The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/uipc_syscalls.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1982, 1986, 1989, 1990, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * sendfile(2) and related extensions:
    6  * Copyright (c) 1998, David Greenman. All rights reserved.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  * 4. Neither the name of the University nor the names of its contributors
   17  *    may be used to endorse or promote products derived from this software
   18  *    without specific prior written permission.
   19  *
   20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   30  * SUCH DAMAGE.
   31  *
   32  *      @(#)uipc_syscalls.c     8.4 (Berkeley) 2/21/94
   33  */
   34 
   35 #include <sys/cdefs.h>
   36 __FBSDID("$FreeBSD: releng/8.3/sys/kern/uipc_syscalls.c 233270 2012-03-21 08:08:23Z tuexen $");
   37 
   38 #include "opt_inet.h"
   39 #include "opt_inet6.h"
   40 #include "opt_sctp.h"
   41 #include "opt_compat.h"
   42 #include "opt_ktrace.h"
   43 
   44 #include <sys/param.h>
   45 #include <sys/systm.h>
   46 #include <sys/kernel.h>
   47 #include <sys/lock.h>
   48 #include <sys/mutex.h>
   49 #include <sys/sysproto.h>
   50 #include <sys/malloc.h>
   51 #include <sys/filedesc.h>
   52 #include <sys/event.h>
   53 #include <sys/proc.h>
   54 #include <sys/fcntl.h>
   55 #include <sys/file.h>
   56 #include <sys/filio.h>
   57 #include <sys/jail.h>
   58 #include <sys/mount.h>
   59 #include <sys/mbuf.h>
   60 #include <sys/protosw.h>
   61 #include <sys/sf_buf.h>
   62 #include <sys/sysent.h>
   63 #include <sys/socket.h>
   64 #include <sys/socketvar.h>
   65 #include <sys/signalvar.h>
   66 #include <sys/syscallsubr.h>
   67 #include <sys/sysctl.h>
   68 #include <sys/uio.h>
   69 #include <sys/vnode.h>
   70 #ifdef KTRACE
   71 #include <sys/ktrace.h>
   72 #endif
   73 #ifdef COMPAT_FREEBSD32
   74 #include <compat/freebsd32/freebsd32_util.h>
   75 #endif
   76 
   77 #include <net/vnet.h>
   78 
   79 #include <security/audit/audit.h>
   80 #include <security/mac/mac_framework.h>
   81 
   82 #include <vm/vm.h>
   83 #include <vm/vm_object.h>
   84 #include <vm/vm_page.h>
   85 #include <vm/vm_pageout.h>
   86 #include <vm/vm_kern.h>
   87 #include <vm/vm_extern.h>
   88 
   89 #if defined(INET) || defined(INET6)
   90 #ifdef SCTP
   91 #include <netinet/sctp.h>
   92 #include <netinet/sctp_peeloff.h>
   93 #endif /* SCTP */
   94 #endif /* INET || INET6 */
   95 
   96 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
   97 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp);
   98 
   99 static int accept1(struct thread *td, struct accept_args *uap, int compat);
  100 static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat);
  101 static int getsockname1(struct thread *td, struct getsockname_args *uap,
  102                         int compat);
  103 static int getpeername1(struct thread *td, struct getpeername_args *uap,
  104                         int compat);
  105 
  106 /*
  107  * NSFBUFS-related variables and associated sysctls
  108  */
  109 int nsfbufs;
  110 int nsfbufspeak;
  111 int nsfbufsused;
  112 
  113 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
  114     "Maximum number of sendfile(2) sf_bufs available");
  115 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
  116     "Number of sendfile(2) sf_bufs at peak usage");
  117 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
  118     "Number of sendfile(2) sf_bufs in use");
  119 
  120 /*
  121  * Convert a user file descriptor to a kernel file entry.  A reference on the
  122  * file entry is held upon returning.  This is lighter weight than
  123  * fgetsock(), which bumps the socket reference drops the file reference
  124  * count instead, as this approach avoids several additional mutex operations
  125  * associated with the additional reference count.  If requested, return the
  126  * open file flags.
  127  */
  128 static int
  129 getsock(struct filedesc *fdp, int fd, struct file **fpp, u_int *fflagp)
  130 {
  131         struct file *fp;
  132         int error;
  133 
  134         fp = NULL;
  135         if (fdp == NULL || (fp = fget_unlocked(fdp, fd)) == NULL) {
  136                 error = EBADF;
  137         } else if (fp->f_type != DTYPE_SOCKET) {
  138                 fdrop(fp, curthread);
  139                 fp = NULL;
  140                 error = ENOTSOCK;
  141         } else {
  142                 if (fflagp != NULL)
  143                         *fflagp = fp->f_flag;
  144                 error = 0;
  145         }
  146         *fpp = fp;
  147         return (error);
  148 }
  149 
  150 /*
  151  * System call interface to the socket abstraction.
  152  */
  153 #if defined(COMPAT_43)
  154 #define COMPAT_OLDSOCK
  155 #endif
  156 
  157 int
  158 socket(td, uap)
  159         struct thread *td;
  160         struct socket_args /* {
  161                 int     domain;
  162                 int     type;
  163                 int     protocol;
  164         } */ *uap;
  165 {
  166         struct filedesc *fdp;
  167         struct socket *so;
  168         struct file *fp;
  169         int fd, error;
  170 
  171         AUDIT_ARG_SOCKET(uap->domain, uap->type, uap->protocol);
  172 #ifdef MAC
  173         error = mac_socket_check_create(td->td_ucred, uap->domain, uap->type,
  174             uap->protocol);
  175         if (error)
  176                 return (error);
  177 #endif
  178         fdp = td->td_proc->p_fd;
  179         error = falloc(td, &fp, &fd);
  180         if (error)
  181                 return (error);
  182         /* An extra reference on `fp' has been held for us by falloc(). */
  183         error = socreate(uap->domain, &so, uap->type, uap->protocol,
  184             td->td_ucred, td);
  185         if (error) {
  186                 fdclose(fdp, fp, fd, td);
  187         } else {
  188                 finit(fp, FREAD | FWRITE, DTYPE_SOCKET, so, &socketops);
  189                 td->td_retval[0] = fd;
  190         }
  191         fdrop(fp, td);
  192         return (error);
  193 }
  194 
  195 /* ARGSUSED */
  196 int
  197 bind(td, uap)
  198         struct thread *td;
  199         struct bind_args /* {
  200                 int     s;
  201                 caddr_t name;
  202                 int     namelen;
  203         } */ *uap;
  204 {
  205         struct sockaddr *sa;
  206         int error;
  207 
  208         if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0)
  209                 return (error);
  210 
  211         error = kern_bind(td, uap->s, sa);
  212         free(sa, M_SONAME);
  213         return (error);
  214 }
  215 
  216 int
  217 kern_bind(td, fd, sa)
  218         struct thread *td;
  219         int fd;
  220         struct sockaddr *sa;
  221 {
  222         struct socket *so;
  223         struct file *fp;
  224         int error;
  225 
  226         AUDIT_ARG_FD(fd);
  227         error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
  228         if (error)
  229                 return (error);
  230         so = fp->f_data;
  231 #ifdef KTRACE
  232         if (KTRPOINT(td, KTR_STRUCT))
  233                 ktrsockaddr(sa);
  234 #endif
  235 #ifdef MAC
  236         error = mac_socket_check_bind(td->td_ucred, so, sa);
  237         if (error == 0)
  238 #endif
  239                 error = sobind(so, sa, td);
  240         fdrop(fp, td);
  241         return (error);
  242 }
  243 
  244 /* ARGSUSED */
  245 int
  246 listen(td, uap)
  247         struct thread *td;
  248         struct listen_args /* {
  249                 int     s;
  250                 int     backlog;
  251         } */ *uap;
  252 {
  253         struct socket *so;
  254         struct file *fp;
  255         int error;
  256 
  257         AUDIT_ARG_FD(uap->s);
  258         error = getsock(td->td_proc->p_fd, uap->s, &fp, NULL);
  259         if (error == 0) {
  260                 so = fp->f_data;
  261 #ifdef MAC
  262                 error = mac_socket_check_listen(td->td_ucred, so);
  263                 if (error == 0)
  264 #endif
  265                         error = solisten(so, uap->backlog, td);
  266                 fdrop(fp, td);
  267         }
  268         return(error);
  269 }
  270 
  271 /*
  272  * accept1()
  273  */
  274 static int
  275 accept1(td, uap, compat)
  276         struct thread *td;
  277         struct accept_args /* {
  278                 int     s;
  279                 struct sockaddr * __restrict name;
  280                 socklen_t       * __restrict anamelen;
  281         } */ *uap;
  282         int compat;
  283 {
  284         struct sockaddr *name;
  285         socklen_t namelen;
  286         struct file *fp;
  287         int error;
  288 
  289         if (uap->name == NULL)
  290                 return (kern_accept(td, uap->s, NULL, NULL, NULL));
  291 
  292         error = copyin(uap->anamelen, &namelen, sizeof (namelen));
  293         if (error)
  294                 return (error);
  295 
  296         error = kern_accept(td, uap->s, &name, &namelen, &fp);
  297 
  298         /*
  299          * return a namelen of zero for older code which might
  300          * ignore the return value from accept.
  301          */
  302         if (error) {
  303                 (void) copyout(&namelen,
  304                     uap->anamelen, sizeof(*uap->anamelen));
  305                 return (error);
  306         }
  307 
  308         if (error == 0 && name != NULL) {
  309 #ifdef COMPAT_OLDSOCK
  310                 if (compat)
  311                         ((struct osockaddr *)name)->sa_family =
  312                             name->sa_family;
  313 #endif
  314                 error = copyout(name, uap->name, namelen);
  315         }
  316         if (error == 0)
  317                 error = copyout(&namelen, uap->anamelen,
  318                     sizeof(namelen));
  319         if (error)
  320                 fdclose(td->td_proc->p_fd, fp, td->td_retval[0], td);
  321         fdrop(fp, td);
  322         free(name, M_SONAME);
  323         return (error);
  324 }
  325 
  326 int
  327 kern_accept(struct thread *td, int s, struct sockaddr **name,
  328     socklen_t *namelen, struct file **fp)
  329 {
  330         struct filedesc *fdp;
  331         struct file *headfp, *nfp = NULL;
  332         struct sockaddr *sa = NULL;
  333         int error;
  334         struct socket *head, *so;
  335         int fd;
  336         u_int fflag;
  337         pid_t pgid;
  338         int tmp;
  339 
  340         if (name) {
  341                 *name = NULL;
  342                 if (*namelen < 0)
  343                         return (EINVAL);
  344         }
  345 
  346         AUDIT_ARG_FD(s);
  347         fdp = td->td_proc->p_fd;
  348         error = getsock(fdp, s, &headfp, &fflag);
  349         if (error)
  350                 return (error);
  351         head = headfp->f_data;
  352         if ((head->so_options & SO_ACCEPTCONN) == 0) {
  353                 error = EINVAL;
  354                 goto done;
  355         }
  356 #ifdef MAC
  357         error = mac_socket_check_accept(td->td_ucred, head);
  358         if (error != 0)
  359                 goto done;
  360 #endif
  361         error = falloc(td, &nfp, &fd);
  362         if (error)
  363                 goto done;
  364         ACCEPT_LOCK();
  365         if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) {
  366                 ACCEPT_UNLOCK();
  367                 error = EWOULDBLOCK;
  368                 goto noconnection;
  369         }
  370         while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
  371                 if (head->so_rcv.sb_state & SBS_CANTRCVMORE) {
  372                         head->so_error = ECONNABORTED;
  373                         break;
  374                 }
  375                 error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH,
  376                     "accept", 0);
  377                 if (error) {
  378                         ACCEPT_UNLOCK();
  379                         goto noconnection;
  380                 }
  381         }
  382         if (head->so_error) {
  383                 error = head->so_error;
  384                 head->so_error = 0;
  385                 ACCEPT_UNLOCK();
  386                 goto noconnection;
  387         }
  388         so = TAILQ_FIRST(&head->so_comp);
  389         KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP"));
  390         KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP"));
  391 
  392         /*
  393          * Before changing the flags on the socket, we have to bump the
  394          * reference count.  Otherwise, if the protocol calls sofree(),
  395          * the socket will be released due to a zero refcount.
  396          */
  397         SOCK_LOCK(so);                  /* soref() and so_state update */
  398         soref(so);                      /* file descriptor reference */
  399 
  400         TAILQ_REMOVE(&head->so_comp, so, so_list);
  401         head->so_qlen--;
  402         so->so_state |= (head->so_state & SS_NBIO);
  403         so->so_qstate &= ~SQ_COMP;
  404         so->so_head = NULL;
  405 
  406         SOCK_UNLOCK(so);
  407         ACCEPT_UNLOCK();
  408 
  409         /* An extra reference on `nfp' has been held for us by falloc(). */
  410         td->td_retval[0] = fd;
  411 
  412         /* connection has been removed from the listen queue */
  413         KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0);
  414 
  415         pgid = fgetown(&head->so_sigio);
  416         if (pgid != 0)
  417                 fsetown(pgid, &so->so_sigio);
  418 
  419         finit(nfp, fflag, DTYPE_SOCKET, so, &socketops);
  420         /* Sync socket nonblocking/async state with file flags */
  421         tmp = fflag & FNONBLOCK;
  422         (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td);
  423         tmp = fflag & FASYNC;
  424         (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td);
  425         sa = 0;
  426         error = soaccept(so, &sa);
  427         if (error) {
  428                 /*
  429                  * return a namelen of zero for older code which might
  430                  * ignore the return value from accept.
  431                  */
  432                 if (name)
  433                         *namelen = 0;
  434                 goto noconnection;
  435         }
  436         if (sa == NULL) {
  437                 if (name)
  438                         *namelen = 0;
  439                 goto done;
  440         }
  441         if (name) {
  442                 /* check sa_len before it is destroyed */
  443                 if (*namelen > sa->sa_len)
  444                         *namelen = sa->sa_len;
  445 #ifdef KTRACE
  446                 if (KTRPOINT(td, KTR_STRUCT))
  447                         ktrsockaddr(sa);
  448 #endif
  449                 *name = sa;
  450                 sa = NULL;
  451         }
  452 noconnection:
  453         if (sa)
  454                 free(sa, M_SONAME);
  455 
  456         /*
  457          * close the new descriptor, assuming someone hasn't ripped it
  458          * out from under us.
  459          */
  460         if (error)
  461                 fdclose(fdp, nfp, fd, td);
  462 
  463         /*
  464          * Release explicitly held references before returning.  We return
  465          * a reference on nfp to the caller on success if they request it.
  466          */
  467 done:
  468         if (fp != NULL) {
  469                 if (error == 0) {
  470                         *fp = nfp;
  471                         nfp = NULL;
  472                 } else
  473                         *fp = NULL;
  474         }
  475         if (nfp != NULL)
  476                 fdrop(nfp, td);
  477         fdrop(headfp, td);
  478         return (error);
  479 }
  480 
  481 int
  482 accept(td, uap)
  483         struct thread *td;
  484         struct accept_args *uap;
  485 {
  486 
  487         return (accept1(td, uap, 0));
  488 }
  489 
  490 #ifdef COMPAT_OLDSOCK
  491 int
  492 oaccept(td, uap)
  493         struct thread *td;
  494         struct accept_args *uap;
  495 {
  496 
  497         return (accept1(td, uap, 1));
  498 }
  499 #endif /* COMPAT_OLDSOCK */
  500 
  501 /* ARGSUSED */
  502 int
  503 connect(td, uap)
  504         struct thread *td;
  505         struct connect_args /* {
  506                 int     s;
  507                 caddr_t name;
  508                 int     namelen;
  509         } */ *uap;
  510 {
  511         struct sockaddr *sa;
  512         int error;
  513 
  514         error = getsockaddr(&sa, uap->name, uap->namelen);
  515         if (error)
  516                 return (error);
  517 
  518         error = kern_connect(td, uap->s, sa);
  519         free(sa, M_SONAME);
  520         return (error);
  521 }
  522 
  523 
  524 int
  525 kern_connect(td, fd, sa)
  526         struct thread *td;
  527         int fd;
  528         struct sockaddr *sa;
  529 {
  530         struct socket *so;
  531         struct file *fp;
  532         int error;
  533         int interrupted = 0;
  534 
  535         AUDIT_ARG_FD(fd);
  536         error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
  537         if (error)
  538                 return (error);
  539         so = fp->f_data;
  540         if (so->so_state & SS_ISCONNECTING) {
  541                 error = EALREADY;
  542                 goto done1;
  543         }
  544 #ifdef KTRACE
  545         if (KTRPOINT(td, KTR_STRUCT))
  546                 ktrsockaddr(sa);
  547 #endif
  548 #ifdef MAC
  549         error = mac_socket_check_connect(td->td_ucred, so, sa);
  550         if (error)
  551                 goto bad;
  552 #endif
  553         error = soconnect(so, sa, td);
  554         if (error)
  555                 goto bad;
  556         if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
  557                 error = EINPROGRESS;
  558                 goto done1;
  559         }
  560         SOCK_LOCK(so);
  561         while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
  562                 error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH,
  563                     "connec", 0);
  564                 if (error) {
  565                         if (error == EINTR || error == ERESTART)
  566                                 interrupted = 1;
  567                         break;
  568                 }
  569         }
  570         if (error == 0) {
  571                 error = so->so_error;
  572                 so->so_error = 0;
  573         }
  574         SOCK_UNLOCK(so);
  575 bad:
  576         if (!interrupted)
  577                 so->so_state &= ~SS_ISCONNECTING;
  578         if (error == ERESTART)
  579                 error = EINTR;
  580 done1:
  581         fdrop(fp, td);
  582         return (error);
  583 }
  584 
  585 int
  586 kern_socketpair(struct thread *td, int domain, int type, int protocol,
  587     int *rsv)
  588 {
  589         struct filedesc *fdp = td->td_proc->p_fd;
  590         struct file *fp1, *fp2;
  591         struct socket *so1, *so2;
  592         int fd, error;
  593 
  594         AUDIT_ARG_SOCKET(domain, type, protocol);
  595 #ifdef MAC
  596         /* We might want to have a separate check for socket pairs. */
  597         error = mac_socket_check_create(td->td_ucred, domain, type,
  598             protocol);
  599         if (error)
  600                 return (error);
  601 #endif
  602         error = socreate(domain, &so1, type, protocol, td->td_ucred, td);
  603         if (error)
  604                 return (error);
  605         error = socreate(domain, &so2, type, protocol, td->td_ucred, td);
  606         if (error)
  607                 goto free1;
  608         /* On success extra reference to `fp1' and 'fp2' is set by falloc. */
  609         error = falloc(td, &fp1, &fd);
  610         if (error)
  611                 goto free2;
  612         rsv[0] = fd;
  613         fp1->f_data = so1;      /* so1 already has ref count */
  614         error = falloc(td, &fp2, &fd);
  615         if (error)
  616                 goto free3;
  617         fp2->f_data = so2;      /* so2 already has ref count */
  618         rsv[1] = fd;
  619         error = soconnect2(so1, so2);
  620         if (error)
  621                 goto free4;
  622         if (type == SOCK_DGRAM) {
  623                 /*
  624                  * Datagram socket connection is asymmetric.
  625                  */
  626                  error = soconnect2(so2, so1);
  627                  if (error)
  628                         goto free4;
  629         }
  630         finit(fp1, FREAD | FWRITE, DTYPE_SOCKET, fp1->f_data, &socketops);
  631         finit(fp2, FREAD | FWRITE, DTYPE_SOCKET, fp2->f_data, &socketops);
  632         fdrop(fp1, td);
  633         fdrop(fp2, td);
  634         return (0);
  635 free4:
  636         fdclose(fdp, fp2, rsv[1], td);
  637         fdrop(fp2, td);
  638 free3:
  639         fdclose(fdp, fp1, rsv[0], td);
  640         fdrop(fp1, td);
  641 free2:
  642         if (so2 != NULL)
  643                 (void)soclose(so2);
  644 free1:
  645         if (so1 != NULL)
  646                 (void)soclose(so1);
  647         return (error);
  648 }
  649 
  650 int
  651 socketpair(struct thread *td, struct socketpair_args *uap)
  652 {
  653         int error, sv[2];
  654 
  655         error = kern_socketpair(td, uap->domain, uap->type,
  656             uap->protocol, sv);
  657         if (error)
  658                 return (error);
  659         error = copyout(sv, uap->rsv, 2 * sizeof(int));
  660         if (error) {
  661                 (void)kern_close(td, sv[0]);
  662                 (void)kern_close(td, sv[1]);
  663         }
  664         return (error);
  665 }
  666 
  667 static int
  668 sendit(td, s, mp, flags)
  669         struct thread *td;
  670         int s;
  671         struct msghdr *mp;
  672         int flags;
  673 {
  674         struct mbuf *control;
  675         struct sockaddr *to;
  676         int error;
  677 
  678         if (mp->msg_name != NULL) {
  679                 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
  680                 if (error) {
  681                         to = NULL;
  682                         goto bad;
  683                 }
  684                 mp->msg_name = to;
  685         } else {
  686                 to = NULL;
  687         }
  688 
  689         if (mp->msg_control) {
  690                 if (mp->msg_controllen < sizeof(struct cmsghdr)
  691 #ifdef COMPAT_OLDSOCK
  692                     && mp->msg_flags != MSG_COMPAT
  693 #endif
  694                 ) {
  695                         error = EINVAL;
  696                         goto bad;
  697                 }
  698                 error = sockargs(&control, mp->msg_control,
  699                     mp->msg_controllen, MT_CONTROL);
  700                 if (error)
  701                         goto bad;
  702 #ifdef COMPAT_OLDSOCK
  703                 if (mp->msg_flags == MSG_COMPAT) {
  704                         struct cmsghdr *cm;
  705 
  706                         M_PREPEND(control, sizeof(*cm), M_WAIT);
  707                         cm = mtod(control, struct cmsghdr *);
  708                         cm->cmsg_len = control->m_len;
  709                         cm->cmsg_level = SOL_SOCKET;
  710                         cm->cmsg_type = SCM_RIGHTS;
  711                 }
  712 #endif
  713         } else {
  714                 control = NULL;
  715         }
  716 
  717         error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE);
  718 
  719 bad:
  720         if (to)
  721                 free(to, M_SONAME);
  722         return (error);
  723 }
  724 
  725 int
  726 kern_sendit(td, s, mp, flags, control, segflg)
  727         struct thread *td;
  728         int s;
  729         struct msghdr *mp;
  730         int flags;
  731         struct mbuf *control;
  732         enum uio_seg segflg;
  733 {
  734         struct file *fp;
  735         struct uio auio;
  736         struct iovec *iov;
  737         struct socket *so;
  738         int i;
  739         int len, error;
  740 #ifdef KTRACE
  741         struct uio *ktruio = NULL;
  742 #endif
  743 
  744         AUDIT_ARG_FD(s);
  745         error = getsock(td->td_proc->p_fd, s, &fp, NULL);
  746         if (error)
  747                 return (error);
  748         so = (struct socket *)fp->f_data;
  749 
  750 #ifdef KTRACE
  751         if (mp->msg_name != NULL && KTRPOINT(td, KTR_STRUCT))
  752                 ktrsockaddr(mp->msg_name);
  753 #endif
  754 #ifdef MAC
  755         if (mp->msg_name != NULL) {
  756                 error = mac_socket_check_connect(td->td_ucred, so,
  757                     mp->msg_name);
  758                 if (error)
  759                         goto bad;
  760         }
  761         error = mac_socket_check_send(td->td_ucred, so);
  762         if (error)
  763                 goto bad;
  764 #endif
  765 
  766         auio.uio_iov = mp->msg_iov;
  767         auio.uio_iovcnt = mp->msg_iovlen;
  768         auio.uio_segflg = segflg;
  769         auio.uio_rw = UIO_WRITE;
  770         auio.uio_td = td;
  771         auio.uio_offset = 0;                    /* XXX */
  772         auio.uio_resid = 0;
  773         iov = mp->msg_iov;
  774         for (i = 0; i < mp->msg_iovlen; i++, iov++) {
  775                 if ((auio.uio_resid += iov->iov_len) < 0) {
  776                         error = EINVAL;
  777                         goto bad;
  778                 }
  779         }
  780 #ifdef KTRACE
  781         if (KTRPOINT(td, KTR_GENIO))
  782                 ktruio = cloneuio(&auio);
  783 #endif
  784         len = auio.uio_resid;
  785         error = sosend(so, mp->msg_name, &auio, 0, control, flags, td);
  786         if (error) {
  787                 if (auio.uio_resid != len && (error == ERESTART ||
  788                     error == EINTR || error == EWOULDBLOCK))
  789                         error = 0;
  790                 /* Generation of SIGPIPE can be controlled per socket */
  791                 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
  792                     !(flags & MSG_NOSIGNAL)) {
  793                         PROC_LOCK(td->td_proc);
  794                         tdksignal(td, SIGPIPE, NULL);
  795                         PROC_UNLOCK(td->td_proc);
  796                 }
  797         }
  798         if (error == 0)
  799                 td->td_retval[0] = len - auio.uio_resid;
  800 #ifdef KTRACE
  801         if (ktruio != NULL) {
  802                 ktruio->uio_resid = td->td_retval[0];
  803                 ktrgenio(s, UIO_WRITE, ktruio, error);
  804         }
  805 #endif
  806 bad:
  807         fdrop(fp, td);
  808         return (error);
  809 }
  810 
  811 int
  812 sendto(td, uap)
  813         struct thread *td;
  814         struct sendto_args /* {
  815                 int     s;
  816                 caddr_t buf;
  817                 size_t  len;
  818                 int     flags;
  819                 caddr_t to;
  820                 int     tolen;
  821         } */ *uap;
  822 {
  823         struct msghdr msg;
  824         struct iovec aiov;
  825         int error;
  826 
  827         msg.msg_name = uap->to;
  828         msg.msg_namelen = uap->tolen;
  829         msg.msg_iov = &aiov;
  830         msg.msg_iovlen = 1;
  831         msg.msg_control = 0;
  832 #ifdef COMPAT_OLDSOCK
  833         msg.msg_flags = 0;
  834 #endif
  835         aiov.iov_base = uap->buf;
  836         aiov.iov_len = uap->len;
  837         error = sendit(td, uap->s, &msg, uap->flags);
  838         return (error);
  839 }
  840 
  841 #ifdef COMPAT_OLDSOCK
  842 int
  843 osend(td, uap)
  844         struct thread *td;
  845         struct osend_args /* {
  846                 int     s;
  847                 caddr_t buf;
  848                 int     len;
  849                 int     flags;
  850         } */ *uap;
  851 {
  852         struct msghdr msg;
  853         struct iovec aiov;
  854         int error;
  855 
  856         msg.msg_name = 0;
  857         msg.msg_namelen = 0;
  858         msg.msg_iov = &aiov;
  859         msg.msg_iovlen = 1;
  860         aiov.iov_base = uap->buf;
  861         aiov.iov_len = uap->len;
  862         msg.msg_control = 0;
  863         msg.msg_flags = 0;
  864         error = sendit(td, uap->s, &msg, uap->flags);
  865         return (error);
  866 }
  867 
  868 int
  869 osendmsg(td, uap)
  870         struct thread *td;
  871         struct osendmsg_args /* {
  872                 int     s;
  873                 caddr_t msg;
  874                 int     flags;
  875         } */ *uap;
  876 {
  877         struct msghdr msg;
  878         struct iovec *iov;
  879         int error;
  880 
  881         error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
  882         if (error)
  883                 return (error);
  884         error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
  885         if (error)
  886                 return (error);
  887         msg.msg_iov = iov;
  888         msg.msg_flags = MSG_COMPAT;
  889         error = sendit(td, uap->s, &msg, uap->flags);
  890         free(iov, M_IOV);
  891         return (error);
  892 }
  893 #endif
  894 
  895 int
  896 sendmsg(td, uap)
  897         struct thread *td;
  898         struct sendmsg_args /* {
  899                 int     s;
  900                 caddr_t msg;
  901                 int     flags;
  902         } */ *uap;
  903 {
  904         struct msghdr msg;
  905         struct iovec *iov;
  906         int error;
  907 
  908         error = copyin(uap->msg, &msg, sizeof (msg));
  909         if (error)
  910                 return (error);
  911         error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
  912         if (error)
  913                 return (error);
  914         msg.msg_iov = iov;
  915 #ifdef COMPAT_OLDSOCK
  916         msg.msg_flags = 0;
  917 #endif
  918         error = sendit(td, uap->s, &msg, uap->flags);
  919         free(iov, M_IOV);
  920         return (error);
  921 }
  922 
  923 int
  924 kern_recvit(td, s, mp, fromseg, controlp)
  925         struct thread *td;
  926         int s;
  927         struct msghdr *mp;
  928         enum uio_seg fromseg;
  929         struct mbuf **controlp;
  930 {
  931         struct uio auio;
  932         struct iovec *iov;
  933         int i;
  934         socklen_t len;
  935         int error;
  936         struct mbuf *m, *control = 0;
  937         caddr_t ctlbuf;
  938         struct file *fp;
  939         struct socket *so;
  940         struct sockaddr *fromsa = 0;
  941 #ifdef KTRACE
  942         struct uio *ktruio = NULL;
  943 #endif
  944 
  945         if(controlp != NULL)
  946                 *controlp = 0;
  947 
  948         AUDIT_ARG_FD(s);
  949         error = getsock(td->td_proc->p_fd, s, &fp, NULL);
  950         if (error)
  951                 return (error);
  952         so = fp->f_data;
  953 
  954 #ifdef MAC
  955         error = mac_socket_check_receive(td->td_ucred, so);
  956         if (error) {
  957                 fdrop(fp, td);
  958                 return (error);
  959         }
  960 #endif
  961 
  962         auio.uio_iov = mp->msg_iov;
  963         auio.uio_iovcnt = mp->msg_iovlen;
  964         auio.uio_segflg = UIO_USERSPACE;
  965         auio.uio_rw = UIO_READ;
  966         auio.uio_td = td;
  967         auio.uio_offset = 0;                    /* XXX */
  968         auio.uio_resid = 0;
  969         iov = mp->msg_iov;
  970         for (i = 0; i < mp->msg_iovlen; i++, iov++) {
  971                 if ((auio.uio_resid += iov->iov_len) < 0) {
  972                         fdrop(fp, td);
  973                         return (EINVAL);
  974                 }
  975         }
  976 #ifdef KTRACE
  977         if (KTRPOINT(td, KTR_GENIO))
  978                 ktruio = cloneuio(&auio);
  979 #endif
  980         len = auio.uio_resid;
  981         error = soreceive(so, &fromsa, &auio, (struct mbuf **)0,
  982             (mp->msg_control || controlp) ? &control : (struct mbuf **)0,
  983             &mp->msg_flags);
  984         if (error) {
  985                 if (auio.uio_resid != (int)len && (error == ERESTART ||
  986                     error == EINTR || error == EWOULDBLOCK))
  987                         error = 0;
  988         }
  989 #ifdef KTRACE
  990         if (ktruio != NULL) {
  991                 ktruio->uio_resid = (int)len - auio.uio_resid;
  992                 ktrgenio(s, UIO_READ, ktruio, error);
  993         }
  994 #endif
  995         if (error)
  996                 goto out;
  997         td->td_retval[0] = (int)len - auio.uio_resid;
  998         if (mp->msg_name) {
  999                 len = mp->msg_namelen;
 1000                 if (len <= 0 || fromsa == 0)
 1001                         len = 0;
 1002                 else {
 1003                         /* save sa_len before it is destroyed by MSG_COMPAT */
 1004                         len = MIN(len, fromsa->sa_len);
 1005 #ifdef COMPAT_OLDSOCK
 1006                         if (mp->msg_flags & MSG_COMPAT)
 1007                                 ((struct osockaddr *)fromsa)->sa_family =
 1008                                     fromsa->sa_family;
 1009 #endif
 1010                         if (fromseg == UIO_USERSPACE) {
 1011                                 error = copyout(fromsa, mp->msg_name,
 1012                                     (unsigned)len);
 1013                                 if (error)
 1014                                         goto out;
 1015                         } else
 1016                                 bcopy(fromsa, mp->msg_name, len);
 1017                 }
 1018                 mp->msg_namelen = len;
 1019         }
 1020         if (mp->msg_control && controlp == NULL) {
 1021 #ifdef COMPAT_OLDSOCK
 1022                 /*
 1023                  * We assume that old recvmsg calls won't receive access
 1024                  * rights and other control info, esp. as control info
 1025                  * is always optional and those options didn't exist in 4.3.
 1026                  * If we receive rights, trim the cmsghdr; anything else
 1027                  * is tossed.
 1028                  */
 1029                 if (control && mp->msg_flags & MSG_COMPAT) {
 1030                         if (mtod(control, struct cmsghdr *)->cmsg_level !=
 1031                             SOL_SOCKET ||
 1032                             mtod(control, struct cmsghdr *)->cmsg_type !=
 1033                             SCM_RIGHTS) {
 1034                                 mp->msg_controllen = 0;
 1035                                 goto out;
 1036                         }
 1037                         control->m_len -= sizeof (struct cmsghdr);
 1038                         control->m_data += sizeof (struct cmsghdr);
 1039                 }
 1040 #endif
 1041                 len = mp->msg_controllen;
 1042                 m = control;
 1043                 mp->msg_controllen = 0;
 1044                 ctlbuf = mp->msg_control;
 1045 
 1046                 while (m && len > 0) {
 1047                         unsigned int tocopy;
 1048 
 1049                         if (len >= m->m_len)
 1050                                 tocopy = m->m_len;
 1051                         else {
 1052                                 mp->msg_flags |= MSG_CTRUNC;
 1053                                 tocopy = len;
 1054                         }
 1055 
 1056                         if ((error = copyout(mtod(m, caddr_t),
 1057                                         ctlbuf, tocopy)) != 0)
 1058                                 goto out;
 1059 
 1060                         ctlbuf += tocopy;
 1061                         len -= tocopy;
 1062                         m = m->m_next;
 1063                 }
 1064                 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
 1065         }
 1066 out:
 1067         fdrop(fp, td);
 1068 #ifdef KTRACE
 1069         if (fromsa && KTRPOINT(td, KTR_STRUCT))
 1070                 ktrsockaddr(fromsa);
 1071 #endif
 1072         if (fromsa)
 1073                 free(fromsa, M_SONAME);
 1074 
 1075         if (error == 0 && controlp != NULL)  
 1076                 *controlp = control;
 1077         else  if (control)
 1078                 m_freem(control);
 1079 
 1080         return (error);
 1081 }
 1082 
 1083 static int
 1084 recvit(td, s, mp, namelenp)
 1085         struct thread *td;
 1086         int s;
 1087         struct msghdr *mp;
 1088         void *namelenp;
 1089 {
 1090         int error;
 1091 
 1092         error = kern_recvit(td, s, mp, UIO_USERSPACE, NULL);
 1093         if (error)
 1094                 return (error);
 1095         if (namelenp) {
 1096                 error = copyout(&mp->msg_namelen, namelenp, sizeof (socklen_t));
 1097 #ifdef COMPAT_OLDSOCK
 1098                 if (mp->msg_flags & MSG_COMPAT)
 1099                         error = 0;      /* old recvfrom didn't check */
 1100 #endif
 1101         }
 1102         return (error);
 1103 }
 1104 
 1105 int
 1106 recvfrom(td, uap)
 1107         struct thread *td;
 1108         struct recvfrom_args /* {
 1109                 int     s;
 1110                 caddr_t buf;
 1111                 size_t  len;
 1112                 int     flags;
 1113                 struct sockaddr * __restrict    from;
 1114                 socklen_t * __restrict fromlenaddr;
 1115         } */ *uap;
 1116 {
 1117         struct msghdr msg;
 1118         struct iovec aiov;
 1119         int error;
 1120 
 1121         if (uap->fromlenaddr) {
 1122                 error = copyin(uap->fromlenaddr,
 1123                     &msg.msg_namelen, sizeof (msg.msg_namelen));
 1124                 if (error)
 1125                         goto done2;
 1126         } else {
 1127                 msg.msg_namelen = 0;
 1128         }
 1129         msg.msg_name = uap->from;
 1130         msg.msg_iov = &aiov;
 1131         msg.msg_iovlen = 1;
 1132         aiov.iov_base = uap->buf;
 1133         aiov.iov_len = uap->len;
 1134         msg.msg_control = 0;
 1135         msg.msg_flags = uap->flags;
 1136         error = recvit(td, uap->s, &msg, uap->fromlenaddr);
 1137 done2:
 1138         return(error);
 1139 }
 1140 
 1141 #ifdef COMPAT_OLDSOCK
 1142 int
 1143 orecvfrom(td, uap)
 1144         struct thread *td;
 1145         struct recvfrom_args *uap;
 1146 {
 1147 
 1148         uap->flags |= MSG_COMPAT;
 1149         return (recvfrom(td, uap));
 1150 }
 1151 #endif
 1152 
 1153 #ifdef COMPAT_OLDSOCK
 1154 int
 1155 orecv(td, uap)
 1156         struct thread *td;
 1157         struct orecv_args /* {
 1158                 int     s;
 1159                 caddr_t buf;
 1160                 int     len;
 1161                 int     flags;
 1162         } */ *uap;
 1163 {
 1164         struct msghdr msg;
 1165         struct iovec aiov;
 1166         int error;
 1167 
 1168         msg.msg_name = 0;
 1169         msg.msg_namelen = 0;
 1170         msg.msg_iov = &aiov;
 1171         msg.msg_iovlen = 1;
 1172         aiov.iov_base = uap->buf;
 1173         aiov.iov_len = uap->len;
 1174         msg.msg_control = 0;
 1175         msg.msg_flags = uap->flags;
 1176         error = recvit(td, uap->s, &msg, NULL);
 1177         return (error);
 1178 }
 1179 
 1180 /*
 1181  * Old recvmsg.  This code takes advantage of the fact that the old msghdr
 1182  * overlays the new one, missing only the flags, and with the (old) access
 1183  * rights where the control fields are now.
 1184  */
 1185 int
 1186 orecvmsg(td, uap)
 1187         struct thread *td;
 1188         struct orecvmsg_args /* {
 1189                 int     s;
 1190                 struct  omsghdr *msg;
 1191                 int     flags;
 1192         } */ *uap;
 1193 {
 1194         struct msghdr msg;
 1195         struct iovec *iov;
 1196         int error;
 1197 
 1198         error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
 1199         if (error)
 1200                 return (error);
 1201         error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
 1202         if (error)
 1203                 return (error);
 1204         msg.msg_flags = uap->flags | MSG_COMPAT;
 1205         msg.msg_iov = iov;
 1206         error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen);
 1207         if (msg.msg_controllen && error == 0)
 1208                 error = copyout(&msg.msg_controllen,
 1209                     &uap->msg->msg_accrightslen, sizeof (int));
 1210         free(iov, M_IOV);
 1211         return (error);
 1212 }
 1213 #endif
 1214 
 1215 int
 1216 recvmsg(td, uap)
 1217         struct thread *td;
 1218         struct recvmsg_args /* {
 1219                 int     s;
 1220                 struct  msghdr *msg;
 1221                 int     flags;
 1222         } */ *uap;
 1223 {
 1224         struct msghdr msg;
 1225         struct iovec *uiov, *iov;
 1226         int error;
 1227 
 1228         error = copyin(uap->msg, &msg, sizeof (msg));
 1229         if (error)
 1230                 return (error);
 1231         error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
 1232         if (error)
 1233                 return (error);
 1234         msg.msg_flags = uap->flags;
 1235 #ifdef COMPAT_OLDSOCK
 1236         msg.msg_flags &= ~MSG_COMPAT;
 1237 #endif
 1238         uiov = msg.msg_iov;
 1239         msg.msg_iov = iov;
 1240         error = recvit(td, uap->s, &msg, NULL);
 1241         if (error == 0) {
 1242                 msg.msg_iov = uiov;
 1243                 error = copyout(&msg, uap->msg, sizeof(msg));
 1244         }
 1245         free(iov, M_IOV);
 1246         return (error);
 1247 }
 1248 
 1249 /* ARGSUSED */
 1250 int
 1251 shutdown(td, uap)
 1252         struct thread *td;
 1253         struct shutdown_args /* {
 1254                 int     s;
 1255                 int     how;
 1256         } */ *uap;
 1257 {
 1258         struct socket *so;
 1259         struct file *fp;
 1260         int error;
 1261 
 1262         AUDIT_ARG_FD(uap->s);
 1263         error = getsock(td->td_proc->p_fd, uap->s, &fp, NULL);
 1264         if (error == 0) {
 1265                 so = fp->f_data;
 1266                 error = soshutdown(so, uap->how);
 1267                 fdrop(fp, td);
 1268         }
 1269         return (error);
 1270 }
 1271 
 1272 /* ARGSUSED */
 1273 int
 1274 setsockopt(td, uap)
 1275         struct thread *td;
 1276         struct setsockopt_args /* {
 1277                 int     s;
 1278                 int     level;
 1279                 int     name;
 1280                 caddr_t val;
 1281                 int     valsize;
 1282         } */ *uap;
 1283 {
 1284 
 1285         return (kern_setsockopt(td, uap->s, uap->level, uap->name,
 1286             uap->val, UIO_USERSPACE, uap->valsize));
 1287 }
 1288 
 1289 int
 1290 kern_setsockopt(td, s, level, name, val, valseg, valsize)
 1291         struct thread *td;
 1292         int s;
 1293         int level;
 1294         int name;
 1295         void *val;
 1296         enum uio_seg valseg;
 1297         socklen_t valsize;
 1298 {
 1299         int error;
 1300         struct socket *so;
 1301         struct file *fp;
 1302         struct sockopt sopt;
 1303 
 1304         if (val == NULL && valsize != 0)
 1305                 return (EFAULT);
 1306         if ((int)valsize < 0)
 1307                 return (EINVAL);
 1308 
 1309         sopt.sopt_dir = SOPT_SET;
 1310         sopt.sopt_level = level;
 1311         sopt.sopt_name = name;
 1312         sopt.sopt_val = val;
 1313         sopt.sopt_valsize = valsize;
 1314         switch (valseg) {
 1315         case UIO_USERSPACE:
 1316                 sopt.sopt_td = td;
 1317                 break;
 1318         case UIO_SYSSPACE:
 1319                 sopt.sopt_td = NULL;
 1320                 break;
 1321         default:
 1322                 panic("kern_setsockopt called with bad valseg");
 1323         }
 1324 
 1325         AUDIT_ARG_FD(s);
 1326         error = getsock(td->td_proc->p_fd, s, &fp, NULL);
 1327         if (error == 0) {
 1328                 so = fp->f_data;
 1329                 error = sosetopt(so, &sopt);
 1330                 fdrop(fp, td);
 1331         }
 1332         return(error);
 1333 }
 1334 
 1335 /* ARGSUSED */
 1336 int
 1337 getsockopt(td, uap)
 1338         struct thread *td;
 1339         struct getsockopt_args /* {
 1340                 int     s;
 1341                 int     level;
 1342                 int     name;
 1343                 void * __restrict       val;
 1344                 socklen_t * __restrict avalsize;
 1345         } */ *uap;
 1346 {
 1347         socklen_t valsize;
 1348         int     error;
 1349 
 1350         if (uap->val) {
 1351                 error = copyin(uap->avalsize, &valsize, sizeof (valsize));
 1352                 if (error)
 1353                         return (error);
 1354         }
 1355 
 1356         error = kern_getsockopt(td, uap->s, uap->level, uap->name,
 1357             uap->val, UIO_USERSPACE, &valsize);
 1358 
 1359         if (error == 0)
 1360                 error = copyout(&valsize, uap->avalsize, sizeof (valsize));
 1361         return (error);
 1362 }
 1363 
 1364 /*
 1365  * Kernel version of getsockopt.
 1366  * optval can be a userland or userspace. optlen is always a kernel pointer.
 1367  */
 1368 int
 1369 kern_getsockopt(td, s, level, name, val, valseg, valsize)
 1370         struct thread *td;
 1371         int s;
 1372         int level;
 1373         int name;
 1374         void *val;
 1375         enum uio_seg valseg;
 1376         socklen_t *valsize;
 1377 {
 1378         int error;
 1379         struct  socket *so;
 1380         struct file *fp;
 1381         struct  sockopt sopt;
 1382 
 1383         if (val == NULL)
 1384                 *valsize = 0;
 1385         if ((int)*valsize < 0)
 1386                 return (EINVAL);
 1387 
 1388         sopt.sopt_dir = SOPT_GET;
 1389         sopt.sopt_level = level;
 1390         sopt.sopt_name = name;
 1391         sopt.sopt_val = val;
 1392         sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */
 1393         switch (valseg) {
 1394         case UIO_USERSPACE:
 1395                 sopt.sopt_td = td;
 1396                 break;
 1397         case UIO_SYSSPACE:
 1398                 sopt.sopt_td = NULL;
 1399                 break;
 1400         default:
 1401                 panic("kern_getsockopt called with bad valseg");
 1402         }
 1403 
 1404         AUDIT_ARG_FD(s);
 1405         error = getsock(td->td_proc->p_fd, s, &fp, NULL);
 1406         if (error == 0) {
 1407                 so = fp->f_data;
 1408                 error = sogetopt(so, &sopt);
 1409                 *valsize = sopt.sopt_valsize;
 1410                 fdrop(fp, td);
 1411         }
 1412         return (error);
 1413 }
 1414 
 1415 /*
 1416  * getsockname1() - Get socket name.
 1417  */
 1418 /* ARGSUSED */
 1419 static int
 1420 getsockname1(td, uap, compat)
 1421         struct thread *td;
 1422         struct getsockname_args /* {
 1423                 int     fdes;
 1424                 struct sockaddr * __restrict asa;
 1425                 socklen_t * __restrict alen;
 1426         } */ *uap;
 1427         int compat;
 1428 {
 1429         struct sockaddr *sa;
 1430         socklen_t len;
 1431         int error;
 1432 
 1433         error = copyin(uap->alen, &len, sizeof(len));
 1434         if (error)
 1435                 return (error);
 1436 
 1437         error = kern_getsockname(td, uap->fdes, &sa, &len);
 1438         if (error)
 1439                 return (error);
 1440 
 1441         if (len != 0) {
 1442 #ifdef COMPAT_OLDSOCK
 1443                 if (compat)
 1444                         ((struct osockaddr *)sa)->sa_family = sa->sa_family;
 1445 #endif
 1446                 error = copyout(sa, uap->asa, (u_int)len);
 1447         }
 1448         free(sa, M_SONAME);
 1449         if (error == 0)
 1450                 error = copyout(&len, uap->alen, sizeof(len));
 1451         return (error);
 1452 }
 1453 
 1454 int
 1455 kern_getsockname(struct thread *td, int fd, struct sockaddr **sa,
 1456     socklen_t *alen)
 1457 {
 1458         struct socket *so;
 1459         struct file *fp;
 1460         socklen_t len;
 1461         int error;
 1462 
 1463         if (*alen < 0)
 1464                 return (EINVAL);
 1465 
 1466         AUDIT_ARG_FD(fd);
 1467         error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
 1468         if (error)
 1469                 return (error);
 1470         so = fp->f_data;
 1471         *sa = NULL;
 1472         CURVNET_SET(so->so_vnet);
 1473         error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, sa);
 1474         CURVNET_RESTORE();
 1475         if (error)
 1476                 goto bad;
 1477         if (*sa == NULL)
 1478                 len = 0;
 1479         else
 1480                 len = MIN(*alen, (*sa)->sa_len);
 1481         *alen = len;
 1482 #ifdef KTRACE
 1483         if (KTRPOINT(td, KTR_STRUCT))
 1484                 ktrsockaddr(*sa);
 1485 #endif
 1486 bad:
 1487         fdrop(fp, td);
 1488         if (error && *sa) {
 1489                 free(*sa, M_SONAME);
 1490                 *sa = NULL;
 1491         }
 1492         return (error);
 1493 }
 1494 
 1495 int
 1496 getsockname(td, uap)
 1497         struct thread *td;
 1498         struct getsockname_args *uap;
 1499 {
 1500 
 1501         return (getsockname1(td, uap, 0));
 1502 }
 1503 
 1504 #ifdef COMPAT_OLDSOCK
 1505 int
 1506 ogetsockname(td, uap)
 1507         struct thread *td;
 1508         struct getsockname_args *uap;
 1509 {
 1510 
 1511         return (getsockname1(td, uap, 1));
 1512 }
 1513 #endif /* COMPAT_OLDSOCK */
 1514 
 1515 /*
 1516  * getpeername1() - Get name of peer for connected socket.
 1517  */
 1518 /* ARGSUSED */
 1519 static int
 1520 getpeername1(td, uap, compat)
 1521         struct thread *td;
 1522         struct getpeername_args /* {
 1523                 int     fdes;
 1524                 struct sockaddr * __restrict    asa;
 1525                 socklen_t * __restrict  alen;
 1526         } */ *uap;
 1527         int compat;
 1528 {
 1529         struct sockaddr *sa;
 1530         socklen_t len;
 1531         int error;
 1532 
 1533         error = copyin(uap->alen, &len, sizeof (len));
 1534         if (error)
 1535                 return (error);
 1536 
 1537         error = kern_getpeername(td, uap->fdes, &sa, &len);
 1538         if (error)
 1539                 return (error);
 1540 
 1541         if (len != 0) {
 1542 #ifdef COMPAT_OLDSOCK
 1543                 if (compat)
 1544                         ((struct osockaddr *)sa)->sa_family = sa->sa_family;
 1545 #endif
 1546                 error = copyout(sa, uap->asa, (u_int)len);
 1547         }
 1548         free(sa, M_SONAME);
 1549         if (error == 0)
 1550                 error = copyout(&len, uap->alen, sizeof(len));
 1551         return (error);
 1552 }
 1553 
 1554 int
 1555 kern_getpeername(struct thread *td, int fd, struct sockaddr **sa,
 1556     socklen_t *alen)
 1557 {
 1558         struct socket *so;
 1559         struct file *fp;
 1560         socklen_t len;
 1561         int error;
 1562 
 1563         if (*alen < 0)
 1564                 return (EINVAL);
 1565 
 1566         AUDIT_ARG_FD(fd);
 1567         error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
 1568         if (error)
 1569                 return (error);
 1570         so = fp->f_data;
 1571         if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
 1572                 error = ENOTCONN;
 1573                 goto done;
 1574         }
 1575         *sa = NULL;
 1576         CURVNET_SET(so->so_vnet);
 1577         error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, sa);
 1578         CURVNET_RESTORE();
 1579         if (error)
 1580                 goto bad;
 1581         if (*sa == NULL)
 1582                 len = 0;
 1583         else
 1584                 len = MIN(*alen, (*sa)->sa_len);
 1585         *alen = len;
 1586 #ifdef KTRACE
 1587         if (KTRPOINT(td, KTR_STRUCT))
 1588                 ktrsockaddr(*sa);
 1589 #endif
 1590 bad:
 1591         if (error && *sa) {
 1592                 free(*sa, M_SONAME);
 1593                 *sa = NULL;
 1594         }
 1595 done:
 1596         fdrop(fp, td);
 1597         return (error);
 1598 }
 1599 
 1600 int
 1601 getpeername(td, uap)
 1602         struct thread *td;
 1603         struct getpeername_args *uap;
 1604 {
 1605 
 1606         return (getpeername1(td, uap, 0));
 1607 }
 1608 
 1609 #ifdef COMPAT_OLDSOCK
 1610 int
 1611 ogetpeername(td, uap)
 1612         struct thread *td;
 1613         struct ogetpeername_args *uap;
 1614 {
 1615 
 1616         /* XXX uap should have type `getpeername_args *' to begin with. */
 1617         return (getpeername1(td, (struct getpeername_args *)uap, 1));
 1618 }
 1619 #endif /* COMPAT_OLDSOCK */
 1620 
 1621 int
 1622 sockargs(mp, buf, buflen, type)
 1623         struct mbuf **mp;
 1624         caddr_t buf;
 1625         int buflen, type;
 1626 {
 1627         struct sockaddr *sa;
 1628         struct mbuf *m;
 1629         int error;
 1630 
 1631         if ((u_int)buflen > MLEN) {
 1632 #ifdef COMPAT_OLDSOCK
 1633                 if (type == MT_SONAME && (u_int)buflen <= 112)
 1634                         buflen = MLEN;          /* unix domain compat. hack */
 1635                 else
 1636 #endif
 1637                         if ((u_int)buflen > MCLBYTES)
 1638                                 return (EINVAL);
 1639         }
 1640         m = m_get(M_WAIT, type);
 1641         if ((u_int)buflen > MLEN)
 1642                 MCLGET(m, M_WAIT);
 1643         m->m_len = buflen;
 1644         error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
 1645         if (error)
 1646                 (void) m_free(m);
 1647         else {
 1648                 *mp = m;
 1649                 if (type == MT_SONAME) {
 1650                         sa = mtod(m, struct sockaddr *);
 1651 
 1652 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
 1653                         if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
 1654                                 sa->sa_family = sa->sa_len;
 1655 #endif
 1656                         sa->sa_len = buflen;
 1657                 }
 1658         }
 1659         return (error);
 1660 }
 1661 
 1662 int
 1663 getsockaddr(namp, uaddr, len)
 1664         struct sockaddr **namp;
 1665         caddr_t uaddr;
 1666         size_t len;
 1667 {
 1668         struct sockaddr *sa;
 1669         int error;
 1670 
 1671         if (len > SOCK_MAXADDRLEN)
 1672                 return (ENAMETOOLONG);
 1673         if (len < offsetof(struct sockaddr, sa_data[0]))
 1674                 return (EINVAL);
 1675         sa = malloc(len, M_SONAME, M_WAITOK);
 1676         error = copyin(uaddr, sa, len);
 1677         if (error) {
 1678                 free(sa, M_SONAME);
 1679         } else {
 1680 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
 1681                 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
 1682                         sa->sa_family = sa->sa_len;
 1683 #endif
 1684                 sa->sa_len = len;
 1685                 *namp = sa;
 1686         }
 1687         return (error);
 1688 }
 1689 
 1690 #include <sys/condvar.h>
 1691 
 1692 struct sendfile_sync {
 1693         struct mtx      mtx;
 1694         struct cv       cv;
 1695         unsigned        count;
 1696 };
 1697 
 1698 /*
 1699  * Detach mapped page and release resources back to the system.
 1700  */
 1701 void
 1702 sf_buf_mext(void *addr, void *args)
 1703 {
 1704         vm_page_t m;
 1705         struct sendfile_sync *sfs;
 1706 
 1707         m = sf_buf_page(args);
 1708         sf_buf_free(args);
 1709         vm_page_lock_queues();
 1710         vm_page_unwire(m, 0);
 1711         /*
 1712          * Check for the object going away on us. This can
 1713          * happen since we don't hold a reference to it.
 1714          * If so, we're responsible for freeing the page.
 1715          */
 1716         if (m->wire_count == 0 && m->object == NULL)
 1717                 vm_page_free(m);
 1718         vm_page_unlock_queues();
 1719         if (addr == NULL)
 1720                 return;
 1721         sfs = addr;
 1722         mtx_lock(&sfs->mtx);
 1723         KASSERT(sfs->count> 0, ("Sendfile sync botchup count == 0"));
 1724         if (--sfs->count == 0)
 1725                 cv_signal(&sfs->cv);
 1726         mtx_unlock(&sfs->mtx);
 1727 }
 1728 
 1729 /*
 1730  * sendfile(2)
 1731  *
 1732  * int sendfile(int fd, int s, off_t offset, size_t nbytes,
 1733  *       struct sf_hdtr *hdtr, off_t *sbytes, int flags)
 1734  *
 1735  * Send a file specified by 'fd' and starting at 'offset' to a socket
 1736  * specified by 's'. Send only 'nbytes' of the file or until EOF if nbytes ==
 1737  * 0.  Optionally add a header and/or trailer to the socket output.  If
 1738  * specified, write the total number of bytes sent into *sbytes.
 1739  */
 1740 int
 1741 sendfile(struct thread *td, struct sendfile_args *uap)
 1742 {
 1743 
 1744         return (do_sendfile(td, uap, 0));
 1745 }
 1746 
 1747 static int
 1748 do_sendfile(struct thread *td, struct sendfile_args *uap, int compat)
 1749 {
 1750         struct sf_hdtr hdtr;
 1751         struct uio *hdr_uio, *trl_uio;
 1752         int error;
 1753 
 1754         hdr_uio = trl_uio = NULL;
 1755 
 1756         if (uap->hdtr != NULL) {
 1757                 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
 1758                 if (error)
 1759                         goto out;
 1760                 if (hdtr.headers != NULL) {
 1761                         error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio);
 1762                         if (error)
 1763                                 goto out;
 1764                 }
 1765                 if (hdtr.trailers != NULL) {
 1766                         error = copyinuio(hdtr.trailers, hdtr.trl_cnt, &trl_uio);
 1767                         if (error)
 1768                                 goto out;
 1769 
 1770                 }
 1771         }
 1772 
 1773         error = kern_sendfile(td, uap, hdr_uio, trl_uio, compat);
 1774 out:
 1775         if (hdr_uio)
 1776                 free(hdr_uio, M_IOV);
 1777         if (trl_uio)
 1778                 free(trl_uio, M_IOV);
 1779         return (error);
 1780 }
 1781 
 1782 #ifdef COMPAT_FREEBSD4
 1783 int
 1784 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap)
 1785 {
 1786         struct sendfile_args args;
 1787 
 1788         args.fd = uap->fd;
 1789         args.s = uap->s;
 1790         args.offset = uap->offset;
 1791         args.nbytes = uap->nbytes;
 1792         args.hdtr = uap->hdtr;
 1793         args.sbytes = uap->sbytes;
 1794         args.flags = uap->flags;
 1795 
 1796         return (do_sendfile(td, &args, 1));
 1797 }
 1798 #endif /* COMPAT_FREEBSD4 */
 1799 
 1800 int
 1801 kern_sendfile(struct thread *td, struct sendfile_args *uap,
 1802     struct uio *hdr_uio, struct uio *trl_uio, int compat)
 1803 {
 1804         struct file *sock_fp;
 1805         struct vnode *vp;
 1806         struct vm_object *obj = NULL;
 1807         struct socket *so = NULL;
 1808         struct mbuf *m = NULL;
 1809         struct sf_buf *sf;
 1810         struct vm_page *pg;
 1811         off_t off, xfsize, fsbytes = 0, sbytes = 0, rem = 0;
 1812         int error, hdrlen = 0, mnw = 0;
 1813         int vfslocked;
 1814         struct sendfile_sync *sfs = NULL;
 1815 
 1816         /*
 1817          * The file descriptor must be a regular file and have a
 1818          * backing VM object.
 1819          * File offset must be positive.  If it goes beyond EOF
 1820          * we send only the header/trailer and no payload data.
 1821          */
 1822         AUDIT_ARG_FD(uap->fd);
 1823         if ((error = fgetvp_read(td, uap->fd, &vp)) != 0)
 1824                 goto out;
 1825         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 1826         vn_lock(vp, LK_SHARED | LK_RETRY);
 1827         if (vp->v_type == VREG) {
 1828                 obj = vp->v_object;
 1829                 if (obj != NULL) {
 1830                         /*
 1831                          * Temporarily increase the backing VM
 1832                          * object's reference count so that a forced
 1833                          * reclamation of its vnode does not
 1834                          * immediately destroy it.
 1835                          */
 1836                         VM_OBJECT_LOCK(obj);
 1837                         if ((obj->flags & OBJ_DEAD) == 0) {
 1838                                 vm_object_reference_locked(obj);
 1839                                 VM_OBJECT_UNLOCK(obj);
 1840                         } else {
 1841                                 VM_OBJECT_UNLOCK(obj);
 1842                                 obj = NULL;
 1843                         }
 1844                 }
 1845         }
 1846         VOP_UNLOCK(vp, 0);
 1847         VFS_UNLOCK_GIANT(vfslocked);
 1848         if (obj == NULL) {
 1849                 error = EINVAL;
 1850                 goto out;
 1851         }
 1852         if (uap->offset < 0) {
 1853                 error = EINVAL;
 1854                 goto out;
 1855         }
 1856 
 1857         /*
 1858          * The socket must be a stream socket and connected.
 1859          * Remember if it a blocking or non-blocking socket.
 1860          */
 1861         if ((error = getsock(td->td_proc->p_fd, uap->s, &sock_fp,
 1862             NULL)) != 0)
 1863                 goto out;
 1864         so = sock_fp->f_data;
 1865         if (so->so_type != SOCK_STREAM) {
 1866                 error = EINVAL;
 1867                 goto out;
 1868         }
 1869         if ((so->so_state & SS_ISCONNECTED) == 0) {
 1870                 error = ENOTCONN;
 1871                 goto out;
 1872         }
 1873         /*
 1874          * Do not wait on memory allocations but return ENOMEM for
 1875          * caller to retry later.
 1876          * XXX: Experimental.
 1877          */
 1878         if (uap->flags & SF_MNOWAIT)
 1879                 mnw = 1;
 1880 
 1881         if (uap->flags & SF_SYNC) {
 1882                 sfs = malloc(sizeof *sfs, M_TEMP, M_WAITOK);
 1883                 memset(sfs, 0, sizeof *sfs);
 1884                 mtx_init(&sfs->mtx, "sendfile", NULL, MTX_DEF);
 1885                 cv_init(&sfs->cv, "sendfile");
 1886         }
 1887 
 1888 #ifdef MAC
 1889         error = mac_socket_check_send(td->td_ucred, so);
 1890         if (error)
 1891                 goto out;
 1892 #endif
 1893 
 1894         /* If headers are specified copy them into mbufs. */
 1895         if (hdr_uio != NULL) {
 1896                 hdr_uio->uio_td = td;
 1897                 hdr_uio->uio_rw = UIO_WRITE;
 1898                 if (hdr_uio->uio_resid > 0) {
 1899                         /*
 1900                          * In FBSD < 5.0 the nbytes to send also included
 1901                          * the header.  If compat is specified subtract the
 1902                          * header size from nbytes.
 1903                          */
 1904                         if (compat) {
 1905                                 if (uap->nbytes > hdr_uio->uio_resid)
 1906                                         uap->nbytes -= hdr_uio->uio_resid;
 1907                                 else
 1908                                         uap->nbytes = 0;
 1909                         }
 1910                         m = m_uiotombuf(hdr_uio, (mnw ? M_NOWAIT : M_WAITOK),
 1911                             0, 0, 0);
 1912                         if (m == NULL) {
 1913                                 error = mnw ? EAGAIN : ENOBUFS;
 1914                                 goto out;
 1915                         }
 1916                         hdrlen = m_length(m, NULL);
 1917                 }
 1918         }
 1919 
 1920         /*
 1921          * Protect against multiple writers to the socket.
 1922          *
 1923          * XXXRW: Historically this has assumed non-interruptibility, so now
 1924          * we implement that, but possibly shouldn't.
 1925          */
 1926         (void)sblock(&so->so_snd, SBL_WAIT | SBL_NOINTR);
 1927 
 1928         /*
 1929          * Loop through the pages of the file, starting with the requested
 1930          * offset. Get a file page (do I/O if necessary), map the file page
 1931          * into an sf_buf, attach an mbuf header to the sf_buf, and queue
 1932          * it on the socket.
 1933          * This is done in two loops.  The inner loop turns as many pages
 1934          * as it can, up to available socket buffer space, without blocking
 1935          * into mbufs to have it bulk delivered into the socket send buffer.
 1936          * The outer loop checks the state and available space of the socket
 1937          * and takes care of the overall progress.
 1938          */
 1939         for (off = uap->offset, rem = uap->nbytes; ; ) {
 1940                 int loopbytes = 0;
 1941                 int space = 0;
 1942                 int done = 0;
 1943 
 1944                 /*
 1945                  * Check the socket state for ongoing connection,
 1946                  * no errors and space in socket buffer.
 1947                  * If space is low allow for the remainder of the
 1948                  * file to be processed if it fits the socket buffer.
 1949                  * Otherwise block in waiting for sufficient space
 1950                  * to proceed, or if the socket is nonblocking, return
 1951                  * to userland with EAGAIN while reporting how far
 1952                  * we've come.
 1953                  * We wait until the socket buffer has significant free
 1954                  * space to do bulk sends.  This makes good use of file
 1955                  * system read ahead and allows packet segmentation
 1956                  * offloading hardware to take over lots of work.  If
 1957                  * we were not careful here we would send off only one
 1958                  * sfbuf at a time.
 1959                  */
 1960                 SOCKBUF_LOCK(&so->so_snd);
 1961                 if (so->so_snd.sb_lowat < so->so_snd.sb_hiwat / 2)
 1962                         so->so_snd.sb_lowat = so->so_snd.sb_hiwat / 2;
 1963 retry_space:
 1964                 if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 1965                         error = EPIPE;
 1966                         SOCKBUF_UNLOCK(&so->so_snd);
 1967                         goto done;
 1968                 } else if (so->so_error) {
 1969                         error = so->so_error;
 1970                         so->so_error = 0;
 1971                         SOCKBUF_UNLOCK(&so->so_snd);
 1972                         goto done;
 1973                 }
 1974                 space = sbspace(&so->so_snd);
 1975                 if (space < rem &&
 1976                     (space <= 0 ||
 1977                      space < so->so_snd.sb_lowat)) {
 1978                         if (so->so_state & SS_NBIO) {
 1979                                 SOCKBUF_UNLOCK(&so->so_snd);
 1980                                 error = EAGAIN;
 1981                                 goto done;
 1982                         }
 1983                         /*
 1984                          * sbwait drops the lock while sleeping.
 1985                          * When we loop back to retry_space the
 1986                          * state may have changed and we retest
 1987                          * for it.
 1988                          */
 1989                         error = sbwait(&so->so_snd);
 1990                         /*
 1991                          * An error from sbwait usually indicates that we've
 1992                          * been interrupted by a signal. If we've sent anything
 1993                          * then return bytes sent, otherwise return the error.
 1994                          */
 1995                         if (error) {
 1996                                 SOCKBUF_UNLOCK(&so->so_snd);
 1997                                 goto done;
 1998                         }
 1999                         goto retry_space;
 2000                 }
 2001                 SOCKBUF_UNLOCK(&so->so_snd);
 2002 
 2003                 /*
 2004                  * Reduce space in the socket buffer by the size of
 2005                  * the header mbuf chain.
 2006                  * hdrlen is set to 0 after the first loop.
 2007                  */
 2008                 space -= hdrlen;
 2009 
 2010                 /*
 2011                  * Loop and construct maximum sized mbuf chain to be bulk
 2012                  * dumped into socket buffer.
 2013                  */
 2014                 while(space > loopbytes) {
 2015                         vm_pindex_t pindex;
 2016                         vm_offset_t pgoff;
 2017                         struct mbuf *m0;
 2018 
 2019                         VM_OBJECT_LOCK(obj);
 2020                         /*
 2021                          * Calculate the amount to transfer.
 2022                          * Not to exceed a page, the EOF,
 2023                          * or the passed in nbytes.
 2024                          */
 2025                         pgoff = (vm_offset_t)(off & PAGE_MASK);
 2026                         xfsize = omin(PAGE_SIZE - pgoff,
 2027                             obj->un_pager.vnp.vnp_size - uap->offset -
 2028                             fsbytes - loopbytes);
 2029                         if (uap->nbytes)
 2030                                 rem = (uap->nbytes - fsbytes - loopbytes);
 2031                         else
 2032                                 rem = obj->un_pager.vnp.vnp_size -
 2033                                     uap->offset - fsbytes - loopbytes;
 2034                         xfsize = omin(rem, xfsize);
 2035                         xfsize = omin(space - loopbytes, xfsize);
 2036                         if (xfsize <= 0) {
 2037                                 VM_OBJECT_UNLOCK(obj);
 2038                                 done = 1;               /* all data sent */
 2039                                 break;
 2040                         }
 2041 
 2042                         /*
 2043                          * Attempt to look up the page.  Allocate
 2044                          * if not found or wait and loop if busy.
 2045                          */
 2046                         pindex = OFF_TO_IDX(off);
 2047                         pg = vm_page_grab(obj, pindex, VM_ALLOC_NOBUSY |
 2048                             VM_ALLOC_NORMAL | VM_ALLOC_WIRED | VM_ALLOC_RETRY);
 2049 
 2050                         /*
 2051                          * Check if page is valid for what we need,
 2052                          * otherwise initiate I/O.
 2053                          * If we already turned some pages into mbufs,
 2054                          * send them off before we come here again and
 2055                          * block.
 2056                          */
 2057                         if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize))
 2058                                 VM_OBJECT_UNLOCK(obj);
 2059                         else if (m != NULL)
 2060                                 error = EAGAIN; /* send what we already got */
 2061                         else if (uap->flags & SF_NODISKIO)
 2062                                 error = EBUSY;
 2063                         else {
 2064                                 int bsize, resid;
 2065 
 2066                                 /*
 2067                                  * Ensure that our page is still around
 2068                                  * when the I/O completes.
 2069                                  */
 2070                                 vm_page_io_start(pg);
 2071                                 VM_OBJECT_UNLOCK(obj);
 2072 
 2073                                 /*
 2074                                  * Get the page from backing store.
 2075                                  */
 2076                                 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 2077                                 error = vn_lock(vp, LK_SHARED);
 2078                                 if (error != 0)
 2079                                         goto after_read;
 2080                                 bsize = vp->v_mount->mnt_stat.f_iosize;
 2081 
 2082                                 /*
 2083                                  * XXXMAC: Because we don't have fp->f_cred
 2084                                  * here, we pass in NOCRED.  This is probably
 2085                                  * wrong, but is consistent with our original
 2086                                  * implementation.
 2087                                  */
 2088                                 error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE,
 2089                                     trunc_page(off), UIO_NOCOPY, IO_NODELOCKED |
 2090                                     IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT),
 2091                                     td->td_ucred, NOCRED, &resid, td);
 2092                                 VOP_UNLOCK(vp, 0);
 2093                         after_read:
 2094                                 VFS_UNLOCK_GIANT(vfslocked);
 2095                                 VM_OBJECT_LOCK(obj);
 2096                                 vm_page_io_finish(pg);
 2097                                 if (!error)
 2098                                         VM_OBJECT_UNLOCK(obj);
 2099                                 mbstat.sf_iocnt++;
 2100                         }
 2101                         if (error) {
 2102                                 vm_page_lock_queues();
 2103                                 vm_page_unwire(pg, 0);
 2104                                 /*
 2105                                  * See if anyone else might know about
 2106                                  * this page.  If not and it is not valid,
 2107                                  * then free it.
 2108                                  */
 2109                                 if (pg->wire_count == 0 && pg->valid == 0 &&
 2110                                     pg->busy == 0 && !(pg->oflags & VPO_BUSY) &&
 2111                                     pg->hold_count == 0) {
 2112                                         vm_page_free(pg);
 2113                                 }
 2114                                 vm_page_unlock_queues();
 2115                                 VM_OBJECT_UNLOCK(obj);
 2116                                 if (error == EAGAIN)
 2117                                         error = 0;      /* not a real error */
 2118                                 break;
 2119                         }
 2120 
 2121                         /*
 2122                          * Get a sendfile buf.  When allocating the
 2123                          * first buffer for mbuf chain, we usually
 2124                          * wait as long as necessary, but this wait
 2125                          * can be interrupted.  For consequent
 2126                          * buffers, do not sleep, since several
 2127                          * threads might exhaust the buffers and then
 2128                          * deadlock.
 2129                          */
 2130                         sf = sf_buf_alloc(pg, (mnw || m != NULL) ? SFB_NOWAIT :
 2131                             SFB_CATCH);
 2132                         if (sf == NULL) {
 2133                                 mbstat.sf_allocfail++;
 2134                                 vm_page_lock_queues();
 2135                                 vm_page_unwire(pg, 0);
 2136                                 /*
 2137                                  * XXX: Not same check as above!?
 2138                                  */
 2139                                 if (pg->wire_count == 0 && pg->object == NULL)
 2140                                         vm_page_free(pg);
 2141                                 vm_page_unlock_queues();
 2142                                 if (m == NULL)
 2143                                         error = (mnw ? EAGAIN : EINTR);
 2144                                 break;
 2145                         }
 2146 
 2147                         /*
 2148                          * Get an mbuf and set it up as having
 2149                          * external storage.
 2150                          */
 2151                         m0 = m_get((mnw ? M_NOWAIT : M_WAITOK), MT_DATA);
 2152                         if (m0 == NULL) {
 2153                                 error = (mnw ? EAGAIN : ENOBUFS);
 2154                                 sf_buf_mext((void *)sf_buf_kva(sf), sf);
 2155                                 break;
 2156                         }
 2157                         MEXTADD(m0, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext,
 2158                             sfs, sf, M_RDONLY, EXT_SFBUF);
 2159                         m0->m_data = (char *)sf_buf_kva(sf) + pgoff;
 2160                         m0->m_len = xfsize;
 2161 
 2162                         /* Append to mbuf chain. */
 2163                         if (m != NULL)
 2164                                 m_cat(m, m0);
 2165                         else
 2166                                 m = m0;
 2167 
 2168                         /* Keep track of bits processed. */
 2169                         loopbytes += xfsize;
 2170                         off += xfsize;
 2171 
 2172                         if (sfs != NULL) {
 2173                                 mtx_lock(&sfs->mtx);
 2174                                 sfs->count++;
 2175                                 mtx_unlock(&sfs->mtx);
 2176                         }
 2177                 }
 2178 
 2179                 /* Add the buffer chain to the socket buffer. */
 2180                 if (m != NULL) {
 2181                         int mlen, err;
 2182 
 2183                         mlen = m_length(m, NULL);
 2184                         SOCKBUF_LOCK(&so->so_snd);
 2185                         if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 2186                                 error = EPIPE;
 2187                                 SOCKBUF_UNLOCK(&so->so_snd);
 2188                                 goto done;
 2189                         }
 2190                         SOCKBUF_UNLOCK(&so->so_snd);
 2191                         CURVNET_SET(so->so_vnet);
 2192                         /* Avoid error aliasing. */
 2193                         err = (*so->so_proto->pr_usrreqs->pru_send)
 2194                                     (so, 0, m, NULL, NULL, td);
 2195                         CURVNET_RESTORE();
 2196                         if (err == 0) {
 2197                                 /*
 2198                                  * We need two counters to get the
 2199                                  * file offset and nbytes to send
 2200                                  * right:
 2201                                  * - sbytes contains the total amount
 2202                                  *   of bytes sent, including headers.
 2203                                  * - fsbytes contains the total amount
 2204                                  *   of bytes sent from the file.
 2205                                  */
 2206                                 sbytes += mlen;
 2207                                 fsbytes += mlen;
 2208                                 if (hdrlen) {
 2209                                         fsbytes -= hdrlen;
 2210                                         hdrlen = 0;
 2211                                 }
 2212                         } else if (error == 0)
 2213                                 error = err;
 2214                         m = NULL;       /* pru_send always consumes */
 2215                 }
 2216 
 2217                 /* Quit outer loop on error or when we're done. */
 2218                 if (done) 
 2219                         break;
 2220                 if (error)
 2221                         goto done;
 2222         }
 2223 
 2224         /*
 2225          * Send trailers. Wimp out and use writev(2).
 2226          */
 2227         if (trl_uio != NULL) {
 2228                 sbunlock(&so->so_snd);
 2229                 error = kern_writev(td, uap->s, trl_uio);
 2230                 if (error == 0)
 2231                         sbytes += td->td_retval[0];
 2232                 goto out;
 2233         }
 2234 
 2235 done:
 2236         sbunlock(&so->so_snd);
 2237 out:
 2238         /*
 2239          * If there was no error we have to clear td->td_retval[0]
 2240          * because it may have been set by writev.
 2241          */
 2242         if (error == 0) {
 2243                 td->td_retval[0] = 0;
 2244         }
 2245         if (uap->sbytes != NULL) {
 2246                 copyout(&sbytes, uap->sbytes, sizeof(off_t));
 2247         }
 2248         if (obj != NULL)
 2249                 vm_object_deallocate(obj);
 2250         if (vp != NULL) {
 2251                 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 2252                 vrele(vp);
 2253                 VFS_UNLOCK_GIANT(vfslocked);
 2254         }
 2255         if (so)
 2256                 fdrop(sock_fp, td);
 2257         if (m)
 2258                 m_freem(m);
 2259 
 2260         if (sfs != NULL) {
 2261                 mtx_lock(&sfs->mtx);
 2262                 if (sfs->count != 0)
 2263                         cv_wait(&sfs->cv, &sfs->mtx);
 2264                 KASSERT(sfs->count == 0, ("sendfile sync still busy"));
 2265                 cv_destroy(&sfs->cv);
 2266                 mtx_destroy(&sfs->mtx);
 2267                 free(sfs, M_TEMP);
 2268         }
 2269 
 2270         if (error == ERESTART)
 2271                 error = EINTR;
 2272 
 2273         return (error);
 2274 }
 2275 
 2276 /*
 2277  * SCTP syscalls.
 2278  * Functionality only compiled in if SCTP is defined in the kernel Makefile,
 2279  * otherwise all return EOPNOTSUPP.
 2280  * XXX: We should make this loadable one day.
 2281  */
 2282 int
 2283 sctp_peeloff(td, uap)
 2284         struct thread *td;
 2285         struct sctp_peeloff_args /* {
 2286                 int     sd;
 2287                 caddr_t name;
 2288         } */ *uap;
 2289 {
 2290 #if (defined(INET) || defined(INET6)) && defined(SCTP)
 2291         struct filedesc *fdp;
 2292         struct file *nfp = NULL;
 2293         int error;
 2294         struct socket *head, *so;
 2295         int fd;
 2296         u_int fflag;
 2297 
 2298         fdp = td->td_proc->p_fd;
 2299         AUDIT_ARG_FD(uap->sd);
 2300         error = fgetsock(td, uap->sd, &head, &fflag);
 2301         if (error)
 2302                 goto done2;
 2303         if (head->so_proto->pr_protocol != IPPROTO_SCTP) {
 2304                 error = EOPNOTSUPP;
 2305                 goto done2;
 2306         }
 2307         error = sctp_can_peel_off(head, (sctp_assoc_t)uap->name);
 2308         if (error)
 2309                 goto done2;
 2310         /*
 2311          * At this point we know we do have a assoc to pull
 2312          * we proceed to get the fd setup. This may block
 2313          * but that is ok.
 2314          */
 2315 
 2316         error = falloc(td, &nfp, &fd);
 2317         if (error)
 2318                 goto done;
 2319         td->td_retval[0] = fd;
 2320 
 2321         CURVNET_SET(head->so_vnet);
 2322         so = sonewconn(head, SS_ISCONNECTED);
 2323         if (so == NULL) 
 2324                 goto noconnection;
 2325         /*
 2326          * Before changing the flags on the socket, we have to bump the
 2327          * reference count.  Otherwise, if the protocol calls sofree(),
 2328          * the socket will be released due to a zero refcount.
 2329          */
 2330         SOCK_LOCK(so);
 2331         soref(so);                      /* file descriptor reference */
 2332         SOCK_UNLOCK(so);
 2333 
 2334         ACCEPT_LOCK();
 2335 
 2336         TAILQ_REMOVE(&head->so_comp, so, so_list);
 2337         head->so_qlen--;
 2338         so->so_state |= (head->so_state & SS_NBIO);
 2339         so->so_state &= ~SS_NOFDREF;
 2340         so->so_qstate &= ~SQ_COMP;
 2341         so->so_head = NULL;
 2342         ACCEPT_UNLOCK();
 2343         finit(nfp, fflag, DTYPE_SOCKET, so, &socketops);
 2344         error = sctp_do_peeloff(head, so, (sctp_assoc_t)uap->name);
 2345         if (error)
 2346                 goto noconnection;
 2347         if (head->so_sigio != NULL)
 2348                 fsetown(fgetown(&head->so_sigio), &so->so_sigio);
 2349 
 2350 noconnection:
 2351         /*
 2352          * close the new descriptor, assuming someone hasn't ripped it
 2353          * out from under us.
 2354          */
 2355         if (error)
 2356                 fdclose(fdp, nfp, fd, td);
 2357 
 2358         /*
 2359          * Release explicitly held references before returning.
 2360          */
 2361         CURVNET_RESTORE();
 2362 done:
 2363         if (nfp != NULL)
 2364                 fdrop(nfp, td);
 2365         fputsock(head);
 2366 done2:
 2367         return (error);
 2368 #else  /* SCTP */
 2369         return (EOPNOTSUPP);
 2370 #endif /* SCTP */
 2371 }
 2372 
 2373 int
 2374 sctp_generic_sendmsg (td, uap)
 2375         struct thread *td;
 2376         struct sctp_generic_sendmsg_args /* {
 2377                 int sd, 
 2378                 caddr_t msg, 
 2379                 int mlen, 
 2380                 caddr_t to, 
 2381                 __socklen_t tolen, 
 2382                 struct sctp_sndrcvinfo *sinfo, 
 2383                 int flags
 2384         } */ *uap;
 2385 {
 2386 #if (defined(INET) || defined(INET6)) && defined(SCTP)
 2387         struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL;
 2388         struct socket *so;
 2389         struct file *fp = NULL;
 2390         int error = 0, len;
 2391         struct sockaddr *to = NULL;
 2392 #ifdef KTRACE
 2393         struct uio *ktruio = NULL;
 2394 #endif
 2395         struct uio auio;
 2396         struct iovec iov[1];
 2397 
 2398         if (uap->sinfo) {
 2399                 error = copyin(uap->sinfo, &sinfo, sizeof (sinfo));
 2400                 if (error)
 2401                         return (error);
 2402                 u_sinfo = &sinfo;
 2403         }
 2404         if (uap->tolen) {
 2405                 error = getsockaddr(&to, uap->to, uap->tolen);
 2406                 if (error) {
 2407                         to = NULL;
 2408                         goto sctp_bad2;
 2409                 }
 2410         }
 2411 
 2412         AUDIT_ARG_FD(uap->sd);
 2413         error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL);
 2414         if (error)
 2415                 goto sctp_bad;
 2416 #ifdef KTRACE
 2417         if (to && (KTRPOINT(td, KTR_STRUCT)))
 2418                 ktrsockaddr(to);
 2419 #endif
 2420 
 2421         iov[0].iov_base = uap->msg;
 2422         iov[0].iov_len = uap->mlen;
 2423 
 2424         so = (struct socket *)fp->f_data;
 2425         if (so->so_proto->pr_protocol != IPPROTO_SCTP) {
 2426                 error = EOPNOTSUPP;
 2427                 goto sctp_bad;
 2428         }
 2429 #ifdef MAC
 2430         error = mac_socket_check_send(td->td_ucred, so);
 2431         if (error)
 2432                 goto sctp_bad;
 2433 #endif /* MAC */
 2434 
 2435         auio.uio_iov =  iov;
 2436         auio.uio_iovcnt = 1;
 2437         auio.uio_segflg = UIO_USERSPACE;
 2438         auio.uio_rw = UIO_WRITE;
 2439         auio.uio_td = td;
 2440         auio.uio_offset = 0;                    /* XXX */
 2441         auio.uio_resid = 0;
 2442         len = auio.uio_resid = uap->mlen;
 2443         CURVNET_SET(so->so_vnet);
 2444         error = sctp_lower_sosend(so, to, &auio,
 2445                     (struct mbuf *)NULL, (struct mbuf *)NULL,
 2446                     uap->flags, u_sinfo, td);
 2447         CURVNET_RESTORE();
 2448         if (error) {
 2449                 if (auio.uio_resid != len && (error == ERESTART ||
 2450                     error == EINTR || error == EWOULDBLOCK))
 2451                         error = 0;
 2452                 /* Generation of SIGPIPE can be controlled per socket. */
 2453                 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
 2454                     !(uap->flags & MSG_NOSIGNAL)) {
 2455                         PROC_LOCK(td->td_proc);
 2456                         tdksignal(td, SIGPIPE, NULL);
 2457                         PROC_UNLOCK(td->td_proc);
 2458                 }
 2459         }
 2460         if (error == 0)
 2461                 td->td_retval[0] = len - auio.uio_resid;
 2462 #ifdef KTRACE
 2463         if (ktruio != NULL) {
 2464                 ktruio->uio_resid = td->td_retval[0];
 2465                 ktrgenio(uap->sd, UIO_WRITE, ktruio, error);
 2466         }
 2467 #endif /* KTRACE */
 2468 sctp_bad:
 2469         if (fp)
 2470                 fdrop(fp, td);
 2471 sctp_bad2:
 2472         if (to)
 2473                 free(to, M_SONAME);
 2474         return (error);
 2475 #else  /* SCTP */
 2476         return (EOPNOTSUPP);
 2477 #endif /* SCTP */
 2478 }
 2479 
 2480 int
 2481 sctp_generic_sendmsg_iov(td, uap)
 2482         struct thread *td;
 2483         struct sctp_generic_sendmsg_iov_args /* {
 2484                 int sd, 
 2485                 struct iovec *iov, 
 2486                 int iovlen, 
 2487                 caddr_t to, 
 2488                 __socklen_t tolen, 
 2489                 struct sctp_sndrcvinfo *sinfo, 
 2490                 int flags
 2491         } */ *uap;
 2492 {
 2493 #if (defined(INET) || defined(INET6)) && defined(SCTP)
 2494         struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL;
 2495         struct socket *so;
 2496         struct file *fp = NULL;
 2497         int error=0, len, i;
 2498         struct sockaddr *to = NULL;
 2499 #ifdef KTRACE
 2500         struct uio *ktruio = NULL;
 2501 #endif
 2502         struct uio auio;
 2503         struct iovec *iov, *tiov;
 2504 
 2505         if (uap->sinfo) {
 2506                 error = copyin(uap->sinfo, &sinfo, sizeof (sinfo));
 2507                 if (error)
 2508                         return (error);
 2509                 u_sinfo = &sinfo;
 2510         }
 2511         if (uap->tolen) {
 2512                 error = getsockaddr(&to, uap->to, uap->tolen);
 2513                 if (error) {
 2514                         to = NULL;
 2515                         goto sctp_bad2;
 2516                 }
 2517         }
 2518 
 2519         AUDIT_ARG_FD(uap->sd);
 2520         error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL);
 2521         if (error)
 2522                 goto sctp_bad1;
 2523 
 2524 #ifdef COMPAT_FREEBSD32
 2525         if (SV_CURPROC_FLAG(SV_ILP32))
 2526                 error = freebsd32_copyiniov((struct iovec32 *)uap->iov,
 2527                     uap->iovlen, &iov, EMSGSIZE);
 2528         else
 2529 #endif
 2530                 error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE);
 2531         if (error)
 2532                 goto sctp_bad1;
 2533 #ifdef KTRACE
 2534         if (to && (KTRPOINT(td, KTR_STRUCT)))
 2535                 ktrsockaddr(to);
 2536 #endif
 2537 
 2538         so = (struct socket *)fp->f_data;
 2539         if (so->so_proto->pr_protocol != IPPROTO_SCTP) {
 2540                 error = EOPNOTSUPP;
 2541                 goto sctp_bad;
 2542         }
 2543 #ifdef MAC
 2544         error = mac_socket_check_send(td->td_ucred, so);
 2545         if (error)
 2546                 goto sctp_bad;
 2547 #endif /* MAC */
 2548 
 2549         auio.uio_iov = iov;
 2550         auio.uio_iovcnt = uap->iovlen;
 2551         auio.uio_segflg = UIO_USERSPACE;
 2552         auio.uio_rw = UIO_WRITE;
 2553         auio.uio_td = td;
 2554         auio.uio_offset = 0;                    /* XXX */
 2555         auio.uio_resid = 0;
 2556         tiov = iov;
 2557         for (i = 0; i <uap->iovlen; i++, tiov++) {
 2558                 if ((auio.uio_resid += tiov->iov_len) < 0) {
 2559                         error = EINVAL;
 2560                         goto sctp_bad;
 2561                 }
 2562         }
 2563         len = auio.uio_resid;
 2564         CURVNET_SET(so->so_vnet);
 2565         error = sctp_lower_sosend(so, to, &auio,
 2566                     (struct mbuf *)NULL, (struct mbuf *)NULL,
 2567                     uap->flags, u_sinfo, td);
 2568         CURVNET_RESTORE();
 2569         if (error) {
 2570                 if (auio.uio_resid != len && (error == ERESTART ||
 2571                     error == EINTR || error == EWOULDBLOCK))
 2572                         error = 0;
 2573                 /* Generation of SIGPIPE can be controlled per socket */
 2574                 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
 2575                     !(uap->flags & MSG_NOSIGNAL)) {
 2576                         PROC_LOCK(td->td_proc);
 2577                         tdksignal(td, SIGPIPE, NULL);
 2578                         PROC_UNLOCK(td->td_proc);
 2579                 }
 2580         }
 2581         if (error == 0)
 2582                 td->td_retval[0] = len - auio.uio_resid;
 2583 #ifdef KTRACE
 2584         if (ktruio != NULL) {
 2585                 ktruio->uio_resid = td->td_retval[0];
 2586                 ktrgenio(uap->sd, UIO_WRITE, ktruio, error);
 2587         }
 2588 #endif /* KTRACE */
 2589 sctp_bad:
 2590         free(iov, M_IOV);
 2591 sctp_bad1:
 2592         if (fp)
 2593                 fdrop(fp, td);
 2594 sctp_bad2:
 2595         if (to)
 2596                 free(to, M_SONAME);
 2597         return (error);
 2598 #else  /* SCTP */
 2599         return (EOPNOTSUPP);
 2600 #endif /* SCTP */
 2601 }
 2602 
 2603 int
 2604 sctp_generic_recvmsg(td, uap)
 2605         struct thread *td;
 2606         struct sctp_generic_recvmsg_args /* {
 2607                 int sd, 
 2608                 struct iovec *iov, 
 2609                 int iovlen,
 2610                 struct sockaddr *from, 
 2611                 __socklen_t *fromlenaddr,
 2612                 struct sctp_sndrcvinfo *sinfo, 
 2613                 int *msg_flags
 2614         } */ *uap;
 2615 {
 2616 #if (defined(INET) || defined(INET6)) && defined(SCTP)
 2617         u_int8_t sockbufstore[256];
 2618         struct uio auio;
 2619         struct iovec *iov, *tiov;
 2620         struct sctp_sndrcvinfo sinfo;
 2621         struct socket *so;
 2622         struct file *fp = NULL;
 2623         struct sockaddr *fromsa;
 2624         int fromlen;
 2625         int len, i, msg_flags;
 2626         int error = 0;
 2627 #ifdef KTRACE
 2628         struct uio *ktruio = NULL;
 2629 #endif
 2630 
 2631         AUDIT_ARG_FD(uap->sd);
 2632         error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL);
 2633         if (error) {
 2634                 return (error);
 2635         }
 2636 #ifdef COMPAT_FREEBSD32
 2637         if (SV_CURPROC_FLAG(SV_ILP32))
 2638                 error = freebsd32_copyiniov((struct iovec32 *)uap->iov,
 2639                     uap->iovlen, &iov, EMSGSIZE);
 2640         else
 2641 #endif
 2642                 error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE);
 2643         if (error)
 2644                 goto out1;
 2645 
 2646         so = fp->f_data;
 2647         if (so->so_proto->pr_protocol != IPPROTO_SCTP) {
 2648                 error = EOPNOTSUPP;
 2649                 goto out;
 2650         }
 2651 #ifdef MAC
 2652         error = mac_socket_check_receive(td->td_ucred, so);
 2653         if (error) {
 2654                 goto out;
 2655         }
 2656 #endif /* MAC */
 2657 
 2658         if (uap->fromlenaddr) {
 2659                 error = copyin(uap->fromlenaddr,
 2660                     &fromlen, sizeof (fromlen));
 2661                 if (error) {
 2662                         goto out;
 2663                 }
 2664         } else {
 2665                 fromlen = 0;
 2666         }
 2667         if (uap->msg_flags) {
 2668                 error = copyin(uap->msg_flags, &msg_flags, sizeof (int));
 2669                 if (error) {
 2670                         goto out;
 2671                 }
 2672         } else {
 2673                 msg_flags = 0;
 2674         }
 2675         auio.uio_iov = iov;
 2676         auio.uio_iovcnt = uap->iovlen;
 2677         auio.uio_segflg = UIO_USERSPACE;
 2678         auio.uio_rw = UIO_READ;
 2679         auio.uio_td = td;
 2680         auio.uio_offset = 0;                    /* XXX */
 2681         auio.uio_resid = 0;
 2682         tiov = iov;
 2683         for (i = 0; i <uap->iovlen; i++, tiov++) {
 2684                 if ((auio.uio_resid += tiov->iov_len) < 0) {
 2685                         error = EINVAL;
 2686                         goto out;
 2687                 }
 2688         }
 2689         len = auio.uio_resid;
 2690         fromsa = (struct sockaddr *)sockbufstore;
 2691 
 2692 #ifdef KTRACE
 2693         if (KTRPOINT(td, KTR_GENIO))
 2694                 ktruio = cloneuio(&auio);
 2695 #endif /* KTRACE */
 2696         memset(&sinfo, 0, sizeof(struct sctp_sndrcvinfo));
 2697         CURVNET_SET(so->so_vnet);
 2698         error = sctp_sorecvmsg(so, &auio, (struct mbuf **)NULL,
 2699                     fromsa, fromlen, &msg_flags,
 2700                     (struct sctp_sndrcvinfo *)&sinfo, 1);
 2701         CURVNET_RESTORE();
 2702         if (error) {
 2703                 if (auio.uio_resid != (int)len && (error == ERESTART ||
 2704                     error == EINTR || error == EWOULDBLOCK))
 2705                         error = 0;
 2706         } else {
 2707                 if (uap->sinfo)
 2708                         error = copyout(&sinfo, uap->sinfo, sizeof (sinfo));
 2709         }
 2710 #ifdef KTRACE
 2711         if (ktruio != NULL) {
 2712                 ktruio->uio_resid = (int)len - auio.uio_resid;
 2713                 ktrgenio(uap->sd, UIO_READ, ktruio, error);
 2714         }
 2715 #endif /* KTRACE */
 2716         if (error)
 2717                 goto out;
 2718         td->td_retval[0] = (int)len - auio.uio_resid;
 2719 
 2720         if (fromlen && uap->from) {
 2721                 len = fromlen;
 2722                 if (len <= 0 || fromsa == 0)
 2723                         len = 0;
 2724                 else {
 2725                         len = MIN(len, fromsa->sa_len);
 2726                         error = copyout(fromsa, uap->from, (unsigned)len);
 2727                         if (error)
 2728                                 goto out;
 2729                 }
 2730                 error = copyout(&len, uap->fromlenaddr, sizeof (socklen_t));
 2731                 if (error) {
 2732                         goto out;
 2733                 }
 2734         }
 2735 #ifdef KTRACE
 2736         if (KTRPOINT(td, KTR_STRUCT))
 2737                 ktrsockaddr(fromsa);
 2738 #endif
 2739         if (uap->msg_flags) {
 2740                 error = copyout(&msg_flags, uap->msg_flags, sizeof (int));
 2741                 if (error) {
 2742                         goto out;
 2743                 }
 2744         }
 2745 out:
 2746         free(iov, M_IOV);
 2747 out1:
 2748         if (fp) 
 2749                 fdrop(fp, td);
 2750 
 2751         return (error);
 2752 #else  /* SCTP */
 2753         return (EOPNOTSUPP);
 2754 #endif /* SCTP */
 2755 }

Cache object: 7c3e87ebe51096d56023258dae024abd


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.