The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/uipc_syscalls.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1982, 1986, 1989, 1990, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * sendfile(2) and related extensions:
    6  * Copyright (c) 1998, David Greenman. All rights reserved.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  * 4. Neither the name of the University nor the names of its contributors
   17  *    may be used to endorse or promote products derived from this software
   18  *    without specific prior written permission.
   19  *
   20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   30  * SUCH DAMAGE.
   31  *
   32  *      @(#)uipc_syscalls.c     8.4 (Berkeley) 2/21/94
   33  */
   34 
   35 #include <sys/cdefs.h>
   36 __FBSDID("$FreeBSD: releng/9.0/sys/kern/uipc_syscalls.c 225617 2011-09-16 13:58:51Z kmacy $");
   37 
   38 #include "opt_capsicum.h"
   39 #include "opt_inet.h"
   40 #include "opt_inet6.h"
   41 #include "opt_sctp.h"
   42 #include "opt_compat.h"
   43 #include "opt_ktrace.h"
   44 
   45 #include <sys/param.h>
   46 #include <sys/systm.h>
   47 #include <sys/capability.h>
   48 #include <sys/kernel.h>
   49 #include <sys/lock.h>
   50 #include <sys/mutex.h>
   51 #include <sys/sysproto.h>
   52 #include <sys/malloc.h>
   53 #include <sys/filedesc.h>
   54 #include <sys/event.h>
   55 #include <sys/proc.h>
   56 #include <sys/fcntl.h>
   57 #include <sys/file.h>
   58 #include <sys/filio.h>
   59 #include <sys/jail.h>
   60 #include <sys/mount.h>
   61 #include <sys/mbuf.h>
   62 #include <sys/protosw.h>
   63 #include <sys/sf_buf.h>
   64 #include <sys/sysent.h>
   65 #include <sys/socket.h>
   66 #include <sys/socketvar.h>
   67 #include <sys/signalvar.h>
   68 #include <sys/syscallsubr.h>
   69 #include <sys/sysctl.h>
   70 #include <sys/uio.h>
   71 #include <sys/vnode.h>
   72 #ifdef KTRACE
   73 #include <sys/ktrace.h>
   74 #endif
   75 #ifdef COMPAT_FREEBSD32
   76 #include <compat/freebsd32/freebsd32_util.h>
   77 #endif
   78 
   79 #include <net/vnet.h>
   80 
   81 #include <security/audit/audit.h>
   82 #include <security/mac/mac_framework.h>
   83 
   84 #include <vm/vm.h>
   85 #include <vm/vm_object.h>
   86 #include <vm/vm_page.h>
   87 #include <vm/vm_pageout.h>
   88 #include <vm/vm_kern.h>
   89 #include <vm/vm_extern.h>
   90 
   91 #if defined(INET) || defined(INET6)
   92 #ifdef SCTP
   93 #include <netinet/sctp.h>
   94 #include <netinet/sctp_peeloff.h>
   95 #endif /* SCTP */
   96 #endif /* INET || INET6 */
   97 
   98 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
   99 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp);
  100 
  101 static int accept1(struct thread *td, struct accept_args *uap, int compat);
  102 static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat);
  103 static int getsockname1(struct thread *td, struct getsockname_args *uap,
  104                         int compat);
  105 static int getpeername1(struct thread *td, struct getpeername_args *uap,
  106                         int compat);
  107 
  108 /*
  109  * NSFBUFS-related variables and associated sysctls
  110  */
  111 int nsfbufs;
  112 int nsfbufspeak;
  113 int nsfbufsused;
  114 
  115 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
  116     "Maximum number of sendfile(2) sf_bufs available");
  117 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
  118     "Number of sendfile(2) sf_bufs at peak usage");
  119 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
  120     "Number of sendfile(2) sf_bufs in use");
  121 
  122 /*
  123  * Convert a user file descriptor to a kernel file entry and check that, if
  124  * it is a capability, the right rights are present. A reference on the file
  125  * entry is held upon returning.
  126  */
  127 static int
  128 getsock_cap(struct filedesc *fdp, int fd, cap_rights_t rights,
  129     struct file **fpp, u_int *fflagp)
  130 {
  131         struct file *fp;
  132 #ifdef CAPABILITIES
  133         struct file *fp_fromcap;
  134         int error;
  135 #endif
  136 
  137         fp = NULL;
  138         if ((fdp == NULL) || ((fp = fget_unlocked(fdp, fd)) == NULL))
  139                 return (EBADF);
  140 #ifdef CAPABILITIES
  141         /*
  142          * If the file descriptor is for a capability, test rights and use
  143          * the file descriptor referenced by the capability.
  144          */
  145         error = cap_funwrap(fp, rights, &fp_fromcap);
  146         if (error) {
  147                 fdrop(fp, curthread);
  148                 return (error);
  149         }
  150         if (fp != fp_fromcap) {
  151                 fhold(fp_fromcap);
  152                 fdrop(fp, curthread);
  153                 fp = fp_fromcap;
  154         }
  155 #endif /* CAPABILITIES */
  156         if (fp->f_type != DTYPE_SOCKET) {
  157                 fdrop(fp, curthread);
  158                 return (ENOTSOCK);
  159         }
  160         if (fflagp != NULL)
  161                 *fflagp = fp->f_flag;
  162         *fpp = fp;
  163         return (0);
  164 }
  165 
  166 /*
  167  * System call interface to the socket abstraction.
  168  */
  169 #if defined(COMPAT_43)
  170 #define COMPAT_OLDSOCK
  171 #endif
  172 
  173 int
  174 sys_socket(td, uap)
  175         struct thread *td;
  176         struct socket_args /* {
  177                 int     domain;
  178                 int     type;
  179                 int     protocol;
  180         } */ *uap;
  181 {
  182         struct filedesc *fdp;
  183         struct socket *so;
  184         struct file *fp;
  185         int fd, error;
  186 
  187         AUDIT_ARG_SOCKET(uap->domain, uap->type, uap->protocol);
  188 #ifdef MAC
  189         error = mac_socket_check_create(td->td_ucred, uap->domain, uap->type,
  190             uap->protocol);
  191         if (error)
  192                 return (error);
  193 #endif
  194         fdp = td->td_proc->p_fd;
  195         error = falloc(td, &fp, &fd, 0);
  196         if (error)
  197                 return (error);
  198         /* An extra reference on `fp' has been held for us by falloc(). */
  199         error = socreate(uap->domain, &so, uap->type, uap->protocol,
  200             td->td_ucred, td);
  201         if (error) {
  202                 fdclose(fdp, fp, fd, td);
  203         } else {
  204                 finit(fp, FREAD | FWRITE, DTYPE_SOCKET, so, &socketops);
  205                 td->td_retval[0] = fd;
  206         }
  207         fdrop(fp, td);
  208         return (error);
  209 }
  210 
  211 /* ARGSUSED */
  212 int
  213 sys_bind(td, uap)
  214         struct thread *td;
  215         struct bind_args /* {
  216                 int     s;
  217                 caddr_t name;
  218                 int     namelen;
  219         } */ *uap;
  220 {
  221         struct sockaddr *sa;
  222         int error;
  223 
  224         if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0)
  225                 return (error);
  226 
  227         error = kern_bind(td, uap->s, sa);
  228         free(sa, M_SONAME);
  229         return (error);
  230 }
  231 
  232 int
  233 kern_bind(td, fd, sa)
  234         struct thread *td;
  235         int fd;
  236         struct sockaddr *sa;
  237 {
  238         struct socket *so;
  239         struct file *fp;
  240         int error;
  241 
  242         AUDIT_ARG_FD(fd);
  243         error = getsock_cap(td->td_proc->p_fd, fd, CAP_BIND, &fp, NULL);
  244         if (error)
  245                 return (error);
  246         so = fp->f_data;
  247 #ifdef KTRACE
  248         if (KTRPOINT(td, KTR_STRUCT))
  249                 ktrsockaddr(sa);
  250 #endif
  251 #ifdef MAC
  252         error = mac_socket_check_bind(td->td_ucred, so, sa);
  253         if (error == 0)
  254 #endif
  255                 error = sobind(so, sa, td);
  256         fdrop(fp, td);
  257         return (error);
  258 }
  259 
  260 /* ARGSUSED */
  261 int
  262 sys_listen(td, uap)
  263         struct thread *td;
  264         struct listen_args /* {
  265                 int     s;
  266                 int     backlog;
  267         } */ *uap;
  268 {
  269         struct socket *so;
  270         struct file *fp;
  271         int error;
  272 
  273         AUDIT_ARG_FD(uap->s);
  274         error = getsock_cap(td->td_proc->p_fd, uap->s, CAP_LISTEN, &fp, NULL);
  275         if (error == 0) {
  276                 so = fp->f_data;
  277 #ifdef MAC
  278                 error = mac_socket_check_listen(td->td_ucred, so);
  279                 if (error == 0)
  280 #endif
  281                         error = solisten(so, uap->backlog, td);
  282                 fdrop(fp, td);
  283         }
  284         return(error);
  285 }
  286 
  287 /*
  288  * accept1()
  289  */
  290 static int
  291 accept1(td, uap, compat)
  292         struct thread *td;
  293         struct accept_args /* {
  294                 int     s;
  295                 struct sockaddr * __restrict name;
  296                 socklen_t       * __restrict anamelen;
  297         } */ *uap;
  298         int compat;
  299 {
  300         struct sockaddr *name;
  301         socklen_t namelen;
  302         struct file *fp;
  303         int error;
  304 
  305         if (uap->name == NULL)
  306                 return (kern_accept(td, uap->s, NULL, NULL, NULL));
  307 
  308         error = copyin(uap->anamelen, &namelen, sizeof (namelen));
  309         if (error)
  310                 return (error);
  311 
  312         error = kern_accept(td, uap->s, &name, &namelen, &fp);
  313 
  314         /*
  315          * return a namelen of zero for older code which might
  316          * ignore the return value from accept.
  317          */
  318         if (error) {
  319                 (void) copyout(&namelen,
  320                     uap->anamelen, sizeof(*uap->anamelen));
  321                 return (error);
  322         }
  323 
  324         if (error == 0 && name != NULL) {
  325 #ifdef COMPAT_OLDSOCK
  326                 if (compat)
  327                         ((struct osockaddr *)name)->sa_family =
  328                             name->sa_family;
  329 #endif
  330                 error = copyout(name, uap->name, namelen);
  331         }
  332         if (error == 0)
  333                 error = copyout(&namelen, uap->anamelen,
  334                     sizeof(namelen));
  335         if (error)
  336                 fdclose(td->td_proc->p_fd, fp, td->td_retval[0], td);
  337         fdrop(fp, td);
  338         free(name, M_SONAME);
  339         return (error);
  340 }
  341 
  342 int
  343 kern_accept(struct thread *td, int s, struct sockaddr **name,
  344     socklen_t *namelen, struct file **fp)
  345 {
  346         struct filedesc *fdp;
  347         struct file *headfp, *nfp = NULL;
  348         struct sockaddr *sa = NULL;
  349         int error;
  350         struct socket *head, *so;
  351         int fd;
  352         u_int fflag;
  353         pid_t pgid;
  354         int tmp;
  355 
  356         if (name) {
  357                 *name = NULL;
  358                 if (*namelen < 0)
  359                         return (EINVAL);
  360         }
  361 
  362         AUDIT_ARG_FD(s);
  363         fdp = td->td_proc->p_fd;
  364         error = getsock_cap(fdp, s, CAP_ACCEPT, &headfp, &fflag);
  365         if (error)
  366                 return (error);
  367         head = headfp->f_data;
  368         if ((head->so_options & SO_ACCEPTCONN) == 0) {
  369                 error = EINVAL;
  370                 goto done;
  371         }
  372 #ifdef MAC
  373         error = mac_socket_check_accept(td->td_ucred, head);
  374         if (error != 0)
  375                 goto done;
  376 #endif
  377         error = falloc(td, &nfp, &fd, 0);
  378         if (error)
  379                 goto done;
  380         ACCEPT_LOCK();
  381         if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) {
  382                 ACCEPT_UNLOCK();
  383                 error = EWOULDBLOCK;
  384                 goto noconnection;
  385         }
  386         while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
  387                 if (head->so_rcv.sb_state & SBS_CANTRCVMORE) {
  388                         head->so_error = ECONNABORTED;
  389                         break;
  390                 }
  391                 error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH,
  392                     "accept", 0);
  393                 if (error) {
  394                         ACCEPT_UNLOCK();
  395                         goto noconnection;
  396                 }
  397         }
  398         if (head->so_error) {
  399                 error = head->so_error;
  400                 head->so_error = 0;
  401                 ACCEPT_UNLOCK();
  402                 goto noconnection;
  403         }
  404         so = TAILQ_FIRST(&head->so_comp);
  405         KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP"));
  406         KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP"));
  407 
  408         /*
  409          * Before changing the flags on the socket, we have to bump the
  410          * reference count.  Otherwise, if the protocol calls sofree(),
  411          * the socket will be released due to a zero refcount.
  412          */
  413         SOCK_LOCK(so);                  /* soref() and so_state update */
  414         soref(so);                      /* file descriptor reference */
  415 
  416         TAILQ_REMOVE(&head->so_comp, so, so_list);
  417         head->so_qlen--;
  418         so->so_state |= (head->so_state & SS_NBIO);
  419         so->so_qstate &= ~SQ_COMP;
  420         so->so_head = NULL;
  421 
  422         SOCK_UNLOCK(so);
  423         ACCEPT_UNLOCK();
  424 
  425         /* An extra reference on `nfp' has been held for us by falloc(). */
  426         td->td_retval[0] = fd;
  427 
  428         /* connection has been removed from the listen queue */
  429         KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0);
  430 
  431         pgid = fgetown(&head->so_sigio);
  432         if (pgid != 0)
  433                 fsetown(pgid, &so->so_sigio);
  434 
  435         finit(nfp, fflag, DTYPE_SOCKET, so, &socketops);
  436         /* Sync socket nonblocking/async state with file flags */
  437         tmp = fflag & FNONBLOCK;
  438         (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td);
  439         tmp = fflag & FASYNC;
  440         (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td);
  441         sa = 0;
  442         error = soaccept(so, &sa);
  443         if (error) {
  444                 /*
  445                  * return a namelen of zero for older code which might
  446                  * ignore the return value from accept.
  447                  */
  448                 if (name)
  449                         *namelen = 0;
  450                 goto noconnection;
  451         }
  452         if (sa == NULL) {
  453                 if (name)
  454                         *namelen = 0;
  455                 goto done;
  456         }
  457         if (name) {
  458                 /* check sa_len before it is destroyed */
  459                 if (*namelen > sa->sa_len)
  460                         *namelen = sa->sa_len;
  461 #ifdef KTRACE
  462                 if (KTRPOINT(td, KTR_STRUCT))
  463                         ktrsockaddr(sa);
  464 #endif
  465                 *name = sa;
  466                 sa = NULL;
  467         }
  468 noconnection:
  469         if (sa)
  470                 free(sa, M_SONAME);
  471 
  472         /*
  473          * close the new descriptor, assuming someone hasn't ripped it
  474          * out from under us.
  475          */
  476         if (error)
  477                 fdclose(fdp, nfp, fd, td);
  478 
  479         /*
  480          * Release explicitly held references before returning.  We return
  481          * a reference on nfp to the caller on success if they request it.
  482          */
  483 done:
  484         if (fp != NULL) {
  485                 if (error == 0) {
  486                         *fp = nfp;
  487                         nfp = NULL;
  488                 } else
  489                         *fp = NULL;
  490         }
  491         if (nfp != NULL)
  492                 fdrop(nfp, td);
  493         fdrop(headfp, td);
  494         return (error);
  495 }
  496 
  497 int
  498 sys_accept(td, uap)
  499         struct thread *td;
  500         struct accept_args *uap;
  501 {
  502 
  503         return (accept1(td, uap, 0));
  504 }
  505 
  506 #ifdef COMPAT_OLDSOCK
  507 int
  508 oaccept(td, uap)
  509         struct thread *td;
  510         struct accept_args *uap;
  511 {
  512 
  513         return (accept1(td, uap, 1));
  514 }
  515 #endif /* COMPAT_OLDSOCK */
  516 
  517 /* ARGSUSED */
  518 int
  519 sys_connect(td, uap)
  520         struct thread *td;
  521         struct connect_args /* {
  522                 int     s;
  523                 caddr_t name;
  524                 int     namelen;
  525         } */ *uap;
  526 {
  527         struct sockaddr *sa;
  528         int error;
  529 
  530         error = getsockaddr(&sa, uap->name, uap->namelen);
  531         if (error)
  532                 return (error);
  533 
  534         error = kern_connect(td, uap->s, sa);
  535         free(sa, M_SONAME);
  536         return (error);
  537 }
  538 
  539 
  540 int
  541 kern_connect(td, fd, sa)
  542         struct thread *td;
  543         int fd;
  544         struct sockaddr *sa;
  545 {
  546         struct socket *so;
  547         struct file *fp;
  548         int error;
  549         int interrupted = 0;
  550 
  551         AUDIT_ARG_FD(fd);
  552         error = getsock_cap(td->td_proc->p_fd, fd, CAP_CONNECT, &fp, NULL);
  553         if (error)
  554                 return (error);
  555         so = fp->f_data;
  556         if (so->so_state & SS_ISCONNECTING) {
  557                 error = EALREADY;
  558                 goto done1;
  559         }
  560 #ifdef KTRACE
  561         if (KTRPOINT(td, KTR_STRUCT))
  562                 ktrsockaddr(sa);
  563 #endif
  564 #ifdef MAC
  565         error = mac_socket_check_connect(td->td_ucred, so, sa);
  566         if (error)
  567                 goto bad;
  568 #endif
  569         error = soconnect(so, sa, td);
  570         if (error)
  571                 goto bad;
  572         if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
  573                 error = EINPROGRESS;
  574                 goto done1;
  575         }
  576         SOCK_LOCK(so);
  577         while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
  578                 error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH,
  579                     "connec", 0);
  580                 if (error) {
  581                         if (error == EINTR || error == ERESTART)
  582                                 interrupted = 1;
  583                         break;
  584                 }
  585         }
  586         if (error == 0) {
  587                 error = so->so_error;
  588                 so->so_error = 0;
  589         }
  590         SOCK_UNLOCK(so);
  591 bad:
  592         if (!interrupted)
  593                 so->so_state &= ~SS_ISCONNECTING;
  594         if (error == ERESTART)
  595                 error = EINTR;
  596 done1:
  597         fdrop(fp, td);
  598         return (error);
  599 }
  600 
  601 int
  602 kern_socketpair(struct thread *td, int domain, int type, int protocol,
  603     int *rsv)
  604 {
  605         struct filedesc *fdp = td->td_proc->p_fd;
  606         struct file *fp1, *fp2;
  607         struct socket *so1, *so2;
  608         int fd, error;
  609 
  610         AUDIT_ARG_SOCKET(domain, type, protocol);
  611 #ifdef MAC
  612         /* We might want to have a separate check for socket pairs. */
  613         error = mac_socket_check_create(td->td_ucred, domain, type,
  614             protocol);
  615         if (error)
  616                 return (error);
  617 #endif
  618         error = socreate(domain, &so1, type, protocol, td->td_ucred, td);
  619         if (error)
  620                 return (error);
  621         error = socreate(domain, &so2, type, protocol, td->td_ucred, td);
  622         if (error)
  623                 goto free1;
  624         /* On success extra reference to `fp1' and 'fp2' is set by falloc. */
  625         error = falloc(td, &fp1, &fd, 0);
  626         if (error)
  627                 goto free2;
  628         rsv[0] = fd;
  629         fp1->f_data = so1;      /* so1 already has ref count */
  630         error = falloc(td, &fp2, &fd, 0);
  631         if (error)
  632                 goto free3;
  633         fp2->f_data = so2;      /* so2 already has ref count */
  634         rsv[1] = fd;
  635         error = soconnect2(so1, so2);
  636         if (error)
  637                 goto free4;
  638         if (type == SOCK_DGRAM) {
  639                 /*
  640                  * Datagram socket connection is asymmetric.
  641                  */
  642                  error = soconnect2(so2, so1);
  643                  if (error)
  644                         goto free4;
  645         }
  646         finit(fp1, FREAD | FWRITE, DTYPE_SOCKET, fp1->f_data, &socketops);
  647         finit(fp2, FREAD | FWRITE, DTYPE_SOCKET, fp2->f_data, &socketops);
  648         fdrop(fp1, td);
  649         fdrop(fp2, td);
  650         return (0);
  651 free4:
  652         fdclose(fdp, fp2, rsv[1], td);
  653         fdrop(fp2, td);
  654 free3:
  655         fdclose(fdp, fp1, rsv[0], td);
  656         fdrop(fp1, td);
  657 free2:
  658         if (so2 != NULL)
  659                 (void)soclose(so2);
  660 free1:
  661         if (so1 != NULL)
  662                 (void)soclose(so1);
  663         return (error);
  664 }
  665 
  666 int
  667 sys_socketpair(struct thread *td, struct socketpair_args *uap)
  668 {
  669         int error, sv[2];
  670 
  671         error = kern_socketpair(td, uap->domain, uap->type,
  672             uap->protocol, sv);
  673         if (error)
  674                 return (error);
  675         error = copyout(sv, uap->rsv, 2 * sizeof(int));
  676         if (error) {
  677                 (void)kern_close(td, sv[0]);
  678                 (void)kern_close(td, sv[1]);
  679         }
  680         return (error);
  681 }
  682 
  683 static int
  684 sendit(td, s, mp, flags)
  685         struct thread *td;
  686         int s;
  687         struct msghdr *mp;
  688         int flags;
  689 {
  690         struct mbuf *control;
  691         struct sockaddr *to;
  692         int error;
  693 
  694 #ifdef CAPABILITY_MODE
  695         if (IN_CAPABILITY_MODE(td) && (mp->msg_name != NULL))
  696                 return (ECAPMODE);
  697 #endif
  698 
  699         if (mp->msg_name != NULL) {
  700                 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
  701                 if (error) {
  702                         to = NULL;
  703                         goto bad;
  704                 }
  705                 mp->msg_name = to;
  706         } else {
  707                 to = NULL;
  708         }
  709 
  710         if (mp->msg_control) {
  711                 if (mp->msg_controllen < sizeof(struct cmsghdr)
  712 #ifdef COMPAT_OLDSOCK
  713                     && mp->msg_flags != MSG_COMPAT
  714 #endif
  715                 ) {
  716                         error = EINVAL;
  717                         goto bad;
  718                 }
  719                 error = sockargs(&control, mp->msg_control,
  720                     mp->msg_controllen, MT_CONTROL);
  721                 if (error)
  722                         goto bad;
  723 #ifdef COMPAT_OLDSOCK
  724                 if (mp->msg_flags == MSG_COMPAT) {
  725                         struct cmsghdr *cm;
  726 
  727                         M_PREPEND(control, sizeof(*cm), M_WAIT);
  728                         cm = mtod(control, struct cmsghdr *);
  729                         cm->cmsg_len = control->m_len;
  730                         cm->cmsg_level = SOL_SOCKET;
  731                         cm->cmsg_type = SCM_RIGHTS;
  732                 }
  733 #endif
  734         } else {
  735                 control = NULL;
  736         }
  737 
  738         error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE);
  739 
  740 bad:
  741         if (to)
  742                 free(to, M_SONAME);
  743         return (error);
  744 }
  745 
  746 int
  747 kern_sendit(td, s, mp, flags, control, segflg)
  748         struct thread *td;
  749         int s;
  750         struct msghdr *mp;
  751         int flags;
  752         struct mbuf *control;
  753         enum uio_seg segflg;
  754 {
  755         struct file *fp;
  756         struct uio auio;
  757         struct iovec *iov;
  758         struct socket *so;
  759         int i;
  760         int len, error;
  761         cap_rights_t rights;
  762 #ifdef KTRACE
  763         struct uio *ktruio = NULL;
  764 #endif
  765 
  766         AUDIT_ARG_FD(s);
  767         rights = CAP_WRITE;
  768         if (mp->msg_name != NULL)
  769                 rights |= CAP_CONNECT;
  770         error = getsock_cap(td->td_proc->p_fd, s, rights, &fp, NULL);
  771         if (error)
  772                 return (error);
  773         so = (struct socket *)fp->f_data;
  774 
  775 #ifdef KTRACE
  776         if (mp->msg_name != NULL && KTRPOINT(td, KTR_STRUCT))
  777                 ktrsockaddr(mp->msg_name);
  778 #endif
  779 #ifdef MAC
  780         if (mp->msg_name != NULL) {
  781                 error = mac_socket_check_connect(td->td_ucred, so,
  782                     mp->msg_name);
  783                 if (error)
  784                         goto bad;
  785         }
  786         error = mac_socket_check_send(td->td_ucred, so);
  787         if (error)
  788                 goto bad;
  789 #endif
  790 
  791         auio.uio_iov = mp->msg_iov;
  792         auio.uio_iovcnt = mp->msg_iovlen;
  793         auio.uio_segflg = segflg;
  794         auio.uio_rw = UIO_WRITE;
  795         auio.uio_td = td;
  796         auio.uio_offset = 0;                    /* XXX */
  797         auio.uio_resid = 0;
  798         iov = mp->msg_iov;
  799         for (i = 0; i < mp->msg_iovlen; i++, iov++) {
  800                 if ((auio.uio_resid += iov->iov_len) < 0) {
  801                         error = EINVAL;
  802                         goto bad;
  803                 }
  804         }
  805 #ifdef KTRACE
  806         if (KTRPOINT(td, KTR_GENIO))
  807                 ktruio = cloneuio(&auio);
  808 #endif
  809         len = auio.uio_resid;
  810         error = sosend(so, mp->msg_name, &auio, 0, control, flags, td);
  811         if (error) {
  812                 if (auio.uio_resid != len && (error == ERESTART ||
  813                     error == EINTR || error == EWOULDBLOCK))
  814                         error = 0;
  815                 /* Generation of SIGPIPE can be controlled per socket */
  816                 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
  817                     !(flags & MSG_NOSIGNAL)) {
  818                         PROC_LOCK(td->td_proc);
  819                         tdsignal(td, SIGPIPE);
  820                         PROC_UNLOCK(td->td_proc);
  821                 }
  822         }
  823         if (error == 0)
  824                 td->td_retval[0] = len - auio.uio_resid;
  825 #ifdef KTRACE
  826         if (ktruio != NULL) {
  827                 ktruio->uio_resid = td->td_retval[0];
  828                 ktrgenio(s, UIO_WRITE, ktruio, error);
  829         }
  830 #endif
  831 bad:
  832         fdrop(fp, td);
  833         return (error);
  834 }
  835 
  836 int
  837 sys_sendto(td, uap)
  838         struct thread *td;
  839         struct sendto_args /* {
  840                 int     s;
  841                 caddr_t buf;
  842                 size_t  len;
  843                 int     flags;
  844                 caddr_t to;
  845                 int     tolen;
  846         } */ *uap;
  847 {
  848         struct msghdr msg;
  849         struct iovec aiov;
  850         int error;
  851 
  852         msg.msg_name = uap->to;
  853         msg.msg_namelen = uap->tolen;
  854         msg.msg_iov = &aiov;
  855         msg.msg_iovlen = 1;
  856         msg.msg_control = 0;
  857 #ifdef COMPAT_OLDSOCK
  858         msg.msg_flags = 0;
  859 #endif
  860         aiov.iov_base = uap->buf;
  861         aiov.iov_len = uap->len;
  862         error = sendit(td, uap->s, &msg, uap->flags);
  863         return (error);
  864 }
  865 
  866 #ifdef COMPAT_OLDSOCK
  867 int
  868 osend(td, uap)
  869         struct thread *td;
  870         struct osend_args /* {
  871                 int     s;
  872                 caddr_t buf;
  873                 int     len;
  874                 int     flags;
  875         } */ *uap;
  876 {
  877         struct msghdr msg;
  878         struct iovec aiov;
  879         int error;
  880 
  881         msg.msg_name = 0;
  882         msg.msg_namelen = 0;
  883         msg.msg_iov = &aiov;
  884         msg.msg_iovlen = 1;
  885         aiov.iov_base = uap->buf;
  886         aiov.iov_len = uap->len;
  887         msg.msg_control = 0;
  888         msg.msg_flags = 0;
  889         error = sendit(td, uap->s, &msg, uap->flags);
  890         return (error);
  891 }
  892 
  893 int
  894 osendmsg(td, uap)
  895         struct thread *td;
  896         struct osendmsg_args /* {
  897                 int     s;
  898                 caddr_t msg;
  899                 int     flags;
  900         } */ *uap;
  901 {
  902         struct msghdr msg;
  903         struct iovec *iov;
  904         int error;
  905 
  906         error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
  907         if (error)
  908                 return (error);
  909         error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
  910         if (error)
  911                 return (error);
  912         msg.msg_iov = iov;
  913         msg.msg_flags = MSG_COMPAT;
  914         error = sendit(td, uap->s, &msg, uap->flags);
  915         free(iov, M_IOV);
  916         return (error);
  917 }
  918 #endif
  919 
  920 int
  921 sys_sendmsg(td, uap)
  922         struct thread *td;
  923         struct sendmsg_args /* {
  924                 int     s;
  925                 caddr_t msg;
  926                 int     flags;
  927         } */ *uap;
  928 {
  929         struct msghdr msg;
  930         struct iovec *iov;
  931         int error;
  932 
  933         error = copyin(uap->msg, &msg, sizeof (msg));
  934         if (error)
  935                 return (error);
  936         error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
  937         if (error)
  938                 return (error);
  939         msg.msg_iov = iov;
  940 #ifdef COMPAT_OLDSOCK
  941         msg.msg_flags = 0;
  942 #endif
  943         error = sendit(td, uap->s, &msg, uap->flags);
  944         free(iov, M_IOV);
  945         return (error);
  946 }
  947 
  948 int
  949 kern_recvit(td, s, mp, fromseg, controlp)
  950         struct thread *td;
  951         int s;
  952         struct msghdr *mp;
  953         enum uio_seg fromseg;
  954         struct mbuf **controlp;
  955 {
  956         struct uio auio;
  957         struct iovec *iov;
  958         int i;
  959         socklen_t len;
  960         int error;
  961         struct mbuf *m, *control = 0;
  962         caddr_t ctlbuf;
  963         struct file *fp;
  964         struct socket *so;
  965         struct sockaddr *fromsa = 0;
  966 #ifdef KTRACE
  967         struct uio *ktruio = NULL;
  968 #endif
  969 
  970         if (controlp != NULL)
  971                 *controlp = NULL;
  972 
  973         AUDIT_ARG_FD(s);
  974         error = getsock_cap(td->td_proc->p_fd, s, CAP_READ, &fp, NULL);
  975         if (error)
  976                 return (error);
  977         so = fp->f_data;
  978 
  979 #ifdef MAC
  980         error = mac_socket_check_receive(td->td_ucred, so);
  981         if (error) {
  982                 fdrop(fp, td);
  983                 return (error);
  984         }
  985 #endif
  986 
  987         auio.uio_iov = mp->msg_iov;
  988         auio.uio_iovcnt = mp->msg_iovlen;
  989         auio.uio_segflg = UIO_USERSPACE;
  990         auio.uio_rw = UIO_READ;
  991         auio.uio_td = td;
  992         auio.uio_offset = 0;                    /* XXX */
  993         auio.uio_resid = 0;
  994         iov = mp->msg_iov;
  995         for (i = 0; i < mp->msg_iovlen; i++, iov++) {
  996                 if ((auio.uio_resid += iov->iov_len) < 0) {
  997                         fdrop(fp, td);
  998                         return (EINVAL);
  999                 }
 1000         }
 1001 #ifdef KTRACE
 1002         if (KTRPOINT(td, KTR_GENIO))
 1003                 ktruio = cloneuio(&auio);
 1004 #endif
 1005         len = auio.uio_resid;
 1006         error = soreceive(so, &fromsa, &auio, (struct mbuf **)0,
 1007             (mp->msg_control || controlp) ? &control : (struct mbuf **)0,
 1008             &mp->msg_flags);
 1009         if (error) {
 1010                 if (auio.uio_resid != (int)len && (error == ERESTART ||
 1011                     error == EINTR || error == EWOULDBLOCK))
 1012                         error = 0;
 1013         }
 1014 #ifdef KTRACE
 1015         if (ktruio != NULL) {
 1016                 ktruio->uio_resid = (int)len - auio.uio_resid;
 1017                 ktrgenio(s, UIO_READ, ktruio, error);
 1018         }
 1019 #endif
 1020         if (error)
 1021                 goto out;
 1022         td->td_retval[0] = (int)len - auio.uio_resid;
 1023         if (mp->msg_name) {
 1024                 len = mp->msg_namelen;
 1025                 if (len <= 0 || fromsa == 0)
 1026                         len = 0;
 1027                 else {
 1028                         /* save sa_len before it is destroyed by MSG_COMPAT */
 1029                         len = MIN(len, fromsa->sa_len);
 1030 #ifdef COMPAT_OLDSOCK
 1031                         if (mp->msg_flags & MSG_COMPAT)
 1032                                 ((struct osockaddr *)fromsa)->sa_family =
 1033                                     fromsa->sa_family;
 1034 #endif
 1035                         if (fromseg == UIO_USERSPACE) {
 1036                                 error = copyout(fromsa, mp->msg_name,
 1037                                     (unsigned)len);
 1038                                 if (error)
 1039                                         goto out;
 1040                         } else
 1041                                 bcopy(fromsa, mp->msg_name, len);
 1042                 }
 1043                 mp->msg_namelen = len;
 1044         }
 1045         if (mp->msg_control && controlp == NULL) {
 1046 #ifdef COMPAT_OLDSOCK
 1047                 /*
 1048                  * We assume that old recvmsg calls won't receive access
 1049                  * rights and other control info, esp. as control info
 1050                  * is always optional and those options didn't exist in 4.3.
 1051                  * If we receive rights, trim the cmsghdr; anything else
 1052                  * is tossed.
 1053                  */
 1054                 if (control && mp->msg_flags & MSG_COMPAT) {
 1055                         if (mtod(control, struct cmsghdr *)->cmsg_level !=
 1056                             SOL_SOCKET ||
 1057                             mtod(control, struct cmsghdr *)->cmsg_type !=
 1058                             SCM_RIGHTS) {
 1059                                 mp->msg_controllen = 0;
 1060                                 goto out;
 1061                         }
 1062                         control->m_len -= sizeof (struct cmsghdr);
 1063                         control->m_data += sizeof (struct cmsghdr);
 1064                 }
 1065 #endif
 1066                 len = mp->msg_controllen;
 1067                 m = control;
 1068                 mp->msg_controllen = 0;
 1069                 ctlbuf = mp->msg_control;
 1070 
 1071                 while (m && len > 0) {
 1072                         unsigned int tocopy;
 1073 
 1074                         if (len >= m->m_len)
 1075                                 tocopy = m->m_len;
 1076                         else {
 1077                                 mp->msg_flags |= MSG_CTRUNC;
 1078                                 tocopy = len;
 1079                         }
 1080 
 1081                         if ((error = copyout(mtod(m, caddr_t),
 1082                                         ctlbuf, tocopy)) != 0)
 1083                                 goto out;
 1084 
 1085                         ctlbuf += tocopy;
 1086                         len -= tocopy;
 1087                         m = m->m_next;
 1088                 }
 1089                 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
 1090         }
 1091 out:
 1092         fdrop(fp, td);
 1093 #ifdef KTRACE
 1094         if (fromsa && KTRPOINT(td, KTR_STRUCT))
 1095                 ktrsockaddr(fromsa);
 1096 #endif
 1097         if (fromsa)
 1098                 free(fromsa, M_SONAME);
 1099 
 1100         if (error == 0 && controlp != NULL)  
 1101                 *controlp = control;
 1102         else  if (control)
 1103                 m_freem(control);
 1104 
 1105         return (error);
 1106 }
 1107 
 1108 static int
 1109 recvit(td, s, mp, namelenp)
 1110         struct thread *td;
 1111         int s;
 1112         struct msghdr *mp;
 1113         void *namelenp;
 1114 {
 1115         int error;
 1116 
 1117         error = kern_recvit(td, s, mp, UIO_USERSPACE, NULL);
 1118         if (error)
 1119                 return (error);
 1120         if (namelenp) {
 1121                 error = copyout(&mp->msg_namelen, namelenp, sizeof (socklen_t));
 1122 #ifdef COMPAT_OLDSOCK
 1123                 if (mp->msg_flags & MSG_COMPAT)
 1124                         error = 0;      /* old recvfrom didn't check */
 1125 #endif
 1126         }
 1127         return (error);
 1128 }
 1129 
 1130 int
 1131 sys_recvfrom(td, uap)
 1132         struct thread *td;
 1133         struct recvfrom_args /* {
 1134                 int     s;
 1135                 caddr_t buf;
 1136                 size_t  len;
 1137                 int     flags;
 1138                 struct sockaddr * __restrict    from;
 1139                 socklen_t * __restrict fromlenaddr;
 1140         } */ *uap;
 1141 {
 1142         struct msghdr msg;
 1143         struct iovec aiov;
 1144         int error;
 1145 
 1146         if (uap->fromlenaddr) {
 1147                 error = copyin(uap->fromlenaddr,
 1148                     &msg.msg_namelen, sizeof (msg.msg_namelen));
 1149                 if (error)
 1150                         goto done2;
 1151         } else {
 1152                 msg.msg_namelen = 0;
 1153         }
 1154         msg.msg_name = uap->from;
 1155         msg.msg_iov = &aiov;
 1156         msg.msg_iovlen = 1;
 1157         aiov.iov_base = uap->buf;
 1158         aiov.iov_len = uap->len;
 1159         msg.msg_control = 0;
 1160         msg.msg_flags = uap->flags;
 1161         error = recvit(td, uap->s, &msg, uap->fromlenaddr);
 1162 done2:
 1163         return(error);
 1164 }
 1165 
 1166 #ifdef COMPAT_OLDSOCK
 1167 int
 1168 orecvfrom(td, uap)
 1169         struct thread *td;
 1170         struct recvfrom_args *uap;
 1171 {
 1172 
 1173         uap->flags |= MSG_COMPAT;
 1174         return (sys_recvfrom(td, uap));
 1175 }
 1176 #endif
 1177 
 1178 #ifdef COMPAT_OLDSOCK
 1179 int
 1180 orecv(td, uap)
 1181         struct thread *td;
 1182         struct orecv_args /* {
 1183                 int     s;
 1184                 caddr_t buf;
 1185                 int     len;
 1186                 int     flags;
 1187         } */ *uap;
 1188 {
 1189         struct msghdr msg;
 1190         struct iovec aiov;
 1191         int error;
 1192 
 1193         msg.msg_name = 0;
 1194         msg.msg_namelen = 0;
 1195         msg.msg_iov = &aiov;
 1196         msg.msg_iovlen = 1;
 1197         aiov.iov_base = uap->buf;
 1198         aiov.iov_len = uap->len;
 1199         msg.msg_control = 0;
 1200         msg.msg_flags = uap->flags;
 1201         error = recvit(td, uap->s, &msg, NULL);
 1202         return (error);
 1203 }
 1204 
 1205 /*
 1206  * Old recvmsg.  This code takes advantage of the fact that the old msghdr
 1207  * overlays the new one, missing only the flags, and with the (old) access
 1208  * rights where the control fields are now.
 1209  */
 1210 int
 1211 orecvmsg(td, uap)
 1212         struct thread *td;
 1213         struct orecvmsg_args /* {
 1214                 int     s;
 1215                 struct  omsghdr *msg;
 1216                 int     flags;
 1217         } */ *uap;
 1218 {
 1219         struct msghdr msg;
 1220         struct iovec *iov;
 1221         int error;
 1222 
 1223         error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
 1224         if (error)
 1225                 return (error);
 1226         error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
 1227         if (error)
 1228                 return (error);
 1229         msg.msg_flags = uap->flags | MSG_COMPAT;
 1230         msg.msg_iov = iov;
 1231         error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen);
 1232         if (msg.msg_controllen && error == 0)
 1233                 error = copyout(&msg.msg_controllen,
 1234                     &uap->msg->msg_accrightslen, sizeof (int));
 1235         free(iov, M_IOV);
 1236         return (error);
 1237 }
 1238 #endif
 1239 
 1240 int
 1241 sys_recvmsg(td, uap)
 1242         struct thread *td;
 1243         struct recvmsg_args /* {
 1244                 int     s;
 1245                 struct  msghdr *msg;
 1246                 int     flags;
 1247         } */ *uap;
 1248 {
 1249         struct msghdr msg;
 1250         struct iovec *uiov, *iov;
 1251         int error;
 1252 
 1253         error = copyin(uap->msg, &msg, sizeof (msg));
 1254         if (error)
 1255                 return (error);
 1256         error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
 1257         if (error)
 1258                 return (error);
 1259         msg.msg_flags = uap->flags;
 1260 #ifdef COMPAT_OLDSOCK
 1261         msg.msg_flags &= ~MSG_COMPAT;
 1262 #endif
 1263         uiov = msg.msg_iov;
 1264         msg.msg_iov = iov;
 1265         error = recvit(td, uap->s, &msg, NULL);
 1266         if (error == 0) {
 1267                 msg.msg_iov = uiov;
 1268                 error = copyout(&msg, uap->msg, sizeof(msg));
 1269         }
 1270         free(iov, M_IOV);
 1271         return (error);
 1272 }
 1273 
 1274 /* ARGSUSED */
 1275 int
 1276 sys_shutdown(td, uap)
 1277         struct thread *td;
 1278         struct shutdown_args /* {
 1279                 int     s;
 1280                 int     how;
 1281         } */ *uap;
 1282 {
 1283         struct socket *so;
 1284         struct file *fp;
 1285         int error;
 1286 
 1287         AUDIT_ARG_FD(uap->s);
 1288         error = getsock_cap(td->td_proc->p_fd, uap->s, CAP_SHUTDOWN, &fp,
 1289             NULL);
 1290         if (error == 0) {
 1291                 so = fp->f_data;
 1292                 error = soshutdown(so, uap->how);
 1293                 fdrop(fp, td);
 1294         }
 1295         return (error);
 1296 }
 1297 
 1298 /* ARGSUSED */
 1299 int
 1300 sys_setsockopt(td, uap)
 1301         struct thread *td;
 1302         struct setsockopt_args /* {
 1303                 int     s;
 1304                 int     level;
 1305                 int     name;
 1306                 caddr_t val;
 1307                 int     valsize;
 1308         } */ *uap;
 1309 {
 1310 
 1311         return (kern_setsockopt(td, uap->s, uap->level, uap->name,
 1312             uap->val, UIO_USERSPACE, uap->valsize));
 1313 }
 1314 
 1315 int
 1316 kern_setsockopt(td, s, level, name, val, valseg, valsize)
 1317         struct thread *td;
 1318         int s;
 1319         int level;
 1320         int name;
 1321         void *val;
 1322         enum uio_seg valseg;
 1323         socklen_t valsize;
 1324 {
 1325         int error;
 1326         struct socket *so;
 1327         struct file *fp;
 1328         struct sockopt sopt;
 1329 
 1330         if (val == NULL && valsize != 0)
 1331                 return (EFAULT);
 1332         if ((int)valsize < 0)
 1333                 return (EINVAL);
 1334 
 1335         sopt.sopt_dir = SOPT_SET;
 1336         sopt.sopt_level = level;
 1337         sopt.sopt_name = name;
 1338         sopt.sopt_val = val;
 1339         sopt.sopt_valsize = valsize;
 1340         switch (valseg) {
 1341         case UIO_USERSPACE:
 1342                 sopt.sopt_td = td;
 1343                 break;
 1344         case UIO_SYSSPACE:
 1345                 sopt.sopt_td = NULL;
 1346                 break;
 1347         default:
 1348                 panic("kern_setsockopt called with bad valseg");
 1349         }
 1350 
 1351         AUDIT_ARG_FD(s);
 1352         error = getsock_cap(td->td_proc->p_fd, s, CAP_SETSOCKOPT, &fp, NULL);
 1353         if (error == 0) {
 1354                 so = fp->f_data;
 1355                 error = sosetopt(so, &sopt);
 1356                 fdrop(fp, td);
 1357         }
 1358         return(error);
 1359 }
 1360 
 1361 /* ARGSUSED */
 1362 int
 1363 sys_getsockopt(td, uap)
 1364         struct thread *td;
 1365         struct getsockopt_args /* {
 1366                 int     s;
 1367                 int     level;
 1368                 int     name;
 1369                 void * __restrict       val;
 1370                 socklen_t * __restrict avalsize;
 1371         } */ *uap;
 1372 {
 1373         socklen_t valsize;
 1374         int     error;
 1375 
 1376         if (uap->val) {
 1377                 error = copyin(uap->avalsize, &valsize, sizeof (valsize));
 1378                 if (error)
 1379                         return (error);
 1380         }
 1381 
 1382         error = kern_getsockopt(td, uap->s, uap->level, uap->name,
 1383             uap->val, UIO_USERSPACE, &valsize);
 1384 
 1385         if (error == 0)
 1386                 error = copyout(&valsize, uap->avalsize, sizeof (valsize));
 1387         return (error);
 1388 }
 1389 
 1390 /*
 1391  * Kernel version of getsockopt.
 1392  * optval can be a userland or userspace. optlen is always a kernel pointer.
 1393  */
 1394 int
 1395 kern_getsockopt(td, s, level, name, val, valseg, valsize)
 1396         struct thread *td;
 1397         int s;
 1398         int level;
 1399         int name;
 1400         void *val;
 1401         enum uio_seg valseg;
 1402         socklen_t *valsize;
 1403 {
 1404         int error;
 1405         struct  socket *so;
 1406         struct file *fp;
 1407         struct  sockopt sopt;
 1408 
 1409         if (val == NULL)
 1410                 *valsize = 0;
 1411         if ((int)*valsize < 0)
 1412                 return (EINVAL);
 1413 
 1414         sopt.sopt_dir = SOPT_GET;
 1415         sopt.sopt_level = level;
 1416         sopt.sopt_name = name;
 1417         sopt.sopt_val = val;
 1418         sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */
 1419         switch (valseg) {
 1420         case UIO_USERSPACE:
 1421                 sopt.sopt_td = td;
 1422                 break;
 1423         case UIO_SYSSPACE:
 1424                 sopt.sopt_td = NULL;
 1425                 break;
 1426         default:
 1427                 panic("kern_getsockopt called with bad valseg");
 1428         }
 1429 
 1430         AUDIT_ARG_FD(s);
 1431         error = getsock_cap(td->td_proc->p_fd, s, CAP_GETSOCKOPT, &fp, NULL);
 1432         if (error == 0) {
 1433                 so = fp->f_data;
 1434                 error = sogetopt(so, &sopt);
 1435                 *valsize = sopt.sopt_valsize;
 1436                 fdrop(fp, td);
 1437         }
 1438         return (error);
 1439 }
 1440 
 1441 /*
 1442  * getsockname1() - Get socket name.
 1443  */
 1444 /* ARGSUSED */
 1445 static int
 1446 getsockname1(td, uap, compat)
 1447         struct thread *td;
 1448         struct getsockname_args /* {
 1449                 int     fdes;
 1450                 struct sockaddr * __restrict asa;
 1451                 socklen_t * __restrict alen;
 1452         } */ *uap;
 1453         int compat;
 1454 {
 1455         struct sockaddr *sa;
 1456         socklen_t len;
 1457         int error;
 1458 
 1459         error = copyin(uap->alen, &len, sizeof(len));
 1460         if (error)
 1461                 return (error);
 1462 
 1463         error = kern_getsockname(td, uap->fdes, &sa, &len);
 1464         if (error)
 1465                 return (error);
 1466 
 1467         if (len != 0) {
 1468 #ifdef COMPAT_OLDSOCK
 1469                 if (compat)
 1470                         ((struct osockaddr *)sa)->sa_family = sa->sa_family;
 1471 #endif
 1472                 error = copyout(sa, uap->asa, (u_int)len);
 1473         }
 1474         free(sa, M_SONAME);
 1475         if (error == 0)
 1476                 error = copyout(&len, uap->alen, sizeof(len));
 1477         return (error);
 1478 }
 1479 
 1480 int
 1481 kern_getsockname(struct thread *td, int fd, struct sockaddr **sa,
 1482     socklen_t *alen)
 1483 {
 1484         struct socket *so;
 1485         struct file *fp;
 1486         socklen_t len;
 1487         int error;
 1488 
 1489         if (*alen < 0)
 1490                 return (EINVAL);
 1491 
 1492         AUDIT_ARG_FD(fd);
 1493         error = getsock_cap(td->td_proc->p_fd, fd, CAP_GETSOCKNAME, &fp, NULL);
 1494         if (error)
 1495                 return (error);
 1496         so = fp->f_data;
 1497         *sa = NULL;
 1498         CURVNET_SET(so->so_vnet);
 1499         error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, sa);
 1500         CURVNET_RESTORE();
 1501         if (error)
 1502                 goto bad;
 1503         if (*sa == NULL)
 1504                 len = 0;
 1505         else
 1506                 len = MIN(*alen, (*sa)->sa_len);
 1507         *alen = len;
 1508 #ifdef KTRACE
 1509         if (KTRPOINT(td, KTR_STRUCT))
 1510                 ktrsockaddr(*sa);
 1511 #endif
 1512 bad:
 1513         fdrop(fp, td);
 1514         if (error && *sa) {
 1515                 free(*sa, M_SONAME);
 1516                 *sa = NULL;
 1517         }
 1518         return (error);
 1519 }
 1520 
 1521 int
 1522 sys_getsockname(td, uap)
 1523         struct thread *td;
 1524         struct getsockname_args *uap;
 1525 {
 1526 
 1527         return (getsockname1(td, uap, 0));
 1528 }
 1529 
 1530 #ifdef COMPAT_OLDSOCK
 1531 int
 1532 ogetsockname(td, uap)
 1533         struct thread *td;
 1534         struct getsockname_args *uap;
 1535 {
 1536 
 1537         return (getsockname1(td, uap, 1));
 1538 }
 1539 #endif /* COMPAT_OLDSOCK */
 1540 
 1541 /*
 1542  * getpeername1() - Get name of peer for connected socket.
 1543  */
 1544 /* ARGSUSED */
 1545 static int
 1546 getpeername1(td, uap, compat)
 1547         struct thread *td;
 1548         struct getpeername_args /* {
 1549                 int     fdes;
 1550                 struct sockaddr * __restrict    asa;
 1551                 socklen_t * __restrict  alen;
 1552         } */ *uap;
 1553         int compat;
 1554 {
 1555         struct sockaddr *sa;
 1556         socklen_t len;
 1557         int error;
 1558 
 1559         error = copyin(uap->alen, &len, sizeof (len));
 1560         if (error)
 1561                 return (error);
 1562 
 1563         error = kern_getpeername(td, uap->fdes, &sa, &len);
 1564         if (error)
 1565                 return (error);
 1566 
 1567         if (len != 0) {
 1568 #ifdef COMPAT_OLDSOCK
 1569                 if (compat)
 1570                         ((struct osockaddr *)sa)->sa_family = sa->sa_family;
 1571 #endif
 1572                 error = copyout(sa, uap->asa, (u_int)len);
 1573         }
 1574         free(sa, M_SONAME);
 1575         if (error == 0)
 1576                 error = copyout(&len, uap->alen, sizeof(len));
 1577         return (error);
 1578 }
 1579 
 1580 int
 1581 kern_getpeername(struct thread *td, int fd, struct sockaddr **sa,
 1582     socklen_t *alen)
 1583 {
 1584         struct socket *so;
 1585         struct file *fp;
 1586         socklen_t len;
 1587         int error;
 1588 
 1589         if (*alen < 0)
 1590                 return (EINVAL);
 1591 
 1592         AUDIT_ARG_FD(fd);
 1593         error = getsock_cap(td->td_proc->p_fd, fd, CAP_GETPEERNAME, &fp, NULL);
 1594         if (error)
 1595                 return (error);
 1596         so = fp->f_data;
 1597         if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
 1598                 error = ENOTCONN;
 1599                 goto done;
 1600         }
 1601         *sa = NULL;
 1602         CURVNET_SET(so->so_vnet);
 1603         error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, sa);
 1604         CURVNET_RESTORE();
 1605         if (error)
 1606                 goto bad;
 1607         if (*sa == NULL)
 1608                 len = 0;
 1609         else
 1610                 len = MIN(*alen, (*sa)->sa_len);
 1611         *alen = len;
 1612 #ifdef KTRACE
 1613         if (KTRPOINT(td, KTR_STRUCT))
 1614                 ktrsockaddr(*sa);
 1615 #endif
 1616 bad:
 1617         if (error && *sa) {
 1618                 free(*sa, M_SONAME);
 1619                 *sa = NULL;
 1620         }
 1621 done:
 1622         fdrop(fp, td);
 1623         return (error);
 1624 }
 1625 
 1626 int
 1627 sys_getpeername(td, uap)
 1628         struct thread *td;
 1629         struct getpeername_args *uap;
 1630 {
 1631 
 1632         return (getpeername1(td, uap, 0));
 1633 }
 1634 
 1635 #ifdef COMPAT_OLDSOCK
 1636 int
 1637 ogetpeername(td, uap)
 1638         struct thread *td;
 1639         struct ogetpeername_args *uap;
 1640 {
 1641 
 1642         /* XXX uap should have type `getpeername_args *' to begin with. */
 1643         return (getpeername1(td, (struct getpeername_args *)uap, 1));
 1644 }
 1645 #endif /* COMPAT_OLDSOCK */
 1646 
 1647 int
 1648 sockargs(mp, buf, buflen, type)
 1649         struct mbuf **mp;
 1650         caddr_t buf;
 1651         int buflen, type;
 1652 {
 1653         struct sockaddr *sa;
 1654         struct mbuf *m;
 1655         int error;
 1656 
 1657         if ((u_int)buflen > MLEN) {
 1658 #ifdef COMPAT_OLDSOCK
 1659                 if (type == MT_SONAME && (u_int)buflen <= 112)
 1660                         buflen = MLEN;          /* unix domain compat. hack */
 1661                 else
 1662 #endif
 1663                         if ((u_int)buflen > MCLBYTES)
 1664                                 return (EINVAL);
 1665         }
 1666         m = m_get(M_WAIT, type);
 1667         if ((u_int)buflen > MLEN)
 1668                 MCLGET(m, M_WAIT);
 1669         m->m_len = buflen;
 1670         error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
 1671         if (error)
 1672                 (void) m_free(m);
 1673         else {
 1674                 *mp = m;
 1675                 if (type == MT_SONAME) {
 1676                         sa = mtod(m, struct sockaddr *);
 1677 
 1678 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
 1679                         if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
 1680                                 sa->sa_family = sa->sa_len;
 1681 #endif
 1682                         sa->sa_len = buflen;
 1683                 }
 1684         }
 1685         return (error);
 1686 }
 1687 
 1688 int
 1689 getsockaddr(namp, uaddr, len)
 1690         struct sockaddr **namp;
 1691         caddr_t uaddr;
 1692         size_t len;
 1693 {
 1694         struct sockaddr *sa;
 1695         int error;
 1696 
 1697         if (len > SOCK_MAXADDRLEN)
 1698                 return (ENAMETOOLONG);
 1699         if (len < offsetof(struct sockaddr, sa_data[0]))
 1700                 return (EINVAL);
 1701         sa = malloc(len, M_SONAME, M_WAITOK);
 1702         error = copyin(uaddr, sa, len);
 1703         if (error) {
 1704                 free(sa, M_SONAME);
 1705         } else {
 1706 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
 1707                 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
 1708                         sa->sa_family = sa->sa_len;
 1709 #endif
 1710                 sa->sa_len = len;
 1711                 *namp = sa;
 1712         }
 1713         return (error);
 1714 }
 1715 
 1716 #include <sys/condvar.h>
 1717 
 1718 struct sendfile_sync {
 1719         struct mtx      mtx;
 1720         struct cv       cv;
 1721         unsigned        count;
 1722 };
 1723 
 1724 /*
 1725  * Detach mapped page and release resources back to the system.
 1726  */
 1727 void
 1728 sf_buf_mext(void *addr, void *args)
 1729 {
 1730         vm_page_t m;
 1731         struct sendfile_sync *sfs;
 1732 
 1733         m = sf_buf_page(args);
 1734         sf_buf_free(args);
 1735         vm_page_lock(m);
 1736         vm_page_unwire(m, 0);
 1737         /*
 1738          * Check for the object going away on us. This can
 1739          * happen since we don't hold a reference to it.
 1740          * If so, we're responsible for freeing the page.
 1741          */
 1742         if (m->wire_count == 0 && m->object == NULL)
 1743                 vm_page_free(m);
 1744         vm_page_unlock(m);
 1745         if (addr == NULL)
 1746                 return;
 1747         sfs = addr;
 1748         mtx_lock(&sfs->mtx);
 1749         KASSERT(sfs->count> 0, ("Sendfile sync botchup count == 0"));
 1750         if (--sfs->count == 0)
 1751                 cv_signal(&sfs->cv);
 1752         mtx_unlock(&sfs->mtx);
 1753 }
 1754 
 1755 /*
 1756  * sendfile(2)
 1757  *
 1758  * int sendfile(int fd, int s, off_t offset, size_t nbytes,
 1759  *       struct sf_hdtr *hdtr, off_t *sbytes, int flags)
 1760  *
 1761  * Send a file specified by 'fd' and starting at 'offset' to a socket
 1762  * specified by 's'. Send only 'nbytes' of the file or until EOF if nbytes ==
 1763  * 0.  Optionally add a header and/or trailer to the socket output.  If
 1764  * specified, write the total number of bytes sent into *sbytes.
 1765  */
 1766 int
 1767 sys_sendfile(struct thread *td, struct sendfile_args *uap)
 1768 {
 1769 
 1770         return (do_sendfile(td, uap, 0));
 1771 }
 1772 
 1773 static int
 1774 do_sendfile(struct thread *td, struct sendfile_args *uap, int compat)
 1775 {
 1776         struct sf_hdtr hdtr;
 1777         struct uio *hdr_uio, *trl_uio;
 1778         int error;
 1779 
 1780         hdr_uio = trl_uio = NULL;
 1781 
 1782         if (uap->hdtr != NULL) {
 1783                 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
 1784                 if (error)
 1785                         goto out;
 1786                 if (hdtr.headers != NULL) {
 1787                         error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio);
 1788                         if (error)
 1789                                 goto out;
 1790                 }
 1791                 if (hdtr.trailers != NULL) {
 1792                         error = copyinuio(hdtr.trailers, hdtr.trl_cnt, &trl_uio);
 1793                         if (error)
 1794                                 goto out;
 1795 
 1796                 }
 1797         }
 1798 
 1799         error = kern_sendfile(td, uap, hdr_uio, trl_uio, compat);
 1800 out:
 1801         if (hdr_uio)
 1802                 free(hdr_uio, M_IOV);
 1803         if (trl_uio)
 1804                 free(trl_uio, M_IOV);
 1805         return (error);
 1806 }
 1807 
 1808 #ifdef COMPAT_FREEBSD4
 1809 int
 1810 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap)
 1811 {
 1812         struct sendfile_args args;
 1813 
 1814         args.fd = uap->fd;
 1815         args.s = uap->s;
 1816         args.offset = uap->offset;
 1817         args.nbytes = uap->nbytes;
 1818         args.hdtr = uap->hdtr;
 1819         args.sbytes = uap->sbytes;
 1820         args.flags = uap->flags;
 1821 
 1822         return (do_sendfile(td, &args, 1));
 1823 }
 1824 #endif /* COMPAT_FREEBSD4 */
 1825 
 1826 int
 1827 kern_sendfile(struct thread *td, struct sendfile_args *uap,
 1828     struct uio *hdr_uio, struct uio *trl_uio, int compat)
 1829 {
 1830         struct file *sock_fp;
 1831         struct vnode *vp;
 1832         struct vm_object *obj = NULL;
 1833         struct socket *so = NULL;
 1834         struct mbuf *m = NULL;
 1835         struct sf_buf *sf;
 1836         struct vm_page *pg;
 1837         off_t off, xfsize, fsbytes = 0, sbytes = 0, rem = 0;
 1838         int error, hdrlen = 0, mnw = 0;
 1839         int vfslocked;
 1840         struct sendfile_sync *sfs = NULL;
 1841 
 1842         /*
 1843          * The file descriptor must be a regular file and have a
 1844          * backing VM object.
 1845          * File offset must be positive.  If it goes beyond EOF
 1846          * we send only the header/trailer and no payload data.
 1847          */
 1848         AUDIT_ARG_FD(uap->fd);
 1849         if ((error = fgetvp_read(td, uap->fd, CAP_READ, &vp)) != 0)
 1850                 goto out;
 1851         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 1852         vn_lock(vp, LK_SHARED | LK_RETRY);
 1853         if (vp->v_type == VREG) {
 1854                 obj = vp->v_object;
 1855                 if (obj != NULL) {
 1856                         /*
 1857                          * Temporarily increase the backing VM
 1858                          * object's reference count so that a forced
 1859                          * reclamation of its vnode does not
 1860                          * immediately destroy it.
 1861                          */
 1862                         VM_OBJECT_LOCK(obj);
 1863                         if ((obj->flags & OBJ_DEAD) == 0) {
 1864                                 vm_object_reference_locked(obj);
 1865                                 VM_OBJECT_UNLOCK(obj);
 1866                         } else {
 1867                                 VM_OBJECT_UNLOCK(obj);
 1868                                 obj = NULL;
 1869                         }
 1870                 }
 1871         }
 1872         VOP_UNLOCK(vp, 0);
 1873         VFS_UNLOCK_GIANT(vfslocked);
 1874         if (obj == NULL) {
 1875                 error = EINVAL;
 1876                 goto out;
 1877         }
 1878         if (uap->offset < 0) {
 1879                 error = EINVAL;
 1880                 goto out;
 1881         }
 1882 
 1883         /*
 1884          * The socket must be a stream socket and connected.
 1885          * Remember if it a blocking or non-blocking socket.
 1886          */
 1887         if ((error = getsock_cap(td->td_proc->p_fd, uap->s, CAP_WRITE,
 1888             &sock_fp, NULL)) != 0)
 1889                 goto out;
 1890         so = sock_fp->f_data;
 1891         if (so->so_type != SOCK_STREAM) {
 1892                 error = EINVAL;
 1893                 goto out;
 1894         }
 1895         if ((so->so_state & SS_ISCONNECTED) == 0) {
 1896                 error = ENOTCONN;
 1897                 goto out;
 1898         }
 1899         /*
 1900          * Do not wait on memory allocations but return ENOMEM for
 1901          * caller to retry later.
 1902          * XXX: Experimental.
 1903          */
 1904         if (uap->flags & SF_MNOWAIT)
 1905                 mnw = 1;
 1906 
 1907         if (uap->flags & SF_SYNC) {
 1908                 sfs = malloc(sizeof *sfs, M_TEMP, M_WAITOK | M_ZERO);
 1909                 mtx_init(&sfs->mtx, "sendfile", NULL, MTX_DEF);
 1910                 cv_init(&sfs->cv, "sendfile");
 1911         }
 1912 
 1913 #ifdef MAC
 1914         error = mac_socket_check_send(td->td_ucred, so);
 1915         if (error)
 1916                 goto out;
 1917 #endif
 1918 
 1919         /* If headers are specified copy them into mbufs. */
 1920         if (hdr_uio != NULL) {
 1921                 hdr_uio->uio_td = td;
 1922                 hdr_uio->uio_rw = UIO_WRITE;
 1923                 if (hdr_uio->uio_resid > 0) {
 1924                         /*
 1925                          * In FBSD < 5.0 the nbytes to send also included
 1926                          * the header.  If compat is specified subtract the
 1927                          * header size from nbytes.
 1928                          */
 1929                         if (compat) {
 1930                                 if (uap->nbytes > hdr_uio->uio_resid)
 1931                                         uap->nbytes -= hdr_uio->uio_resid;
 1932                                 else
 1933                                         uap->nbytes = 0;
 1934                         }
 1935                         m = m_uiotombuf(hdr_uio, (mnw ? M_NOWAIT : M_WAITOK),
 1936                             0, 0, 0);
 1937                         if (m == NULL) {
 1938                                 error = mnw ? EAGAIN : ENOBUFS;
 1939                                 goto out;
 1940                         }
 1941                         hdrlen = m_length(m, NULL);
 1942                 }
 1943         }
 1944 
 1945         /*
 1946          * Protect against multiple writers to the socket.
 1947          *
 1948          * XXXRW: Historically this has assumed non-interruptibility, so now
 1949          * we implement that, but possibly shouldn't.
 1950          */
 1951         (void)sblock(&so->so_snd, SBL_WAIT | SBL_NOINTR);
 1952 
 1953         /*
 1954          * Loop through the pages of the file, starting with the requested
 1955          * offset. Get a file page (do I/O if necessary), map the file page
 1956          * into an sf_buf, attach an mbuf header to the sf_buf, and queue
 1957          * it on the socket.
 1958          * This is done in two loops.  The inner loop turns as many pages
 1959          * as it can, up to available socket buffer space, without blocking
 1960          * into mbufs to have it bulk delivered into the socket send buffer.
 1961          * The outer loop checks the state and available space of the socket
 1962          * and takes care of the overall progress.
 1963          */
 1964         for (off = uap->offset, rem = uap->nbytes; ; ) {
 1965                 int loopbytes = 0;
 1966                 int space = 0;
 1967                 int done = 0;
 1968 
 1969                 /*
 1970                  * Check the socket state for ongoing connection,
 1971                  * no errors and space in socket buffer.
 1972                  * If space is low allow for the remainder of the
 1973                  * file to be processed if it fits the socket buffer.
 1974                  * Otherwise block in waiting for sufficient space
 1975                  * to proceed, or if the socket is nonblocking, return
 1976                  * to userland with EAGAIN while reporting how far
 1977                  * we've come.
 1978                  * We wait until the socket buffer has significant free
 1979                  * space to do bulk sends.  This makes good use of file
 1980                  * system read ahead and allows packet segmentation
 1981                  * offloading hardware to take over lots of work.  If
 1982                  * we were not careful here we would send off only one
 1983                  * sfbuf at a time.
 1984                  */
 1985                 SOCKBUF_LOCK(&so->so_snd);
 1986                 if (so->so_snd.sb_lowat < so->so_snd.sb_hiwat / 2)
 1987                         so->so_snd.sb_lowat = so->so_snd.sb_hiwat / 2;
 1988 retry_space:
 1989                 if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 1990                         error = EPIPE;
 1991                         SOCKBUF_UNLOCK(&so->so_snd);
 1992                         goto done;
 1993                 } else if (so->so_error) {
 1994                         error = so->so_error;
 1995                         so->so_error = 0;
 1996                         SOCKBUF_UNLOCK(&so->so_snd);
 1997                         goto done;
 1998                 }
 1999                 space = sbspace(&so->so_snd);
 2000                 if (space < rem &&
 2001                     (space <= 0 ||
 2002                      space < so->so_snd.sb_lowat)) {
 2003                         if (so->so_state & SS_NBIO) {
 2004                                 SOCKBUF_UNLOCK(&so->so_snd);
 2005                                 error = EAGAIN;
 2006                                 goto done;
 2007                         }
 2008                         /*
 2009                          * sbwait drops the lock while sleeping.
 2010                          * When we loop back to retry_space the
 2011                          * state may have changed and we retest
 2012                          * for it.
 2013                          */
 2014                         error = sbwait(&so->so_snd);
 2015                         /*
 2016                          * An error from sbwait usually indicates that we've
 2017                          * been interrupted by a signal. If we've sent anything
 2018                          * then return bytes sent, otherwise return the error.
 2019                          */
 2020                         if (error) {
 2021                                 SOCKBUF_UNLOCK(&so->so_snd);
 2022                                 goto done;
 2023                         }
 2024                         goto retry_space;
 2025                 }
 2026                 SOCKBUF_UNLOCK(&so->so_snd);
 2027 
 2028                 /*
 2029                  * Reduce space in the socket buffer by the size of
 2030                  * the header mbuf chain.
 2031                  * hdrlen is set to 0 after the first loop.
 2032                  */
 2033                 space -= hdrlen;
 2034 
 2035                 /*
 2036                  * Loop and construct maximum sized mbuf chain to be bulk
 2037                  * dumped into socket buffer.
 2038                  */
 2039                 while (space > loopbytes) {
 2040                         vm_pindex_t pindex;
 2041                         vm_offset_t pgoff;
 2042                         struct mbuf *m0;
 2043 
 2044                         VM_OBJECT_LOCK(obj);
 2045                         /*
 2046                          * Calculate the amount to transfer.
 2047                          * Not to exceed a page, the EOF,
 2048                          * or the passed in nbytes.
 2049                          */
 2050                         pgoff = (vm_offset_t)(off & PAGE_MASK);
 2051                         xfsize = omin(PAGE_SIZE - pgoff,
 2052                             obj->un_pager.vnp.vnp_size - uap->offset -
 2053                             fsbytes - loopbytes);
 2054                         if (uap->nbytes)
 2055                                 rem = (uap->nbytes - fsbytes - loopbytes);
 2056                         else
 2057                                 rem = obj->un_pager.vnp.vnp_size -
 2058                                     uap->offset - fsbytes - loopbytes;
 2059                         xfsize = omin(rem, xfsize);
 2060                         xfsize = omin(space - loopbytes, xfsize);
 2061                         if (xfsize <= 0) {
 2062                                 VM_OBJECT_UNLOCK(obj);
 2063                                 done = 1;               /* all data sent */
 2064                                 break;
 2065                         }
 2066 
 2067                         /*
 2068                          * Attempt to look up the page.  Allocate
 2069                          * if not found or wait and loop if busy.
 2070                          */
 2071                         pindex = OFF_TO_IDX(off);
 2072                         pg = vm_page_grab(obj, pindex, VM_ALLOC_NOBUSY |
 2073                             VM_ALLOC_NORMAL | VM_ALLOC_WIRED | VM_ALLOC_RETRY);
 2074 
 2075                         /*
 2076                          * Check if page is valid for what we need,
 2077                          * otherwise initiate I/O.
 2078                          * If we already turned some pages into mbufs,
 2079                          * send them off before we come here again and
 2080                          * block.
 2081                          */
 2082                         if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize))
 2083                                 VM_OBJECT_UNLOCK(obj);
 2084                         else if (m != NULL)
 2085                                 error = EAGAIN; /* send what we already got */
 2086                         else if (uap->flags & SF_NODISKIO)
 2087                                 error = EBUSY;
 2088                         else {
 2089                                 int bsize, resid;
 2090 
 2091                                 /*
 2092                                  * Ensure that our page is still around
 2093                                  * when the I/O completes.
 2094                                  */
 2095                                 vm_page_io_start(pg);
 2096                                 VM_OBJECT_UNLOCK(obj);
 2097 
 2098                                 /*
 2099                                  * Get the page from backing store.
 2100                                  */
 2101                                 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 2102                                 error = vn_lock(vp, LK_SHARED);
 2103                                 if (error != 0)
 2104                                         goto after_read;
 2105                                 bsize = vp->v_mount->mnt_stat.f_iosize;
 2106 
 2107                                 /*
 2108                                  * XXXMAC: Because we don't have fp->f_cred
 2109                                  * here, we pass in NOCRED.  This is probably
 2110                                  * wrong, but is consistent with our original
 2111                                  * implementation.
 2112                                  */
 2113                                 error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE,
 2114                                     trunc_page(off), UIO_NOCOPY, IO_NODELOCKED |
 2115                                     IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT),
 2116                                     td->td_ucred, NOCRED, &resid, td);
 2117                                 VOP_UNLOCK(vp, 0);
 2118                         after_read:
 2119                                 VFS_UNLOCK_GIANT(vfslocked);
 2120                                 VM_OBJECT_LOCK(obj);
 2121                                 vm_page_io_finish(pg);
 2122                                 if (!error)
 2123                                         VM_OBJECT_UNLOCK(obj);
 2124                                 mbstat.sf_iocnt++;
 2125                         }
 2126                         if (error) {
 2127                                 vm_page_lock(pg);
 2128                                 vm_page_unwire(pg, 0);
 2129                                 /*
 2130                                  * See if anyone else might know about
 2131                                  * this page.  If not and it is not valid,
 2132                                  * then free it.
 2133                                  */
 2134                                 if (pg->wire_count == 0 && pg->valid == 0 &&
 2135                                     pg->busy == 0 && !(pg->oflags & VPO_BUSY))
 2136                                         vm_page_free(pg);
 2137                                 vm_page_unlock(pg);
 2138                                 VM_OBJECT_UNLOCK(obj);
 2139                                 if (error == EAGAIN)
 2140                                         error = 0;      /* not a real error */
 2141                                 break;
 2142                         }
 2143 
 2144                         /*
 2145                          * Get a sendfile buf.  When allocating the
 2146                          * first buffer for mbuf chain, we usually
 2147                          * wait as long as necessary, but this wait
 2148                          * can be interrupted.  For consequent
 2149                          * buffers, do not sleep, since several
 2150                          * threads might exhaust the buffers and then
 2151                          * deadlock.
 2152                          */
 2153                         sf = sf_buf_alloc(pg, (mnw || m != NULL) ? SFB_NOWAIT :
 2154                             SFB_CATCH);
 2155                         if (sf == NULL) {
 2156                                 mbstat.sf_allocfail++;
 2157                                 vm_page_lock(pg);
 2158                                 vm_page_unwire(pg, 0);
 2159                                 KASSERT(pg->object != NULL,
 2160                                     ("kern_sendfile: object disappeared"));
 2161                                 vm_page_unlock(pg);
 2162                                 if (m == NULL)
 2163                                         error = (mnw ? EAGAIN : EINTR);
 2164                                 break;
 2165                         }
 2166 
 2167                         /*
 2168                          * Get an mbuf and set it up as having
 2169                          * external storage.
 2170                          */
 2171                         m0 = m_get((mnw ? M_NOWAIT : M_WAITOK), MT_DATA);
 2172                         if (m0 == NULL) {
 2173                                 error = (mnw ? EAGAIN : ENOBUFS);
 2174                                 sf_buf_mext((void *)sf_buf_kva(sf), sf);
 2175                                 break;
 2176                         }
 2177                         MEXTADD(m0, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext,
 2178                             sfs, sf, M_RDONLY, EXT_SFBUF);
 2179                         m0->m_data = (char *)sf_buf_kva(sf) + pgoff;
 2180                         m0->m_len = xfsize;
 2181 
 2182                         /* Append to mbuf chain. */
 2183                         if (m != NULL)
 2184                                 m_cat(m, m0);
 2185                         else
 2186                                 m = m0;
 2187 
 2188                         /* Keep track of bits processed. */
 2189                         loopbytes += xfsize;
 2190                         off += xfsize;
 2191 
 2192                         if (sfs != NULL) {
 2193                                 mtx_lock(&sfs->mtx);
 2194                                 sfs->count++;
 2195                                 mtx_unlock(&sfs->mtx);
 2196                         }
 2197                 }
 2198 
 2199                 /* Add the buffer chain to the socket buffer. */
 2200                 if (m != NULL) {
 2201                         int mlen, err;
 2202 
 2203                         mlen = m_length(m, NULL);
 2204                         SOCKBUF_LOCK(&so->so_snd);
 2205                         if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 2206                                 error = EPIPE;
 2207                                 SOCKBUF_UNLOCK(&so->so_snd);
 2208                                 goto done;
 2209                         }
 2210                         SOCKBUF_UNLOCK(&so->so_snd);
 2211                         CURVNET_SET(so->so_vnet);
 2212                         /* Avoid error aliasing. */
 2213                         err = (*so->so_proto->pr_usrreqs->pru_send)
 2214                                     (so, 0, m, NULL, NULL, td);
 2215                         CURVNET_RESTORE();
 2216                         if (err == 0) {
 2217                                 /*
 2218                                  * We need two counters to get the
 2219                                  * file offset and nbytes to send
 2220                                  * right:
 2221                                  * - sbytes contains the total amount
 2222                                  *   of bytes sent, including headers.
 2223                                  * - fsbytes contains the total amount
 2224                                  *   of bytes sent from the file.
 2225                                  */
 2226                                 sbytes += mlen;
 2227                                 fsbytes += mlen;
 2228                                 if (hdrlen) {
 2229                                         fsbytes -= hdrlen;
 2230                                         hdrlen = 0;
 2231                                 }
 2232                         } else if (error == 0)
 2233                                 error = err;
 2234                         m = NULL;       /* pru_send always consumes */
 2235                 }
 2236 
 2237                 /* Quit outer loop on error or when we're done. */
 2238                 if (done) 
 2239                         break;
 2240                 if (error)
 2241                         goto done;
 2242         }
 2243 
 2244         /*
 2245          * Send trailers. Wimp out and use writev(2).
 2246          */
 2247         if (trl_uio != NULL) {
 2248                 sbunlock(&so->so_snd);
 2249                 error = kern_writev(td, uap->s, trl_uio);
 2250                 if (error == 0)
 2251                         sbytes += td->td_retval[0];
 2252                 goto out;
 2253         }
 2254 
 2255 done:
 2256         sbunlock(&so->so_snd);
 2257 out:
 2258         /*
 2259          * If there was no error we have to clear td->td_retval[0]
 2260          * because it may have been set by writev.
 2261          */
 2262         if (error == 0) {
 2263                 td->td_retval[0] = 0;
 2264         }
 2265         if (uap->sbytes != NULL) {
 2266                 copyout(&sbytes, uap->sbytes, sizeof(off_t));
 2267         }
 2268         if (obj != NULL)
 2269                 vm_object_deallocate(obj);
 2270         if (vp != NULL) {
 2271                 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 2272                 vrele(vp);
 2273                 VFS_UNLOCK_GIANT(vfslocked);
 2274         }
 2275         if (so)
 2276                 fdrop(sock_fp, td);
 2277         if (m)
 2278                 m_freem(m);
 2279 
 2280         if (sfs != NULL) {
 2281                 mtx_lock(&sfs->mtx);
 2282                 if (sfs->count != 0)
 2283                         cv_wait(&sfs->cv, &sfs->mtx);
 2284                 KASSERT(sfs->count == 0, ("sendfile sync still busy"));
 2285                 cv_destroy(&sfs->cv);
 2286                 mtx_destroy(&sfs->mtx);
 2287                 free(sfs, M_TEMP);
 2288         }
 2289 
 2290         if (error == ERESTART)
 2291                 error = EINTR;
 2292 
 2293         return (error);
 2294 }
 2295 
 2296 /*
 2297  * SCTP syscalls.
 2298  * Functionality only compiled in if SCTP is defined in the kernel Makefile,
 2299  * otherwise all return EOPNOTSUPP.
 2300  * XXX: We should make this loadable one day.
 2301  */
 2302 int
 2303 sys_sctp_peeloff(td, uap)
 2304         struct thread *td;
 2305         struct sctp_peeloff_args /* {
 2306                 int     sd;
 2307                 caddr_t name;
 2308         } */ *uap;
 2309 {
 2310 #if (defined(INET) || defined(INET6)) && defined(SCTP)
 2311         struct filedesc *fdp;
 2312         struct file *nfp = NULL;
 2313         int error;
 2314         struct socket *head, *so;
 2315         int fd;
 2316         u_int fflag;
 2317 
 2318         fdp = td->td_proc->p_fd;
 2319         AUDIT_ARG_FD(uap->sd);
 2320         error = fgetsock(td, uap->sd, CAP_PEELOFF, &head, &fflag);
 2321         if (error)
 2322                 goto done2;
 2323         error = sctp_can_peel_off(head, (sctp_assoc_t)uap->name);
 2324         if (error)
 2325                 goto done2;
 2326         /*
 2327          * At this point we know we do have a assoc to pull
 2328          * we proceed to get the fd setup. This may block
 2329          * but that is ok.
 2330          */
 2331 
 2332         error = falloc(td, &nfp, &fd, 0);
 2333         if (error)
 2334                 goto done;
 2335         td->td_retval[0] = fd;
 2336 
 2337         CURVNET_SET(head->so_vnet);
 2338         so = sonewconn(head, SS_ISCONNECTED);
 2339         if (so == NULL) 
 2340                 goto noconnection;
 2341         /*
 2342          * Before changing the flags on the socket, we have to bump the
 2343          * reference count.  Otherwise, if the protocol calls sofree(),
 2344          * the socket will be released due to a zero refcount.
 2345          */
 2346         SOCK_LOCK(so);
 2347         soref(so);                      /* file descriptor reference */
 2348         SOCK_UNLOCK(so);
 2349 
 2350         ACCEPT_LOCK();
 2351 
 2352         TAILQ_REMOVE(&head->so_comp, so, so_list);
 2353         head->so_qlen--;
 2354         so->so_state |= (head->so_state & SS_NBIO);
 2355         so->so_state &= ~SS_NOFDREF;
 2356         so->so_qstate &= ~SQ_COMP;
 2357         so->so_head = NULL;
 2358         ACCEPT_UNLOCK();
 2359         finit(nfp, fflag, DTYPE_SOCKET, so, &socketops);
 2360         error = sctp_do_peeloff(head, so, (sctp_assoc_t)uap->name);
 2361         if (error)
 2362                 goto noconnection;
 2363         if (head->so_sigio != NULL)
 2364                 fsetown(fgetown(&head->so_sigio), &so->so_sigio);
 2365 
 2366 noconnection:
 2367         /*
 2368          * close the new descriptor, assuming someone hasn't ripped it
 2369          * out from under us.
 2370          */
 2371         if (error)
 2372                 fdclose(fdp, nfp, fd, td);
 2373 
 2374         /*
 2375          * Release explicitly held references before returning.
 2376          */
 2377         CURVNET_RESTORE();
 2378 done:
 2379         if (nfp != NULL)
 2380                 fdrop(nfp, td);
 2381         fputsock(head);
 2382 done2:
 2383         return (error);
 2384 #else  /* SCTP */
 2385         return (EOPNOTSUPP);
 2386 #endif /* SCTP */
 2387 }
 2388 
 2389 int
 2390 sys_sctp_generic_sendmsg (td, uap)
 2391         struct thread *td;
 2392         struct sctp_generic_sendmsg_args /* {
 2393                 int sd, 
 2394                 caddr_t msg, 
 2395                 int mlen, 
 2396                 caddr_t to, 
 2397                 __socklen_t tolen, 
 2398                 struct sctp_sndrcvinfo *sinfo, 
 2399                 int flags
 2400         } */ *uap;
 2401 {
 2402 #if (defined(INET) || defined(INET6)) && defined(SCTP)
 2403         struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL;
 2404         struct socket *so;
 2405         struct file *fp = NULL;
 2406         int error = 0, len;
 2407         struct sockaddr *to = NULL;
 2408 #ifdef KTRACE
 2409         struct uio *ktruio = NULL;
 2410 #endif
 2411         struct uio auio;
 2412         struct iovec iov[1];
 2413         cap_rights_t rights;
 2414 
 2415         if (uap->sinfo) {
 2416                 error = copyin(uap->sinfo, &sinfo, sizeof (sinfo));
 2417                 if (error)
 2418                         return (error);
 2419                 u_sinfo = &sinfo;
 2420         }
 2421 
 2422         rights = CAP_WRITE;
 2423         if (uap->tolen) {
 2424                 error = getsockaddr(&to, uap->to, uap->tolen);
 2425                 if (error) {
 2426                         to = NULL;
 2427                         goto sctp_bad2;
 2428                 }
 2429                 rights |= CAP_CONNECT;
 2430         }
 2431 
 2432         AUDIT_ARG_FD(uap->sd);
 2433         error = getsock_cap(td->td_proc->p_fd, uap->sd, rights, &fp, NULL);
 2434         if (error)
 2435                 goto sctp_bad;
 2436 #ifdef KTRACE
 2437         if (to && (KTRPOINT(td, KTR_STRUCT)))
 2438                 ktrsockaddr(to);
 2439 #endif
 2440 
 2441         iov[0].iov_base = uap->msg;
 2442         iov[0].iov_len = uap->mlen;
 2443 
 2444         so = (struct socket *)fp->f_data;
 2445 #ifdef MAC
 2446         error = mac_socket_check_send(td->td_ucred, so);
 2447         if (error)
 2448                 goto sctp_bad;
 2449 #endif /* MAC */
 2450 
 2451         auio.uio_iov =  iov;
 2452         auio.uio_iovcnt = 1;
 2453         auio.uio_segflg = UIO_USERSPACE;
 2454         auio.uio_rw = UIO_WRITE;
 2455         auio.uio_td = td;
 2456         auio.uio_offset = 0;                    /* XXX */
 2457         auio.uio_resid = 0;
 2458         len = auio.uio_resid = uap->mlen;
 2459         CURVNET_SET(so->so_vnet);
 2460         error = sctp_lower_sosend(so, to, &auio,
 2461                     (struct mbuf *)NULL, (struct mbuf *)NULL,
 2462                     uap->flags, u_sinfo, td);
 2463         CURVNET_RESTORE();
 2464         if (error) {
 2465                 if (auio.uio_resid != len && (error == ERESTART ||
 2466                     error == EINTR || error == EWOULDBLOCK))
 2467                         error = 0;
 2468                 /* Generation of SIGPIPE can be controlled per socket. */
 2469                 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
 2470                     !(uap->flags & MSG_NOSIGNAL)) {
 2471                         PROC_LOCK(td->td_proc);
 2472                         tdsignal(td, SIGPIPE);
 2473                         PROC_UNLOCK(td->td_proc);
 2474                 }
 2475         }
 2476         if (error == 0)
 2477                 td->td_retval[0] = len - auio.uio_resid;
 2478 #ifdef KTRACE
 2479         if (ktruio != NULL) {
 2480                 ktruio->uio_resid = td->td_retval[0];
 2481                 ktrgenio(uap->sd, UIO_WRITE, ktruio, error);
 2482         }
 2483 #endif /* KTRACE */
 2484 sctp_bad:
 2485         if (fp)
 2486                 fdrop(fp, td);
 2487 sctp_bad2:
 2488         if (to)
 2489                 free(to, M_SONAME);
 2490         return (error);
 2491 #else  /* SCTP */
 2492         return (EOPNOTSUPP);
 2493 #endif /* SCTP */
 2494 }
 2495 
 2496 int
 2497 sys_sctp_generic_sendmsg_iov(td, uap)
 2498         struct thread *td;
 2499         struct sctp_generic_sendmsg_iov_args /* {
 2500                 int sd, 
 2501                 struct iovec *iov, 
 2502                 int iovlen, 
 2503                 caddr_t to, 
 2504                 __socklen_t tolen, 
 2505                 struct sctp_sndrcvinfo *sinfo, 
 2506                 int flags
 2507         } */ *uap;
 2508 {
 2509 #if (defined(INET) || defined(INET6)) && defined(SCTP)
 2510         struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL;
 2511         struct socket *so;
 2512         struct file *fp = NULL;
 2513         int error=0, len, i;
 2514         struct sockaddr *to = NULL;
 2515 #ifdef KTRACE
 2516         struct uio *ktruio = NULL;
 2517 #endif
 2518         struct uio auio;
 2519         struct iovec *iov, *tiov;
 2520         cap_rights_t rights;
 2521 
 2522         if (uap->sinfo) {
 2523                 error = copyin(uap->sinfo, &sinfo, sizeof (sinfo));
 2524                 if (error)
 2525                         return (error);
 2526                 u_sinfo = &sinfo;
 2527         }
 2528         rights = CAP_WRITE;
 2529         if (uap->tolen) {
 2530                 error = getsockaddr(&to, uap->to, uap->tolen);
 2531                 if (error) {
 2532                         to = NULL;
 2533                         goto sctp_bad2;
 2534                 }
 2535                 rights |= CAP_CONNECT;
 2536         }
 2537 
 2538         AUDIT_ARG_FD(uap->sd);
 2539         error = getsock_cap(td->td_proc->p_fd, uap->sd, rights, &fp, NULL);
 2540         if (error)
 2541                 goto sctp_bad1;
 2542 
 2543 #ifdef COMPAT_FREEBSD32
 2544         if (SV_CURPROC_FLAG(SV_ILP32))
 2545                 error = freebsd32_copyiniov((struct iovec32 *)uap->iov,
 2546                     uap->iovlen, &iov, EMSGSIZE);
 2547         else
 2548 #endif
 2549                 error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE);
 2550         if (error)
 2551                 goto sctp_bad1;
 2552 #ifdef KTRACE
 2553         if (to && (KTRPOINT(td, KTR_STRUCT)))
 2554                 ktrsockaddr(to);
 2555 #endif
 2556 
 2557         so = (struct socket *)fp->f_data;
 2558 #ifdef MAC
 2559         error = mac_socket_check_send(td->td_ucred, so);
 2560         if (error)
 2561                 goto sctp_bad;
 2562 #endif /* MAC */
 2563 
 2564         auio.uio_iov = iov;
 2565         auio.uio_iovcnt = uap->iovlen;
 2566         auio.uio_segflg = UIO_USERSPACE;
 2567         auio.uio_rw = UIO_WRITE;
 2568         auio.uio_td = td;
 2569         auio.uio_offset = 0;                    /* XXX */
 2570         auio.uio_resid = 0;
 2571         tiov = iov;
 2572         for (i = 0; i <uap->iovlen; i++, tiov++) {
 2573                 if ((auio.uio_resid += tiov->iov_len) < 0) {
 2574                         error = EINVAL;
 2575                         goto sctp_bad;
 2576                 }
 2577         }
 2578         len = auio.uio_resid;
 2579         CURVNET_SET(so->so_vnet);
 2580         error = sctp_lower_sosend(so, to, &auio,
 2581                     (struct mbuf *)NULL, (struct mbuf *)NULL,
 2582                     uap->flags, u_sinfo, td);
 2583         CURVNET_RESTORE();
 2584         if (error) {
 2585                 if (auio.uio_resid != len && (error == ERESTART ||
 2586                     error == EINTR || error == EWOULDBLOCK))
 2587                         error = 0;
 2588                 /* Generation of SIGPIPE can be controlled per socket */
 2589                 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
 2590                     !(uap->flags & MSG_NOSIGNAL)) {
 2591                         PROC_LOCK(td->td_proc);
 2592                         tdsignal(td, SIGPIPE);
 2593                         PROC_UNLOCK(td->td_proc);
 2594                 }
 2595         }
 2596         if (error == 0)
 2597                 td->td_retval[0] = len - auio.uio_resid;
 2598 #ifdef KTRACE
 2599         if (ktruio != NULL) {
 2600                 ktruio->uio_resid = td->td_retval[0];
 2601                 ktrgenio(uap->sd, UIO_WRITE, ktruio, error);
 2602         }
 2603 #endif /* KTRACE */
 2604 sctp_bad:
 2605         free(iov, M_IOV);
 2606 sctp_bad1:
 2607         if (fp)
 2608                 fdrop(fp, td);
 2609 sctp_bad2:
 2610         if (to)
 2611                 free(to, M_SONAME);
 2612         return (error);
 2613 #else  /* SCTP */
 2614         return (EOPNOTSUPP);
 2615 #endif /* SCTP */
 2616 }
 2617 
 2618 int
 2619 sys_sctp_generic_recvmsg(td, uap)
 2620         struct thread *td;
 2621         struct sctp_generic_recvmsg_args /* {
 2622                 int sd, 
 2623                 struct iovec *iov, 
 2624                 int iovlen,
 2625                 struct sockaddr *from, 
 2626                 __socklen_t *fromlenaddr,
 2627                 struct sctp_sndrcvinfo *sinfo, 
 2628                 int *msg_flags
 2629         } */ *uap;
 2630 {
 2631 #if (defined(INET) || defined(INET6)) && defined(SCTP)
 2632         uint8_t sockbufstore[256];
 2633         struct uio auio;
 2634         struct iovec *iov, *tiov;
 2635         struct sctp_sndrcvinfo sinfo;
 2636         struct socket *so;
 2637         struct file *fp = NULL;
 2638         struct sockaddr *fromsa;
 2639         int fromlen;
 2640         int len, i, msg_flags;
 2641         int error = 0;
 2642 #ifdef KTRACE
 2643         struct uio *ktruio = NULL;
 2644 #endif
 2645 
 2646         AUDIT_ARG_FD(uap->sd);
 2647         error = getsock_cap(td->td_proc->p_fd, uap->sd, CAP_READ, &fp, NULL);
 2648         if (error) {
 2649                 return (error);
 2650         }
 2651 #ifdef COMPAT_FREEBSD32
 2652         if (SV_CURPROC_FLAG(SV_ILP32))
 2653                 error = freebsd32_copyiniov((struct iovec32 *)uap->iov,
 2654                     uap->iovlen, &iov, EMSGSIZE);
 2655         else
 2656 #endif
 2657                 error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE);
 2658         if (error)
 2659                 goto out1;
 2660 
 2661         so = fp->f_data;
 2662 #ifdef MAC
 2663         error = mac_socket_check_receive(td->td_ucred, so);
 2664         if (error) {
 2665                 goto out;
 2666         }
 2667 #endif /* MAC */
 2668 
 2669         if (uap->fromlenaddr) {
 2670                 error = copyin(uap->fromlenaddr,
 2671                     &fromlen, sizeof (fromlen));
 2672                 if (error) {
 2673                         goto out;
 2674                 }
 2675         } else {
 2676                 fromlen = 0;
 2677         }
 2678         if (uap->msg_flags) {
 2679                 error = copyin(uap->msg_flags, &msg_flags, sizeof (int));
 2680                 if (error) {
 2681                         goto out;
 2682                 }
 2683         } else {
 2684                 msg_flags = 0;
 2685         }
 2686         auio.uio_iov = iov;
 2687         auio.uio_iovcnt = uap->iovlen;
 2688         auio.uio_segflg = UIO_USERSPACE;
 2689         auio.uio_rw = UIO_READ;
 2690         auio.uio_td = td;
 2691         auio.uio_offset = 0;                    /* XXX */
 2692         auio.uio_resid = 0;
 2693         tiov = iov;
 2694         for (i = 0; i <uap->iovlen; i++, tiov++) {
 2695                 if ((auio.uio_resid += tiov->iov_len) < 0) {
 2696                         error = EINVAL;
 2697                         goto out;
 2698                 }
 2699         }
 2700         len = auio.uio_resid;
 2701         fromsa = (struct sockaddr *)sockbufstore;
 2702 
 2703 #ifdef KTRACE
 2704         if (KTRPOINT(td, KTR_GENIO))
 2705                 ktruio = cloneuio(&auio);
 2706 #endif /* KTRACE */
 2707         memset(&sinfo, 0, sizeof(struct sctp_sndrcvinfo));
 2708         CURVNET_SET(so->so_vnet);
 2709         error = sctp_sorecvmsg(so, &auio, (struct mbuf **)NULL,
 2710                     fromsa, fromlen, &msg_flags,
 2711                     (struct sctp_sndrcvinfo *)&sinfo, 1);
 2712         CURVNET_RESTORE();
 2713         if (error) {
 2714                 if (auio.uio_resid != (int)len && (error == ERESTART ||
 2715                     error == EINTR || error == EWOULDBLOCK))
 2716                         error = 0;
 2717         } else {
 2718                 if (uap->sinfo)
 2719                         error = copyout(&sinfo, uap->sinfo, sizeof (sinfo));
 2720         }
 2721 #ifdef KTRACE
 2722         if (ktruio != NULL) {
 2723                 ktruio->uio_resid = (int)len - auio.uio_resid;
 2724                 ktrgenio(uap->sd, UIO_READ, ktruio, error);
 2725         }
 2726 #endif /* KTRACE */
 2727         if (error)
 2728                 goto out;
 2729         td->td_retval[0] = (int)len - auio.uio_resid;
 2730 
 2731         if (fromlen && uap->from) {
 2732                 len = fromlen;
 2733                 if (len <= 0 || fromsa == 0)
 2734                         len = 0;
 2735                 else {
 2736                         len = MIN(len, fromsa->sa_len);
 2737                         error = copyout(fromsa, uap->from, (unsigned)len);
 2738                         if (error)
 2739                                 goto out;
 2740                 }
 2741                 error = copyout(&len, uap->fromlenaddr, sizeof (socklen_t));
 2742                 if (error) {
 2743                         goto out;
 2744                 }
 2745         }
 2746 #ifdef KTRACE
 2747         if (KTRPOINT(td, KTR_STRUCT))
 2748                 ktrsockaddr(fromsa);
 2749 #endif
 2750         if (uap->msg_flags) {
 2751                 error = copyout(&msg_flags, uap->msg_flags, sizeof (int));
 2752                 if (error) {
 2753                         goto out;
 2754                 }
 2755         }
 2756 out:
 2757         free(iov, M_IOV);
 2758 out1:
 2759         if (fp) 
 2760                 fdrop(fp, td);
 2761 
 2762         return (error);
 2763 #else  /* SCTP */
 2764         return (EOPNOTSUPP);
 2765 #endif /* SCTP */
 2766 }

Cache object: 49367cf2bc6ef10b913834880aa8cc97


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.