The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/uipc_syscalls.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1982, 1986, 1989, 1990, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * sendfile(2) and related extensions:
    6  * Copyright (c) 1998, David Greenman. All rights reserved.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  * 4. Neither the name of the University nor the names of its contributors
   17  *    may be used to endorse or promote products derived from this software
   18  *    without specific prior written permission.
   19  *
   20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   30  * SUCH DAMAGE.
   31  *
   32  *      @(#)uipc_syscalls.c     8.4 (Berkeley) 2/21/94
   33  */
   34 
   35 #include <sys/cdefs.h>
   36 __FBSDID("$FreeBSD$");
   37 
   38 #include "opt_capsicum.h"
   39 #include "opt_inet.h"
   40 #include "opt_inet6.h"
   41 #include "opt_sctp.h"
   42 #include "opt_compat.h"
   43 #include "opt_ktrace.h"
   44 
   45 #include <sys/param.h>
   46 #include <sys/systm.h>
   47 #include <sys/capability.h>
   48 #include <sys/kernel.h>
   49 #include <sys/lock.h>
   50 #include <sys/mutex.h>
   51 #include <sys/sysproto.h>
   52 #include <sys/malloc.h>
   53 #include <sys/filedesc.h>
   54 #include <sys/event.h>
   55 #include <sys/proc.h>
   56 #include <sys/fcntl.h>
   57 #include <sys/file.h>
   58 #include <sys/filio.h>
   59 #include <sys/jail.h>
   60 #include <sys/mount.h>
   61 #include <sys/mbuf.h>
   62 #include <sys/protosw.h>
   63 #include <sys/sf_buf.h>
   64 #include <sys/sysent.h>
   65 #include <sys/socket.h>
   66 #include <sys/socketvar.h>
   67 #include <sys/signalvar.h>
   68 #include <sys/syscallsubr.h>
   69 #include <sys/sysctl.h>
   70 #include <sys/uio.h>
   71 #include <sys/vnode.h>
   72 #ifdef KTRACE
   73 #include <sys/ktrace.h>
   74 #endif
   75 #ifdef COMPAT_FREEBSD32
   76 #include <compat/freebsd32/freebsd32_util.h>
   77 #endif
   78 
   79 #include <net/vnet.h>
   80 
   81 #include <security/audit/audit.h>
   82 #include <security/mac/mac_framework.h>
   83 
   84 #include <vm/vm.h>
   85 #include <vm/vm_param.h>
   86 #include <vm/vm_object.h>
   87 #include <vm/vm_page.h>
   88 #include <vm/vm_pageout.h>
   89 #include <vm/vm_kern.h>
   90 #include <vm/vm_extern.h>
   91 
   92 #if defined(INET) || defined(INET6)
   93 #ifdef SCTP
   94 #include <netinet/sctp.h>
   95 #include <netinet/sctp_peeloff.h>
   96 #endif /* SCTP */
   97 #endif /* INET || INET6 */
   98 
   99 /*
  100  * Creation flags, OR'ed into socket() and socketpair() type argument.
  101  * For stable/9, these are supported but not exposed in the header file.
  102  */
  103 #define SOCK_CLOEXEC    0x10000000
  104 #define SOCK_NONBLOCK   0x20000000
  105 
  106 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
  107 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp);
  108 
  109 static int accept1(struct thread *td, struct accept_args *uap, int compat);
  110 static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat);
  111 static int getsockname1(struct thread *td, struct getsockname_args *uap,
  112                         int compat);
  113 static int getpeername1(struct thread *td, struct getpeername_args *uap,
  114                         int compat);
  115 
  116 /*
  117  * NSFBUFS-related variables and associated sysctls
  118  */
  119 int nsfbufs;
  120 int nsfbufspeak;
  121 int nsfbufsused;
  122 
  123 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
  124     "Maximum number of sendfile(2) sf_bufs available");
  125 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
  126     "Number of sendfile(2) sf_bufs at peak usage");
  127 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
  128     "Number of sendfile(2) sf_bufs in use");
  129 
  130 /*
  131  * Convert a user file descriptor to a kernel file entry and check that, if
  132  * it is a capability, the right rights are present. A reference on the file
  133  * entry is held upon returning.
  134  */
  135 static int
  136 getsock_cap(struct filedesc *fdp, int fd, cap_rights_t rights,
  137     struct file **fpp, u_int *fflagp)
  138 {
  139         struct file *fp;
  140 #ifdef CAPABILITIES
  141         struct file *fp_fromcap;
  142         int error;
  143 #endif
  144 
  145         fp = NULL;
  146         if ((fdp == NULL) || ((fp = fget_unlocked(fdp, fd)) == NULL))
  147                 return (EBADF);
  148 #ifdef CAPABILITIES
  149         /*
  150          * If the file descriptor is for a capability, test rights and use
  151          * the file descriptor referenced by the capability.
  152          */
  153         error = cap_funwrap(fp, rights, &fp_fromcap);
  154         if (error) {
  155                 fdrop(fp, curthread);
  156                 return (error);
  157         }
  158         if (fp != fp_fromcap) {
  159                 fhold(fp_fromcap);
  160                 fdrop(fp, curthread);
  161                 fp = fp_fromcap;
  162         }
  163 #endif /* CAPABILITIES */
  164         if (fp->f_type != DTYPE_SOCKET) {
  165                 fdrop(fp, curthread);
  166                 return (ENOTSOCK);
  167         }
  168         if (fflagp != NULL)
  169                 *fflagp = fp->f_flag;
  170         *fpp = fp;
  171         return (0);
  172 }
  173 
  174 /*
  175  * System call interface to the socket abstraction.
  176  */
  177 #if defined(COMPAT_43)
  178 #define COMPAT_OLDSOCK
  179 #endif
  180 
  181 int
  182 sys_socket(td, uap)
  183         struct thread *td;
  184         struct socket_args /* {
  185                 int     domain;
  186                 int     type;
  187                 int     protocol;
  188         } */ *uap;
  189 {
  190         struct filedesc *fdp;
  191         struct socket *so;
  192         struct file *fp;
  193         int fd, error, type, oflag, fflag;
  194 
  195         AUDIT_ARG_SOCKET(uap->domain, uap->type, uap->protocol);
  196 
  197         type = uap->type;
  198         oflag = 0;
  199         fflag = 0;
  200         if ((type & SOCK_CLOEXEC) != 0) {
  201                 type &= ~SOCK_CLOEXEC;
  202                 oflag |= O_CLOEXEC;
  203         }
  204         if ((type & SOCK_NONBLOCK) != 0) {
  205                 type &= ~SOCK_NONBLOCK;
  206                 fflag |= FNONBLOCK;
  207         }
  208 
  209 #ifdef MAC
  210         error = mac_socket_check_create(td->td_ucred, uap->domain, type,
  211             uap->protocol);
  212         if (error)
  213                 return (error);
  214 #endif
  215         fdp = td->td_proc->p_fd;
  216         error = falloc(td, &fp, &fd, oflag);
  217         if (error)
  218                 return (error);
  219         /* An extra reference on `fp' has been held for us by falloc(). */
  220         error = socreate(uap->domain, &so, type, uap->protocol,
  221             td->td_ucred, td);
  222         if (error) {
  223                 fdclose(fdp, fp, fd, td);
  224         } else {
  225                 finit(fp, FREAD | FWRITE | fflag, DTYPE_SOCKET, so, &socketops);
  226                 if ((fflag & FNONBLOCK) != 0)
  227                         (void) fo_ioctl(fp, FIONBIO, &fflag, td->td_ucred, td);
  228                 td->td_retval[0] = fd;
  229         }
  230         fdrop(fp, td);
  231         return (error);
  232 }
  233 
  234 /* ARGSUSED */
  235 int
  236 sys_bind(td, uap)
  237         struct thread *td;
  238         struct bind_args /* {
  239                 int     s;
  240                 caddr_t name;
  241                 int     namelen;
  242         } */ *uap;
  243 {
  244         struct sockaddr *sa;
  245         int error;
  246 
  247         if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0)
  248                 return (error);
  249 
  250         error = kern_bind(td, uap->s, sa);
  251         free(sa, M_SONAME);
  252         return (error);
  253 }
  254 
  255 int
  256 kern_bind(td, fd, sa)
  257         struct thread *td;
  258         int fd;
  259         struct sockaddr *sa;
  260 {
  261         struct socket *so;
  262         struct file *fp;
  263         int error;
  264 
  265         AUDIT_ARG_FD(fd);
  266         error = getsock_cap(td->td_proc->p_fd, fd, CAP_BIND, &fp, NULL);
  267         if (error)
  268                 return (error);
  269         so = fp->f_data;
  270 #ifdef KTRACE
  271         if (KTRPOINT(td, KTR_STRUCT))
  272                 ktrsockaddr(sa);
  273 #endif
  274 #ifdef MAC
  275         error = mac_socket_check_bind(td->td_ucred, so, sa);
  276         if (error == 0)
  277 #endif
  278                 error = sobind(so, sa, td);
  279         fdrop(fp, td);
  280         return (error);
  281 }
  282 
  283 /* ARGSUSED */
  284 int
  285 sys_listen(td, uap)
  286         struct thread *td;
  287         struct listen_args /* {
  288                 int     s;
  289                 int     backlog;
  290         } */ *uap;
  291 {
  292         struct socket *so;
  293         struct file *fp;
  294         int error;
  295 
  296         AUDIT_ARG_FD(uap->s);
  297         error = getsock_cap(td->td_proc->p_fd, uap->s, CAP_LISTEN, &fp, NULL);
  298         if (error == 0) {
  299                 so = fp->f_data;
  300 #ifdef MAC
  301                 error = mac_socket_check_listen(td->td_ucred, so);
  302                 if (error == 0)
  303 #endif
  304                         error = solisten(so, uap->backlog, td);
  305                 fdrop(fp, td);
  306         }
  307         return(error);
  308 }
  309 
  310 /*
  311  * accept1()
  312  */
  313 static int
  314 accept1(td, uap, compat)
  315         struct thread *td;
  316         struct accept_args /* {
  317                 int     s;
  318                 struct sockaddr * __restrict name;
  319                 socklen_t       * __restrict anamelen;
  320         } */ *uap;
  321         int compat;
  322 {
  323         struct sockaddr *name;
  324         socklen_t namelen;
  325         struct file *fp;
  326         int error;
  327 
  328         if (uap->name == NULL)
  329                 return (kern_accept(td, uap->s, NULL, NULL, NULL));
  330 
  331         error = copyin(uap->anamelen, &namelen, sizeof (namelen));
  332         if (error)
  333                 return (error);
  334 
  335         error = kern_accept(td, uap->s, &name, &namelen, &fp);
  336 
  337         /*
  338          * return a namelen of zero for older code which might
  339          * ignore the return value from accept.
  340          */
  341         if (error) {
  342                 (void) copyout(&namelen,
  343                     uap->anamelen, sizeof(*uap->anamelen));
  344                 return (error);
  345         }
  346 
  347         if (error == 0 && name != NULL) {
  348 #ifdef COMPAT_OLDSOCK
  349                 if (compat)
  350                         ((struct osockaddr *)name)->sa_family =
  351                             name->sa_family;
  352 #endif
  353                 error = copyout(name, uap->name, namelen);
  354         }
  355         if (error == 0)
  356                 error = copyout(&namelen, uap->anamelen,
  357                     sizeof(namelen));
  358         if (error)
  359                 fdclose(td->td_proc->p_fd, fp, td->td_retval[0], td);
  360         fdrop(fp, td);
  361         free(name, M_SONAME);
  362         return (error);
  363 }
  364 
  365 int
  366 kern_accept(struct thread *td, int s, struct sockaddr **name,
  367     socklen_t *namelen, struct file **fp)
  368 {
  369         struct filedesc *fdp;
  370         struct file *headfp, *nfp = NULL;
  371         struct sockaddr *sa = NULL;
  372         int error;
  373         struct socket *head, *so;
  374         int fd;
  375         u_int fflag;
  376         pid_t pgid;
  377         int tmp;
  378 
  379         if (name) {
  380                 *name = NULL;
  381                 if (*namelen < 0)
  382                         return (EINVAL);
  383         }
  384 
  385         AUDIT_ARG_FD(s);
  386         fdp = td->td_proc->p_fd;
  387         error = getsock_cap(fdp, s, CAP_ACCEPT, &headfp, &fflag);
  388         if (error)
  389                 return (error);
  390         head = headfp->f_data;
  391         if ((head->so_options & SO_ACCEPTCONN) == 0) {
  392                 error = EINVAL;
  393                 goto done;
  394         }
  395 #ifdef MAC
  396         error = mac_socket_check_accept(td->td_ucred, head);
  397         if (error != 0)
  398                 goto done;
  399 #endif
  400         error = falloc(td, &nfp, &fd, 0);
  401         if (error)
  402                 goto done;
  403         ACCEPT_LOCK();
  404         if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) {
  405                 ACCEPT_UNLOCK();
  406                 error = EWOULDBLOCK;
  407                 goto noconnection;
  408         }
  409         while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
  410                 if (head->so_rcv.sb_state & SBS_CANTRCVMORE) {
  411                         head->so_error = ECONNABORTED;
  412                         break;
  413                 }
  414                 error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH,
  415                     "accept", 0);
  416                 if (error) {
  417                         ACCEPT_UNLOCK();
  418                         goto noconnection;
  419                 }
  420         }
  421         if (head->so_error) {
  422                 error = head->so_error;
  423                 head->so_error = 0;
  424                 ACCEPT_UNLOCK();
  425                 goto noconnection;
  426         }
  427         so = TAILQ_FIRST(&head->so_comp);
  428         KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP"));
  429         KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP"));
  430 
  431         /*
  432          * Before changing the flags on the socket, we have to bump the
  433          * reference count.  Otherwise, if the protocol calls sofree(),
  434          * the socket will be released due to a zero refcount.
  435          */
  436         SOCK_LOCK(so);                  /* soref() and so_state update */
  437         soref(so);                      /* file descriptor reference */
  438 
  439         TAILQ_REMOVE(&head->so_comp, so, so_list);
  440         head->so_qlen--;
  441         so->so_state |= (head->so_state & SS_NBIO);
  442         so->so_qstate &= ~SQ_COMP;
  443         so->so_head = NULL;
  444 
  445         SOCK_UNLOCK(so);
  446         ACCEPT_UNLOCK();
  447 
  448         /* An extra reference on `nfp' has been held for us by falloc(). */
  449         td->td_retval[0] = fd;
  450 
  451         /* connection has been removed from the listen queue */
  452         KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0);
  453 
  454         pgid = fgetown(&head->so_sigio);
  455         if (pgid != 0)
  456                 fsetown(pgid, &so->so_sigio);
  457 
  458         finit(nfp, fflag, DTYPE_SOCKET, so, &socketops);
  459         /* Sync socket nonblocking/async state with file flags */
  460         tmp = fflag & FNONBLOCK;
  461         (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td);
  462         tmp = fflag & FASYNC;
  463         (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td);
  464         sa = 0;
  465         error = soaccept(so, &sa);
  466         if (error) {
  467                 /*
  468                  * return a namelen of zero for older code which might
  469                  * ignore the return value from accept.
  470                  */
  471                 if (name)
  472                         *namelen = 0;
  473                 goto noconnection;
  474         }
  475         if (sa == NULL) {
  476                 if (name)
  477                         *namelen = 0;
  478                 goto done;
  479         }
  480         if (name) {
  481                 /* check sa_len before it is destroyed */
  482                 if (*namelen > sa->sa_len)
  483                         *namelen = sa->sa_len;
  484 #ifdef KTRACE
  485                 if (KTRPOINT(td, KTR_STRUCT))
  486                         ktrsockaddr(sa);
  487 #endif
  488                 *name = sa;
  489                 sa = NULL;
  490         }
  491 noconnection:
  492         if (sa)
  493                 free(sa, M_SONAME);
  494 
  495         /*
  496          * close the new descriptor, assuming someone hasn't ripped it
  497          * out from under us.
  498          */
  499         if (error)
  500                 fdclose(fdp, nfp, fd, td);
  501 
  502         /*
  503          * Release explicitly held references before returning.  We return
  504          * a reference on nfp to the caller on success if they request it.
  505          */
  506 done:
  507         if (fp != NULL) {
  508                 if (error == 0) {
  509                         *fp = nfp;
  510                         nfp = NULL;
  511                 } else
  512                         *fp = NULL;
  513         }
  514         if (nfp != NULL)
  515                 fdrop(nfp, td);
  516         fdrop(headfp, td);
  517         return (error);
  518 }
  519 
  520 int
  521 sys_accept(td, uap)
  522         struct thread *td;
  523         struct accept_args *uap;
  524 {
  525 
  526         return (accept1(td, uap, 0));
  527 }
  528 
  529 #ifdef COMPAT_OLDSOCK
  530 int
  531 oaccept(td, uap)
  532         struct thread *td;
  533         struct accept_args *uap;
  534 {
  535 
  536         return (accept1(td, uap, 1));
  537 }
  538 #endif /* COMPAT_OLDSOCK */
  539 
  540 /* ARGSUSED */
  541 int
  542 sys_connect(td, uap)
  543         struct thread *td;
  544         struct connect_args /* {
  545                 int     s;
  546                 caddr_t name;
  547                 int     namelen;
  548         } */ *uap;
  549 {
  550         struct sockaddr *sa;
  551         int error;
  552 
  553         error = getsockaddr(&sa, uap->name, uap->namelen);
  554         if (error)
  555                 return (error);
  556 
  557         error = kern_connect(td, uap->s, sa);
  558         free(sa, M_SONAME);
  559         return (error);
  560 }
  561 
  562 
  563 int
  564 kern_connect(td, fd, sa)
  565         struct thread *td;
  566         int fd;
  567         struct sockaddr *sa;
  568 {
  569         struct socket *so;
  570         struct file *fp;
  571         int error;
  572         int interrupted = 0;
  573 
  574         AUDIT_ARG_FD(fd);
  575         error = getsock_cap(td->td_proc->p_fd, fd, CAP_CONNECT, &fp, NULL);
  576         if (error)
  577                 return (error);
  578         so = fp->f_data;
  579         if (so->so_state & SS_ISCONNECTING) {
  580                 error = EALREADY;
  581                 goto done1;
  582         }
  583 #ifdef KTRACE
  584         if (KTRPOINT(td, KTR_STRUCT))
  585                 ktrsockaddr(sa);
  586 #endif
  587 #ifdef MAC
  588         error = mac_socket_check_connect(td->td_ucred, so, sa);
  589         if (error)
  590                 goto bad;
  591 #endif
  592         error = soconnect(so, sa, td);
  593         if (error)
  594                 goto bad;
  595         if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
  596                 error = EINPROGRESS;
  597                 goto done1;
  598         }
  599         SOCK_LOCK(so);
  600         while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
  601                 error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH,
  602                     "connec", 0);
  603                 if (error) {
  604                         if (error == EINTR || error == ERESTART)
  605                                 interrupted = 1;
  606                         break;
  607                 }
  608         }
  609         if (error == 0) {
  610                 error = so->so_error;
  611                 so->so_error = 0;
  612         }
  613         SOCK_UNLOCK(so);
  614 bad:
  615         if (!interrupted)
  616                 so->so_state &= ~SS_ISCONNECTING;
  617         if (error == ERESTART)
  618                 error = EINTR;
  619 done1:
  620         fdrop(fp, td);
  621         return (error);
  622 }
  623 
  624 int
  625 kern_socketpair(struct thread *td, int domain, int type, int protocol,
  626     int *rsv)
  627 {
  628         struct filedesc *fdp = td->td_proc->p_fd;
  629         struct file *fp1, *fp2;
  630         struct socket *so1, *so2;
  631         int fd, error, oflag, fflag;
  632 
  633         AUDIT_ARG_SOCKET(domain, type, protocol);
  634 
  635         oflag = 0;
  636         fflag = 0;
  637         if ((type & SOCK_CLOEXEC) != 0) {
  638                 type &= ~SOCK_CLOEXEC;
  639                 oflag |= O_CLOEXEC;
  640         }
  641         if ((type & SOCK_NONBLOCK) != 0) {
  642                 type &= ~SOCK_NONBLOCK;
  643                 fflag |= FNONBLOCK;
  644         }
  645 #ifdef MAC
  646         /* We might want to have a separate check for socket pairs. */
  647         error = mac_socket_check_create(td->td_ucred, domain, type,
  648             protocol);
  649         if (error)
  650                 return (error);
  651 #endif
  652         error = socreate(domain, &so1, type, protocol, td->td_ucred, td);
  653         if (error)
  654                 return (error);
  655         error = socreate(domain, &so2, type, protocol, td->td_ucred, td);
  656         if (error)
  657                 goto free1;
  658         /* On success extra reference to `fp1' and 'fp2' is set by falloc. */
  659         error = falloc(td, &fp1, &fd, oflag);
  660         if (error)
  661                 goto free2;
  662         rsv[0] = fd;
  663         fp1->f_data = so1;      /* so1 already has ref count */
  664         error = falloc(td, &fp2, &fd, oflag);
  665         if (error)
  666                 goto free3;
  667         fp2->f_data = so2;      /* so2 already has ref count */
  668         rsv[1] = fd;
  669         error = soconnect2(so1, so2);
  670         if (error)
  671                 goto free4;
  672         if (type == SOCK_DGRAM) {
  673                 /*
  674                  * Datagram socket connection is asymmetric.
  675                  */
  676                  error = soconnect2(so2, so1);
  677                  if (error)
  678                         goto free4;
  679         }
  680         finit(fp1, FREAD | FWRITE | fflag, DTYPE_SOCKET, fp1->f_data,
  681             &socketops);
  682         finit(fp2, FREAD | FWRITE | fflag, DTYPE_SOCKET, fp2->f_data,
  683             &socketops);
  684         if ((fflag & FNONBLOCK) != 0) {
  685                 (void) fo_ioctl(fp1, FIONBIO, &fflag, td->td_ucred, td);
  686                 (void) fo_ioctl(fp2, FIONBIO, &fflag, td->td_ucred, td);
  687         }
  688         fdrop(fp1, td);
  689         fdrop(fp2, td);
  690         return (0);
  691 free4:
  692         fdclose(fdp, fp2, rsv[1], td);
  693         fdrop(fp2, td);
  694 free3:
  695         fdclose(fdp, fp1, rsv[0], td);
  696         fdrop(fp1, td);
  697 free2:
  698         if (so2 != NULL)
  699                 (void)soclose(so2);
  700 free1:
  701         if (so1 != NULL)
  702                 (void)soclose(so1);
  703         return (error);
  704 }
  705 
  706 int
  707 sys_socketpair(struct thread *td, struct socketpair_args *uap)
  708 {
  709         int error, sv[2];
  710 
  711         error = kern_socketpair(td, uap->domain, uap->type,
  712             uap->protocol, sv);
  713         if (error)
  714                 return (error);
  715         error = copyout(sv, uap->rsv, 2 * sizeof(int));
  716         if (error) {
  717                 (void)kern_close(td, sv[0]);
  718                 (void)kern_close(td, sv[1]);
  719         }
  720         return (error);
  721 }
  722 
  723 static int
  724 sendit(td, s, mp, flags)
  725         struct thread *td;
  726         int s;
  727         struct msghdr *mp;
  728         int flags;
  729 {
  730         struct mbuf *control;
  731         struct sockaddr *to;
  732         int error;
  733 
  734 #ifdef CAPABILITY_MODE
  735         if (IN_CAPABILITY_MODE(td) && (mp->msg_name != NULL))
  736                 return (ECAPMODE);
  737 #endif
  738 
  739         if (mp->msg_name != NULL) {
  740                 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
  741                 if (error) {
  742                         to = NULL;
  743                         goto bad;
  744                 }
  745                 mp->msg_name = to;
  746         } else {
  747                 to = NULL;
  748         }
  749 
  750         if (mp->msg_control) {
  751                 if (mp->msg_controllen < sizeof(struct cmsghdr)
  752 #ifdef COMPAT_OLDSOCK
  753                     && mp->msg_flags != MSG_COMPAT
  754 #endif
  755                 ) {
  756                         error = EINVAL;
  757                         goto bad;
  758                 }
  759                 error = sockargs(&control, mp->msg_control,
  760                     mp->msg_controllen, MT_CONTROL);
  761                 if (error)
  762                         goto bad;
  763 #ifdef COMPAT_OLDSOCK
  764                 if (mp->msg_flags == MSG_COMPAT) {
  765                         struct cmsghdr *cm;
  766 
  767                         M_PREPEND(control, sizeof(*cm), M_WAIT);
  768                         cm = mtod(control, struct cmsghdr *);
  769                         cm->cmsg_len = control->m_len;
  770                         cm->cmsg_level = SOL_SOCKET;
  771                         cm->cmsg_type = SCM_RIGHTS;
  772                 }
  773 #endif
  774         } else {
  775                 control = NULL;
  776         }
  777 
  778         error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE);
  779 
  780 bad:
  781         if (to)
  782                 free(to, M_SONAME);
  783         return (error);
  784 }
  785 
  786 int
  787 kern_sendit(td, s, mp, flags, control, segflg)
  788         struct thread *td;
  789         int s;
  790         struct msghdr *mp;
  791         int flags;
  792         struct mbuf *control;
  793         enum uio_seg segflg;
  794 {
  795         struct file *fp;
  796         struct uio auio;
  797         struct iovec *iov;
  798         struct socket *so;
  799         int i, error;
  800         ssize_t len;
  801         cap_rights_t rights;
  802 #ifdef KTRACE
  803         struct uio *ktruio = NULL;
  804 #endif
  805 
  806         AUDIT_ARG_FD(s);
  807         rights = CAP_WRITE;
  808         if (mp->msg_name != NULL)
  809                 rights |= CAP_CONNECT;
  810         error = getsock_cap(td->td_proc->p_fd, s, rights, &fp, NULL);
  811         if (error)
  812                 return (error);
  813         so = (struct socket *)fp->f_data;
  814 
  815 #ifdef KTRACE
  816         if (mp->msg_name != NULL && KTRPOINT(td, KTR_STRUCT))
  817                 ktrsockaddr(mp->msg_name);
  818 #endif
  819 #ifdef MAC
  820         if (mp->msg_name != NULL) {
  821                 error = mac_socket_check_connect(td->td_ucred, so,
  822                     mp->msg_name);
  823                 if (error)
  824                         goto bad;
  825         }
  826         error = mac_socket_check_send(td->td_ucred, so);
  827         if (error)
  828                 goto bad;
  829 #endif
  830 
  831         auio.uio_iov = mp->msg_iov;
  832         auio.uio_iovcnt = mp->msg_iovlen;
  833         auio.uio_segflg = segflg;
  834         auio.uio_rw = UIO_WRITE;
  835         auio.uio_td = td;
  836         auio.uio_offset = 0;                    /* XXX */
  837         auio.uio_resid = 0;
  838         iov = mp->msg_iov;
  839         for (i = 0; i < mp->msg_iovlen; i++, iov++) {
  840                 if ((auio.uio_resid += iov->iov_len) < 0) {
  841                         error = EINVAL;
  842                         goto bad;
  843                 }
  844         }
  845 #ifdef KTRACE
  846         if (KTRPOINT(td, KTR_GENIO))
  847                 ktruio = cloneuio(&auio);
  848 #endif
  849         len = auio.uio_resid;
  850         error = sosend(so, mp->msg_name, &auio, 0, control, flags, td);
  851         if (error) {
  852                 if (auio.uio_resid != len && (error == ERESTART ||
  853                     error == EINTR || error == EWOULDBLOCK))
  854                         error = 0;
  855                 /* Generation of SIGPIPE can be controlled per socket */
  856                 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
  857                     !(flags & MSG_NOSIGNAL)) {
  858                         PROC_LOCK(td->td_proc);
  859                         tdsignal(td, SIGPIPE);
  860                         PROC_UNLOCK(td->td_proc);
  861                 }
  862         }
  863         if (error == 0)
  864                 td->td_retval[0] = len - auio.uio_resid;
  865 #ifdef KTRACE
  866         if (ktruio != NULL) {
  867                 ktruio->uio_resid = td->td_retval[0];
  868                 ktrgenio(s, UIO_WRITE, ktruio, error);
  869         }
  870 #endif
  871 bad:
  872         fdrop(fp, td);
  873         return (error);
  874 }
  875 
  876 int
  877 sys_sendto(td, uap)
  878         struct thread *td;
  879         struct sendto_args /* {
  880                 int     s;
  881                 caddr_t buf;
  882                 size_t  len;
  883                 int     flags;
  884                 caddr_t to;
  885                 int     tolen;
  886         } */ *uap;
  887 {
  888         struct msghdr msg;
  889         struct iovec aiov;
  890         int error;
  891 
  892         msg.msg_name = uap->to;
  893         msg.msg_namelen = uap->tolen;
  894         msg.msg_iov = &aiov;
  895         msg.msg_iovlen = 1;
  896         msg.msg_control = 0;
  897 #ifdef COMPAT_OLDSOCK
  898         msg.msg_flags = 0;
  899 #endif
  900         aiov.iov_base = uap->buf;
  901         aiov.iov_len = uap->len;
  902         error = sendit(td, uap->s, &msg, uap->flags);
  903         return (error);
  904 }
  905 
  906 #ifdef COMPAT_OLDSOCK
  907 int
  908 osend(td, uap)
  909         struct thread *td;
  910         struct osend_args /* {
  911                 int     s;
  912                 caddr_t buf;
  913                 int     len;
  914                 int     flags;
  915         } */ *uap;
  916 {
  917         struct msghdr msg;
  918         struct iovec aiov;
  919         int error;
  920 
  921         msg.msg_name = 0;
  922         msg.msg_namelen = 0;
  923         msg.msg_iov = &aiov;
  924         msg.msg_iovlen = 1;
  925         aiov.iov_base = uap->buf;
  926         aiov.iov_len = uap->len;
  927         msg.msg_control = 0;
  928         msg.msg_flags = 0;
  929         error = sendit(td, uap->s, &msg, uap->flags);
  930         return (error);
  931 }
  932 
  933 int
  934 osendmsg(td, uap)
  935         struct thread *td;
  936         struct osendmsg_args /* {
  937                 int     s;
  938                 caddr_t msg;
  939                 int     flags;
  940         } */ *uap;
  941 {
  942         struct msghdr msg;
  943         struct iovec *iov;
  944         int error;
  945 
  946         error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
  947         if (error)
  948                 return (error);
  949         error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
  950         if (error)
  951                 return (error);
  952         msg.msg_iov = iov;
  953         msg.msg_flags = MSG_COMPAT;
  954         error = sendit(td, uap->s, &msg, uap->flags);
  955         free(iov, M_IOV);
  956         return (error);
  957 }
  958 #endif
  959 
  960 int
  961 sys_sendmsg(td, uap)
  962         struct thread *td;
  963         struct sendmsg_args /* {
  964                 int     s;
  965                 caddr_t msg;
  966                 int     flags;
  967         } */ *uap;
  968 {
  969         struct msghdr msg;
  970         struct iovec *iov;
  971         int error;
  972 
  973         error = copyin(uap->msg, &msg, sizeof (msg));
  974         if (error)
  975                 return (error);
  976         error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
  977         if (error)
  978                 return (error);
  979         msg.msg_iov = iov;
  980 #ifdef COMPAT_OLDSOCK
  981         msg.msg_flags = 0;
  982 #endif
  983         error = sendit(td, uap->s, &msg, uap->flags);
  984         free(iov, M_IOV);
  985         return (error);
  986 }
  987 
  988 int
  989 kern_recvit(td, s, mp, fromseg, controlp)
  990         struct thread *td;
  991         int s;
  992         struct msghdr *mp;
  993         enum uio_seg fromseg;
  994         struct mbuf **controlp;
  995 {
  996         struct uio auio;
  997         struct iovec *iov;
  998         int i;
  999         ssize_t len;
 1000         int error;
 1001         struct mbuf *m, *control = 0;
 1002         caddr_t ctlbuf;
 1003         struct file *fp;
 1004         struct socket *so;
 1005         struct sockaddr *fromsa = 0;
 1006 #ifdef KTRACE
 1007         struct uio *ktruio = NULL;
 1008 #endif
 1009 
 1010         if (controlp != NULL)
 1011                 *controlp = NULL;
 1012 
 1013         AUDIT_ARG_FD(s);
 1014         error = getsock_cap(td->td_proc->p_fd, s, CAP_READ, &fp, NULL);
 1015         if (error)
 1016                 return (error);
 1017         so = fp->f_data;
 1018 
 1019 #ifdef MAC
 1020         error = mac_socket_check_receive(td->td_ucred, so);
 1021         if (error) {
 1022                 fdrop(fp, td);
 1023                 return (error);
 1024         }
 1025 #endif
 1026 
 1027         auio.uio_iov = mp->msg_iov;
 1028         auio.uio_iovcnt = mp->msg_iovlen;
 1029         auio.uio_segflg = UIO_USERSPACE;
 1030         auio.uio_rw = UIO_READ;
 1031         auio.uio_td = td;
 1032         auio.uio_offset = 0;                    /* XXX */
 1033         auio.uio_resid = 0;
 1034         iov = mp->msg_iov;
 1035         for (i = 0; i < mp->msg_iovlen; i++, iov++) {
 1036                 if ((auio.uio_resid += iov->iov_len) < 0) {
 1037                         fdrop(fp, td);
 1038                         return (EINVAL);
 1039                 }
 1040         }
 1041 #ifdef KTRACE
 1042         if (KTRPOINT(td, KTR_GENIO))
 1043                 ktruio = cloneuio(&auio);
 1044 #endif
 1045         len = auio.uio_resid;
 1046         error = soreceive(so, &fromsa, &auio, (struct mbuf **)0,
 1047             (mp->msg_control || controlp) ? &control : (struct mbuf **)0,
 1048             &mp->msg_flags);
 1049         if (error) {
 1050                 if (auio.uio_resid != len && (error == ERESTART ||
 1051                     error == EINTR || error == EWOULDBLOCK))
 1052                         error = 0;
 1053         }
 1054 #ifdef KTRACE
 1055         if (ktruio != NULL) {
 1056                 ktruio->uio_resid = len - auio.uio_resid;
 1057                 ktrgenio(s, UIO_READ, ktruio, error);
 1058         }
 1059 #endif
 1060         if (error)
 1061                 goto out;
 1062         td->td_retval[0] = len - auio.uio_resid;
 1063         if (mp->msg_name) {
 1064                 len = mp->msg_namelen;
 1065                 if (len <= 0 || fromsa == 0)
 1066                         len = 0;
 1067                 else {
 1068                         /* save sa_len before it is destroyed by MSG_COMPAT */
 1069                         len = MIN(len, fromsa->sa_len);
 1070 #ifdef COMPAT_OLDSOCK
 1071                         if (mp->msg_flags & MSG_COMPAT)
 1072                                 ((struct osockaddr *)fromsa)->sa_family =
 1073                                     fromsa->sa_family;
 1074 #endif
 1075                         if (fromseg == UIO_USERSPACE) {
 1076                                 error = copyout(fromsa, mp->msg_name,
 1077                                     (unsigned)len);
 1078                                 if (error)
 1079                                         goto out;
 1080                         } else
 1081                                 bcopy(fromsa, mp->msg_name, len);
 1082                 }
 1083                 mp->msg_namelen = len;
 1084         }
 1085         if (mp->msg_control && controlp == NULL) {
 1086 #ifdef COMPAT_OLDSOCK
 1087                 /*
 1088                  * We assume that old recvmsg calls won't receive access
 1089                  * rights and other control info, esp. as control info
 1090                  * is always optional and those options didn't exist in 4.3.
 1091                  * If we receive rights, trim the cmsghdr; anything else
 1092                  * is tossed.
 1093                  */
 1094                 if (control && mp->msg_flags & MSG_COMPAT) {
 1095                         if (mtod(control, struct cmsghdr *)->cmsg_level !=
 1096                             SOL_SOCKET ||
 1097                             mtod(control, struct cmsghdr *)->cmsg_type !=
 1098                             SCM_RIGHTS) {
 1099                                 mp->msg_controllen = 0;
 1100                                 goto out;
 1101                         }
 1102                         control->m_len -= sizeof (struct cmsghdr);
 1103                         control->m_data += sizeof (struct cmsghdr);
 1104                 }
 1105 #endif
 1106                 len = mp->msg_controllen;
 1107                 m = control;
 1108                 mp->msg_controllen = 0;
 1109                 ctlbuf = mp->msg_control;
 1110 
 1111                 while (m && len > 0) {
 1112                         unsigned int tocopy;
 1113 
 1114                         if (len >= m->m_len)
 1115                                 tocopy = m->m_len;
 1116                         else {
 1117                                 mp->msg_flags |= MSG_CTRUNC;
 1118                                 tocopy = len;
 1119                         }
 1120 
 1121                         if ((error = copyout(mtod(m, caddr_t),
 1122                                         ctlbuf, tocopy)) != 0)
 1123                                 goto out;
 1124 
 1125                         ctlbuf += tocopy;
 1126                         len -= tocopy;
 1127                         m = m->m_next;
 1128                 }
 1129                 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
 1130         }
 1131 out:
 1132         fdrop(fp, td);
 1133 #ifdef KTRACE
 1134         if (fromsa && KTRPOINT(td, KTR_STRUCT))
 1135                 ktrsockaddr(fromsa);
 1136 #endif
 1137         if (fromsa)
 1138                 free(fromsa, M_SONAME);
 1139 
 1140         if (error == 0 && controlp != NULL)  
 1141                 *controlp = control;
 1142         else  if (control)
 1143                 m_freem(control);
 1144 
 1145         return (error);
 1146 }
 1147 
 1148 static int
 1149 recvit(td, s, mp, namelenp)
 1150         struct thread *td;
 1151         int s;
 1152         struct msghdr *mp;
 1153         void *namelenp;
 1154 {
 1155         int error;
 1156 
 1157         error = kern_recvit(td, s, mp, UIO_USERSPACE, NULL);
 1158         if (error)
 1159                 return (error);
 1160         if (namelenp) {
 1161                 error = copyout(&mp->msg_namelen, namelenp, sizeof (socklen_t));
 1162 #ifdef COMPAT_OLDSOCK
 1163                 if (mp->msg_flags & MSG_COMPAT)
 1164                         error = 0;      /* old recvfrom didn't check */
 1165 #endif
 1166         }
 1167         return (error);
 1168 }
 1169 
 1170 int
 1171 sys_recvfrom(td, uap)
 1172         struct thread *td;
 1173         struct recvfrom_args /* {
 1174                 int     s;
 1175                 caddr_t buf;
 1176                 size_t  len;
 1177                 int     flags;
 1178                 struct sockaddr * __restrict    from;
 1179                 socklen_t * __restrict fromlenaddr;
 1180         } */ *uap;
 1181 {
 1182         struct msghdr msg;
 1183         struct iovec aiov;
 1184         int error;
 1185 
 1186         if (uap->fromlenaddr) {
 1187                 error = copyin(uap->fromlenaddr,
 1188                     &msg.msg_namelen, sizeof (msg.msg_namelen));
 1189                 if (error)
 1190                         goto done2;
 1191         } else {
 1192                 msg.msg_namelen = 0;
 1193         }
 1194         msg.msg_name = uap->from;
 1195         msg.msg_iov = &aiov;
 1196         msg.msg_iovlen = 1;
 1197         aiov.iov_base = uap->buf;
 1198         aiov.iov_len = uap->len;
 1199         msg.msg_control = 0;
 1200         msg.msg_flags = uap->flags;
 1201         error = recvit(td, uap->s, &msg, uap->fromlenaddr);
 1202 done2:
 1203         return(error);
 1204 }
 1205 
 1206 #ifdef COMPAT_OLDSOCK
 1207 int
 1208 orecvfrom(td, uap)
 1209         struct thread *td;
 1210         struct recvfrom_args *uap;
 1211 {
 1212 
 1213         uap->flags |= MSG_COMPAT;
 1214         return (sys_recvfrom(td, uap));
 1215 }
 1216 #endif
 1217 
 1218 #ifdef COMPAT_OLDSOCK
 1219 int
 1220 orecv(td, uap)
 1221         struct thread *td;
 1222         struct orecv_args /* {
 1223                 int     s;
 1224                 caddr_t buf;
 1225                 int     len;
 1226                 int     flags;
 1227         } */ *uap;
 1228 {
 1229         struct msghdr msg;
 1230         struct iovec aiov;
 1231         int error;
 1232 
 1233         msg.msg_name = 0;
 1234         msg.msg_namelen = 0;
 1235         msg.msg_iov = &aiov;
 1236         msg.msg_iovlen = 1;
 1237         aiov.iov_base = uap->buf;
 1238         aiov.iov_len = uap->len;
 1239         msg.msg_control = 0;
 1240         msg.msg_flags = uap->flags;
 1241         error = recvit(td, uap->s, &msg, NULL);
 1242         return (error);
 1243 }
 1244 
 1245 /*
 1246  * Old recvmsg.  This code takes advantage of the fact that the old msghdr
 1247  * overlays the new one, missing only the flags, and with the (old) access
 1248  * rights where the control fields are now.
 1249  */
 1250 int
 1251 orecvmsg(td, uap)
 1252         struct thread *td;
 1253         struct orecvmsg_args /* {
 1254                 int     s;
 1255                 struct  omsghdr *msg;
 1256                 int     flags;
 1257         } */ *uap;
 1258 {
 1259         struct msghdr msg;
 1260         struct iovec *iov;
 1261         int error;
 1262 
 1263         error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
 1264         if (error)
 1265                 return (error);
 1266         error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
 1267         if (error)
 1268                 return (error);
 1269         msg.msg_flags = uap->flags | MSG_COMPAT;
 1270         msg.msg_iov = iov;
 1271         error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen);
 1272         if (msg.msg_controllen && error == 0)
 1273                 error = copyout(&msg.msg_controllen,
 1274                     &uap->msg->msg_accrightslen, sizeof (int));
 1275         free(iov, M_IOV);
 1276         return (error);
 1277 }
 1278 #endif
 1279 
 1280 int
 1281 sys_recvmsg(td, uap)
 1282         struct thread *td;
 1283         struct recvmsg_args /* {
 1284                 int     s;
 1285                 struct  msghdr *msg;
 1286                 int     flags;
 1287         } */ *uap;
 1288 {
 1289         struct msghdr msg;
 1290         struct iovec *uiov, *iov;
 1291         int error;
 1292 
 1293         error = copyin(uap->msg, &msg, sizeof (msg));
 1294         if (error)
 1295                 return (error);
 1296         error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
 1297         if (error)
 1298                 return (error);
 1299         msg.msg_flags = uap->flags;
 1300 #ifdef COMPAT_OLDSOCK
 1301         msg.msg_flags &= ~MSG_COMPAT;
 1302 #endif
 1303         uiov = msg.msg_iov;
 1304         msg.msg_iov = iov;
 1305         error = recvit(td, uap->s, &msg, NULL);
 1306         if (error == 0) {
 1307                 msg.msg_iov = uiov;
 1308                 error = copyout(&msg, uap->msg, sizeof(msg));
 1309         }
 1310         free(iov, M_IOV);
 1311         return (error);
 1312 }
 1313 
 1314 /* ARGSUSED */
 1315 int
 1316 sys_shutdown(td, uap)
 1317         struct thread *td;
 1318         struct shutdown_args /* {
 1319                 int     s;
 1320                 int     how;
 1321         } */ *uap;
 1322 {
 1323         struct socket *so;
 1324         struct file *fp;
 1325         int error;
 1326 
 1327         AUDIT_ARG_FD(uap->s);
 1328         error = getsock_cap(td->td_proc->p_fd, uap->s, CAP_SHUTDOWN, &fp,
 1329             NULL);
 1330         if (error == 0) {
 1331                 so = fp->f_data;
 1332                 error = soshutdown(so, uap->how);
 1333                 fdrop(fp, td);
 1334         }
 1335         return (error);
 1336 }
 1337 
 1338 /* ARGSUSED */
 1339 int
 1340 sys_setsockopt(td, uap)
 1341         struct thread *td;
 1342         struct setsockopt_args /* {
 1343                 int     s;
 1344                 int     level;
 1345                 int     name;
 1346                 caddr_t val;
 1347                 int     valsize;
 1348         } */ *uap;
 1349 {
 1350 
 1351         return (kern_setsockopt(td, uap->s, uap->level, uap->name,
 1352             uap->val, UIO_USERSPACE, uap->valsize));
 1353 }
 1354 
 1355 int
 1356 kern_setsockopt(td, s, level, name, val, valseg, valsize)
 1357         struct thread *td;
 1358         int s;
 1359         int level;
 1360         int name;
 1361         void *val;
 1362         enum uio_seg valseg;
 1363         socklen_t valsize;
 1364 {
 1365         int error;
 1366         struct socket *so;
 1367         struct file *fp;
 1368         struct sockopt sopt;
 1369 
 1370         if (val == NULL && valsize != 0)
 1371                 return (EFAULT);
 1372         if ((int)valsize < 0)
 1373                 return (EINVAL);
 1374 
 1375         sopt.sopt_dir = SOPT_SET;
 1376         sopt.sopt_level = level;
 1377         sopt.sopt_name = name;
 1378         sopt.sopt_val = val;
 1379         sopt.sopt_valsize = valsize;
 1380         switch (valseg) {
 1381         case UIO_USERSPACE:
 1382                 sopt.sopt_td = td;
 1383                 break;
 1384         case UIO_SYSSPACE:
 1385                 sopt.sopt_td = NULL;
 1386                 break;
 1387         default:
 1388                 panic("kern_setsockopt called with bad valseg");
 1389         }
 1390 
 1391         AUDIT_ARG_FD(s);
 1392         error = getsock_cap(td->td_proc->p_fd, s, CAP_SETSOCKOPT, &fp, NULL);
 1393         if (error == 0) {
 1394                 so = fp->f_data;
 1395                 error = sosetopt(so, &sopt);
 1396                 fdrop(fp, td);
 1397         }
 1398         return(error);
 1399 }
 1400 
 1401 /* ARGSUSED */
 1402 int
 1403 sys_getsockopt(td, uap)
 1404         struct thread *td;
 1405         struct getsockopt_args /* {
 1406                 int     s;
 1407                 int     level;
 1408                 int     name;
 1409                 void * __restrict       val;
 1410                 socklen_t * __restrict avalsize;
 1411         } */ *uap;
 1412 {
 1413         socklen_t valsize;
 1414         int     error;
 1415 
 1416         if (uap->val) {
 1417                 error = copyin(uap->avalsize, &valsize, sizeof (valsize));
 1418                 if (error)
 1419                         return (error);
 1420         }
 1421 
 1422         error = kern_getsockopt(td, uap->s, uap->level, uap->name,
 1423             uap->val, UIO_USERSPACE, &valsize);
 1424 
 1425         if (error == 0)
 1426                 error = copyout(&valsize, uap->avalsize, sizeof (valsize));
 1427         return (error);
 1428 }
 1429 
 1430 /*
 1431  * Kernel version of getsockopt.
 1432  * optval can be a userland or userspace. optlen is always a kernel pointer.
 1433  */
 1434 int
 1435 kern_getsockopt(td, s, level, name, val, valseg, valsize)
 1436         struct thread *td;
 1437         int s;
 1438         int level;
 1439         int name;
 1440         void *val;
 1441         enum uio_seg valseg;
 1442         socklen_t *valsize;
 1443 {
 1444         int error;
 1445         struct  socket *so;
 1446         struct file *fp;
 1447         struct  sockopt sopt;
 1448 
 1449         if (val == NULL)
 1450                 *valsize = 0;
 1451         if ((int)*valsize < 0)
 1452                 return (EINVAL);
 1453 
 1454         sopt.sopt_dir = SOPT_GET;
 1455         sopt.sopt_level = level;
 1456         sopt.sopt_name = name;
 1457         sopt.sopt_val = val;
 1458         sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */
 1459         switch (valseg) {
 1460         case UIO_USERSPACE:
 1461                 sopt.sopt_td = td;
 1462                 break;
 1463         case UIO_SYSSPACE:
 1464                 sopt.sopt_td = NULL;
 1465                 break;
 1466         default:
 1467                 panic("kern_getsockopt called with bad valseg");
 1468         }
 1469 
 1470         AUDIT_ARG_FD(s);
 1471         error = getsock_cap(td->td_proc->p_fd, s, CAP_GETSOCKOPT, &fp, NULL);
 1472         if (error == 0) {
 1473                 so = fp->f_data;
 1474                 error = sogetopt(so, &sopt);
 1475                 *valsize = sopt.sopt_valsize;
 1476                 fdrop(fp, td);
 1477         }
 1478         return (error);
 1479 }
 1480 
 1481 /*
 1482  * getsockname1() - Get socket name.
 1483  */
 1484 /* ARGSUSED */
 1485 static int
 1486 getsockname1(td, uap, compat)
 1487         struct thread *td;
 1488         struct getsockname_args /* {
 1489                 int     fdes;
 1490                 struct sockaddr * __restrict asa;
 1491                 socklen_t * __restrict alen;
 1492         } */ *uap;
 1493         int compat;
 1494 {
 1495         struct sockaddr *sa;
 1496         socklen_t len;
 1497         int error;
 1498 
 1499         error = copyin(uap->alen, &len, sizeof(len));
 1500         if (error)
 1501                 return (error);
 1502 
 1503         error = kern_getsockname(td, uap->fdes, &sa, &len);
 1504         if (error)
 1505                 return (error);
 1506 
 1507         if (len != 0) {
 1508 #ifdef COMPAT_OLDSOCK
 1509                 if (compat)
 1510                         ((struct osockaddr *)sa)->sa_family = sa->sa_family;
 1511 #endif
 1512                 error = copyout(sa, uap->asa, (u_int)len);
 1513         }
 1514         free(sa, M_SONAME);
 1515         if (error == 0)
 1516                 error = copyout(&len, uap->alen, sizeof(len));
 1517         return (error);
 1518 }
 1519 
 1520 int
 1521 kern_getsockname(struct thread *td, int fd, struct sockaddr **sa,
 1522     socklen_t *alen)
 1523 {
 1524         struct socket *so;
 1525         struct file *fp;
 1526         socklen_t len;
 1527         int error;
 1528 
 1529         if (*alen < 0)
 1530                 return (EINVAL);
 1531 
 1532         AUDIT_ARG_FD(fd);
 1533         error = getsock_cap(td->td_proc->p_fd, fd, CAP_GETSOCKNAME, &fp, NULL);
 1534         if (error)
 1535                 return (error);
 1536         so = fp->f_data;
 1537         *sa = NULL;
 1538         CURVNET_SET(so->so_vnet);
 1539         error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, sa);
 1540         CURVNET_RESTORE();
 1541         if (error)
 1542                 goto bad;
 1543         if (*sa == NULL)
 1544                 len = 0;
 1545         else
 1546                 len = MIN(*alen, (*sa)->sa_len);
 1547         *alen = len;
 1548 #ifdef KTRACE
 1549         if (KTRPOINT(td, KTR_STRUCT))
 1550                 ktrsockaddr(*sa);
 1551 #endif
 1552 bad:
 1553         fdrop(fp, td);
 1554         if (error && *sa) {
 1555                 free(*sa, M_SONAME);
 1556                 *sa = NULL;
 1557         }
 1558         return (error);
 1559 }
 1560 
 1561 int
 1562 sys_getsockname(td, uap)
 1563         struct thread *td;
 1564         struct getsockname_args *uap;
 1565 {
 1566 
 1567         return (getsockname1(td, uap, 0));
 1568 }
 1569 
 1570 #ifdef COMPAT_OLDSOCK
 1571 int
 1572 ogetsockname(td, uap)
 1573         struct thread *td;
 1574         struct getsockname_args *uap;
 1575 {
 1576 
 1577         return (getsockname1(td, uap, 1));
 1578 }
 1579 #endif /* COMPAT_OLDSOCK */
 1580 
 1581 /*
 1582  * getpeername1() - Get name of peer for connected socket.
 1583  */
 1584 /* ARGSUSED */
 1585 static int
 1586 getpeername1(td, uap, compat)
 1587         struct thread *td;
 1588         struct getpeername_args /* {
 1589                 int     fdes;
 1590                 struct sockaddr * __restrict    asa;
 1591                 socklen_t * __restrict  alen;
 1592         } */ *uap;
 1593         int compat;
 1594 {
 1595         struct sockaddr *sa;
 1596         socklen_t len;
 1597         int error;
 1598 
 1599         error = copyin(uap->alen, &len, sizeof (len));
 1600         if (error)
 1601                 return (error);
 1602 
 1603         error = kern_getpeername(td, uap->fdes, &sa, &len);
 1604         if (error)
 1605                 return (error);
 1606 
 1607         if (len != 0) {
 1608 #ifdef COMPAT_OLDSOCK
 1609                 if (compat)
 1610                         ((struct osockaddr *)sa)->sa_family = sa->sa_family;
 1611 #endif
 1612                 error = copyout(sa, uap->asa, (u_int)len);
 1613         }
 1614         free(sa, M_SONAME);
 1615         if (error == 0)
 1616                 error = copyout(&len, uap->alen, sizeof(len));
 1617         return (error);
 1618 }
 1619 
 1620 int
 1621 kern_getpeername(struct thread *td, int fd, struct sockaddr **sa,
 1622     socklen_t *alen)
 1623 {
 1624         struct socket *so;
 1625         struct file *fp;
 1626         socklen_t len;
 1627         int error;
 1628 
 1629         if (*alen < 0)
 1630                 return (EINVAL);
 1631 
 1632         AUDIT_ARG_FD(fd);
 1633         error = getsock_cap(td->td_proc->p_fd, fd, CAP_GETPEERNAME, &fp, NULL);
 1634         if (error)
 1635                 return (error);
 1636         so = fp->f_data;
 1637         if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
 1638                 error = ENOTCONN;
 1639                 goto done;
 1640         }
 1641         *sa = NULL;
 1642         CURVNET_SET(so->so_vnet);
 1643         error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, sa);
 1644         CURVNET_RESTORE();
 1645         if (error)
 1646                 goto bad;
 1647         if (*sa == NULL)
 1648                 len = 0;
 1649         else
 1650                 len = MIN(*alen, (*sa)->sa_len);
 1651         *alen = len;
 1652 #ifdef KTRACE
 1653         if (KTRPOINT(td, KTR_STRUCT))
 1654                 ktrsockaddr(*sa);
 1655 #endif
 1656 bad:
 1657         if (error && *sa) {
 1658                 free(*sa, M_SONAME);
 1659                 *sa = NULL;
 1660         }
 1661 done:
 1662         fdrop(fp, td);
 1663         return (error);
 1664 }
 1665 
 1666 int
 1667 sys_getpeername(td, uap)
 1668         struct thread *td;
 1669         struct getpeername_args *uap;
 1670 {
 1671 
 1672         return (getpeername1(td, uap, 0));
 1673 }
 1674 
 1675 #ifdef COMPAT_OLDSOCK
 1676 int
 1677 ogetpeername(td, uap)
 1678         struct thread *td;
 1679         struct ogetpeername_args *uap;
 1680 {
 1681 
 1682         /* XXX uap should have type `getpeername_args *' to begin with. */
 1683         return (getpeername1(td, (struct getpeername_args *)uap, 1));
 1684 }
 1685 #endif /* COMPAT_OLDSOCK */
 1686 
 1687 int
 1688 sockargs(mp, buf, buflen, type)
 1689         struct mbuf **mp;
 1690         caddr_t buf;
 1691         int buflen, type;
 1692 {
 1693         struct sockaddr *sa;
 1694         struct mbuf *m;
 1695         int error;
 1696 
 1697         if ((u_int)buflen > MLEN) {
 1698 #ifdef COMPAT_OLDSOCK
 1699                 if (type == MT_SONAME && (u_int)buflen <= 112)
 1700                         buflen = MLEN;          /* unix domain compat. hack */
 1701                 else
 1702 #endif
 1703                         if ((u_int)buflen > MCLBYTES)
 1704                                 return (EINVAL);
 1705         }
 1706         m = m_get(M_WAIT, type);
 1707         if ((u_int)buflen > MLEN)
 1708                 MCLGET(m, M_WAIT);
 1709         m->m_len = buflen;
 1710         error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
 1711         if (error)
 1712                 (void) m_free(m);
 1713         else {
 1714                 *mp = m;
 1715                 if (type == MT_SONAME) {
 1716                         sa = mtod(m, struct sockaddr *);
 1717 
 1718 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
 1719                         if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
 1720                                 sa->sa_family = sa->sa_len;
 1721 #endif
 1722                         sa->sa_len = buflen;
 1723                 }
 1724         }
 1725         return (error);
 1726 }
 1727 
 1728 int
 1729 getsockaddr(namp, uaddr, len)
 1730         struct sockaddr **namp;
 1731         caddr_t uaddr;
 1732         size_t len;
 1733 {
 1734         struct sockaddr *sa;
 1735         int error;
 1736 
 1737         if (len > SOCK_MAXADDRLEN)
 1738                 return (ENAMETOOLONG);
 1739         if (len < offsetof(struct sockaddr, sa_data[0]))
 1740                 return (EINVAL);
 1741         sa = malloc(len, M_SONAME, M_WAITOK);
 1742         error = copyin(uaddr, sa, len);
 1743         if (error) {
 1744                 free(sa, M_SONAME);
 1745         } else {
 1746 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
 1747                 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
 1748                         sa->sa_family = sa->sa_len;
 1749 #endif
 1750                 sa->sa_len = len;
 1751                 *namp = sa;
 1752         }
 1753         return (error);
 1754 }
 1755 
 1756 #include <sys/condvar.h>
 1757 
 1758 struct sendfile_sync {
 1759         struct mtx      mtx;
 1760         struct cv       cv;
 1761         unsigned        count;
 1762 };
 1763 
 1764 /*
 1765  * Detach mapped page and release resources back to the system.
 1766  */
 1767 void
 1768 sf_buf_mext(void *addr, void *args)
 1769 {
 1770         vm_page_t m;
 1771         struct sendfile_sync *sfs;
 1772 
 1773         m = sf_buf_page(args);
 1774         sf_buf_free(args);
 1775         vm_page_lock(m);
 1776         vm_page_unwire(m, 0);
 1777         /*
 1778          * Check for the object going away on us. This can
 1779          * happen since we don't hold a reference to it.
 1780          * If so, we're responsible for freeing the page.
 1781          */
 1782         if (m->wire_count == 0 && m->object == NULL)
 1783                 vm_page_free(m);
 1784         vm_page_unlock(m);
 1785         if (addr == NULL)
 1786                 return;
 1787         sfs = addr;
 1788         mtx_lock(&sfs->mtx);
 1789         KASSERT(sfs->count> 0, ("Sendfile sync botchup count == 0"));
 1790         if (--sfs->count == 0)
 1791                 cv_signal(&sfs->cv);
 1792         mtx_unlock(&sfs->mtx);
 1793 }
 1794 
 1795 /*
 1796  * sendfile(2)
 1797  *
 1798  * int sendfile(int fd, int s, off_t offset, size_t nbytes,
 1799  *       struct sf_hdtr *hdtr, off_t *sbytes, int flags)
 1800  *
 1801  * Send a file specified by 'fd' and starting at 'offset' to a socket
 1802  * specified by 's'. Send only 'nbytes' of the file or until EOF if nbytes ==
 1803  * 0.  Optionally add a header and/or trailer to the socket output.  If
 1804  * specified, write the total number of bytes sent into *sbytes.
 1805  */
 1806 int
 1807 sys_sendfile(struct thread *td, struct sendfile_args *uap)
 1808 {
 1809 
 1810         return (do_sendfile(td, uap, 0));
 1811 }
 1812 
 1813 static int
 1814 do_sendfile(struct thread *td, struct sendfile_args *uap, int compat)
 1815 {
 1816         struct sf_hdtr hdtr;
 1817         struct uio *hdr_uio, *trl_uio;
 1818         int error;
 1819 
 1820         hdr_uio = trl_uio = NULL;
 1821 
 1822         if (uap->hdtr != NULL) {
 1823                 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
 1824                 if (error)
 1825                         goto out;
 1826                 if (hdtr.headers != NULL) {
 1827                         error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio);
 1828                         if (error)
 1829                                 goto out;
 1830                 }
 1831                 if (hdtr.trailers != NULL) {
 1832                         error = copyinuio(hdtr.trailers, hdtr.trl_cnt, &trl_uio);
 1833                         if (error)
 1834                                 goto out;
 1835 
 1836                 }
 1837         }
 1838 
 1839         error = kern_sendfile(td, uap, hdr_uio, trl_uio, compat);
 1840 out:
 1841         if (hdr_uio)
 1842                 free(hdr_uio, M_IOV);
 1843         if (trl_uio)
 1844                 free(trl_uio, M_IOV);
 1845         return (error);
 1846 }
 1847 
 1848 #ifdef COMPAT_FREEBSD4
 1849 int
 1850 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap)
 1851 {
 1852         struct sendfile_args args;
 1853 
 1854         args.fd = uap->fd;
 1855         args.s = uap->s;
 1856         args.offset = uap->offset;
 1857         args.nbytes = uap->nbytes;
 1858         args.hdtr = uap->hdtr;
 1859         args.sbytes = uap->sbytes;
 1860         args.flags = uap->flags;
 1861 
 1862         return (do_sendfile(td, &args, 1));
 1863 }
 1864 #endif /* COMPAT_FREEBSD4 */
 1865 
 1866 int
 1867 kern_sendfile(struct thread *td, struct sendfile_args *uap,
 1868     struct uio *hdr_uio, struct uio *trl_uio, int compat)
 1869 {
 1870         struct file *sock_fp;
 1871         struct vnode *vp;
 1872         struct vm_object *obj = NULL;
 1873         struct socket *so = NULL;
 1874         struct mbuf *m = NULL;
 1875         struct sf_buf *sf;
 1876         struct vm_page *pg;
 1877         struct vattr va;
 1878         off_t off, xfsize, fsbytes = 0, sbytes = 0, rem = 0;
 1879         int error, hdrlen = 0, mnw = 0;
 1880         int vfslocked;
 1881         int bsize;
 1882         struct sendfile_sync *sfs = NULL;
 1883 
 1884         /*
 1885          * The file descriptor must be a regular file and have a
 1886          * backing VM object.
 1887          * File offset must be positive.  If it goes beyond EOF
 1888          * we send only the header/trailer and no payload data.
 1889          */
 1890         AUDIT_ARG_FD(uap->fd);
 1891         if ((error = fgetvp_read(td, uap->fd, CAP_READ, &vp)) != 0)
 1892                 goto out;
 1893         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 1894         vn_lock(vp, LK_SHARED | LK_RETRY);
 1895         if (vp->v_type == VREG) {
 1896                 bsize = vp->v_mount->mnt_stat.f_iosize;
 1897                 if (uap->nbytes == 0) {
 1898                         error = VOP_GETATTR(vp, &va, td->td_ucred);
 1899                         if (error != 0) {
 1900                                 VOP_UNLOCK(vp, 0);
 1901                                 VFS_UNLOCK_GIANT(vfslocked);
 1902                                 obj = NULL;
 1903                                 goto out;
 1904                         }
 1905                         rem = va.va_size;
 1906                 } else
 1907                         rem = uap->nbytes;
 1908                 obj = vp->v_object;
 1909                 if (obj != NULL) {
 1910                         /*
 1911                          * Temporarily increase the backing VM
 1912                          * object's reference count so that a forced
 1913                          * reclamation of its vnode does not
 1914                          * immediately destroy it.
 1915                          */
 1916                         VM_OBJECT_LOCK(obj);
 1917                         if ((obj->flags & OBJ_DEAD) == 0) {
 1918                                 vm_object_reference_locked(obj);
 1919                                 VM_OBJECT_UNLOCK(obj);
 1920                         } else {
 1921                                 VM_OBJECT_UNLOCK(obj);
 1922                                 obj = NULL;
 1923                         }
 1924                 }
 1925         } else
 1926                 bsize = 0;      /* silence gcc */
 1927         VOP_UNLOCK(vp, 0);
 1928         VFS_UNLOCK_GIANT(vfslocked);
 1929         if (obj == NULL) {
 1930                 error = EINVAL;
 1931                 goto out;
 1932         }
 1933         if (uap->offset < 0) {
 1934                 error = EINVAL;
 1935                 goto out;
 1936         }
 1937 
 1938         /*
 1939          * The socket must be a stream socket and connected.
 1940          * Remember if it a blocking or non-blocking socket.
 1941          */
 1942         if ((error = getsock_cap(td->td_proc->p_fd, uap->s, CAP_WRITE,
 1943             &sock_fp, NULL)) != 0)
 1944                 goto out;
 1945         so = sock_fp->f_data;
 1946         if (so->so_type != SOCK_STREAM) {
 1947                 error = EINVAL;
 1948                 goto out;
 1949         }
 1950         if ((so->so_state & SS_ISCONNECTED) == 0) {
 1951                 error = ENOTCONN;
 1952                 goto out;
 1953         }
 1954         /*
 1955          * Do not wait on memory allocations but return ENOMEM for
 1956          * caller to retry later.
 1957          * XXX: Experimental.
 1958          */
 1959         if (uap->flags & SF_MNOWAIT)
 1960                 mnw = 1;
 1961 
 1962         if (uap->flags & SF_SYNC) {
 1963                 sfs = malloc(sizeof *sfs, M_TEMP, M_WAITOK | M_ZERO);
 1964                 mtx_init(&sfs->mtx, "sendfile", NULL, MTX_DEF);
 1965                 cv_init(&sfs->cv, "sendfile");
 1966         }
 1967 
 1968 #ifdef MAC
 1969         error = mac_socket_check_send(td->td_ucred, so);
 1970         if (error)
 1971                 goto out;
 1972 #endif
 1973 
 1974         /* If headers are specified copy them into mbufs. */
 1975         if (hdr_uio != NULL) {
 1976                 hdr_uio->uio_td = td;
 1977                 hdr_uio->uio_rw = UIO_WRITE;
 1978                 if (hdr_uio->uio_resid > 0) {
 1979                         /*
 1980                          * In FBSD < 5.0 the nbytes to send also included
 1981                          * the header.  If compat is specified subtract the
 1982                          * header size from nbytes.
 1983                          */
 1984                         if (compat) {
 1985                                 if (uap->nbytes > hdr_uio->uio_resid)
 1986                                         uap->nbytes -= hdr_uio->uio_resid;
 1987                                 else
 1988                                         uap->nbytes = 0;
 1989                         }
 1990                         m = m_uiotombuf(hdr_uio, (mnw ? M_NOWAIT : M_WAITOK),
 1991                             0, 0, 0);
 1992                         if (m == NULL) {
 1993                                 error = mnw ? EAGAIN : ENOBUFS;
 1994                                 goto out;
 1995                         }
 1996                         hdrlen = m_length(m, NULL);
 1997                 }
 1998         }
 1999 
 2000         /*
 2001          * Protect against multiple writers to the socket.
 2002          *
 2003          * XXXRW: Historically this has assumed non-interruptibility, so now
 2004          * we implement that, but possibly shouldn't.
 2005          */
 2006         (void)sblock(&so->so_snd, SBL_WAIT | SBL_NOINTR);
 2007 
 2008         /*
 2009          * Loop through the pages of the file, starting with the requested
 2010          * offset. Get a file page (do I/O if necessary), map the file page
 2011          * into an sf_buf, attach an mbuf header to the sf_buf, and queue
 2012          * it on the socket.
 2013          * This is done in two loops.  The inner loop turns as many pages
 2014          * as it can, up to available socket buffer space, without blocking
 2015          * into mbufs to have it bulk delivered into the socket send buffer.
 2016          * The outer loop checks the state and available space of the socket
 2017          * and takes care of the overall progress.
 2018          */
 2019         for (off = uap->offset; ; ) {
 2020                 struct mbuf *mtail;
 2021                 int loopbytes;
 2022                 int space;
 2023                 int done;
 2024 
 2025                 if ((uap->nbytes != 0 && uap->nbytes == fsbytes) ||
 2026                     (uap->nbytes == 0 && va.va_size == fsbytes))
 2027                         break;
 2028 
 2029                 mtail = NULL;
 2030                 loopbytes = 0;
 2031                 space = 0;
 2032                 done = 0;
 2033 
 2034                 /*
 2035                  * Check the socket state for ongoing connection,
 2036                  * no errors and space in socket buffer.
 2037                  * If space is low allow for the remainder of the
 2038                  * file to be processed if it fits the socket buffer.
 2039                  * Otherwise block in waiting for sufficient space
 2040                  * to proceed, or if the socket is nonblocking, return
 2041                  * to userland with EAGAIN while reporting how far
 2042                  * we've come.
 2043                  * We wait until the socket buffer has significant free
 2044                  * space to do bulk sends.  This makes good use of file
 2045                  * system read ahead and allows packet segmentation
 2046                  * offloading hardware to take over lots of work.  If
 2047                  * we were not careful here we would send off only one
 2048                  * sfbuf at a time.
 2049                  */
 2050                 SOCKBUF_LOCK(&so->so_snd);
 2051                 if (so->so_snd.sb_lowat < so->so_snd.sb_hiwat / 2)
 2052                         so->so_snd.sb_lowat = so->so_snd.sb_hiwat / 2;
 2053 retry_space:
 2054                 if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 2055                         error = EPIPE;
 2056                         SOCKBUF_UNLOCK(&so->so_snd);
 2057                         goto done;
 2058                 } else if (so->so_error) {
 2059                         error = so->so_error;
 2060                         so->so_error = 0;
 2061                         SOCKBUF_UNLOCK(&so->so_snd);
 2062                         goto done;
 2063                 }
 2064                 space = sbspace(&so->so_snd);
 2065                 if (space < rem &&
 2066                     (space <= 0 ||
 2067                      space < so->so_snd.sb_lowat)) {
 2068                         if (so->so_state & SS_NBIO) {
 2069                                 SOCKBUF_UNLOCK(&so->so_snd);
 2070                                 error = EAGAIN;
 2071                                 goto done;
 2072                         }
 2073                         /*
 2074                          * sbwait drops the lock while sleeping.
 2075                          * When we loop back to retry_space the
 2076                          * state may have changed and we retest
 2077                          * for it.
 2078                          */
 2079                         error = sbwait(&so->so_snd);
 2080                         /*
 2081                          * An error from sbwait usually indicates that we've
 2082                          * been interrupted by a signal. If we've sent anything
 2083                          * then return bytes sent, otherwise return the error.
 2084                          */
 2085                         if (error) {
 2086                                 SOCKBUF_UNLOCK(&so->so_snd);
 2087                                 goto done;
 2088                         }
 2089                         goto retry_space;
 2090                 }
 2091                 SOCKBUF_UNLOCK(&so->so_snd);
 2092 
 2093                 /*
 2094                  * Reduce space in the socket buffer by the size of
 2095                  * the header mbuf chain.
 2096                  * hdrlen is set to 0 after the first loop.
 2097                  */
 2098                 space -= hdrlen;
 2099 
 2100                 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 2101                 error = vn_lock(vp, LK_SHARED);
 2102                 if (error != 0) {
 2103                         VFS_UNLOCK_GIANT(vfslocked);
 2104                         goto done;
 2105                 }
 2106                 error = VOP_GETATTR(vp, &va, td->td_ucred);
 2107                 if (error != 0 || off >= va.va_size) {
 2108                         VOP_UNLOCK(vp, 0);
 2109                         VFS_UNLOCK_GIANT(vfslocked);
 2110                         goto done;
 2111                 }
 2112                 VFS_UNLOCK_GIANT(vfslocked);
 2113 
 2114                 /*
 2115                  * Loop and construct maximum sized mbuf chain to be bulk
 2116                  * dumped into socket buffer.
 2117                  */
 2118                 while (space > loopbytes) {
 2119                         vm_pindex_t pindex;
 2120                         vm_offset_t pgoff;
 2121                         struct mbuf *m0;
 2122 
 2123                         /*
 2124                          * Calculate the amount to transfer.
 2125                          * Not to exceed a page, the EOF,
 2126                          * or the passed in nbytes.
 2127                          */
 2128                         pgoff = (vm_offset_t)(off & PAGE_MASK);
 2129                         rem = va.va_size - uap->offset;
 2130                         if (uap->nbytes != 0)
 2131                                 rem = omin(rem, uap->nbytes);
 2132                         rem -= fsbytes + loopbytes;
 2133                         xfsize = omin(PAGE_SIZE - pgoff, rem);
 2134                         xfsize = omin(space - loopbytes, xfsize);
 2135                         if (xfsize <= 0) {
 2136                                 done = 1;               /* all data sent */
 2137                                 break;
 2138                         }
 2139 
 2140                         /*
 2141                          * Attempt to look up the page.  Allocate
 2142                          * if not found or wait and loop if busy.
 2143                          */
 2144                         pindex = OFF_TO_IDX(off);
 2145                         VM_OBJECT_LOCK(obj);
 2146                         pg = vm_page_grab(obj, pindex, VM_ALLOC_NOBUSY |
 2147                             VM_ALLOC_NORMAL | VM_ALLOC_WIRED | VM_ALLOC_RETRY);
 2148 
 2149                         /*
 2150                          * Check if page is valid for what we need,
 2151                          * otherwise initiate I/O.
 2152                          * If we already turned some pages into mbufs,
 2153                          * send them off before we come here again and
 2154                          * block.
 2155                          */
 2156                         if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize))
 2157                                 VM_OBJECT_UNLOCK(obj);
 2158                         else if (m != NULL)
 2159                                 error = EAGAIN; /* send what we already got */
 2160                         else if (uap->flags & SF_NODISKIO)
 2161                                 error = EBUSY;
 2162                         else {
 2163                                 ssize_t resid;
 2164 
 2165                                 VM_OBJECT_UNLOCK(obj);
 2166 
 2167                                 /*
 2168                                  * Get the page from backing store.
 2169                                  * XXXMAC: Because we don't have fp->f_cred
 2170                                  * here, we pass in NOCRED.  This is probably
 2171                                  * wrong, but is consistent with our original
 2172                                  * implementation.
 2173                                  */
 2174                                 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 2175                                 error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE,
 2176                                     trunc_page(off), UIO_NOCOPY, IO_NODELOCKED |
 2177                                     IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT),
 2178                                     td->td_ucred, NOCRED, &resid, td);
 2179                                 VFS_UNLOCK_GIANT(vfslocked);
 2180                                 if (error)
 2181                                         VM_OBJECT_LOCK(obj);
 2182                                 mbstat.sf_iocnt++;
 2183                         }
 2184                         if (error) {
 2185                                 vm_page_lock(pg);
 2186                                 vm_page_unwire(pg, 0);
 2187                                 /*
 2188                                  * See if anyone else might know about
 2189                                  * this page.  If not and it is not valid,
 2190                                  * then free it.
 2191                                  */
 2192                                 if (pg->wire_count == 0 && pg->valid == 0 &&
 2193                                     pg->busy == 0 && !(pg->oflags & VPO_BUSY))
 2194                                         vm_page_free(pg);
 2195                                 vm_page_unlock(pg);
 2196                                 VM_OBJECT_UNLOCK(obj);
 2197                                 if (error == EAGAIN)
 2198                                         error = 0;      /* not a real error */
 2199                                 break;
 2200                         }
 2201 
 2202                         /*
 2203                          * Get a sendfile buf.  When allocating the
 2204                          * first buffer for mbuf chain, we usually
 2205                          * wait as long as necessary, but this wait
 2206                          * can be interrupted.  For consequent
 2207                          * buffers, do not sleep, since several
 2208                          * threads might exhaust the buffers and then
 2209                          * deadlock.
 2210                          */
 2211                         sf = sf_buf_alloc(pg, (mnw || m != NULL) ? SFB_NOWAIT :
 2212                             SFB_CATCH);
 2213                         if (sf == NULL) {
 2214                                 mbstat.sf_allocfail++;
 2215                                 vm_page_lock(pg);
 2216                                 vm_page_unwire(pg, 0);
 2217                                 KASSERT(pg->object != NULL,
 2218                                     ("kern_sendfile: object disappeared"));
 2219                                 vm_page_unlock(pg);
 2220                                 if (m == NULL)
 2221                                         error = (mnw ? EAGAIN : EINTR);
 2222                                 break;
 2223                         }
 2224 
 2225                         /*
 2226                          * Get an mbuf and set it up as having
 2227                          * external storage.
 2228                          */
 2229                         m0 = m_get((mnw ? M_NOWAIT : M_WAITOK), MT_DATA);
 2230                         if (m0 == NULL) {
 2231                                 error = (mnw ? EAGAIN : ENOBUFS);
 2232                                 sf_buf_mext(NULL, sf);
 2233                                 break;
 2234                         }
 2235                         MEXTADD(m0, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext,
 2236                             sfs, sf, M_RDONLY, EXT_SFBUF);
 2237                         m0->m_data = (char *)sf_buf_kva(sf) + pgoff;
 2238                         m0->m_len = xfsize;
 2239 
 2240                         /* Append to mbuf chain. */
 2241                         if (mtail != NULL)
 2242                                 mtail->m_next = m0;
 2243                         else if (m != NULL)
 2244                                 m_last(m)->m_next = m0;
 2245                         else
 2246                                 m = m0;
 2247                         mtail = m0;
 2248 
 2249                         /* Keep track of bits processed. */
 2250                         loopbytes += xfsize;
 2251                         off += xfsize;
 2252 
 2253                         if (sfs != NULL) {
 2254                                 mtx_lock(&sfs->mtx);
 2255                                 sfs->count++;
 2256                                 mtx_unlock(&sfs->mtx);
 2257                         }
 2258                 }
 2259 
 2260                 VOP_UNLOCK(vp, 0);
 2261 
 2262                 /* Add the buffer chain to the socket buffer. */
 2263                 if (m != NULL) {
 2264                         int mlen, err;
 2265 
 2266                         mlen = m_length(m, NULL);
 2267                         SOCKBUF_LOCK(&so->so_snd);
 2268                         if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 2269                                 error = EPIPE;
 2270                                 SOCKBUF_UNLOCK(&so->so_snd);
 2271                                 goto done;
 2272                         }
 2273                         SOCKBUF_UNLOCK(&so->so_snd);
 2274                         CURVNET_SET(so->so_vnet);
 2275                         /* Avoid error aliasing. */
 2276                         err = (*so->so_proto->pr_usrreqs->pru_send)
 2277                                     (so, 0, m, NULL, NULL, td);
 2278                         CURVNET_RESTORE();
 2279                         if (err == 0) {
 2280                                 /*
 2281                                  * We need two counters to get the
 2282                                  * file offset and nbytes to send
 2283                                  * right:
 2284                                  * - sbytes contains the total amount
 2285                                  *   of bytes sent, including headers.
 2286                                  * - fsbytes contains the total amount
 2287                                  *   of bytes sent from the file.
 2288                                  */
 2289                                 sbytes += mlen;
 2290                                 fsbytes += mlen;
 2291                                 if (hdrlen) {
 2292                                         fsbytes -= hdrlen;
 2293                                         hdrlen = 0;
 2294                                 }
 2295                         } else if (error == 0)
 2296                                 error = err;
 2297                         m = NULL;       /* pru_send always consumes */
 2298                 }
 2299 
 2300                 /* Quit outer loop on error or when we're done. */
 2301                 if (done) 
 2302                         break;
 2303                 if (error)
 2304                         goto done;
 2305         }
 2306 
 2307         /*
 2308          * Send trailers. Wimp out and use writev(2).
 2309          */
 2310         if (trl_uio != NULL) {
 2311                 sbunlock(&so->so_snd);
 2312                 error = kern_writev(td, uap->s, trl_uio);
 2313                 if (error == 0)
 2314                         sbytes += td->td_retval[0];
 2315                 goto out;
 2316         }
 2317 
 2318 done:
 2319         sbunlock(&so->so_snd);
 2320 out:
 2321         /*
 2322          * If there was no error we have to clear td->td_retval[0]
 2323          * because it may have been set by writev.
 2324          */
 2325         if (error == 0) {
 2326                 td->td_retval[0] = 0;
 2327         }
 2328         if (uap->sbytes != NULL) {
 2329                 copyout(&sbytes, uap->sbytes, sizeof(off_t));
 2330         }
 2331         if (obj != NULL)
 2332                 vm_object_deallocate(obj);
 2333         if (vp != NULL) {
 2334                 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 2335                 vrele(vp);
 2336                 VFS_UNLOCK_GIANT(vfslocked);
 2337         }
 2338         if (so)
 2339                 fdrop(sock_fp, td);
 2340         if (m)
 2341                 m_freem(m);
 2342 
 2343         if (sfs != NULL) {
 2344                 mtx_lock(&sfs->mtx);
 2345                 if (sfs->count != 0)
 2346                         cv_wait(&sfs->cv, &sfs->mtx);
 2347                 KASSERT(sfs->count == 0, ("sendfile sync still busy"));
 2348                 cv_destroy(&sfs->cv);
 2349                 mtx_destroy(&sfs->mtx);
 2350                 free(sfs, M_TEMP);
 2351         }
 2352 
 2353         if (error == ERESTART)
 2354                 error = EINTR;
 2355 
 2356         return (error);
 2357 }
 2358 
 2359 /*
 2360  * SCTP syscalls.
 2361  * Functionality only compiled in if SCTP is defined in the kernel Makefile,
 2362  * otherwise all return EOPNOTSUPP.
 2363  * XXX: We should make this loadable one day.
 2364  */
 2365 int
 2366 sys_sctp_peeloff(td, uap)
 2367         struct thread *td;
 2368         struct sctp_peeloff_args /* {
 2369                 int     sd;
 2370                 caddr_t name;
 2371         } */ *uap;
 2372 {
 2373 #if (defined(INET) || defined(INET6)) && defined(SCTP)
 2374         struct filedesc *fdp;
 2375         struct file *nfp = NULL;
 2376         int error;
 2377         struct socket *head, *so;
 2378         int fd;
 2379         u_int fflag;
 2380 
 2381         fdp = td->td_proc->p_fd;
 2382         AUDIT_ARG_FD(uap->sd);
 2383         error = fgetsock(td, uap->sd, CAP_PEELOFF, &head, &fflag);
 2384         if (error)
 2385                 goto done2;
 2386         if (head->so_proto->pr_protocol != IPPROTO_SCTP) {
 2387                 error = EOPNOTSUPP;
 2388                 goto done;
 2389         }
 2390         error = sctp_can_peel_off(head, (sctp_assoc_t)uap->name);
 2391         if (error)
 2392                 goto done;
 2393         /*
 2394          * At this point we know we do have a assoc to pull
 2395          * we proceed to get the fd setup. This may block
 2396          * but that is ok.
 2397          */
 2398 
 2399         error = falloc(td, &nfp, &fd, 0);
 2400         if (error)
 2401                 goto done;
 2402         td->td_retval[0] = fd;
 2403 
 2404         CURVNET_SET(head->so_vnet);
 2405         so = sonewconn(head, SS_ISCONNECTED);
 2406         if (so == NULL) {
 2407                 error = ENOMEM;
 2408                 goto noconnection;
 2409         }
 2410         /*
 2411          * Before changing the flags on the socket, we have to bump the
 2412          * reference count.  Otherwise, if the protocol calls sofree(),
 2413          * the socket will be released due to a zero refcount.
 2414          */
 2415         SOCK_LOCK(so);
 2416         soref(so);                      /* file descriptor reference */
 2417         SOCK_UNLOCK(so);
 2418 
 2419         ACCEPT_LOCK();
 2420 
 2421         TAILQ_REMOVE(&head->so_comp, so, so_list);
 2422         head->so_qlen--;
 2423         so->so_state |= (head->so_state & SS_NBIO);
 2424         so->so_state &= ~SS_NOFDREF;
 2425         so->so_qstate &= ~SQ_COMP;
 2426         so->so_head = NULL;
 2427         ACCEPT_UNLOCK();
 2428         finit(nfp, fflag, DTYPE_SOCKET, so, &socketops);
 2429         error = sctp_do_peeloff(head, so, (sctp_assoc_t)uap->name);
 2430         if (error)
 2431                 goto noconnection;
 2432         if (head->so_sigio != NULL)
 2433                 fsetown(fgetown(&head->so_sigio), &so->so_sigio);
 2434 
 2435 noconnection:
 2436         /*
 2437          * close the new descriptor, assuming someone hasn't ripped it
 2438          * out from under us.
 2439          */
 2440         if (error)
 2441                 fdclose(fdp, nfp, fd, td);
 2442 
 2443         /*
 2444          * Release explicitly held references before returning.
 2445          */
 2446         CURVNET_RESTORE();
 2447 done:
 2448         if (nfp != NULL)
 2449                 fdrop(nfp, td);
 2450         fputsock(head);
 2451 done2:
 2452         return (error);
 2453 #else  /* SCTP */
 2454         return (EOPNOTSUPP);
 2455 #endif /* SCTP */
 2456 }
 2457 
 2458 int
 2459 sys_sctp_generic_sendmsg (td, uap)
 2460         struct thread *td;
 2461         struct sctp_generic_sendmsg_args /* {
 2462                 int sd, 
 2463                 caddr_t msg, 
 2464                 int mlen, 
 2465                 caddr_t to, 
 2466                 __socklen_t tolen, 
 2467                 struct sctp_sndrcvinfo *sinfo, 
 2468                 int flags
 2469         } */ *uap;
 2470 {
 2471 #if (defined(INET) || defined(INET6)) && defined(SCTP)
 2472         struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL;
 2473         struct socket *so;
 2474         struct file *fp = NULL;
 2475         int error = 0, len;
 2476         struct sockaddr *to = NULL;
 2477 #ifdef KTRACE
 2478         struct uio *ktruio = NULL;
 2479 #endif
 2480         struct uio auio;
 2481         struct iovec iov[1];
 2482         cap_rights_t rights;
 2483 
 2484         if (uap->sinfo) {
 2485                 error = copyin(uap->sinfo, &sinfo, sizeof (sinfo));
 2486                 if (error)
 2487                         return (error);
 2488                 u_sinfo = &sinfo;
 2489         }
 2490 
 2491         rights = CAP_WRITE;
 2492         if (uap->tolen) {
 2493                 error = getsockaddr(&to, uap->to, uap->tolen);
 2494                 if (error) {
 2495                         to = NULL;
 2496                         goto sctp_bad2;
 2497                 }
 2498                 rights |= CAP_CONNECT;
 2499         }
 2500 
 2501         AUDIT_ARG_FD(uap->sd);
 2502         error = getsock_cap(td->td_proc->p_fd, uap->sd, rights, &fp, NULL);
 2503         if (error)
 2504                 goto sctp_bad;
 2505 #ifdef KTRACE
 2506         if (to && (KTRPOINT(td, KTR_STRUCT)))
 2507                 ktrsockaddr(to);
 2508 #endif
 2509 
 2510         iov[0].iov_base = uap->msg;
 2511         iov[0].iov_len = uap->mlen;
 2512 
 2513         so = (struct socket *)fp->f_data;
 2514         if (so->so_proto->pr_protocol != IPPROTO_SCTP) {
 2515                 error = EOPNOTSUPP;
 2516                 goto sctp_bad;
 2517         }
 2518 #ifdef MAC
 2519         error = mac_socket_check_send(td->td_ucred, so);
 2520         if (error)
 2521                 goto sctp_bad;
 2522 #endif /* MAC */
 2523 
 2524         auio.uio_iov =  iov;
 2525         auio.uio_iovcnt = 1;
 2526         auio.uio_segflg = UIO_USERSPACE;
 2527         auio.uio_rw = UIO_WRITE;
 2528         auio.uio_td = td;
 2529         auio.uio_offset = 0;                    /* XXX */
 2530         auio.uio_resid = 0;
 2531         len = auio.uio_resid = uap->mlen;
 2532         CURVNET_SET(so->so_vnet);
 2533         error = sctp_lower_sosend(so, to, &auio,
 2534                     (struct mbuf *)NULL, (struct mbuf *)NULL,
 2535                     uap->flags, u_sinfo, td);
 2536         CURVNET_RESTORE();
 2537         if (error) {
 2538                 if (auio.uio_resid != len && (error == ERESTART ||
 2539                     error == EINTR || error == EWOULDBLOCK))
 2540                         error = 0;
 2541                 /* Generation of SIGPIPE can be controlled per socket. */
 2542                 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
 2543                     !(uap->flags & MSG_NOSIGNAL)) {
 2544                         PROC_LOCK(td->td_proc);
 2545                         tdsignal(td, SIGPIPE);
 2546                         PROC_UNLOCK(td->td_proc);
 2547                 }
 2548         }
 2549         if (error == 0)
 2550                 td->td_retval[0] = len - auio.uio_resid;
 2551 #ifdef KTRACE
 2552         if (ktruio != NULL) {
 2553                 ktruio->uio_resid = td->td_retval[0];
 2554                 ktrgenio(uap->sd, UIO_WRITE, ktruio, error);
 2555         }
 2556 #endif /* KTRACE */
 2557 sctp_bad:
 2558         if (fp)
 2559                 fdrop(fp, td);
 2560 sctp_bad2:
 2561         if (to)
 2562                 free(to, M_SONAME);
 2563         return (error);
 2564 #else  /* SCTP */
 2565         return (EOPNOTSUPP);
 2566 #endif /* SCTP */
 2567 }
 2568 
 2569 int
 2570 sys_sctp_generic_sendmsg_iov(td, uap)
 2571         struct thread *td;
 2572         struct sctp_generic_sendmsg_iov_args /* {
 2573                 int sd, 
 2574                 struct iovec *iov, 
 2575                 int iovlen, 
 2576                 caddr_t to, 
 2577                 __socklen_t tolen, 
 2578                 struct sctp_sndrcvinfo *sinfo, 
 2579                 int flags
 2580         } */ *uap;
 2581 {
 2582 #if (defined(INET) || defined(INET6)) && defined(SCTP)
 2583         struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL;
 2584         struct socket *so;
 2585         struct file *fp = NULL;
 2586         int error=0, i;
 2587         ssize_t len;
 2588         struct sockaddr *to = NULL;
 2589 #ifdef KTRACE
 2590         struct uio *ktruio = NULL;
 2591 #endif
 2592         struct uio auio;
 2593         struct iovec *iov, *tiov;
 2594         cap_rights_t rights;
 2595 
 2596         if (uap->sinfo) {
 2597                 error = copyin(uap->sinfo, &sinfo, sizeof (sinfo));
 2598                 if (error)
 2599                         return (error);
 2600                 u_sinfo = &sinfo;
 2601         }
 2602         rights = CAP_WRITE;
 2603         if (uap->tolen) {
 2604                 error = getsockaddr(&to, uap->to, uap->tolen);
 2605                 if (error) {
 2606                         to = NULL;
 2607                         goto sctp_bad2;
 2608                 }
 2609                 rights |= CAP_CONNECT;
 2610         }
 2611 
 2612         AUDIT_ARG_FD(uap->sd);
 2613         error = getsock_cap(td->td_proc->p_fd, uap->sd, rights, &fp, NULL);
 2614         if (error)
 2615                 goto sctp_bad1;
 2616 
 2617 #ifdef COMPAT_FREEBSD32
 2618         if (SV_CURPROC_FLAG(SV_ILP32))
 2619                 error = freebsd32_copyiniov((struct iovec32 *)uap->iov,
 2620                     uap->iovlen, &iov, EMSGSIZE);
 2621         else
 2622 #endif
 2623                 error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE);
 2624         if (error)
 2625                 goto sctp_bad1;
 2626 #ifdef KTRACE
 2627         if (to && (KTRPOINT(td, KTR_STRUCT)))
 2628                 ktrsockaddr(to);
 2629 #endif
 2630 
 2631         so = (struct socket *)fp->f_data;
 2632         if (so->so_proto->pr_protocol != IPPROTO_SCTP) {
 2633                 error = EOPNOTSUPP;
 2634                 goto sctp_bad;
 2635         }
 2636 #ifdef MAC
 2637         error = mac_socket_check_send(td->td_ucred, so);
 2638         if (error)
 2639                 goto sctp_bad;
 2640 #endif /* MAC */
 2641 
 2642         auio.uio_iov = iov;
 2643         auio.uio_iovcnt = uap->iovlen;
 2644         auio.uio_segflg = UIO_USERSPACE;
 2645         auio.uio_rw = UIO_WRITE;
 2646         auio.uio_td = td;
 2647         auio.uio_offset = 0;                    /* XXX */
 2648         auio.uio_resid = 0;
 2649         tiov = iov;
 2650         for (i = 0; i <uap->iovlen; i++, tiov++) {
 2651                 if ((auio.uio_resid += tiov->iov_len) < 0) {
 2652                         error = EINVAL;
 2653                         goto sctp_bad;
 2654                 }
 2655         }
 2656         len = auio.uio_resid;
 2657         CURVNET_SET(so->so_vnet);
 2658         error = sctp_lower_sosend(so, to, &auio,
 2659                     (struct mbuf *)NULL, (struct mbuf *)NULL,
 2660                     uap->flags, u_sinfo, td);
 2661         CURVNET_RESTORE();
 2662         if (error) {
 2663                 if (auio.uio_resid != len && (error == ERESTART ||
 2664                     error == EINTR || error == EWOULDBLOCK))
 2665                         error = 0;
 2666                 /* Generation of SIGPIPE can be controlled per socket */
 2667                 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
 2668                     !(uap->flags & MSG_NOSIGNAL)) {
 2669                         PROC_LOCK(td->td_proc);
 2670                         tdsignal(td, SIGPIPE);
 2671                         PROC_UNLOCK(td->td_proc);
 2672                 }
 2673         }
 2674         if (error == 0)
 2675                 td->td_retval[0] = len - auio.uio_resid;
 2676 #ifdef KTRACE
 2677         if (ktruio != NULL) {
 2678                 ktruio->uio_resid = td->td_retval[0];
 2679                 ktrgenio(uap->sd, UIO_WRITE, ktruio, error);
 2680         }
 2681 #endif /* KTRACE */
 2682 sctp_bad:
 2683         free(iov, M_IOV);
 2684 sctp_bad1:
 2685         if (fp)
 2686                 fdrop(fp, td);
 2687 sctp_bad2:
 2688         if (to)
 2689                 free(to, M_SONAME);
 2690         return (error);
 2691 #else  /* SCTP */
 2692         return (EOPNOTSUPP);
 2693 #endif /* SCTP */
 2694 }
 2695 
 2696 int
 2697 sys_sctp_generic_recvmsg(td, uap)
 2698         struct thread *td;
 2699         struct sctp_generic_recvmsg_args /* {
 2700                 int sd, 
 2701                 struct iovec *iov, 
 2702                 int iovlen,
 2703                 struct sockaddr *from, 
 2704                 __socklen_t *fromlenaddr,
 2705                 struct sctp_sndrcvinfo *sinfo, 
 2706                 int *msg_flags
 2707         } */ *uap;
 2708 {
 2709 #if (defined(INET) || defined(INET6)) && defined(SCTP)
 2710         uint8_t sockbufstore[256];
 2711         struct uio auio;
 2712         struct iovec *iov, *tiov;
 2713         struct sctp_sndrcvinfo sinfo;
 2714         struct socket *so;
 2715         struct file *fp = NULL;
 2716         struct sockaddr *fromsa;
 2717         int fromlen;
 2718         ssize_t len;
 2719         int i, msg_flags;
 2720         int error = 0;
 2721 #ifdef KTRACE
 2722         struct uio *ktruio = NULL;
 2723 #endif
 2724 
 2725         AUDIT_ARG_FD(uap->sd);
 2726         error = getsock_cap(td->td_proc->p_fd, uap->sd, CAP_READ, &fp, NULL);
 2727         if (error) {
 2728                 return (error);
 2729         }
 2730 #ifdef COMPAT_FREEBSD32
 2731         if (SV_CURPROC_FLAG(SV_ILP32))
 2732                 error = freebsd32_copyiniov((struct iovec32 *)uap->iov,
 2733                     uap->iovlen, &iov, EMSGSIZE);
 2734         else
 2735 #endif
 2736                 error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE);
 2737         if (error)
 2738                 goto out1;
 2739 
 2740         so = fp->f_data;
 2741         if (so->so_proto->pr_protocol != IPPROTO_SCTP) {
 2742                 error = EOPNOTSUPP;
 2743                 goto out;
 2744         }
 2745 #ifdef MAC
 2746         error = mac_socket_check_receive(td->td_ucred, so);
 2747         if (error) {
 2748                 goto out;
 2749         }
 2750 #endif /* MAC */
 2751 
 2752         if (uap->fromlenaddr) {
 2753                 error = copyin(uap->fromlenaddr,
 2754                     &fromlen, sizeof (fromlen));
 2755                 if (error) {
 2756                         goto out;
 2757                 }
 2758         } else {
 2759                 fromlen = 0;
 2760         }
 2761         if (uap->msg_flags) {
 2762                 error = copyin(uap->msg_flags, &msg_flags, sizeof (int));
 2763                 if (error) {
 2764                         goto out;
 2765                 }
 2766         } else {
 2767                 msg_flags = 0;
 2768         }
 2769         auio.uio_iov = iov;
 2770         auio.uio_iovcnt = uap->iovlen;
 2771         auio.uio_segflg = UIO_USERSPACE;
 2772         auio.uio_rw = UIO_READ;
 2773         auio.uio_td = td;
 2774         auio.uio_offset = 0;                    /* XXX */
 2775         auio.uio_resid = 0;
 2776         tiov = iov;
 2777         for (i = 0; i <uap->iovlen; i++, tiov++) {
 2778                 if ((auio.uio_resid += tiov->iov_len) < 0) {
 2779                         error = EINVAL;
 2780                         goto out;
 2781                 }
 2782         }
 2783         len = auio.uio_resid;
 2784         fromsa = (struct sockaddr *)sockbufstore;
 2785 
 2786 #ifdef KTRACE
 2787         if (KTRPOINT(td, KTR_GENIO))
 2788                 ktruio = cloneuio(&auio);
 2789 #endif /* KTRACE */
 2790         memset(&sinfo, 0, sizeof(struct sctp_sndrcvinfo));
 2791         CURVNET_SET(so->so_vnet);
 2792         error = sctp_sorecvmsg(so, &auio, (struct mbuf **)NULL,
 2793                     fromsa, fromlen, &msg_flags,
 2794                     (struct sctp_sndrcvinfo *)&sinfo, 1);
 2795         CURVNET_RESTORE();
 2796         if (error) {
 2797                 if (auio.uio_resid != len && (error == ERESTART ||
 2798                     error == EINTR || error == EWOULDBLOCK))
 2799                         error = 0;
 2800         } else {
 2801                 if (uap->sinfo)
 2802                         error = copyout(&sinfo, uap->sinfo, sizeof (sinfo));
 2803         }
 2804 #ifdef KTRACE
 2805         if (ktruio != NULL) {
 2806                 ktruio->uio_resid = len - auio.uio_resid;
 2807                 ktrgenio(uap->sd, UIO_READ, ktruio, error);
 2808         }
 2809 #endif /* KTRACE */
 2810         if (error)
 2811                 goto out;
 2812         td->td_retval[0] = len - auio.uio_resid;
 2813 
 2814         if (fromlen && uap->from) {
 2815                 len = fromlen;
 2816                 if (len <= 0 || fromsa == 0)
 2817                         len = 0;
 2818                 else {
 2819                         len = MIN(len, fromsa->sa_len);
 2820                         error = copyout(fromsa, uap->from, (size_t)len);
 2821                         if (error)
 2822                                 goto out;
 2823                 }
 2824                 error = copyout(&len, uap->fromlenaddr, sizeof (socklen_t));
 2825                 if (error) {
 2826                         goto out;
 2827                 }
 2828         }
 2829 #ifdef KTRACE
 2830         if (KTRPOINT(td, KTR_STRUCT))
 2831                 ktrsockaddr(fromsa);
 2832 #endif
 2833         if (uap->msg_flags) {
 2834                 error = copyout(&msg_flags, uap->msg_flags, sizeof (int));
 2835                 if (error) {
 2836                         goto out;
 2837                 }
 2838         }
 2839 out:
 2840         free(iov, M_IOV);
 2841 out1:
 2842         if (fp) 
 2843                 fdrop(fp, td);
 2844 
 2845         return (error);
 2846 #else  /* SCTP */
 2847         return (EOPNOTSUPP);
 2848 #endif /* SCTP */
 2849 }

Cache object: 026478926e8aec69d2cf25287c1699a8


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.