The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/uipc_syscalls.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1982, 1986, 1989, 1990, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * sendfile(2) and related extensions:
    6  * Copyright (c) 1998, David Greenman. All rights reserved.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  * 4. Neither the name of the University nor the names of its contributors
   17  *    may be used to endorse or promote products derived from this software
   18  *    without specific prior written permission.
   19  *
   20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   30  * SUCH DAMAGE.
   31  *
   32  *      @(#)uipc_syscalls.c     8.4 (Berkeley) 2/21/94
   33  */
   34 
   35 #include <sys/cdefs.h>
   36 __FBSDID("$FreeBSD: releng/9.2/sys/kern/uipc_syscalls.c 255456 2013-09-10 19:00:32Z gjb $");
   37 
   38 #include "opt_capsicum.h"
   39 #include "opt_inet.h"
   40 #include "opt_inet6.h"
   41 #include "opt_sctp.h"
   42 #include "opt_compat.h"
   43 #include "opt_ktrace.h"
   44 
   45 #include <sys/param.h>
   46 #include <sys/systm.h>
   47 #include <sys/capability.h>
   48 #include <sys/kernel.h>
   49 #include <sys/lock.h>
   50 #include <sys/mutex.h>
   51 #include <sys/sysproto.h>
   52 #include <sys/malloc.h>
   53 #include <sys/filedesc.h>
   54 #include <sys/event.h>
   55 #include <sys/proc.h>
   56 #include <sys/fcntl.h>
   57 #include <sys/file.h>
   58 #include <sys/filio.h>
   59 #include <sys/jail.h>
   60 #include <sys/mount.h>
   61 #include <sys/mbuf.h>
   62 #include <sys/protosw.h>
   63 #include <sys/sf_buf.h>
   64 #include <sys/sysent.h>
   65 #include <sys/socket.h>
   66 #include <sys/socketvar.h>
   67 #include <sys/signalvar.h>
   68 #include <sys/syscallsubr.h>
   69 #include <sys/sysctl.h>
   70 #include <sys/uio.h>
   71 #include <sys/vnode.h>
   72 #ifdef KTRACE
   73 #include <sys/ktrace.h>
   74 #endif
   75 #ifdef COMPAT_FREEBSD32
   76 #include <compat/freebsd32/freebsd32_util.h>
   77 #endif
   78 
   79 #include <net/vnet.h>
   80 
   81 #include <security/audit/audit.h>
   82 #include <security/mac/mac_framework.h>
   83 
   84 #include <vm/vm.h>
   85 #include <vm/vm_param.h>
   86 #include <vm/vm_object.h>
   87 #include <vm/vm_page.h>
   88 #include <vm/vm_pageout.h>
   89 #include <vm/vm_kern.h>
   90 #include <vm/vm_extern.h>
   91 
   92 #if defined(INET) || defined(INET6)
   93 #ifdef SCTP
   94 #include <netinet/sctp.h>
   95 #include <netinet/sctp_peeloff.h>
   96 #endif /* SCTP */
   97 #endif /* INET || INET6 */
   98 
   99 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
  100 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp);
  101 
  102 static int accept1(struct thread *td, struct accept_args *uap, int compat);
  103 static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat);
  104 static int getsockname1(struct thread *td, struct getsockname_args *uap,
  105                         int compat);
  106 static int getpeername1(struct thread *td, struct getpeername_args *uap,
  107                         int compat);
  108 
  109 /*
  110  * NSFBUFS-related variables and associated sysctls
  111  */
  112 int nsfbufs;
  113 int nsfbufspeak;
  114 int nsfbufsused;
  115 
  116 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
  117     "Maximum number of sendfile(2) sf_bufs available");
  118 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
  119     "Number of sendfile(2) sf_bufs at peak usage");
  120 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
  121     "Number of sendfile(2) sf_bufs in use");
  122 
  123 /*
  124  * Convert a user file descriptor to a kernel file entry and check that, if
  125  * it is a capability, the right rights are present. A reference on the file
  126  * entry is held upon returning.
  127  */
  128 static int
  129 getsock_cap(struct filedesc *fdp, int fd, cap_rights_t rights,
  130     struct file **fpp, u_int *fflagp)
  131 {
  132         struct file *fp;
  133 #ifdef CAPABILITIES
  134         struct file *fp_fromcap;
  135         int error;
  136 #endif
  137 
  138         fp = NULL;
  139         if ((fdp == NULL) || ((fp = fget_unlocked(fdp, fd)) == NULL))
  140                 return (EBADF);
  141 #ifdef CAPABILITIES
  142         /*
  143          * If the file descriptor is for a capability, test rights and use
  144          * the file descriptor referenced by the capability.
  145          */
  146         error = cap_funwrap(fp, rights, &fp_fromcap);
  147         if (error) {
  148                 fdrop(fp, curthread);
  149                 return (error);
  150         }
  151         if (fp != fp_fromcap) {
  152                 fhold(fp_fromcap);
  153                 fdrop(fp, curthread);
  154                 fp = fp_fromcap;
  155         }
  156 #endif /* CAPABILITIES */
  157         if (fp->f_type != DTYPE_SOCKET) {
  158                 fdrop(fp, curthread);
  159                 return (ENOTSOCK);
  160         }
  161         if (fflagp != NULL)
  162                 *fflagp = fp->f_flag;
  163         *fpp = fp;
  164         return (0);
  165 }
  166 
  167 /*
  168  * System call interface to the socket abstraction.
  169  */
  170 #if defined(COMPAT_43)
  171 #define COMPAT_OLDSOCK
  172 #endif
  173 
  174 int
  175 sys_socket(td, uap)
  176         struct thread *td;
  177         struct socket_args /* {
  178                 int     domain;
  179                 int     type;
  180                 int     protocol;
  181         } */ *uap;
  182 {
  183         struct filedesc *fdp;
  184         struct socket *so;
  185         struct file *fp;
  186         int fd, error;
  187 
  188         AUDIT_ARG_SOCKET(uap->domain, uap->type, uap->protocol);
  189 #ifdef MAC
  190         error = mac_socket_check_create(td->td_ucred, uap->domain, uap->type,
  191             uap->protocol);
  192         if (error)
  193                 return (error);
  194 #endif
  195         fdp = td->td_proc->p_fd;
  196         error = falloc(td, &fp, &fd, 0);
  197         if (error)
  198                 return (error);
  199         /* An extra reference on `fp' has been held for us by falloc(). */
  200         error = socreate(uap->domain, &so, uap->type, uap->protocol,
  201             td->td_ucred, td);
  202         if (error) {
  203                 fdclose(fdp, fp, fd, td);
  204         } else {
  205                 finit(fp, FREAD | FWRITE, DTYPE_SOCKET, so, &socketops);
  206                 td->td_retval[0] = fd;
  207         }
  208         fdrop(fp, td);
  209         return (error);
  210 }
  211 
  212 /* ARGSUSED */
  213 int
  214 sys_bind(td, uap)
  215         struct thread *td;
  216         struct bind_args /* {
  217                 int     s;
  218                 caddr_t name;
  219                 int     namelen;
  220         } */ *uap;
  221 {
  222         struct sockaddr *sa;
  223         int error;
  224 
  225         if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0)
  226                 return (error);
  227 
  228         error = kern_bind(td, uap->s, sa);
  229         free(sa, M_SONAME);
  230         return (error);
  231 }
  232 
  233 int
  234 kern_bind(td, fd, sa)
  235         struct thread *td;
  236         int fd;
  237         struct sockaddr *sa;
  238 {
  239         struct socket *so;
  240         struct file *fp;
  241         int error;
  242 
  243         AUDIT_ARG_FD(fd);
  244         error = getsock_cap(td->td_proc->p_fd, fd, CAP_BIND, &fp, NULL);
  245         if (error)
  246                 return (error);
  247         so = fp->f_data;
  248 #ifdef KTRACE
  249         if (KTRPOINT(td, KTR_STRUCT))
  250                 ktrsockaddr(sa);
  251 #endif
  252 #ifdef MAC
  253         error = mac_socket_check_bind(td->td_ucred, so, sa);
  254         if (error == 0)
  255 #endif
  256                 error = sobind(so, sa, td);
  257         fdrop(fp, td);
  258         return (error);
  259 }
  260 
  261 /* ARGSUSED */
  262 int
  263 sys_listen(td, uap)
  264         struct thread *td;
  265         struct listen_args /* {
  266                 int     s;
  267                 int     backlog;
  268         } */ *uap;
  269 {
  270         struct socket *so;
  271         struct file *fp;
  272         int error;
  273 
  274         AUDIT_ARG_FD(uap->s);
  275         error = getsock_cap(td->td_proc->p_fd, uap->s, CAP_LISTEN, &fp, NULL);
  276         if (error == 0) {
  277                 so = fp->f_data;
  278 #ifdef MAC
  279                 error = mac_socket_check_listen(td->td_ucred, so);
  280                 if (error == 0)
  281 #endif
  282                         error = solisten(so, uap->backlog, td);
  283                 fdrop(fp, td);
  284         }
  285         return(error);
  286 }
  287 
  288 /*
  289  * accept1()
  290  */
  291 static int
  292 accept1(td, uap, compat)
  293         struct thread *td;
  294         struct accept_args /* {
  295                 int     s;
  296                 struct sockaddr * __restrict name;
  297                 socklen_t       * __restrict anamelen;
  298         } */ *uap;
  299         int compat;
  300 {
  301         struct sockaddr *name;
  302         socklen_t namelen;
  303         struct file *fp;
  304         int error;
  305 
  306         if (uap->name == NULL)
  307                 return (kern_accept(td, uap->s, NULL, NULL, NULL));
  308 
  309         error = copyin(uap->anamelen, &namelen, sizeof (namelen));
  310         if (error)
  311                 return (error);
  312 
  313         error = kern_accept(td, uap->s, &name, &namelen, &fp);
  314 
  315         /*
  316          * return a namelen of zero for older code which might
  317          * ignore the return value from accept.
  318          */
  319         if (error) {
  320                 (void) copyout(&namelen,
  321                     uap->anamelen, sizeof(*uap->anamelen));
  322                 return (error);
  323         }
  324 
  325         if (error == 0 && name != NULL) {
  326 #ifdef COMPAT_OLDSOCK
  327                 if (compat)
  328                         ((struct osockaddr *)name)->sa_family =
  329                             name->sa_family;
  330 #endif
  331                 error = copyout(name, uap->name, namelen);
  332         }
  333         if (error == 0)
  334                 error = copyout(&namelen, uap->anamelen,
  335                     sizeof(namelen));
  336         if (error)
  337                 fdclose(td->td_proc->p_fd, fp, td->td_retval[0], td);
  338         fdrop(fp, td);
  339         free(name, M_SONAME);
  340         return (error);
  341 }
  342 
  343 int
  344 kern_accept(struct thread *td, int s, struct sockaddr **name,
  345     socklen_t *namelen, struct file **fp)
  346 {
  347         struct filedesc *fdp;
  348         struct file *headfp, *nfp = NULL;
  349         struct sockaddr *sa = NULL;
  350         int error;
  351         struct socket *head, *so;
  352         int fd;
  353         u_int fflag;
  354         pid_t pgid;
  355         int tmp;
  356 
  357         if (name) {
  358                 *name = NULL;
  359                 if (*namelen < 0)
  360                         return (EINVAL);
  361         }
  362 
  363         AUDIT_ARG_FD(s);
  364         fdp = td->td_proc->p_fd;
  365         error = getsock_cap(fdp, s, CAP_ACCEPT, &headfp, &fflag);
  366         if (error)
  367                 return (error);
  368         head = headfp->f_data;
  369         if ((head->so_options & SO_ACCEPTCONN) == 0) {
  370                 error = EINVAL;
  371                 goto done;
  372         }
  373 #ifdef MAC
  374         error = mac_socket_check_accept(td->td_ucred, head);
  375         if (error != 0)
  376                 goto done;
  377 #endif
  378         error = falloc(td, &nfp, &fd, 0);
  379         if (error)
  380                 goto done;
  381         ACCEPT_LOCK();
  382         if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) {
  383                 ACCEPT_UNLOCK();
  384                 error = EWOULDBLOCK;
  385                 goto noconnection;
  386         }
  387         while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
  388                 if (head->so_rcv.sb_state & SBS_CANTRCVMORE) {
  389                         head->so_error = ECONNABORTED;
  390                         break;
  391                 }
  392                 error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH,
  393                     "accept", 0);
  394                 if (error) {
  395                         ACCEPT_UNLOCK();
  396                         goto noconnection;
  397                 }
  398         }
  399         if (head->so_error) {
  400                 error = head->so_error;
  401                 head->so_error = 0;
  402                 ACCEPT_UNLOCK();
  403                 goto noconnection;
  404         }
  405         so = TAILQ_FIRST(&head->so_comp);
  406         KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP"));
  407         KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP"));
  408 
  409         /*
  410          * Before changing the flags on the socket, we have to bump the
  411          * reference count.  Otherwise, if the protocol calls sofree(),
  412          * the socket will be released due to a zero refcount.
  413          */
  414         SOCK_LOCK(so);                  /* soref() and so_state update */
  415         soref(so);                      /* file descriptor reference */
  416 
  417         TAILQ_REMOVE(&head->so_comp, so, so_list);
  418         head->so_qlen--;
  419         so->so_state |= (head->so_state & SS_NBIO);
  420         so->so_qstate &= ~SQ_COMP;
  421         so->so_head = NULL;
  422 
  423         SOCK_UNLOCK(so);
  424         ACCEPT_UNLOCK();
  425 
  426         /* An extra reference on `nfp' has been held for us by falloc(). */
  427         td->td_retval[0] = fd;
  428 
  429         /* connection has been removed from the listen queue */
  430         KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0);
  431 
  432         pgid = fgetown(&head->so_sigio);
  433         if (pgid != 0)
  434                 fsetown(pgid, &so->so_sigio);
  435 
  436         finit(nfp, fflag, DTYPE_SOCKET, so, &socketops);
  437         /* Sync socket nonblocking/async state with file flags */
  438         tmp = fflag & FNONBLOCK;
  439         (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td);
  440         tmp = fflag & FASYNC;
  441         (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td);
  442         sa = 0;
  443         error = soaccept(so, &sa);
  444         if (error) {
  445                 /*
  446                  * return a namelen of zero for older code which might
  447                  * ignore the return value from accept.
  448                  */
  449                 if (name)
  450                         *namelen = 0;
  451                 goto noconnection;
  452         }
  453         if (sa == NULL) {
  454                 if (name)
  455                         *namelen = 0;
  456                 goto done;
  457         }
  458         if (name) {
  459                 /* check sa_len before it is destroyed */
  460                 if (*namelen > sa->sa_len)
  461                         *namelen = sa->sa_len;
  462 #ifdef KTRACE
  463                 if (KTRPOINT(td, KTR_STRUCT))
  464                         ktrsockaddr(sa);
  465 #endif
  466                 *name = sa;
  467                 sa = NULL;
  468         }
  469 noconnection:
  470         if (sa)
  471                 free(sa, M_SONAME);
  472 
  473         /*
  474          * close the new descriptor, assuming someone hasn't ripped it
  475          * out from under us.
  476          */
  477         if (error)
  478                 fdclose(fdp, nfp, fd, td);
  479 
  480         /*
  481          * Release explicitly held references before returning.  We return
  482          * a reference on nfp to the caller on success if they request it.
  483          */
  484 done:
  485         if (fp != NULL) {
  486                 if (error == 0) {
  487                         *fp = nfp;
  488                         nfp = NULL;
  489                 } else
  490                         *fp = NULL;
  491         }
  492         if (nfp != NULL)
  493                 fdrop(nfp, td);
  494         fdrop(headfp, td);
  495         return (error);
  496 }
  497 
  498 int
  499 sys_accept(td, uap)
  500         struct thread *td;
  501         struct accept_args *uap;
  502 {
  503 
  504         return (accept1(td, uap, 0));
  505 }
  506 
  507 #ifdef COMPAT_OLDSOCK
  508 int
  509 oaccept(td, uap)
  510         struct thread *td;
  511         struct accept_args *uap;
  512 {
  513 
  514         return (accept1(td, uap, 1));
  515 }
  516 #endif /* COMPAT_OLDSOCK */
  517 
  518 /* ARGSUSED */
  519 int
  520 sys_connect(td, uap)
  521         struct thread *td;
  522         struct connect_args /* {
  523                 int     s;
  524                 caddr_t name;
  525                 int     namelen;
  526         } */ *uap;
  527 {
  528         struct sockaddr *sa;
  529         int error;
  530 
  531         error = getsockaddr(&sa, uap->name, uap->namelen);
  532         if (error)
  533                 return (error);
  534 
  535         error = kern_connect(td, uap->s, sa);
  536         free(sa, M_SONAME);
  537         return (error);
  538 }
  539 
  540 
  541 int
  542 kern_connect(td, fd, sa)
  543         struct thread *td;
  544         int fd;
  545         struct sockaddr *sa;
  546 {
  547         struct socket *so;
  548         struct file *fp;
  549         int error;
  550         int interrupted = 0;
  551 
  552         AUDIT_ARG_FD(fd);
  553         error = getsock_cap(td->td_proc->p_fd, fd, CAP_CONNECT, &fp, NULL);
  554         if (error)
  555                 return (error);
  556         so = fp->f_data;
  557         if (so->so_state & SS_ISCONNECTING) {
  558                 error = EALREADY;
  559                 goto done1;
  560         }
  561 #ifdef KTRACE
  562         if (KTRPOINT(td, KTR_STRUCT))
  563                 ktrsockaddr(sa);
  564 #endif
  565 #ifdef MAC
  566         error = mac_socket_check_connect(td->td_ucred, so, sa);
  567         if (error)
  568                 goto bad;
  569 #endif
  570         error = soconnect(so, sa, td);
  571         if (error)
  572                 goto bad;
  573         if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
  574                 error = EINPROGRESS;
  575                 goto done1;
  576         }
  577         SOCK_LOCK(so);
  578         while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
  579                 error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH,
  580                     "connec", 0);
  581                 if (error) {
  582                         if (error == EINTR || error == ERESTART)
  583                                 interrupted = 1;
  584                         break;
  585                 }
  586         }
  587         if (error == 0) {
  588                 error = so->so_error;
  589                 so->so_error = 0;
  590         }
  591         SOCK_UNLOCK(so);
  592 bad:
  593         if (!interrupted)
  594                 so->so_state &= ~SS_ISCONNECTING;
  595         if (error == ERESTART)
  596                 error = EINTR;
  597 done1:
  598         fdrop(fp, td);
  599         return (error);
  600 }
  601 
  602 int
  603 kern_socketpair(struct thread *td, int domain, int type, int protocol,
  604     int *rsv)
  605 {
  606         struct filedesc *fdp = td->td_proc->p_fd;
  607         struct file *fp1, *fp2;
  608         struct socket *so1, *so2;
  609         int fd, error;
  610 
  611         AUDIT_ARG_SOCKET(domain, type, protocol);
  612 #ifdef MAC
  613         /* We might want to have a separate check for socket pairs. */
  614         error = mac_socket_check_create(td->td_ucred, domain, type,
  615             protocol);
  616         if (error)
  617                 return (error);
  618 #endif
  619         error = socreate(domain, &so1, type, protocol, td->td_ucred, td);
  620         if (error)
  621                 return (error);
  622         error = socreate(domain, &so2, type, protocol, td->td_ucred, td);
  623         if (error)
  624                 goto free1;
  625         /* On success extra reference to `fp1' and 'fp2' is set by falloc. */
  626         error = falloc(td, &fp1, &fd, 0);
  627         if (error)
  628                 goto free2;
  629         rsv[0] = fd;
  630         fp1->f_data = so1;      /* so1 already has ref count */
  631         error = falloc(td, &fp2, &fd, 0);
  632         if (error)
  633                 goto free3;
  634         fp2->f_data = so2;      /* so2 already has ref count */
  635         rsv[1] = fd;
  636         error = soconnect2(so1, so2);
  637         if (error)
  638                 goto free4;
  639         if (type == SOCK_DGRAM) {
  640                 /*
  641                  * Datagram socket connection is asymmetric.
  642                  */
  643                  error = soconnect2(so2, so1);
  644                  if (error)
  645                         goto free4;
  646         }
  647         finit(fp1, FREAD | FWRITE, DTYPE_SOCKET, fp1->f_data, &socketops);
  648         finit(fp2, FREAD | FWRITE, DTYPE_SOCKET, fp2->f_data, &socketops);
  649         fdrop(fp1, td);
  650         fdrop(fp2, td);
  651         return (0);
  652 free4:
  653         fdclose(fdp, fp2, rsv[1], td);
  654         fdrop(fp2, td);
  655 free3:
  656         fdclose(fdp, fp1, rsv[0], td);
  657         fdrop(fp1, td);
  658 free2:
  659         if (so2 != NULL)
  660                 (void)soclose(so2);
  661 free1:
  662         if (so1 != NULL)
  663                 (void)soclose(so1);
  664         return (error);
  665 }
  666 
  667 int
  668 sys_socketpair(struct thread *td, struct socketpair_args *uap)
  669 {
  670         int error, sv[2];
  671 
  672         error = kern_socketpair(td, uap->domain, uap->type,
  673             uap->protocol, sv);
  674         if (error)
  675                 return (error);
  676         error = copyout(sv, uap->rsv, 2 * sizeof(int));
  677         if (error) {
  678                 (void)kern_close(td, sv[0]);
  679                 (void)kern_close(td, sv[1]);
  680         }
  681         return (error);
  682 }
  683 
  684 static int
  685 sendit(td, s, mp, flags)
  686         struct thread *td;
  687         int s;
  688         struct msghdr *mp;
  689         int flags;
  690 {
  691         struct mbuf *control;
  692         struct sockaddr *to;
  693         int error;
  694 
  695 #ifdef CAPABILITY_MODE
  696         if (IN_CAPABILITY_MODE(td) && (mp->msg_name != NULL))
  697                 return (ECAPMODE);
  698 #endif
  699 
  700         if (mp->msg_name != NULL) {
  701                 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
  702                 if (error) {
  703                         to = NULL;
  704                         goto bad;
  705                 }
  706                 mp->msg_name = to;
  707         } else {
  708                 to = NULL;
  709         }
  710 
  711         if (mp->msg_control) {
  712                 if (mp->msg_controllen < sizeof(struct cmsghdr)
  713 #ifdef COMPAT_OLDSOCK
  714                     && mp->msg_flags != MSG_COMPAT
  715 #endif
  716                 ) {
  717                         error = EINVAL;
  718                         goto bad;
  719                 }
  720                 error = sockargs(&control, mp->msg_control,
  721                     mp->msg_controllen, MT_CONTROL);
  722                 if (error)
  723                         goto bad;
  724 #ifdef COMPAT_OLDSOCK
  725                 if (mp->msg_flags == MSG_COMPAT) {
  726                         struct cmsghdr *cm;
  727 
  728                         M_PREPEND(control, sizeof(*cm), M_WAIT);
  729                         cm = mtod(control, struct cmsghdr *);
  730                         cm->cmsg_len = control->m_len;
  731                         cm->cmsg_level = SOL_SOCKET;
  732                         cm->cmsg_type = SCM_RIGHTS;
  733                 }
  734 #endif
  735         } else {
  736                 control = NULL;
  737         }
  738 
  739         error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE);
  740 
  741 bad:
  742         if (to)
  743                 free(to, M_SONAME);
  744         return (error);
  745 }
  746 
  747 int
  748 kern_sendit(td, s, mp, flags, control, segflg)
  749         struct thread *td;
  750         int s;
  751         struct msghdr *mp;
  752         int flags;
  753         struct mbuf *control;
  754         enum uio_seg segflg;
  755 {
  756         struct file *fp;
  757         struct uio auio;
  758         struct iovec *iov;
  759         struct socket *so;
  760         int i, error;
  761         ssize_t len;
  762         cap_rights_t rights;
  763 #ifdef KTRACE
  764         struct uio *ktruio = NULL;
  765 #endif
  766 
  767         AUDIT_ARG_FD(s);
  768         rights = CAP_WRITE;
  769         if (mp->msg_name != NULL)
  770                 rights |= CAP_CONNECT;
  771         error = getsock_cap(td->td_proc->p_fd, s, rights, &fp, NULL);
  772         if (error)
  773                 return (error);
  774         so = (struct socket *)fp->f_data;
  775 
  776 #ifdef KTRACE
  777         if (mp->msg_name != NULL && KTRPOINT(td, KTR_STRUCT))
  778                 ktrsockaddr(mp->msg_name);
  779 #endif
  780 #ifdef MAC
  781         if (mp->msg_name != NULL) {
  782                 error = mac_socket_check_connect(td->td_ucred, so,
  783                     mp->msg_name);
  784                 if (error)
  785                         goto bad;
  786         }
  787         error = mac_socket_check_send(td->td_ucred, so);
  788         if (error)
  789                 goto bad;
  790 #endif
  791 
  792         auio.uio_iov = mp->msg_iov;
  793         auio.uio_iovcnt = mp->msg_iovlen;
  794         auio.uio_segflg = segflg;
  795         auio.uio_rw = UIO_WRITE;
  796         auio.uio_td = td;
  797         auio.uio_offset = 0;                    /* XXX */
  798         auio.uio_resid = 0;
  799         iov = mp->msg_iov;
  800         for (i = 0; i < mp->msg_iovlen; i++, iov++) {
  801                 if ((auio.uio_resid += iov->iov_len) < 0) {
  802                         error = EINVAL;
  803                         goto bad;
  804                 }
  805         }
  806 #ifdef KTRACE
  807         if (KTRPOINT(td, KTR_GENIO))
  808                 ktruio = cloneuio(&auio);
  809 #endif
  810         len = auio.uio_resid;
  811         error = sosend(so, mp->msg_name, &auio, 0, control, flags, td);
  812         if (error) {
  813                 if (auio.uio_resid != len && (error == ERESTART ||
  814                     error == EINTR || error == EWOULDBLOCK))
  815                         error = 0;
  816                 /* Generation of SIGPIPE can be controlled per socket */
  817                 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
  818                     !(flags & MSG_NOSIGNAL)) {
  819                         PROC_LOCK(td->td_proc);
  820                         tdsignal(td, SIGPIPE);
  821                         PROC_UNLOCK(td->td_proc);
  822                 }
  823         }
  824         if (error == 0)
  825                 td->td_retval[0] = len - auio.uio_resid;
  826 #ifdef KTRACE
  827         if (ktruio != NULL) {
  828                 ktruio->uio_resid = td->td_retval[0];
  829                 ktrgenio(s, UIO_WRITE, ktruio, error);
  830         }
  831 #endif
  832 bad:
  833         fdrop(fp, td);
  834         return (error);
  835 }
  836 
  837 int
  838 sys_sendto(td, uap)
  839         struct thread *td;
  840         struct sendto_args /* {
  841                 int     s;
  842                 caddr_t buf;
  843                 size_t  len;
  844                 int     flags;
  845                 caddr_t to;
  846                 int     tolen;
  847         } */ *uap;
  848 {
  849         struct msghdr msg;
  850         struct iovec aiov;
  851         int error;
  852 
  853         msg.msg_name = uap->to;
  854         msg.msg_namelen = uap->tolen;
  855         msg.msg_iov = &aiov;
  856         msg.msg_iovlen = 1;
  857         msg.msg_control = 0;
  858 #ifdef COMPAT_OLDSOCK
  859         msg.msg_flags = 0;
  860 #endif
  861         aiov.iov_base = uap->buf;
  862         aiov.iov_len = uap->len;
  863         error = sendit(td, uap->s, &msg, uap->flags);
  864         return (error);
  865 }
  866 
  867 #ifdef COMPAT_OLDSOCK
  868 int
  869 osend(td, uap)
  870         struct thread *td;
  871         struct osend_args /* {
  872                 int     s;
  873                 caddr_t buf;
  874                 int     len;
  875                 int     flags;
  876         } */ *uap;
  877 {
  878         struct msghdr msg;
  879         struct iovec aiov;
  880         int error;
  881 
  882         msg.msg_name = 0;
  883         msg.msg_namelen = 0;
  884         msg.msg_iov = &aiov;
  885         msg.msg_iovlen = 1;
  886         aiov.iov_base = uap->buf;
  887         aiov.iov_len = uap->len;
  888         msg.msg_control = 0;
  889         msg.msg_flags = 0;
  890         error = sendit(td, uap->s, &msg, uap->flags);
  891         return (error);
  892 }
  893 
  894 int
  895 osendmsg(td, uap)
  896         struct thread *td;
  897         struct osendmsg_args /* {
  898                 int     s;
  899                 caddr_t msg;
  900                 int     flags;
  901         } */ *uap;
  902 {
  903         struct msghdr msg;
  904         struct iovec *iov;
  905         int error;
  906 
  907         error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
  908         if (error)
  909                 return (error);
  910         error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
  911         if (error)
  912                 return (error);
  913         msg.msg_iov = iov;
  914         msg.msg_flags = MSG_COMPAT;
  915         error = sendit(td, uap->s, &msg, uap->flags);
  916         free(iov, M_IOV);
  917         return (error);
  918 }
  919 #endif
  920 
  921 int
  922 sys_sendmsg(td, uap)
  923         struct thread *td;
  924         struct sendmsg_args /* {
  925                 int     s;
  926                 caddr_t msg;
  927                 int     flags;
  928         } */ *uap;
  929 {
  930         struct msghdr msg;
  931         struct iovec *iov;
  932         int error;
  933 
  934         error = copyin(uap->msg, &msg, sizeof (msg));
  935         if (error)
  936                 return (error);
  937         error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
  938         if (error)
  939                 return (error);
  940         msg.msg_iov = iov;
  941 #ifdef COMPAT_OLDSOCK
  942         msg.msg_flags = 0;
  943 #endif
  944         error = sendit(td, uap->s, &msg, uap->flags);
  945         free(iov, M_IOV);
  946         return (error);
  947 }
  948 
  949 int
  950 kern_recvit(td, s, mp, fromseg, controlp)
  951         struct thread *td;
  952         int s;
  953         struct msghdr *mp;
  954         enum uio_seg fromseg;
  955         struct mbuf **controlp;
  956 {
  957         struct uio auio;
  958         struct iovec *iov;
  959         int i;
  960         ssize_t len;
  961         int error;
  962         struct mbuf *m, *control = 0;
  963         caddr_t ctlbuf;
  964         struct file *fp;
  965         struct socket *so;
  966         struct sockaddr *fromsa = 0;
  967 #ifdef KTRACE
  968         struct uio *ktruio = NULL;
  969 #endif
  970 
  971         if (controlp != NULL)
  972                 *controlp = NULL;
  973 
  974         AUDIT_ARG_FD(s);
  975         error = getsock_cap(td->td_proc->p_fd, s, CAP_READ, &fp, NULL);
  976         if (error)
  977                 return (error);
  978         so = fp->f_data;
  979 
  980 #ifdef MAC
  981         error = mac_socket_check_receive(td->td_ucred, so);
  982         if (error) {
  983                 fdrop(fp, td);
  984                 return (error);
  985         }
  986 #endif
  987 
  988         auio.uio_iov = mp->msg_iov;
  989         auio.uio_iovcnt = mp->msg_iovlen;
  990         auio.uio_segflg = UIO_USERSPACE;
  991         auio.uio_rw = UIO_READ;
  992         auio.uio_td = td;
  993         auio.uio_offset = 0;                    /* XXX */
  994         auio.uio_resid = 0;
  995         iov = mp->msg_iov;
  996         for (i = 0; i < mp->msg_iovlen; i++, iov++) {
  997                 if ((auio.uio_resid += iov->iov_len) < 0) {
  998                         fdrop(fp, td);
  999                         return (EINVAL);
 1000                 }
 1001         }
 1002 #ifdef KTRACE
 1003         if (KTRPOINT(td, KTR_GENIO))
 1004                 ktruio = cloneuio(&auio);
 1005 #endif
 1006         len = auio.uio_resid;
 1007         error = soreceive(so, &fromsa, &auio, (struct mbuf **)0,
 1008             (mp->msg_control || controlp) ? &control : (struct mbuf **)0,
 1009             &mp->msg_flags);
 1010         if (error) {
 1011                 if (auio.uio_resid != len && (error == ERESTART ||
 1012                     error == EINTR || error == EWOULDBLOCK))
 1013                         error = 0;
 1014         }
 1015 #ifdef KTRACE
 1016         if (ktruio != NULL) {
 1017                 ktruio->uio_resid = len - auio.uio_resid;
 1018                 ktrgenio(s, UIO_READ, ktruio, error);
 1019         }
 1020 #endif
 1021         if (error)
 1022                 goto out;
 1023         td->td_retval[0] = len - auio.uio_resid;
 1024         if (mp->msg_name) {
 1025                 len = mp->msg_namelen;
 1026                 if (len <= 0 || fromsa == 0)
 1027                         len = 0;
 1028                 else {
 1029                         /* save sa_len before it is destroyed by MSG_COMPAT */
 1030                         len = MIN(len, fromsa->sa_len);
 1031 #ifdef COMPAT_OLDSOCK
 1032                         if (mp->msg_flags & MSG_COMPAT)
 1033                                 ((struct osockaddr *)fromsa)->sa_family =
 1034                                     fromsa->sa_family;
 1035 #endif
 1036                         if (fromseg == UIO_USERSPACE) {
 1037                                 error = copyout(fromsa, mp->msg_name,
 1038                                     (unsigned)len);
 1039                                 if (error)
 1040                                         goto out;
 1041                         } else
 1042                                 bcopy(fromsa, mp->msg_name, len);
 1043                 }
 1044                 mp->msg_namelen = len;
 1045         }
 1046         if (mp->msg_control && controlp == NULL) {
 1047 #ifdef COMPAT_OLDSOCK
 1048                 /*
 1049                  * We assume that old recvmsg calls won't receive access
 1050                  * rights and other control info, esp. as control info
 1051                  * is always optional and those options didn't exist in 4.3.
 1052                  * If we receive rights, trim the cmsghdr; anything else
 1053                  * is tossed.
 1054                  */
 1055                 if (control && mp->msg_flags & MSG_COMPAT) {
 1056                         if (mtod(control, struct cmsghdr *)->cmsg_level !=
 1057                             SOL_SOCKET ||
 1058                             mtod(control, struct cmsghdr *)->cmsg_type !=
 1059                             SCM_RIGHTS) {
 1060                                 mp->msg_controllen = 0;
 1061                                 goto out;
 1062                         }
 1063                         control->m_len -= sizeof (struct cmsghdr);
 1064                         control->m_data += sizeof (struct cmsghdr);
 1065                 }
 1066 #endif
 1067                 len = mp->msg_controllen;
 1068                 m = control;
 1069                 mp->msg_controllen = 0;
 1070                 ctlbuf = mp->msg_control;
 1071 
 1072                 while (m && len > 0) {
 1073                         unsigned int tocopy;
 1074 
 1075                         if (len >= m->m_len)
 1076                                 tocopy = m->m_len;
 1077                         else {
 1078                                 mp->msg_flags |= MSG_CTRUNC;
 1079                                 tocopy = len;
 1080                         }
 1081 
 1082                         if ((error = copyout(mtod(m, caddr_t),
 1083                                         ctlbuf, tocopy)) != 0)
 1084                                 goto out;
 1085 
 1086                         ctlbuf += tocopy;
 1087                         len -= tocopy;
 1088                         m = m->m_next;
 1089                 }
 1090                 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
 1091         }
 1092 out:
 1093         fdrop(fp, td);
 1094 #ifdef KTRACE
 1095         if (fromsa && KTRPOINT(td, KTR_STRUCT))
 1096                 ktrsockaddr(fromsa);
 1097 #endif
 1098         if (fromsa)
 1099                 free(fromsa, M_SONAME);
 1100 
 1101         if (error == 0 && controlp != NULL)  
 1102                 *controlp = control;
 1103         else  if (control)
 1104                 m_freem(control);
 1105 
 1106         return (error);
 1107 }
 1108 
 1109 static int
 1110 recvit(td, s, mp, namelenp)
 1111         struct thread *td;
 1112         int s;
 1113         struct msghdr *mp;
 1114         void *namelenp;
 1115 {
 1116         int error;
 1117 
 1118         error = kern_recvit(td, s, mp, UIO_USERSPACE, NULL);
 1119         if (error)
 1120                 return (error);
 1121         if (namelenp) {
 1122                 error = copyout(&mp->msg_namelen, namelenp, sizeof (socklen_t));
 1123 #ifdef COMPAT_OLDSOCK
 1124                 if (mp->msg_flags & MSG_COMPAT)
 1125                         error = 0;      /* old recvfrom didn't check */
 1126 #endif
 1127         }
 1128         return (error);
 1129 }
 1130 
 1131 int
 1132 sys_recvfrom(td, uap)
 1133         struct thread *td;
 1134         struct recvfrom_args /* {
 1135                 int     s;
 1136                 caddr_t buf;
 1137                 size_t  len;
 1138                 int     flags;
 1139                 struct sockaddr * __restrict    from;
 1140                 socklen_t * __restrict fromlenaddr;
 1141         } */ *uap;
 1142 {
 1143         struct msghdr msg;
 1144         struct iovec aiov;
 1145         int error;
 1146 
 1147         if (uap->fromlenaddr) {
 1148                 error = copyin(uap->fromlenaddr,
 1149                     &msg.msg_namelen, sizeof (msg.msg_namelen));
 1150                 if (error)
 1151                         goto done2;
 1152         } else {
 1153                 msg.msg_namelen = 0;
 1154         }
 1155         msg.msg_name = uap->from;
 1156         msg.msg_iov = &aiov;
 1157         msg.msg_iovlen = 1;
 1158         aiov.iov_base = uap->buf;
 1159         aiov.iov_len = uap->len;
 1160         msg.msg_control = 0;
 1161         msg.msg_flags = uap->flags;
 1162         error = recvit(td, uap->s, &msg, uap->fromlenaddr);
 1163 done2:
 1164         return(error);
 1165 }
 1166 
 1167 #ifdef COMPAT_OLDSOCK
 1168 int
 1169 orecvfrom(td, uap)
 1170         struct thread *td;
 1171         struct recvfrom_args *uap;
 1172 {
 1173 
 1174         uap->flags |= MSG_COMPAT;
 1175         return (sys_recvfrom(td, uap));
 1176 }
 1177 #endif
 1178 
 1179 #ifdef COMPAT_OLDSOCK
 1180 int
 1181 orecv(td, uap)
 1182         struct thread *td;
 1183         struct orecv_args /* {
 1184                 int     s;
 1185                 caddr_t buf;
 1186                 int     len;
 1187                 int     flags;
 1188         } */ *uap;
 1189 {
 1190         struct msghdr msg;
 1191         struct iovec aiov;
 1192         int error;
 1193 
 1194         msg.msg_name = 0;
 1195         msg.msg_namelen = 0;
 1196         msg.msg_iov = &aiov;
 1197         msg.msg_iovlen = 1;
 1198         aiov.iov_base = uap->buf;
 1199         aiov.iov_len = uap->len;
 1200         msg.msg_control = 0;
 1201         msg.msg_flags = uap->flags;
 1202         error = recvit(td, uap->s, &msg, NULL);
 1203         return (error);
 1204 }
 1205 
 1206 /*
 1207  * Old recvmsg.  This code takes advantage of the fact that the old msghdr
 1208  * overlays the new one, missing only the flags, and with the (old) access
 1209  * rights where the control fields are now.
 1210  */
 1211 int
 1212 orecvmsg(td, uap)
 1213         struct thread *td;
 1214         struct orecvmsg_args /* {
 1215                 int     s;
 1216                 struct  omsghdr *msg;
 1217                 int     flags;
 1218         } */ *uap;
 1219 {
 1220         struct msghdr msg;
 1221         struct iovec *iov;
 1222         int error;
 1223 
 1224         error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
 1225         if (error)
 1226                 return (error);
 1227         error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
 1228         if (error)
 1229                 return (error);
 1230         msg.msg_flags = uap->flags | MSG_COMPAT;
 1231         msg.msg_iov = iov;
 1232         error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen);
 1233         if (msg.msg_controllen && error == 0)
 1234                 error = copyout(&msg.msg_controllen,
 1235                     &uap->msg->msg_accrightslen, sizeof (int));
 1236         free(iov, M_IOV);
 1237         return (error);
 1238 }
 1239 #endif
 1240 
 1241 int
 1242 sys_recvmsg(td, uap)
 1243         struct thread *td;
 1244         struct recvmsg_args /* {
 1245                 int     s;
 1246                 struct  msghdr *msg;
 1247                 int     flags;
 1248         } */ *uap;
 1249 {
 1250         struct msghdr msg;
 1251         struct iovec *uiov, *iov;
 1252         int error;
 1253 
 1254         error = copyin(uap->msg, &msg, sizeof (msg));
 1255         if (error)
 1256                 return (error);
 1257         error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
 1258         if (error)
 1259                 return (error);
 1260         msg.msg_flags = uap->flags;
 1261 #ifdef COMPAT_OLDSOCK
 1262         msg.msg_flags &= ~MSG_COMPAT;
 1263 #endif
 1264         uiov = msg.msg_iov;
 1265         msg.msg_iov = iov;
 1266         error = recvit(td, uap->s, &msg, NULL);
 1267         if (error == 0) {
 1268                 msg.msg_iov = uiov;
 1269                 error = copyout(&msg, uap->msg, sizeof(msg));
 1270         }
 1271         free(iov, M_IOV);
 1272         return (error);
 1273 }
 1274 
 1275 /* ARGSUSED */
 1276 int
 1277 sys_shutdown(td, uap)
 1278         struct thread *td;
 1279         struct shutdown_args /* {
 1280                 int     s;
 1281                 int     how;
 1282         } */ *uap;
 1283 {
 1284         struct socket *so;
 1285         struct file *fp;
 1286         int error;
 1287 
 1288         AUDIT_ARG_FD(uap->s);
 1289         error = getsock_cap(td->td_proc->p_fd, uap->s, CAP_SHUTDOWN, &fp,
 1290             NULL);
 1291         if (error == 0) {
 1292                 so = fp->f_data;
 1293                 error = soshutdown(so, uap->how);
 1294                 fdrop(fp, td);
 1295         }
 1296         return (error);
 1297 }
 1298 
 1299 /* ARGSUSED */
 1300 int
 1301 sys_setsockopt(td, uap)
 1302         struct thread *td;
 1303         struct setsockopt_args /* {
 1304                 int     s;
 1305                 int     level;
 1306                 int     name;
 1307                 caddr_t val;
 1308                 int     valsize;
 1309         } */ *uap;
 1310 {
 1311 
 1312         return (kern_setsockopt(td, uap->s, uap->level, uap->name,
 1313             uap->val, UIO_USERSPACE, uap->valsize));
 1314 }
 1315 
 1316 int
 1317 kern_setsockopt(td, s, level, name, val, valseg, valsize)
 1318         struct thread *td;
 1319         int s;
 1320         int level;
 1321         int name;
 1322         void *val;
 1323         enum uio_seg valseg;
 1324         socklen_t valsize;
 1325 {
 1326         int error;
 1327         struct socket *so;
 1328         struct file *fp;
 1329         struct sockopt sopt;
 1330 
 1331         if (val == NULL && valsize != 0)
 1332                 return (EFAULT);
 1333         if ((int)valsize < 0)
 1334                 return (EINVAL);
 1335 
 1336         sopt.sopt_dir = SOPT_SET;
 1337         sopt.sopt_level = level;
 1338         sopt.sopt_name = name;
 1339         sopt.sopt_val = val;
 1340         sopt.sopt_valsize = valsize;
 1341         switch (valseg) {
 1342         case UIO_USERSPACE:
 1343                 sopt.sopt_td = td;
 1344                 break;
 1345         case UIO_SYSSPACE:
 1346                 sopt.sopt_td = NULL;
 1347                 break;
 1348         default:
 1349                 panic("kern_setsockopt called with bad valseg");
 1350         }
 1351 
 1352         AUDIT_ARG_FD(s);
 1353         error = getsock_cap(td->td_proc->p_fd, s, CAP_SETSOCKOPT, &fp, NULL);
 1354         if (error == 0) {
 1355                 so = fp->f_data;
 1356                 error = sosetopt(so, &sopt);
 1357                 fdrop(fp, td);
 1358         }
 1359         return(error);
 1360 }
 1361 
 1362 /* ARGSUSED */
 1363 int
 1364 sys_getsockopt(td, uap)
 1365         struct thread *td;
 1366         struct getsockopt_args /* {
 1367                 int     s;
 1368                 int     level;
 1369                 int     name;
 1370                 void * __restrict       val;
 1371                 socklen_t * __restrict avalsize;
 1372         } */ *uap;
 1373 {
 1374         socklen_t valsize;
 1375         int     error;
 1376 
 1377         if (uap->val) {
 1378                 error = copyin(uap->avalsize, &valsize, sizeof (valsize));
 1379                 if (error)
 1380                         return (error);
 1381         }
 1382 
 1383         error = kern_getsockopt(td, uap->s, uap->level, uap->name,
 1384             uap->val, UIO_USERSPACE, &valsize);
 1385 
 1386         if (error == 0)
 1387                 error = copyout(&valsize, uap->avalsize, sizeof (valsize));
 1388         return (error);
 1389 }
 1390 
 1391 /*
 1392  * Kernel version of getsockopt.
 1393  * optval can be a userland or userspace. optlen is always a kernel pointer.
 1394  */
 1395 int
 1396 kern_getsockopt(td, s, level, name, val, valseg, valsize)
 1397         struct thread *td;
 1398         int s;
 1399         int level;
 1400         int name;
 1401         void *val;
 1402         enum uio_seg valseg;
 1403         socklen_t *valsize;
 1404 {
 1405         int error;
 1406         struct  socket *so;
 1407         struct file *fp;
 1408         struct  sockopt sopt;
 1409 
 1410         if (val == NULL)
 1411                 *valsize = 0;
 1412         if ((int)*valsize < 0)
 1413                 return (EINVAL);
 1414 
 1415         sopt.sopt_dir = SOPT_GET;
 1416         sopt.sopt_level = level;
 1417         sopt.sopt_name = name;
 1418         sopt.sopt_val = val;
 1419         sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */
 1420         switch (valseg) {
 1421         case UIO_USERSPACE:
 1422                 sopt.sopt_td = td;
 1423                 break;
 1424         case UIO_SYSSPACE:
 1425                 sopt.sopt_td = NULL;
 1426                 break;
 1427         default:
 1428                 panic("kern_getsockopt called with bad valseg");
 1429         }
 1430 
 1431         AUDIT_ARG_FD(s);
 1432         error = getsock_cap(td->td_proc->p_fd, s, CAP_GETSOCKOPT, &fp, NULL);
 1433         if (error == 0) {
 1434                 so = fp->f_data;
 1435                 error = sogetopt(so, &sopt);
 1436                 *valsize = sopt.sopt_valsize;
 1437                 fdrop(fp, td);
 1438         }
 1439         return (error);
 1440 }
 1441 
 1442 /*
 1443  * getsockname1() - Get socket name.
 1444  */
 1445 /* ARGSUSED */
 1446 static int
 1447 getsockname1(td, uap, compat)
 1448         struct thread *td;
 1449         struct getsockname_args /* {
 1450                 int     fdes;
 1451                 struct sockaddr * __restrict asa;
 1452                 socklen_t * __restrict alen;
 1453         } */ *uap;
 1454         int compat;
 1455 {
 1456         struct sockaddr *sa;
 1457         socklen_t len;
 1458         int error;
 1459 
 1460         error = copyin(uap->alen, &len, sizeof(len));
 1461         if (error)
 1462                 return (error);
 1463 
 1464         error = kern_getsockname(td, uap->fdes, &sa, &len);
 1465         if (error)
 1466                 return (error);
 1467 
 1468         if (len != 0) {
 1469 #ifdef COMPAT_OLDSOCK
 1470                 if (compat)
 1471                         ((struct osockaddr *)sa)->sa_family = sa->sa_family;
 1472 #endif
 1473                 error = copyout(sa, uap->asa, (u_int)len);
 1474         }
 1475         free(sa, M_SONAME);
 1476         if (error == 0)
 1477                 error = copyout(&len, uap->alen, sizeof(len));
 1478         return (error);
 1479 }
 1480 
 1481 int
 1482 kern_getsockname(struct thread *td, int fd, struct sockaddr **sa,
 1483     socklen_t *alen)
 1484 {
 1485         struct socket *so;
 1486         struct file *fp;
 1487         socklen_t len;
 1488         int error;
 1489 
 1490         if (*alen < 0)
 1491                 return (EINVAL);
 1492 
 1493         AUDIT_ARG_FD(fd);
 1494         error = getsock_cap(td->td_proc->p_fd, fd, CAP_GETSOCKNAME, &fp, NULL);
 1495         if (error)
 1496                 return (error);
 1497         so = fp->f_data;
 1498         *sa = NULL;
 1499         CURVNET_SET(so->so_vnet);
 1500         error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, sa);
 1501         CURVNET_RESTORE();
 1502         if (error)
 1503                 goto bad;
 1504         if (*sa == NULL)
 1505                 len = 0;
 1506         else
 1507                 len = MIN(*alen, (*sa)->sa_len);
 1508         *alen = len;
 1509 #ifdef KTRACE
 1510         if (KTRPOINT(td, KTR_STRUCT))
 1511                 ktrsockaddr(*sa);
 1512 #endif
 1513 bad:
 1514         fdrop(fp, td);
 1515         if (error && *sa) {
 1516                 free(*sa, M_SONAME);
 1517                 *sa = NULL;
 1518         }
 1519         return (error);
 1520 }
 1521 
 1522 int
 1523 sys_getsockname(td, uap)
 1524         struct thread *td;
 1525         struct getsockname_args *uap;
 1526 {
 1527 
 1528         return (getsockname1(td, uap, 0));
 1529 }
 1530 
 1531 #ifdef COMPAT_OLDSOCK
 1532 int
 1533 ogetsockname(td, uap)
 1534         struct thread *td;
 1535         struct getsockname_args *uap;
 1536 {
 1537 
 1538         return (getsockname1(td, uap, 1));
 1539 }
 1540 #endif /* COMPAT_OLDSOCK */
 1541 
 1542 /*
 1543  * getpeername1() - Get name of peer for connected socket.
 1544  */
 1545 /* ARGSUSED */
 1546 static int
 1547 getpeername1(td, uap, compat)
 1548         struct thread *td;
 1549         struct getpeername_args /* {
 1550                 int     fdes;
 1551                 struct sockaddr * __restrict    asa;
 1552                 socklen_t * __restrict  alen;
 1553         } */ *uap;
 1554         int compat;
 1555 {
 1556         struct sockaddr *sa;
 1557         socklen_t len;
 1558         int error;
 1559 
 1560         error = copyin(uap->alen, &len, sizeof (len));
 1561         if (error)
 1562                 return (error);
 1563 
 1564         error = kern_getpeername(td, uap->fdes, &sa, &len);
 1565         if (error)
 1566                 return (error);
 1567 
 1568         if (len != 0) {
 1569 #ifdef COMPAT_OLDSOCK
 1570                 if (compat)
 1571                         ((struct osockaddr *)sa)->sa_family = sa->sa_family;
 1572 #endif
 1573                 error = copyout(sa, uap->asa, (u_int)len);
 1574         }
 1575         free(sa, M_SONAME);
 1576         if (error == 0)
 1577                 error = copyout(&len, uap->alen, sizeof(len));
 1578         return (error);
 1579 }
 1580 
 1581 int
 1582 kern_getpeername(struct thread *td, int fd, struct sockaddr **sa,
 1583     socklen_t *alen)
 1584 {
 1585         struct socket *so;
 1586         struct file *fp;
 1587         socklen_t len;
 1588         int error;
 1589 
 1590         if (*alen < 0)
 1591                 return (EINVAL);
 1592 
 1593         AUDIT_ARG_FD(fd);
 1594         error = getsock_cap(td->td_proc->p_fd, fd, CAP_GETPEERNAME, &fp, NULL);
 1595         if (error)
 1596                 return (error);
 1597         so = fp->f_data;
 1598         if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
 1599                 error = ENOTCONN;
 1600                 goto done;
 1601         }
 1602         *sa = NULL;
 1603         CURVNET_SET(so->so_vnet);
 1604         error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, sa);
 1605         CURVNET_RESTORE();
 1606         if (error)
 1607                 goto bad;
 1608         if (*sa == NULL)
 1609                 len = 0;
 1610         else
 1611                 len = MIN(*alen, (*sa)->sa_len);
 1612         *alen = len;
 1613 #ifdef KTRACE
 1614         if (KTRPOINT(td, KTR_STRUCT))
 1615                 ktrsockaddr(*sa);
 1616 #endif
 1617 bad:
 1618         if (error && *sa) {
 1619                 free(*sa, M_SONAME);
 1620                 *sa = NULL;
 1621         }
 1622 done:
 1623         fdrop(fp, td);
 1624         return (error);
 1625 }
 1626 
 1627 int
 1628 sys_getpeername(td, uap)
 1629         struct thread *td;
 1630         struct getpeername_args *uap;
 1631 {
 1632 
 1633         return (getpeername1(td, uap, 0));
 1634 }
 1635 
 1636 #ifdef COMPAT_OLDSOCK
 1637 int
 1638 ogetpeername(td, uap)
 1639         struct thread *td;
 1640         struct ogetpeername_args *uap;
 1641 {
 1642 
 1643         /* XXX uap should have type `getpeername_args *' to begin with. */
 1644         return (getpeername1(td, (struct getpeername_args *)uap, 1));
 1645 }
 1646 #endif /* COMPAT_OLDSOCK */
 1647 
 1648 int
 1649 sockargs(mp, buf, buflen, type)
 1650         struct mbuf **mp;
 1651         caddr_t buf;
 1652         int buflen, type;
 1653 {
 1654         struct sockaddr *sa;
 1655         struct mbuf *m;
 1656         int error;
 1657 
 1658         if ((u_int)buflen > MLEN) {
 1659 #ifdef COMPAT_OLDSOCK
 1660                 if (type == MT_SONAME && (u_int)buflen <= 112)
 1661                         buflen = MLEN;          /* unix domain compat. hack */
 1662                 else
 1663 #endif
 1664                         if ((u_int)buflen > MCLBYTES)
 1665                                 return (EINVAL);
 1666         }
 1667         m = m_get(M_WAIT, type);
 1668         if ((u_int)buflen > MLEN)
 1669                 MCLGET(m, M_WAIT);
 1670         m->m_len = buflen;
 1671         error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
 1672         if (error)
 1673                 (void) m_free(m);
 1674         else {
 1675                 *mp = m;
 1676                 if (type == MT_SONAME) {
 1677                         sa = mtod(m, struct sockaddr *);
 1678 
 1679 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
 1680                         if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
 1681                                 sa->sa_family = sa->sa_len;
 1682 #endif
 1683                         sa->sa_len = buflen;
 1684                 }
 1685         }
 1686         return (error);
 1687 }
 1688 
 1689 int
 1690 getsockaddr(namp, uaddr, len)
 1691         struct sockaddr **namp;
 1692         caddr_t uaddr;
 1693         size_t len;
 1694 {
 1695         struct sockaddr *sa;
 1696         int error;
 1697 
 1698         if (len > SOCK_MAXADDRLEN)
 1699                 return (ENAMETOOLONG);
 1700         if (len < offsetof(struct sockaddr, sa_data[0]))
 1701                 return (EINVAL);
 1702         sa = malloc(len, M_SONAME, M_WAITOK);
 1703         error = copyin(uaddr, sa, len);
 1704         if (error) {
 1705                 free(sa, M_SONAME);
 1706         } else {
 1707 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
 1708                 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
 1709                         sa->sa_family = sa->sa_len;
 1710 #endif
 1711                 sa->sa_len = len;
 1712                 *namp = sa;
 1713         }
 1714         return (error);
 1715 }
 1716 
 1717 #include <sys/condvar.h>
 1718 
 1719 struct sendfile_sync {
 1720         struct mtx      mtx;
 1721         struct cv       cv;
 1722         unsigned        count;
 1723 };
 1724 
 1725 /*
 1726  * Detach mapped page and release resources back to the system.
 1727  */
 1728 void
 1729 sf_buf_mext(void *addr, void *args)
 1730 {
 1731         vm_page_t m;
 1732         struct sendfile_sync *sfs;
 1733 
 1734         m = sf_buf_page(args);
 1735         sf_buf_free(args);
 1736         vm_page_lock(m);
 1737         vm_page_unwire(m, 0);
 1738         /*
 1739          * Check for the object going away on us. This can
 1740          * happen since we don't hold a reference to it.
 1741          * If so, we're responsible for freeing the page.
 1742          */
 1743         if (m->wire_count == 0 && m->object == NULL)
 1744                 vm_page_free(m);
 1745         vm_page_unlock(m);
 1746         if (addr == NULL)
 1747                 return;
 1748         sfs = addr;
 1749         mtx_lock(&sfs->mtx);
 1750         KASSERT(sfs->count> 0, ("Sendfile sync botchup count == 0"));
 1751         if (--sfs->count == 0)
 1752                 cv_signal(&sfs->cv);
 1753         mtx_unlock(&sfs->mtx);
 1754 }
 1755 
 1756 /*
 1757  * sendfile(2)
 1758  *
 1759  * int sendfile(int fd, int s, off_t offset, size_t nbytes,
 1760  *       struct sf_hdtr *hdtr, off_t *sbytes, int flags)
 1761  *
 1762  * Send a file specified by 'fd' and starting at 'offset' to a socket
 1763  * specified by 's'. Send only 'nbytes' of the file or until EOF if nbytes ==
 1764  * 0.  Optionally add a header and/or trailer to the socket output.  If
 1765  * specified, write the total number of bytes sent into *sbytes.
 1766  */
 1767 int
 1768 sys_sendfile(struct thread *td, struct sendfile_args *uap)
 1769 {
 1770 
 1771         return (do_sendfile(td, uap, 0));
 1772 }
 1773 
 1774 static int
 1775 do_sendfile(struct thread *td, struct sendfile_args *uap, int compat)
 1776 {
 1777         struct sf_hdtr hdtr;
 1778         struct uio *hdr_uio, *trl_uio;
 1779         int error;
 1780 
 1781         hdr_uio = trl_uio = NULL;
 1782 
 1783         if (uap->hdtr != NULL) {
 1784                 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
 1785                 if (error)
 1786                         goto out;
 1787                 if (hdtr.headers != NULL) {
 1788                         error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio);
 1789                         if (error)
 1790                                 goto out;
 1791                 }
 1792                 if (hdtr.trailers != NULL) {
 1793                         error = copyinuio(hdtr.trailers, hdtr.trl_cnt, &trl_uio);
 1794                         if (error)
 1795                                 goto out;
 1796 
 1797                 }
 1798         }
 1799 
 1800         error = kern_sendfile(td, uap, hdr_uio, trl_uio, compat);
 1801 out:
 1802         if (hdr_uio)
 1803                 free(hdr_uio, M_IOV);
 1804         if (trl_uio)
 1805                 free(trl_uio, M_IOV);
 1806         return (error);
 1807 }
 1808 
 1809 #ifdef COMPAT_FREEBSD4
 1810 int
 1811 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap)
 1812 {
 1813         struct sendfile_args args;
 1814 
 1815         args.fd = uap->fd;
 1816         args.s = uap->s;
 1817         args.offset = uap->offset;
 1818         args.nbytes = uap->nbytes;
 1819         args.hdtr = uap->hdtr;
 1820         args.sbytes = uap->sbytes;
 1821         args.flags = uap->flags;
 1822 
 1823         return (do_sendfile(td, &args, 1));
 1824 }
 1825 #endif /* COMPAT_FREEBSD4 */
 1826 
 1827 int
 1828 kern_sendfile(struct thread *td, struct sendfile_args *uap,
 1829     struct uio *hdr_uio, struct uio *trl_uio, int compat)
 1830 {
 1831         struct file *sock_fp;
 1832         struct vnode *vp;
 1833         struct vm_object *obj = NULL;
 1834         struct socket *so = NULL;
 1835         struct mbuf *m = NULL;
 1836         struct sf_buf *sf;
 1837         struct vm_page *pg;
 1838         struct vattr va;
 1839         off_t off, xfsize, fsbytes = 0, sbytes = 0, rem = 0;
 1840         int error, hdrlen = 0, mnw = 0;
 1841         int vfslocked;
 1842         int bsize;
 1843         struct sendfile_sync *sfs = NULL;
 1844 
 1845         /*
 1846          * The file descriptor must be a regular file and have a
 1847          * backing VM object.
 1848          * File offset must be positive.  If it goes beyond EOF
 1849          * we send only the header/trailer and no payload data.
 1850          */
 1851         AUDIT_ARG_FD(uap->fd);
 1852         if ((error = fgetvp_read(td, uap->fd, CAP_READ, &vp)) != 0)
 1853                 goto out;
 1854         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 1855         vn_lock(vp, LK_SHARED | LK_RETRY);
 1856         if (vp->v_type == VREG) {
 1857                 bsize = vp->v_mount->mnt_stat.f_iosize;
 1858                 if (uap->nbytes == 0) {
 1859                         error = VOP_GETATTR(vp, &va, td->td_ucred);
 1860                         if (error != 0) {
 1861                                 VOP_UNLOCK(vp, 0);
 1862                                 VFS_UNLOCK_GIANT(vfslocked);
 1863                                 obj = NULL;
 1864                                 goto out;
 1865                         }
 1866                         rem = va.va_size;
 1867                 } else
 1868                         rem = uap->nbytes;
 1869                 obj = vp->v_object;
 1870                 if (obj != NULL) {
 1871                         /*
 1872                          * Temporarily increase the backing VM
 1873                          * object's reference count so that a forced
 1874                          * reclamation of its vnode does not
 1875                          * immediately destroy it.
 1876                          */
 1877                         VM_OBJECT_LOCK(obj);
 1878                         if ((obj->flags & OBJ_DEAD) == 0) {
 1879                                 vm_object_reference_locked(obj);
 1880                                 VM_OBJECT_UNLOCK(obj);
 1881                         } else {
 1882                                 VM_OBJECT_UNLOCK(obj);
 1883                                 obj = NULL;
 1884                         }
 1885                 }
 1886         } else
 1887                 bsize = 0;      /* silence gcc */
 1888         VOP_UNLOCK(vp, 0);
 1889         VFS_UNLOCK_GIANT(vfslocked);
 1890         if (obj == NULL) {
 1891                 error = EINVAL;
 1892                 goto out;
 1893         }
 1894         if (uap->offset < 0) {
 1895                 error = EINVAL;
 1896                 goto out;
 1897         }
 1898 
 1899         /*
 1900          * The socket must be a stream socket and connected.
 1901          * Remember if it a blocking or non-blocking socket.
 1902          */
 1903         if ((error = getsock_cap(td->td_proc->p_fd, uap->s, CAP_WRITE,
 1904             &sock_fp, NULL)) != 0)
 1905                 goto out;
 1906         so = sock_fp->f_data;
 1907         if (so->so_type != SOCK_STREAM) {
 1908                 error = EINVAL;
 1909                 goto out;
 1910         }
 1911         if ((so->so_state & SS_ISCONNECTED) == 0) {
 1912                 error = ENOTCONN;
 1913                 goto out;
 1914         }
 1915         /*
 1916          * Do not wait on memory allocations but return ENOMEM for
 1917          * caller to retry later.
 1918          * XXX: Experimental.
 1919          */
 1920         if (uap->flags & SF_MNOWAIT)
 1921                 mnw = 1;
 1922 
 1923         if (uap->flags & SF_SYNC) {
 1924                 sfs = malloc(sizeof *sfs, M_TEMP, M_WAITOK | M_ZERO);
 1925                 mtx_init(&sfs->mtx, "sendfile", NULL, MTX_DEF);
 1926                 cv_init(&sfs->cv, "sendfile");
 1927         }
 1928 
 1929 #ifdef MAC
 1930         error = mac_socket_check_send(td->td_ucred, so);
 1931         if (error)
 1932                 goto out;
 1933 #endif
 1934 
 1935         /* If headers are specified copy them into mbufs. */
 1936         if (hdr_uio != NULL) {
 1937                 hdr_uio->uio_td = td;
 1938                 hdr_uio->uio_rw = UIO_WRITE;
 1939                 if (hdr_uio->uio_resid > 0) {
 1940                         /*
 1941                          * In FBSD < 5.0 the nbytes to send also included
 1942                          * the header.  If compat is specified subtract the
 1943                          * header size from nbytes.
 1944                          */
 1945                         if (compat) {
 1946                                 if (uap->nbytes > hdr_uio->uio_resid)
 1947                                         uap->nbytes -= hdr_uio->uio_resid;
 1948                                 else
 1949                                         uap->nbytes = 0;
 1950                         }
 1951                         m = m_uiotombuf(hdr_uio, (mnw ? M_NOWAIT : M_WAITOK),
 1952                             0, 0, 0);
 1953                         if (m == NULL) {
 1954                                 error = mnw ? EAGAIN : ENOBUFS;
 1955                                 goto out;
 1956                         }
 1957                         hdrlen = m_length(m, NULL);
 1958                 }
 1959         }
 1960 
 1961         /*
 1962          * Protect against multiple writers to the socket.
 1963          *
 1964          * XXXRW: Historically this has assumed non-interruptibility, so now
 1965          * we implement that, but possibly shouldn't.
 1966          */
 1967         (void)sblock(&so->so_snd, SBL_WAIT | SBL_NOINTR);
 1968 
 1969         /*
 1970          * Loop through the pages of the file, starting with the requested
 1971          * offset. Get a file page (do I/O if necessary), map the file page
 1972          * into an sf_buf, attach an mbuf header to the sf_buf, and queue
 1973          * it on the socket.
 1974          * This is done in two loops.  The inner loop turns as many pages
 1975          * as it can, up to available socket buffer space, without blocking
 1976          * into mbufs to have it bulk delivered into the socket send buffer.
 1977          * The outer loop checks the state and available space of the socket
 1978          * and takes care of the overall progress.
 1979          */
 1980         for (off = uap->offset; ; ) {
 1981                 struct mbuf *mtail;
 1982                 int loopbytes;
 1983                 int space;
 1984                 int done;
 1985 
 1986                 if ((uap->nbytes != 0 && uap->nbytes == fsbytes) ||
 1987                     (uap->nbytes == 0 && va.va_size == fsbytes))
 1988                         break;
 1989 
 1990                 mtail = NULL;
 1991                 loopbytes = 0;
 1992                 space = 0;
 1993                 done = 0;
 1994 
 1995                 /*
 1996                  * Check the socket state for ongoing connection,
 1997                  * no errors and space in socket buffer.
 1998                  * If space is low allow for the remainder of the
 1999                  * file to be processed if it fits the socket buffer.
 2000                  * Otherwise block in waiting for sufficient space
 2001                  * to proceed, or if the socket is nonblocking, return
 2002                  * to userland with EAGAIN while reporting how far
 2003                  * we've come.
 2004                  * We wait until the socket buffer has significant free
 2005                  * space to do bulk sends.  This makes good use of file
 2006                  * system read ahead and allows packet segmentation
 2007                  * offloading hardware to take over lots of work.  If
 2008                  * we were not careful here we would send off only one
 2009                  * sfbuf at a time.
 2010                  */
 2011                 SOCKBUF_LOCK(&so->so_snd);
 2012                 if (so->so_snd.sb_lowat < so->so_snd.sb_hiwat / 2)
 2013                         so->so_snd.sb_lowat = so->so_snd.sb_hiwat / 2;
 2014 retry_space:
 2015                 if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 2016                         error = EPIPE;
 2017                         SOCKBUF_UNLOCK(&so->so_snd);
 2018                         goto done;
 2019                 } else if (so->so_error) {
 2020                         error = so->so_error;
 2021                         so->so_error = 0;
 2022                         SOCKBUF_UNLOCK(&so->so_snd);
 2023                         goto done;
 2024                 }
 2025                 space = sbspace(&so->so_snd);
 2026                 if (space < rem &&
 2027                     (space <= 0 ||
 2028                      space < so->so_snd.sb_lowat)) {
 2029                         if (so->so_state & SS_NBIO) {
 2030                                 SOCKBUF_UNLOCK(&so->so_snd);
 2031                                 error = EAGAIN;
 2032                                 goto done;
 2033                         }
 2034                         /*
 2035                          * sbwait drops the lock while sleeping.
 2036                          * When we loop back to retry_space the
 2037                          * state may have changed and we retest
 2038                          * for it.
 2039                          */
 2040                         error = sbwait(&so->so_snd);
 2041                         /*
 2042                          * An error from sbwait usually indicates that we've
 2043                          * been interrupted by a signal. If we've sent anything
 2044                          * then return bytes sent, otherwise return the error.
 2045                          */
 2046                         if (error) {
 2047                                 SOCKBUF_UNLOCK(&so->so_snd);
 2048                                 goto done;
 2049                         }
 2050                         goto retry_space;
 2051                 }
 2052                 SOCKBUF_UNLOCK(&so->so_snd);
 2053 
 2054                 /*
 2055                  * Reduce space in the socket buffer by the size of
 2056                  * the header mbuf chain.
 2057                  * hdrlen is set to 0 after the first loop.
 2058                  */
 2059                 space -= hdrlen;
 2060 
 2061                 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 2062                 error = vn_lock(vp, LK_SHARED);
 2063                 if (error != 0) {
 2064                         VFS_UNLOCK_GIANT(vfslocked);
 2065                         goto done;
 2066                 }
 2067                 error = VOP_GETATTR(vp, &va, td->td_ucred);
 2068                 if (error != 0 || off >= va.va_size) {
 2069                         VOP_UNLOCK(vp, 0);
 2070                         VFS_UNLOCK_GIANT(vfslocked);
 2071                         goto done;
 2072                 }
 2073                 VFS_UNLOCK_GIANT(vfslocked);
 2074 
 2075                 /*
 2076                  * Loop and construct maximum sized mbuf chain to be bulk
 2077                  * dumped into socket buffer.
 2078                  */
 2079                 while (space > loopbytes) {
 2080                         vm_pindex_t pindex;
 2081                         vm_offset_t pgoff;
 2082                         struct mbuf *m0;
 2083 
 2084                         /*
 2085                          * Calculate the amount to transfer.
 2086                          * Not to exceed a page, the EOF,
 2087                          * or the passed in nbytes.
 2088                          */
 2089                         pgoff = (vm_offset_t)(off & PAGE_MASK);
 2090                         rem = va.va_size - uap->offset;
 2091                         if (uap->nbytes != 0)
 2092                                 rem = omin(rem, uap->nbytes);
 2093                         rem -= fsbytes + loopbytes;
 2094                         xfsize = omin(PAGE_SIZE - pgoff, rem);
 2095                         xfsize = omin(space - loopbytes, xfsize);
 2096                         if (xfsize <= 0) {
 2097                                 done = 1;               /* all data sent */
 2098                                 break;
 2099                         }
 2100 
 2101                         /*
 2102                          * Attempt to look up the page.  Allocate
 2103                          * if not found or wait and loop if busy.
 2104                          */
 2105                         pindex = OFF_TO_IDX(off);
 2106                         VM_OBJECT_LOCK(obj);
 2107                         pg = vm_page_grab(obj, pindex, VM_ALLOC_NOBUSY |
 2108                             VM_ALLOC_NORMAL | VM_ALLOC_WIRED | VM_ALLOC_RETRY);
 2109 
 2110                         /*
 2111                          * Check if page is valid for what we need,
 2112                          * otherwise initiate I/O.
 2113                          * If we already turned some pages into mbufs,
 2114                          * send them off before we come here again and
 2115                          * block.
 2116                          */
 2117                         if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize))
 2118                                 VM_OBJECT_UNLOCK(obj);
 2119                         else if (m != NULL)
 2120                                 error = EAGAIN; /* send what we already got */
 2121                         else if (uap->flags & SF_NODISKIO)
 2122                                 error = EBUSY;
 2123                         else {
 2124                                 ssize_t resid;
 2125 
 2126                                 VM_OBJECT_UNLOCK(obj);
 2127 
 2128                                 /*
 2129                                  * Get the page from backing store.
 2130                                  * XXXMAC: Because we don't have fp->f_cred
 2131                                  * here, we pass in NOCRED.  This is probably
 2132                                  * wrong, but is consistent with our original
 2133                                  * implementation.
 2134                                  */
 2135                                 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 2136                                 error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE,
 2137                                     trunc_page(off), UIO_NOCOPY, IO_NODELOCKED |
 2138                                     IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT),
 2139                                     td->td_ucred, NOCRED, &resid, td);
 2140                                 VFS_UNLOCK_GIANT(vfslocked);
 2141                                 if (error)
 2142                                         VM_OBJECT_LOCK(obj);
 2143                                 mbstat.sf_iocnt++;
 2144                         }
 2145                         if (error) {
 2146                                 vm_page_lock(pg);
 2147                                 vm_page_unwire(pg, 0);
 2148                                 /*
 2149                                  * See if anyone else might know about
 2150                                  * this page.  If not and it is not valid,
 2151                                  * then free it.
 2152                                  */
 2153                                 if (pg->wire_count == 0 && pg->valid == 0 &&
 2154                                     pg->busy == 0 && !(pg->oflags & VPO_BUSY))
 2155                                         vm_page_free(pg);
 2156                                 vm_page_unlock(pg);
 2157                                 VM_OBJECT_UNLOCK(obj);
 2158                                 if (error == EAGAIN)
 2159                                         error = 0;      /* not a real error */
 2160                                 break;
 2161                         }
 2162 
 2163                         /*
 2164                          * Get a sendfile buf.  When allocating the
 2165                          * first buffer for mbuf chain, we usually
 2166                          * wait as long as necessary, but this wait
 2167                          * can be interrupted.  For consequent
 2168                          * buffers, do not sleep, since several
 2169                          * threads might exhaust the buffers and then
 2170                          * deadlock.
 2171                          */
 2172                         sf = sf_buf_alloc(pg, (mnw || m != NULL) ? SFB_NOWAIT :
 2173                             SFB_CATCH);
 2174                         if (sf == NULL) {
 2175                                 mbstat.sf_allocfail++;
 2176                                 vm_page_lock(pg);
 2177                                 vm_page_unwire(pg, 0);
 2178                                 KASSERT(pg->object != NULL,
 2179                                     ("kern_sendfile: object disappeared"));
 2180                                 vm_page_unlock(pg);
 2181                                 if (m == NULL)
 2182                                         error = (mnw ? EAGAIN : EINTR);
 2183                                 break;
 2184                         }
 2185 
 2186                         /*
 2187                          * Get an mbuf and set it up as having
 2188                          * external storage.
 2189                          */
 2190                         m0 = m_get((mnw ? M_NOWAIT : M_WAITOK), MT_DATA);
 2191                         if (m0 == NULL) {
 2192                                 error = (mnw ? EAGAIN : ENOBUFS);
 2193                                 sf_buf_mext(NULL, sf);
 2194                                 break;
 2195                         }
 2196                         MEXTADD(m0, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext,
 2197                             sfs, sf, M_RDONLY, EXT_SFBUF);
 2198                         m0->m_data = (char *)sf_buf_kva(sf) + pgoff;
 2199                         m0->m_len = xfsize;
 2200 
 2201                         /* Append to mbuf chain. */
 2202                         if (mtail != NULL)
 2203                                 mtail->m_next = m0;
 2204                         else if (m != NULL)
 2205                                 m_last(m)->m_next = m0;
 2206                         else
 2207                                 m = m0;
 2208                         mtail = m0;
 2209 
 2210                         /* Keep track of bits processed. */
 2211                         loopbytes += xfsize;
 2212                         off += xfsize;
 2213 
 2214                         if (sfs != NULL) {
 2215                                 mtx_lock(&sfs->mtx);
 2216                                 sfs->count++;
 2217                                 mtx_unlock(&sfs->mtx);
 2218                         }
 2219                 }
 2220 
 2221                 VOP_UNLOCK(vp, 0);
 2222 
 2223                 /* Add the buffer chain to the socket buffer. */
 2224                 if (m != NULL) {
 2225                         int mlen, err;
 2226 
 2227                         mlen = m_length(m, NULL);
 2228                         SOCKBUF_LOCK(&so->so_snd);
 2229                         if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 2230                                 error = EPIPE;
 2231                                 SOCKBUF_UNLOCK(&so->so_snd);
 2232                                 goto done;
 2233                         }
 2234                         SOCKBUF_UNLOCK(&so->so_snd);
 2235                         CURVNET_SET(so->so_vnet);
 2236                         /* Avoid error aliasing. */
 2237                         err = (*so->so_proto->pr_usrreqs->pru_send)
 2238                                     (so, 0, m, NULL, NULL, td);
 2239                         CURVNET_RESTORE();
 2240                         if (err == 0) {
 2241                                 /*
 2242                                  * We need two counters to get the
 2243                                  * file offset and nbytes to send
 2244                                  * right:
 2245                                  * - sbytes contains the total amount
 2246                                  *   of bytes sent, including headers.
 2247                                  * - fsbytes contains the total amount
 2248                                  *   of bytes sent from the file.
 2249                                  */
 2250                                 sbytes += mlen;
 2251                                 fsbytes += mlen;
 2252                                 if (hdrlen) {
 2253                                         fsbytes -= hdrlen;
 2254                                         hdrlen = 0;
 2255                                 }
 2256                         } else if (error == 0)
 2257                                 error = err;
 2258                         m = NULL;       /* pru_send always consumes */
 2259                 }
 2260 
 2261                 /* Quit outer loop on error or when we're done. */
 2262                 if (done) 
 2263                         break;
 2264                 if (error)
 2265                         goto done;
 2266         }
 2267 
 2268         /*
 2269          * Send trailers. Wimp out and use writev(2).
 2270          */
 2271         if (trl_uio != NULL) {
 2272                 sbunlock(&so->so_snd);
 2273                 error = kern_writev(td, uap->s, trl_uio);
 2274                 if (error == 0)
 2275                         sbytes += td->td_retval[0];
 2276                 goto out;
 2277         }
 2278 
 2279 done:
 2280         sbunlock(&so->so_snd);
 2281 out:
 2282         /*
 2283          * If there was no error we have to clear td->td_retval[0]
 2284          * because it may have been set by writev.
 2285          */
 2286         if (error == 0) {
 2287                 td->td_retval[0] = 0;
 2288         }
 2289         if (uap->sbytes != NULL) {
 2290                 copyout(&sbytes, uap->sbytes, sizeof(off_t));
 2291         }
 2292         if (obj != NULL)
 2293                 vm_object_deallocate(obj);
 2294         if (vp != NULL) {
 2295                 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 2296                 vrele(vp);
 2297                 VFS_UNLOCK_GIANT(vfslocked);
 2298         }
 2299         if (so)
 2300                 fdrop(sock_fp, td);
 2301         if (m)
 2302                 m_freem(m);
 2303 
 2304         if (sfs != NULL) {
 2305                 mtx_lock(&sfs->mtx);
 2306                 if (sfs->count != 0)
 2307                         cv_wait(&sfs->cv, &sfs->mtx);
 2308                 KASSERT(sfs->count == 0, ("sendfile sync still busy"));
 2309                 cv_destroy(&sfs->cv);
 2310                 mtx_destroy(&sfs->mtx);
 2311                 free(sfs, M_TEMP);
 2312         }
 2313 
 2314         if (error == ERESTART)
 2315                 error = EINTR;
 2316 
 2317         return (error);
 2318 }
 2319 
 2320 /*
 2321  * SCTP syscalls.
 2322  * Functionality only compiled in if SCTP is defined in the kernel Makefile,
 2323  * otherwise all return EOPNOTSUPP.
 2324  * XXX: We should make this loadable one day.
 2325  */
 2326 int
 2327 sys_sctp_peeloff(td, uap)
 2328         struct thread *td;
 2329         struct sctp_peeloff_args /* {
 2330                 int     sd;
 2331                 caddr_t name;
 2332         } */ *uap;
 2333 {
 2334 #if (defined(INET) || defined(INET6)) && defined(SCTP)
 2335         struct filedesc *fdp;
 2336         struct file *nfp = NULL;
 2337         int error;
 2338         struct socket *head, *so;
 2339         int fd;
 2340         u_int fflag;
 2341 
 2342         fdp = td->td_proc->p_fd;
 2343         AUDIT_ARG_FD(uap->sd);
 2344         error = fgetsock(td, uap->sd, CAP_PEELOFF, &head, &fflag);
 2345         if (error)
 2346                 goto done2;
 2347         if (head->so_proto->pr_protocol != IPPROTO_SCTP) {
 2348                 error = EOPNOTSUPP;
 2349                 goto done;
 2350         }
 2351         error = sctp_can_peel_off(head, (sctp_assoc_t)uap->name);
 2352         if (error)
 2353                 goto done;
 2354         /*
 2355          * At this point we know we do have a assoc to pull
 2356          * we proceed to get the fd setup. This may block
 2357          * but that is ok.
 2358          */
 2359 
 2360         error = falloc(td, &nfp, &fd, 0);
 2361         if (error)
 2362                 goto done;
 2363         td->td_retval[0] = fd;
 2364 
 2365         CURVNET_SET(head->so_vnet);
 2366         so = sonewconn(head, SS_ISCONNECTED);
 2367         if (so == NULL) {
 2368                 error = ENOMEM;
 2369                 goto noconnection;
 2370         }
 2371         /*
 2372          * Before changing the flags on the socket, we have to bump the
 2373          * reference count.  Otherwise, if the protocol calls sofree(),
 2374          * the socket will be released due to a zero refcount.
 2375          */
 2376         SOCK_LOCK(so);
 2377         soref(so);                      /* file descriptor reference */
 2378         SOCK_UNLOCK(so);
 2379 
 2380         ACCEPT_LOCK();
 2381 
 2382         TAILQ_REMOVE(&head->so_comp, so, so_list);
 2383         head->so_qlen--;
 2384         so->so_state |= (head->so_state & SS_NBIO);
 2385         so->so_state &= ~SS_NOFDREF;
 2386         so->so_qstate &= ~SQ_COMP;
 2387         so->so_head = NULL;
 2388         ACCEPT_UNLOCK();
 2389         finit(nfp, fflag, DTYPE_SOCKET, so, &socketops);
 2390         error = sctp_do_peeloff(head, so, (sctp_assoc_t)uap->name);
 2391         if (error)
 2392                 goto noconnection;
 2393         if (head->so_sigio != NULL)
 2394                 fsetown(fgetown(&head->so_sigio), &so->so_sigio);
 2395 
 2396 noconnection:
 2397         /*
 2398          * close the new descriptor, assuming someone hasn't ripped it
 2399          * out from under us.
 2400          */
 2401         if (error)
 2402                 fdclose(fdp, nfp, fd, td);
 2403 
 2404         /*
 2405          * Release explicitly held references before returning.
 2406          */
 2407         CURVNET_RESTORE();
 2408 done:
 2409         if (nfp != NULL)
 2410                 fdrop(nfp, td);
 2411         fputsock(head);
 2412 done2:
 2413         return (error);
 2414 #else  /* SCTP */
 2415         return (EOPNOTSUPP);
 2416 #endif /* SCTP */
 2417 }
 2418 
 2419 int
 2420 sys_sctp_generic_sendmsg (td, uap)
 2421         struct thread *td;
 2422         struct sctp_generic_sendmsg_args /* {
 2423                 int sd, 
 2424                 caddr_t msg, 
 2425                 int mlen, 
 2426                 caddr_t to, 
 2427                 __socklen_t tolen, 
 2428                 struct sctp_sndrcvinfo *sinfo, 
 2429                 int flags
 2430         } */ *uap;
 2431 {
 2432 #if (defined(INET) || defined(INET6)) && defined(SCTP)
 2433         struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL;
 2434         struct socket *so;
 2435         struct file *fp = NULL;
 2436         int error = 0, len;
 2437         struct sockaddr *to = NULL;
 2438 #ifdef KTRACE
 2439         struct uio *ktruio = NULL;
 2440 #endif
 2441         struct uio auio;
 2442         struct iovec iov[1];
 2443         cap_rights_t rights;
 2444 
 2445         if (uap->sinfo) {
 2446                 error = copyin(uap->sinfo, &sinfo, sizeof (sinfo));
 2447                 if (error)
 2448                         return (error);
 2449                 u_sinfo = &sinfo;
 2450         }
 2451 
 2452         rights = CAP_WRITE;
 2453         if (uap->tolen) {
 2454                 error = getsockaddr(&to, uap->to, uap->tolen);
 2455                 if (error) {
 2456                         to = NULL;
 2457                         goto sctp_bad2;
 2458                 }
 2459                 rights |= CAP_CONNECT;
 2460         }
 2461 
 2462         AUDIT_ARG_FD(uap->sd);
 2463         error = getsock_cap(td->td_proc->p_fd, uap->sd, rights, &fp, NULL);
 2464         if (error)
 2465                 goto sctp_bad;
 2466 #ifdef KTRACE
 2467         if (to && (KTRPOINT(td, KTR_STRUCT)))
 2468                 ktrsockaddr(to);
 2469 #endif
 2470 
 2471         iov[0].iov_base = uap->msg;
 2472         iov[0].iov_len = uap->mlen;
 2473 
 2474         so = (struct socket *)fp->f_data;
 2475         if (so->so_proto->pr_protocol != IPPROTO_SCTP) {
 2476                 error = EOPNOTSUPP;
 2477                 goto sctp_bad;
 2478         }
 2479 #ifdef MAC
 2480         error = mac_socket_check_send(td->td_ucred, so);
 2481         if (error)
 2482                 goto sctp_bad;
 2483 #endif /* MAC */
 2484 
 2485         auio.uio_iov =  iov;
 2486         auio.uio_iovcnt = 1;
 2487         auio.uio_segflg = UIO_USERSPACE;
 2488         auio.uio_rw = UIO_WRITE;
 2489         auio.uio_td = td;
 2490         auio.uio_offset = 0;                    /* XXX */
 2491         auio.uio_resid = 0;
 2492         len = auio.uio_resid = uap->mlen;
 2493         CURVNET_SET(so->so_vnet);
 2494         error = sctp_lower_sosend(so, to, &auio,
 2495                     (struct mbuf *)NULL, (struct mbuf *)NULL,
 2496                     uap->flags, u_sinfo, td);
 2497         CURVNET_RESTORE();
 2498         if (error) {
 2499                 if (auio.uio_resid != len && (error == ERESTART ||
 2500                     error == EINTR || error == EWOULDBLOCK))
 2501                         error = 0;
 2502                 /* Generation of SIGPIPE can be controlled per socket. */
 2503                 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
 2504                     !(uap->flags & MSG_NOSIGNAL)) {
 2505                         PROC_LOCK(td->td_proc);
 2506                         tdsignal(td, SIGPIPE);
 2507                         PROC_UNLOCK(td->td_proc);
 2508                 }
 2509         }
 2510         if (error == 0)
 2511                 td->td_retval[0] = len - auio.uio_resid;
 2512 #ifdef KTRACE
 2513         if (ktruio != NULL) {
 2514                 ktruio->uio_resid = td->td_retval[0];
 2515                 ktrgenio(uap->sd, UIO_WRITE, ktruio, error);
 2516         }
 2517 #endif /* KTRACE */
 2518 sctp_bad:
 2519         if (fp)
 2520                 fdrop(fp, td);
 2521 sctp_bad2:
 2522         if (to)
 2523                 free(to, M_SONAME);
 2524         return (error);
 2525 #else  /* SCTP */
 2526         return (EOPNOTSUPP);
 2527 #endif /* SCTP */
 2528 }
 2529 
 2530 int
 2531 sys_sctp_generic_sendmsg_iov(td, uap)
 2532         struct thread *td;
 2533         struct sctp_generic_sendmsg_iov_args /* {
 2534                 int sd, 
 2535                 struct iovec *iov, 
 2536                 int iovlen, 
 2537                 caddr_t to, 
 2538                 __socklen_t tolen, 
 2539                 struct sctp_sndrcvinfo *sinfo, 
 2540                 int flags
 2541         } */ *uap;
 2542 {
 2543 #if (defined(INET) || defined(INET6)) && defined(SCTP)
 2544         struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL;
 2545         struct socket *so;
 2546         struct file *fp = NULL;
 2547         int error=0, i;
 2548         ssize_t len;
 2549         struct sockaddr *to = NULL;
 2550 #ifdef KTRACE
 2551         struct uio *ktruio = NULL;
 2552 #endif
 2553         struct uio auio;
 2554         struct iovec *iov, *tiov;
 2555         cap_rights_t rights;
 2556 
 2557         if (uap->sinfo) {
 2558                 error = copyin(uap->sinfo, &sinfo, sizeof (sinfo));
 2559                 if (error)
 2560                         return (error);
 2561                 u_sinfo = &sinfo;
 2562         }
 2563         rights = CAP_WRITE;
 2564         if (uap->tolen) {
 2565                 error = getsockaddr(&to, uap->to, uap->tolen);
 2566                 if (error) {
 2567                         to = NULL;
 2568                         goto sctp_bad2;
 2569                 }
 2570                 rights |= CAP_CONNECT;
 2571         }
 2572 
 2573         AUDIT_ARG_FD(uap->sd);
 2574         error = getsock_cap(td->td_proc->p_fd, uap->sd, rights, &fp, NULL);
 2575         if (error)
 2576                 goto sctp_bad1;
 2577 
 2578 #ifdef COMPAT_FREEBSD32
 2579         if (SV_CURPROC_FLAG(SV_ILP32))
 2580                 error = freebsd32_copyiniov((struct iovec32 *)uap->iov,
 2581                     uap->iovlen, &iov, EMSGSIZE);
 2582         else
 2583 #endif
 2584                 error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE);
 2585         if (error)
 2586                 goto sctp_bad1;
 2587 #ifdef KTRACE
 2588         if (to && (KTRPOINT(td, KTR_STRUCT)))
 2589                 ktrsockaddr(to);
 2590 #endif
 2591 
 2592         so = (struct socket *)fp->f_data;
 2593         if (so->so_proto->pr_protocol != IPPROTO_SCTP) {
 2594                 error = EOPNOTSUPP;
 2595                 goto sctp_bad;
 2596         }
 2597 #ifdef MAC
 2598         error = mac_socket_check_send(td->td_ucred, so);
 2599         if (error)
 2600                 goto sctp_bad;
 2601 #endif /* MAC */
 2602 
 2603         auio.uio_iov = iov;
 2604         auio.uio_iovcnt = uap->iovlen;
 2605         auio.uio_segflg = UIO_USERSPACE;
 2606         auio.uio_rw = UIO_WRITE;
 2607         auio.uio_td = td;
 2608         auio.uio_offset = 0;                    /* XXX */
 2609         auio.uio_resid = 0;
 2610         tiov = iov;
 2611         for (i = 0; i <uap->iovlen; i++, tiov++) {
 2612                 if ((auio.uio_resid += tiov->iov_len) < 0) {
 2613                         error = EINVAL;
 2614                         goto sctp_bad;
 2615                 }
 2616         }
 2617         len = auio.uio_resid;
 2618         CURVNET_SET(so->so_vnet);
 2619         error = sctp_lower_sosend(so, to, &auio,
 2620                     (struct mbuf *)NULL, (struct mbuf *)NULL,
 2621                     uap->flags, u_sinfo, td);
 2622         CURVNET_RESTORE();
 2623         if (error) {
 2624                 if (auio.uio_resid != len && (error == ERESTART ||
 2625                     error == EINTR || error == EWOULDBLOCK))
 2626                         error = 0;
 2627                 /* Generation of SIGPIPE can be controlled per socket */
 2628                 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
 2629                     !(uap->flags & MSG_NOSIGNAL)) {
 2630                         PROC_LOCK(td->td_proc);
 2631                         tdsignal(td, SIGPIPE);
 2632                         PROC_UNLOCK(td->td_proc);
 2633                 }
 2634         }
 2635         if (error == 0)
 2636                 td->td_retval[0] = len - auio.uio_resid;
 2637 #ifdef KTRACE
 2638         if (ktruio != NULL) {
 2639                 ktruio->uio_resid = td->td_retval[0];
 2640                 ktrgenio(uap->sd, UIO_WRITE, ktruio, error);
 2641         }
 2642 #endif /* KTRACE */
 2643 sctp_bad:
 2644         free(iov, M_IOV);
 2645 sctp_bad1:
 2646         if (fp)
 2647                 fdrop(fp, td);
 2648 sctp_bad2:
 2649         if (to)
 2650                 free(to, M_SONAME);
 2651         return (error);
 2652 #else  /* SCTP */
 2653         return (EOPNOTSUPP);
 2654 #endif /* SCTP */
 2655 }
 2656 
 2657 int
 2658 sys_sctp_generic_recvmsg(td, uap)
 2659         struct thread *td;
 2660         struct sctp_generic_recvmsg_args /* {
 2661                 int sd, 
 2662                 struct iovec *iov, 
 2663                 int iovlen,
 2664                 struct sockaddr *from, 
 2665                 __socklen_t *fromlenaddr,
 2666                 struct sctp_sndrcvinfo *sinfo, 
 2667                 int *msg_flags
 2668         } */ *uap;
 2669 {
 2670 #if (defined(INET) || defined(INET6)) && defined(SCTP)
 2671         uint8_t sockbufstore[256];
 2672         struct uio auio;
 2673         struct iovec *iov, *tiov;
 2674         struct sctp_sndrcvinfo sinfo;
 2675         struct socket *so;
 2676         struct file *fp = NULL;
 2677         struct sockaddr *fromsa;
 2678         int fromlen;
 2679         ssize_t len;
 2680         int i, msg_flags;
 2681         int error = 0;
 2682 #ifdef KTRACE
 2683         struct uio *ktruio = NULL;
 2684 #endif
 2685 
 2686         AUDIT_ARG_FD(uap->sd);
 2687         error = getsock_cap(td->td_proc->p_fd, uap->sd, CAP_READ, &fp, NULL);
 2688         if (error) {
 2689                 return (error);
 2690         }
 2691 #ifdef COMPAT_FREEBSD32
 2692         if (SV_CURPROC_FLAG(SV_ILP32))
 2693                 error = freebsd32_copyiniov((struct iovec32 *)uap->iov,
 2694                     uap->iovlen, &iov, EMSGSIZE);
 2695         else
 2696 #endif
 2697                 error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE);
 2698         if (error)
 2699                 goto out1;
 2700 
 2701         so = fp->f_data;
 2702         if (so->so_proto->pr_protocol != IPPROTO_SCTP) {
 2703                 error = EOPNOTSUPP;
 2704                 goto out;
 2705         }
 2706 #ifdef MAC
 2707         error = mac_socket_check_receive(td->td_ucred, so);
 2708         if (error) {
 2709                 goto out;
 2710         }
 2711 #endif /* MAC */
 2712 
 2713         if (uap->fromlenaddr) {
 2714                 error = copyin(uap->fromlenaddr,
 2715                     &fromlen, sizeof (fromlen));
 2716                 if (error) {
 2717                         goto out;
 2718                 }
 2719         } else {
 2720                 fromlen = 0;
 2721         }
 2722         if (uap->msg_flags) {
 2723                 error = copyin(uap->msg_flags, &msg_flags, sizeof (int));
 2724                 if (error) {
 2725                         goto out;
 2726                 }
 2727         } else {
 2728                 msg_flags = 0;
 2729         }
 2730         auio.uio_iov = iov;
 2731         auio.uio_iovcnt = uap->iovlen;
 2732         auio.uio_segflg = UIO_USERSPACE;
 2733         auio.uio_rw = UIO_READ;
 2734         auio.uio_td = td;
 2735         auio.uio_offset = 0;                    /* XXX */
 2736         auio.uio_resid = 0;
 2737         tiov = iov;
 2738         for (i = 0; i <uap->iovlen; i++, tiov++) {
 2739                 if ((auio.uio_resid += tiov->iov_len) < 0) {
 2740                         error = EINVAL;
 2741                         goto out;
 2742                 }
 2743         }
 2744         len = auio.uio_resid;
 2745         fromsa = (struct sockaddr *)sockbufstore;
 2746 
 2747 #ifdef KTRACE
 2748         if (KTRPOINT(td, KTR_GENIO))
 2749                 ktruio = cloneuio(&auio);
 2750 #endif /* KTRACE */
 2751         memset(&sinfo, 0, sizeof(struct sctp_sndrcvinfo));
 2752         CURVNET_SET(so->so_vnet);
 2753         error = sctp_sorecvmsg(so, &auio, (struct mbuf **)NULL,
 2754                     fromsa, fromlen, &msg_flags,
 2755                     (struct sctp_sndrcvinfo *)&sinfo, 1);
 2756         CURVNET_RESTORE();
 2757         if (error) {
 2758                 if (auio.uio_resid != len && (error == ERESTART ||
 2759                     error == EINTR || error == EWOULDBLOCK))
 2760                         error = 0;
 2761         } else {
 2762                 if (uap->sinfo)
 2763                         error = copyout(&sinfo, uap->sinfo, sizeof (sinfo));
 2764         }
 2765 #ifdef KTRACE
 2766         if (ktruio != NULL) {
 2767                 ktruio->uio_resid = len - auio.uio_resid;
 2768                 ktrgenio(uap->sd, UIO_READ, ktruio, error);
 2769         }
 2770 #endif /* KTRACE */
 2771         if (error)
 2772                 goto out;
 2773         td->td_retval[0] = len - auio.uio_resid;
 2774 
 2775         if (fromlen && uap->from) {
 2776                 len = fromlen;
 2777                 if (len <= 0 || fromsa == 0)
 2778                         len = 0;
 2779                 else {
 2780                         len = MIN(len, fromsa->sa_len);
 2781                         error = copyout(fromsa, uap->from, (size_t)len);
 2782                         if (error)
 2783                                 goto out;
 2784                 }
 2785                 error = copyout(&len, uap->fromlenaddr, sizeof (socklen_t));
 2786                 if (error) {
 2787                         goto out;
 2788                 }
 2789         }
 2790 #ifdef KTRACE
 2791         if (KTRPOINT(td, KTR_STRUCT))
 2792                 ktrsockaddr(fromsa);
 2793 #endif
 2794         if (uap->msg_flags) {
 2795                 error = copyout(&msg_flags, uap->msg_flags, sizeof (int));
 2796                 if (error) {
 2797                         goto out;
 2798                 }
 2799         }
 2800 out:
 2801         free(iov, M_IOV);
 2802 out1:
 2803         if (fp) 
 2804                 fdrop(fp, td);
 2805 
 2806         return (error);
 2807 #else  /* SCTP */
 2808         return (EOPNOTSUPP);
 2809 #endif /* SCTP */
 2810 }

Cache object: e0ebf852e70c16556b6ae3a32c79ea61


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.