The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/uipc_syscalls.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1982, 1986, 1989, 1990, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * sendfile(2) and related extensions:
    6  * Copyright (c) 1998, David Greenman. All rights reserved.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  * 4. Neither the name of the University nor the names of its contributors
   17  *    may be used to endorse or promote products derived from this software
   18  *    without specific prior written permission.
   19  *
   20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   30  * SUCH DAMAGE.
   31  *
   32  *      @(#)uipc_syscalls.c     8.4 (Berkeley) 2/21/94
   33  */
   34 
   35 #include <sys/cdefs.h>
   36 __FBSDID("$FreeBSD: releng/9.1/sys/kern/uipc_syscalls.c 237054 2012-06-14 07:51:37Z glebius $");
   37 
   38 #include "opt_capsicum.h"
   39 #include "opt_inet.h"
   40 #include "opt_inet6.h"
   41 #include "opt_sctp.h"
   42 #include "opt_compat.h"
   43 #include "opt_ktrace.h"
   44 
   45 #include <sys/param.h>
   46 #include <sys/systm.h>
   47 #include <sys/capability.h>
   48 #include <sys/kernel.h>
   49 #include <sys/lock.h>
   50 #include <sys/mutex.h>
   51 #include <sys/sysproto.h>
   52 #include <sys/malloc.h>
   53 #include <sys/filedesc.h>
   54 #include <sys/event.h>
   55 #include <sys/proc.h>
   56 #include <sys/fcntl.h>
   57 #include <sys/file.h>
   58 #include <sys/filio.h>
   59 #include <sys/jail.h>
   60 #include <sys/mount.h>
   61 #include <sys/mbuf.h>
   62 #include <sys/protosw.h>
   63 #include <sys/sf_buf.h>
   64 #include <sys/sysent.h>
   65 #include <sys/socket.h>
   66 #include <sys/socketvar.h>
   67 #include <sys/signalvar.h>
   68 #include <sys/syscallsubr.h>
   69 #include <sys/sysctl.h>
   70 #include <sys/uio.h>
   71 #include <sys/vnode.h>
   72 #ifdef KTRACE
   73 #include <sys/ktrace.h>
   74 #endif
   75 #ifdef COMPAT_FREEBSD32
   76 #include <compat/freebsd32/freebsd32_util.h>
   77 #endif
   78 
   79 #include <net/vnet.h>
   80 
   81 #include <security/audit/audit.h>
   82 #include <security/mac/mac_framework.h>
   83 
   84 #include <vm/vm.h>
   85 #include <vm/vm_object.h>
   86 #include <vm/vm_page.h>
   87 #include <vm/vm_pageout.h>
   88 #include <vm/vm_kern.h>
   89 #include <vm/vm_extern.h>
   90 
   91 #if defined(INET) || defined(INET6)
   92 #ifdef SCTP
   93 #include <netinet/sctp.h>
   94 #include <netinet/sctp_peeloff.h>
   95 #endif /* SCTP */
   96 #endif /* INET || INET6 */
   97 
   98 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
   99 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp);
  100 
  101 static int accept1(struct thread *td, struct accept_args *uap, int compat);
  102 static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat);
  103 static int getsockname1(struct thread *td, struct getsockname_args *uap,
  104                         int compat);
  105 static int getpeername1(struct thread *td, struct getpeername_args *uap,
  106                         int compat);
  107 
  108 /*
  109  * NSFBUFS-related variables and associated sysctls
  110  */
  111 int nsfbufs;
  112 int nsfbufspeak;
  113 int nsfbufsused;
  114 
  115 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
  116     "Maximum number of sendfile(2) sf_bufs available");
  117 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
  118     "Number of sendfile(2) sf_bufs at peak usage");
  119 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
  120     "Number of sendfile(2) sf_bufs in use");
  121 
  122 /*
  123  * Convert a user file descriptor to a kernel file entry and check that, if
  124  * it is a capability, the right rights are present. A reference on the file
  125  * entry is held upon returning.
  126  */
  127 static int
  128 getsock_cap(struct filedesc *fdp, int fd, cap_rights_t rights,
  129     struct file **fpp, u_int *fflagp)
  130 {
  131         struct file *fp;
  132 #ifdef CAPABILITIES
  133         struct file *fp_fromcap;
  134         int error;
  135 #endif
  136 
  137         fp = NULL;
  138         if ((fdp == NULL) || ((fp = fget_unlocked(fdp, fd)) == NULL))
  139                 return (EBADF);
  140 #ifdef CAPABILITIES
  141         /*
  142          * If the file descriptor is for a capability, test rights and use
  143          * the file descriptor referenced by the capability.
  144          */
  145         error = cap_funwrap(fp, rights, &fp_fromcap);
  146         if (error) {
  147                 fdrop(fp, curthread);
  148                 return (error);
  149         }
  150         if (fp != fp_fromcap) {
  151                 fhold(fp_fromcap);
  152                 fdrop(fp, curthread);
  153                 fp = fp_fromcap;
  154         }
  155 #endif /* CAPABILITIES */
  156         if (fp->f_type != DTYPE_SOCKET) {
  157                 fdrop(fp, curthread);
  158                 return (ENOTSOCK);
  159         }
  160         if (fflagp != NULL)
  161                 *fflagp = fp->f_flag;
  162         *fpp = fp;
  163         return (0);
  164 }
  165 
  166 /*
  167  * System call interface to the socket abstraction.
  168  */
  169 #if defined(COMPAT_43)
  170 #define COMPAT_OLDSOCK
  171 #endif
  172 
  173 int
  174 sys_socket(td, uap)
  175         struct thread *td;
  176         struct socket_args /* {
  177                 int     domain;
  178                 int     type;
  179                 int     protocol;
  180         } */ *uap;
  181 {
  182         struct filedesc *fdp;
  183         struct socket *so;
  184         struct file *fp;
  185         int fd, error;
  186 
  187         AUDIT_ARG_SOCKET(uap->domain, uap->type, uap->protocol);
  188 #ifdef MAC
  189         error = mac_socket_check_create(td->td_ucred, uap->domain, uap->type,
  190             uap->protocol);
  191         if (error)
  192                 return (error);
  193 #endif
  194         fdp = td->td_proc->p_fd;
  195         error = falloc(td, &fp, &fd, 0);
  196         if (error)
  197                 return (error);
  198         /* An extra reference on `fp' has been held for us by falloc(). */
  199         error = socreate(uap->domain, &so, uap->type, uap->protocol,
  200             td->td_ucred, td);
  201         if (error) {
  202                 fdclose(fdp, fp, fd, td);
  203         } else {
  204                 finit(fp, FREAD | FWRITE, DTYPE_SOCKET, so, &socketops);
  205                 td->td_retval[0] = fd;
  206         }
  207         fdrop(fp, td);
  208         return (error);
  209 }
  210 
  211 /* ARGSUSED */
  212 int
  213 sys_bind(td, uap)
  214         struct thread *td;
  215         struct bind_args /* {
  216                 int     s;
  217                 caddr_t name;
  218                 int     namelen;
  219         } */ *uap;
  220 {
  221         struct sockaddr *sa;
  222         int error;
  223 
  224         if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0)
  225                 return (error);
  226 
  227         error = kern_bind(td, uap->s, sa);
  228         free(sa, M_SONAME);
  229         return (error);
  230 }
  231 
  232 int
  233 kern_bind(td, fd, sa)
  234         struct thread *td;
  235         int fd;
  236         struct sockaddr *sa;
  237 {
  238         struct socket *so;
  239         struct file *fp;
  240         int error;
  241 
  242         AUDIT_ARG_FD(fd);
  243         error = getsock_cap(td->td_proc->p_fd, fd, CAP_BIND, &fp, NULL);
  244         if (error)
  245                 return (error);
  246         so = fp->f_data;
  247 #ifdef KTRACE
  248         if (KTRPOINT(td, KTR_STRUCT))
  249                 ktrsockaddr(sa);
  250 #endif
  251 #ifdef MAC
  252         error = mac_socket_check_bind(td->td_ucred, so, sa);
  253         if (error == 0)
  254 #endif
  255                 error = sobind(so, sa, td);
  256         fdrop(fp, td);
  257         return (error);
  258 }
  259 
  260 /* ARGSUSED */
  261 int
  262 sys_listen(td, uap)
  263         struct thread *td;
  264         struct listen_args /* {
  265                 int     s;
  266                 int     backlog;
  267         } */ *uap;
  268 {
  269         struct socket *so;
  270         struct file *fp;
  271         int error;
  272 
  273         AUDIT_ARG_FD(uap->s);
  274         error = getsock_cap(td->td_proc->p_fd, uap->s, CAP_LISTEN, &fp, NULL);
  275         if (error == 0) {
  276                 so = fp->f_data;
  277 #ifdef MAC
  278                 error = mac_socket_check_listen(td->td_ucred, so);
  279                 if (error == 0)
  280 #endif
  281                         error = solisten(so, uap->backlog, td);
  282                 fdrop(fp, td);
  283         }
  284         return(error);
  285 }
  286 
  287 /*
  288  * accept1()
  289  */
  290 static int
  291 accept1(td, uap, compat)
  292         struct thread *td;
  293         struct accept_args /* {
  294                 int     s;
  295                 struct sockaddr * __restrict name;
  296                 socklen_t       * __restrict anamelen;
  297         } */ *uap;
  298         int compat;
  299 {
  300         struct sockaddr *name;
  301         socklen_t namelen;
  302         struct file *fp;
  303         int error;
  304 
  305         if (uap->name == NULL)
  306                 return (kern_accept(td, uap->s, NULL, NULL, NULL));
  307 
  308         error = copyin(uap->anamelen, &namelen, sizeof (namelen));
  309         if (error)
  310                 return (error);
  311 
  312         error = kern_accept(td, uap->s, &name, &namelen, &fp);
  313 
  314         /*
  315          * return a namelen of zero for older code which might
  316          * ignore the return value from accept.
  317          */
  318         if (error) {
  319                 (void) copyout(&namelen,
  320                     uap->anamelen, sizeof(*uap->anamelen));
  321                 return (error);
  322         }
  323 
  324         if (error == 0 && name != NULL) {
  325 #ifdef COMPAT_OLDSOCK
  326                 if (compat)
  327                         ((struct osockaddr *)name)->sa_family =
  328                             name->sa_family;
  329 #endif
  330                 error = copyout(name, uap->name, namelen);
  331         }
  332         if (error == 0)
  333                 error = copyout(&namelen, uap->anamelen,
  334                     sizeof(namelen));
  335         if (error)
  336                 fdclose(td->td_proc->p_fd, fp, td->td_retval[0], td);
  337         fdrop(fp, td);
  338         free(name, M_SONAME);
  339         return (error);
  340 }
  341 
  342 int
  343 kern_accept(struct thread *td, int s, struct sockaddr **name,
  344     socklen_t *namelen, struct file **fp)
  345 {
  346         struct filedesc *fdp;
  347         struct file *headfp, *nfp = NULL;
  348         struct sockaddr *sa = NULL;
  349         int error;
  350         struct socket *head, *so;
  351         int fd;
  352         u_int fflag;
  353         pid_t pgid;
  354         int tmp;
  355 
  356         if (name) {
  357                 *name = NULL;
  358                 if (*namelen < 0)
  359                         return (EINVAL);
  360         }
  361 
  362         AUDIT_ARG_FD(s);
  363         fdp = td->td_proc->p_fd;
  364         error = getsock_cap(fdp, s, CAP_ACCEPT, &headfp, &fflag);
  365         if (error)
  366                 return (error);
  367         head = headfp->f_data;
  368         if ((head->so_options & SO_ACCEPTCONN) == 0) {
  369                 error = EINVAL;
  370                 goto done;
  371         }
  372 #ifdef MAC
  373         error = mac_socket_check_accept(td->td_ucred, head);
  374         if (error != 0)
  375                 goto done;
  376 #endif
  377         error = falloc(td, &nfp, &fd, 0);
  378         if (error)
  379                 goto done;
  380         ACCEPT_LOCK();
  381         if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) {
  382                 ACCEPT_UNLOCK();
  383                 error = EWOULDBLOCK;
  384                 goto noconnection;
  385         }
  386         while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
  387                 if (head->so_rcv.sb_state & SBS_CANTRCVMORE) {
  388                         head->so_error = ECONNABORTED;
  389                         break;
  390                 }
  391                 error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH,
  392                     "accept", 0);
  393                 if (error) {
  394                         ACCEPT_UNLOCK();
  395                         goto noconnection;
  396                 }
  397         }
  398         if (head->so_error) {
  399                 error = head->so_error;
  400                 head->so_error = 0;
  401                 ACCEPT_UNLOCK();
  402                 goto noconnection;
  403         }
  404         so = TAILQ_FIRST(&head->so_comp);
  405         KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP"));
  406         KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP"));
  407 
  408         /*
  409          * Before changing the flags on the socket, we have to bump the
  410          * reference count.  Otherwise, if the protocol calls sofree(),
  411          * the socket will be released due to a zero refcount.
  412          */
  413         SOCK_LOCK(so);                  /* soref() and so_state update */
  414         soref(so);                      /* file descriptor reference */
  415 
  416         TAILQ_REMOVE(&head->so_comp, so, so_list);
  417         head->so_qlen--;
  418         so->so_state |= (head->so_state & SS_NBIO);
  419         so->so_qstate &= ~SQ_COMP;
  420         so->so_head = NULL;
  421 
  422         SOCK_UNLOCK(so);
  423         ACCEPT_UNLOCK();
  424 
  425         /* An extra reference on `nfp' has been held for us by falloc(). */
  426         td->td_retval[0] = fd;
  427 
  428         /* connection has been removed from the listen queue */
  429         KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0);
  430 
  431         pgid = fgetown(&head->so_sigio);
  432         if (pgid != 0)
  433                 fsetown(pgid, &so->so_sigio);
  434 
  435         finit(nfp, fflag, DTYPE_SOCKET, so, &socketops);
  436         /* Sync socket nonblocking/async state with file flags */
  437         tmp = fflag & FNONBLOCK;
  438         (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td);
  439         tmp = fflag & FASYNC;
  440         (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td);
  441         sa = 0;
  442         error = soaccept(so, &sa);
  443         if (error) {
  444                 /*
  445                  * return a namelen of zero for older code which might
  446                  * ignore the return value from accept.
  447                  */
  448                 if (name)
  449                         *namelen = 0;
  450                 goto noconnection;
  451         }
  452         if (sa == NULL) {
  453                 if (name)
  454                         *namelen = 0;
  455                 goto done;
  456         }
  457         if (name) {
  458                 /* check sa_len before it is destroyed */
  459                 if (*namelen > sa->sa_len)
  460                         *namelen = sa->sa_len;
  461 #ifdef KTRACE
  462                 if (KTRPOINT(td, KTR_STRUCT))
  463                         ktrsockaddr(sa);
  464 #endif
  465                 *name = sa;
  466                 sa = NULL;
  467         }
  468 noconnection:
  469         if (sa)
  470                 free(sa, M_SONAME);
  471 
  472         /*
  473          * close the new descriptor, assuming someone hasn't ripped it
  474          * out from under us.
  475          */
  476         if (error)
  477                 fdclose(fdp, nfp, fd, td);
  478 
  479         /*
  480          * Release explicitly held references before returning.  We return
  481          * a reference on nfp to the caller on success if they request it.
  482          */
  483 done:
  484         if (fp != NULL) {
  485                 if (error == 0) {
  486                         *fp = nfp;
  487                         nfp = NULL;
  488                 } else
  489                         *fp = NULL;
  490         }
  491         if (nfp != NULL)
  492                 fdrop(nfp, td);
  493         fdrop(headfp, td);
  494         return (error);
  495 }
  496 
  497 int
  498 sys_accept(td, uap)
  499         struct thread *td;
  500         struct accept_args *uap;
  501 {
  502 
  503         return (accept1(td, uap, 0));
  504 }
  505 
  506 #ifdef COMPAT_OLDSOCK
  507 int
  508 oaccept(td, uap)
  509         struct thread *td;
  510         struct accept_args *uap;
  511 {
  512 
  513         return (accept1(td, uap, 1));
  514 }
  515 #endif /* COMPAT_OLDSOCK */
  516 
  517 /* ARGSUSED */
  518 int
  519 sys_connect(td, uap)
  520         struct thread *td;
  521         struct connect_args /* {
  522                 int     s;
  523                 caddr_t name;
  524                 int     namelen;
  525         } */ *uap;
  526 {
  527         struct sockaddr *sa;
  528         int error;
  529 
  530         error = getsockaddr(&sa, uap->name, uap->namelen);
  531         if (error)
  532                 return (error);
  533 
  534         error = kern_connect(td, uap->s, sa);
  535         free(sa, M_SONAME);
  536         return (error);
  537 }
  538 
  539 
  540 int
  541 kern_connect(td, fd, sa)
  542         struct thread *td;
  543         int fd;
  544         struct sockaddr *sa;
  545 {
  546         struct socket *so;
  547         struct file *fp;
  548         int error;
  549         int interrupted = 0;
  550 
  551         AUDIT_ARG_FD(fd);
  552         error = getsock_cap(td->td_proc->p_fd, fd, CAP_CONNECT, &fp, NULL);
  553         if (error)
  554                 return (error);
  555         so = fp->f_data;
  556         if (so->so_state & SS_ISCONNECTING) {
  557                 error = EALREADY;
  558                 goto done1;
  559         }
  560 #ifdef KTRACE
  561         if (KTRPOINT(td, KTR_STRUCT))
  562                 ktrsockaddr(sa);
  563 #endif
  564 #ifdef MAC
  565         error = mac_socket_check_connect(td->td_ucred, so, sa);
  566         if (error)
  567                 goto bad;
  568 #endif
  569         error = soconnect(so, sa, td);
  570         if (error)
  571                 goto bad;
  572         if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
  573                 error = EINPROGRESS;
  574                 goto done1;
  575         }
  576         SOCK_LOCK(so);
  577         while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
  578                 error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH,
  579                     "connec", 0);
  580                 if (error) {
  581                         if (error == EINTR || error == ERESTART)
  582                                 interrupted = 1;
  583                         break;
  584                 }
  585         }
  586         if (error == 0) {
  587                 error = so->so_error;
  588                 so->so_error = 0;
  589         }
  590         SOCK_UNLOCK(so);
  591 bad:
  592         if (!interrupted)
  593                 so->so_state &= ~SS_ISCONNECTING;
  594         if (error == ERESTART)
  595                 error = EINTR;
  596 done1:
  597         fdrop(fp, td);
  598         return (error);
  599 }
  600 
  601 int
  602 kern_socketpair(struct thread *td, int domain, int type, int protocol,
  603     int *rsv)
  604 {
  605         struct filedesc *fdp = td->td_proc->p_fd;
  606         struct file *fp1, *fp2;
  607         struct socket *so1, *so2;
  608         int fd, error;
  609 
  610         AUDIT_ARG_SOCKET(domain, type, protocol);
  611 #ifdef MAC
  612         /* We might want to have a separate check for socket pairs. */
  613         error = mac_socket_check_create(td->td_ucred, domain, type,
  614             protocol);
  615         if (error)
  616                 return (error);
  617 #endif
  618         error = socreate(domain, &so1, type, protocol, td->td_ucred, td);
  619         if (error)
  620                 return (error);
  621         error = socreate(domain, &so2, type, protocol, td->td_ucred, td);
  622         if (error)
  623                 goto free1;
  624         /* On success extra reference to `fp1' and 'fp2' is set by falloc. */
  625         error = falloc(td, &fp1, &fd, 0);
  626         if (error)
  627                 goto free2;
  628         rsv[0] = fd;
  629         fp1->f_data = so1;      /* so1 already has ref count */
  630         error = falloc(td, &fp2, &fd, 0);
  631         if (error)
  632                 goto free3;
  633         fp2->f_data = so2;      /* so2 already has ref count */
  634         rsv[1] = fd;
  635         error = soconnect2(so1, so2);
  636         if (error)
  637                 goto free4;
  638         if (type == SOCK_DGRAM) {
  639                 /*
  640                  * Datagram socket connection is asymmetric.
  641                  */
  642                  error = soconnect2(so2, so1);
  643                  if (error)
  644                         goto free4;
  645         }
  646         finit(fp1, FREAD | FWRITE, DTYPE_SOCKET, fp1->f_data, &socketops);
  647         finit(fp2, FREAD | FWRITE, DTYPE_SOCKET, fp2->f_data, &socketops);
  648         fdrop(fp1, td);
  649         fdrop(fp2, td);
  650         return (0);
  651 free4:
  652         fdclose(fdp, fp2, rsv[1], td);
  653         fdrop(fp2, td);
  654 free3:
  655         fdclose(fdp, fp1, rsv[0], td);
  656         fdrop(fp1, td);
  657 free2:
  658         if (so2 != NULL)
  659                 (void)soclose(so2);
  660 free1:
  661         if (so1 != NULL)
  662                 (void)soclose(so1);
  663         return (error);
  664 }
  665 
  666 int
  667 sys_socketpair(struct thread *td, struct socketpair_args *uap)
  668 {
  669         int error, sv[2];
  670 
  671         error = kern_socketpair(td, uap->domain, uap->type,
  672             uap->protocol, sv);
  673         if (error)
  674                 return (error);
  675         error = copyout(sv, uap->rsv, 2 * sizeof(int));
  676         if (error) {
  677                 (void)kern_close(td, sv[0]);
  678                 (void)kern_close(td, sv[1]);
  679         }
  680         return (error);
  681 }
  682 
  683 static int
  684 sendit(td, s, mp, flags)
  685         struct thread *td;
  686         int s;
  687         struct msghdr *mp;
  688         int flags;
  689 {
  690         struct mbuf *control;
  691         struct sockaddr *to;
  692         int error;
  693 
  694 #ifdef CAPABILITY_MODE
  695         if (IN_CAPABILITY_MODE(td) && (mp->msg_name != NULL))
  696                 return (ECAPMODE);
  697 #endif
  698 
  699         if (mp->msg_name != NULL) {
  700                 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
  701                 if (error) {
  702                         to = NULL;
  703                         goto bad;
  704                 }
  705                 mp->msg_name = to;
  706         } else {
  707                 to = NULL;
  708         }
  709 
  710         if (mp->msg_control) {
  711                 if (mp->msg_controllen < sizeof(struct cmsghdr)
  712 #ifdef COMPAT_OLDSOCK
  713                     && mp->msg_flags != MSG_COMPAT
  714 #endif
  715                 ) {
  716                         error = EINVAL;
  717                         goto bad;
  718                 }
  719                 error = sockargs(&control, mp->msg_control,
  720                     mp->msg_controllen, MT_CONTROL);
  721                 if (error)
  722                         goto bad;
  723 #ifdef COMPAT_OLDSOCK
  724                 if (mp->msg_flags == MSG_COMPAT) {
  725                         struct cmsghdr *cm;
  726 
  727                         M_PREPEND(control, sizeof(*cm), M_WAIT);
  728                         cm = mtod(control, struct cmsghdr *);
  729                         cm->cmsg_len = control->m_len;
  730                         cm->cmsg_level = SOL_SOCKET;
  731                         cm->cmsg_type = SCM_RIGHTS;
  732                 }
  733 #endif
  734         } else {
  735                 control = NULL;
  736         }
  737 
  738         error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE);
  739 
  740 bad:
  741         if (to)
  742                 free(to, M_SONAME);
  743         return (error);
  744 }
  745 
  746 int
  747 kern_sendit(td, s, mp, flags, control, segflg)
  748         struct thread *td;
  749         int s;
  750         struct msghdr *mp;
  751         int flags;
  752         struct mbuf *control;
  753         enum uio_seg segflg;
  754 {
  755         struct file *fp;
  756         struct uio auio;
  757         struct iovec *iov;
  758         struct socket *so;
  759         int i, error;
  760         ssize_t len;
  761         cap_rights_t rights;
  762 #ifdef KTRACE
  763         struct uio *ktruio = NULL;
  764 #endif
  765 
  766         AUDIT_ARG_FD(s);
  767         rights = CAP_WRITE;
  768         if (mp->msg_name != NULL)
  769                 rights |= CAP_CONNECT;
  770         error = getsock_cap(td->td_proc->p_fd, s, rights, &fp, NULL);
  771         if (error)
  772                 return (error);
  773         so = (struct socket *)fp->f_data;
  774 
  775 #ifdef KTRACE
  776         if (mp->msg_name != NULL && KTRPOINT(td, KTR_STRUCT))
  777                 ktrsockaddr(mp->msg_name);
  778 #endif
  779 #ifdef MAC
  780         if (mp->msg_name != NULL) {
  781                 error = mac_socket_check_connect(td->td_ucred, so,
  782                     mp->msg_name);
  783                 if (error)
  784                         goto bad;
  785         }
  786         error = mac_socket_check_send(td->td_ucred, so);
  787         if (error)
  788                 goto bad;
  789 #endif
  790 
  791         auio.uio_iov = mp->msg_iov;
  792         auio.uio_iovcnt = mp->msg_iovlen;
  793         auio.uio_segflg = segflg;
  794         auio.uio_rw = UIO_WRITE;
  795         auio.uio_td = td;
  796         auio.uio_offset = 0;                    /* XXX */
  797         auio.uio_resid = 0;
  798         iov = mp->msg_iov;
  799         for (i = 0; i < mp->msg_iovlen; i++, iov++) {
  800                 if ((auio.uio_resid += iov->iov_len) < 0) {
  801                         error = EINVAL;
  802                         goto bad;
  803                 }
  804         }
  805 #ifdef KTRACE
  806         if (KTRPOINT(td, KTR_GENIO))
  807                 ktruio = cloneuio(&auio);
  808 #endif
  809         len = auio.uio_resid;
  810         error = sosend(so, mp->msg_name, &auio, 0, control, flags, td);
  811         if (error) {
  812                 if (auio.uio_resid != len && (error == ERESTART ||
  813                     error == EINTR || error == EWOULDBLOCK))
  814                         error = 0;
  815                 /* Generation of SIGPIPE can be controlled per socket */
  816                 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
  817                     !(flags & MSG_NOSIGNAL)) {
  818                         PROC_LOCK(td->td_proc);
  819                         tdsignal(td, SIGPIPE);
  820                         PROC_UNLOCK(td->td_proc);
  821                 }
  822         }
  823         if (error == 0)
  824                 td->td_retval[0] = len - auio.uio_resid;
  825 #ifdef KTRACE
  826         if (ktruio != NULL) {
  827                 ktruio->uio_resid = td->td_retval[0];
  828                 ktrgenio(s, UIO_WRITE, ktruio, error);
  829         }
  830 #endif
  831 bad:
  832         fdrop(fp, td);
  833         return (error);
  834 }
  835 
  836 int
  837 sys_sendto(td, uap)
  838         struct thread *td;
  839         struct sendto_args /* {
  840                 int     s;
  841                 caddr_t buf;
  842                 size_t  len;
  843                 int     flags;
  844                 caddr_t to;
  845                 int     tolen;
  846         } */ *uap;
  847 {
  848         struct msghdr msg;
  849         struct iovec aiov;
  850         int error;
  851 
  852         msg.msg_name = uap->to;
  853         msg.msg_namelen = uap->tolen;
  854         msg.msg_iov = &aiov;
  855         msg.msg_iovlen = 1;
  856         msg.msg_control = 0;
  857 #ifdef COMPAT_OLDSOCK
  858         msg.msg_flags = 0;
  859 #endif
  860         aiov.iov_base = uap->buf;
  861         aiov.iov_len = uap->len;
  862         error = sendit(td, uap->s, &msg, uap->flags);
  863         return (error);
  864 }
  865 
  866 #ifdef COMPAT_OLDSOCK
  867 int
  868 osend(td, uap)
  869         struct thread *td;
  870         struct osend_args /* {
  871                 int     s;
  872                 caddr_t buf;
  873                 int     len;
  874                 int     flags;
  875         } */ *uap;
  876 {
  877         struct msghdr msg;
  878         struct iovec aiov;
  879         int error;
  880 
  881         msg.msg_name = 0;
  882         msg.msg_namelen = 0;
  883         msg.msg_iov = &aiov;
  884         msg.msg_iovlen = 1;
  885         aiov.iov_base = uap->buf;
  886         aiov.iov_len = uap->len;
  887         msg.msg_control = 0;
  888         msg.msg_flags = 0;
  889         error = sendit(td, uap->s, &msg, uap->flags);
  890         return (error);
  891 }
  892 
  893 int
  894 osendmsg(td, uap)
  895         struct thread *td;
  896         struct osendmsg_args /* {
  897                 int     s;
  898                 caddr_t msg;
  899                 int     flags;
  900         } */ *uap;
  901 {
  902         struct msghdr msg;
  903         struct iovec *iov;
  904         int error;
  905 
  906         error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
  907         if (error)
  908                 return (error);
  909         error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
  910         if (error)
  911                 return (error);
  912         msg.msg_iov = iov;
  913         msg.msg_flags = MSG_COMPAT;
  914         error = sendit(td, uap->s, &msg, uap->flags);
  915         free(iov, M_IOV);
  916         return (error);
  917 }
  918 #endif
  919 
  920 int
  921 sys_sendmsg(td, uap)
  922         struct thread *td;
  923         struct sendmsg_args /* {
  924                 int     s;
  925                 caddr_t msg;
  926                 int     flags;
  927         } */ *uap;
  928 {
  929         struct msghdr msg;
  930         struct iovec *iov;
  931         int error;
  932 
  933         error = copyin(uap->msg, &msg, sizeof (msg));
  934         if (error)
  935                 return (error);
  936         error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
  937         if (error)
  938                 return (error);
  939         msg.msg_iov = iov;
  940 #ifdef COMPAT_OLDSOCK
  941         msg.msg_flags = 0;
  942 #endif
  943         error = sendit(td, uap->s, &msg, uap->flags);
  944         free(iov, M_IOV);
  945         return (error);
  946 }
  947 
  948 int
  949 kern_recvit(td, s, mp, fromseg, controlp)
  950         struct thread *td;
  951         int s;
  952         struct msghdr *mp;
  953         enum uio_seg fromseg;
  954         struct mbuf **controlp;
  955 {
  956         struct uio auio;
  957         struct iovec *iov;
  958         int i;
  959         ssize_t len;
  960         int error;
  961         struct mbuf *m, *control = 0;
  962         caddr_t ctlbuf;
  963         struct file *fp;
  964         struct socket *so;
  965         struct sockaddr *fromsa = 0;
  966 #ifdef KTRACE
  967         struct uio *ktruio = NULL;
  968 #endif
  969 
  970         if (controlp != NULL)
  971                 *controlp = NULL;
  972 
  973         AUDIT_ARG_FD(s);
  974         error = getsock_cap(td->td_proc->p_fd, s, CAP_READ, &fp, NULL);
  975         if (error)
  976                 return (error);
  977         so = fp->f_data;
  978 
  979 #ifdef MAC
  980         error = mac_socket_check_receive(td->td_ucred, so);
  981         if (error) {
  982                 fdrop(fp, td);
  983                 return (error);
  984         }
  985 #endif
  986 
  987         auio.uio_iov = mp->msg_iov;
  988         auio.uio_iovcnt = mp->msg_iovlen;
  989         auio.uio_segflg = UIO_USERSPACE;
  990         auio.uio_rw = UIO_READ;
  991         auio.uio_td = td;
  992         auio.uio_offset = 0;                    /* XXX */
  993         auio.uio_resid = 0;
  994         iov = mp->msg_iov;
  995         for (i = 0; i < mp->msg_iovlen; i++, iov++) {
  996                 if ((auio.uio_resid += iov->iov_len) < 0) {
  997                         fdrop(fp, td);
  998                         return (EINVAL);
  999                 }
 1000         }
 1001 #ifdef KTRACE
 1002         if (KTRPOINT(td, KTR_GENIO))
 1003                 ktruio = cloneuio(&auio);
 1004 #endif
 1005         len = auio.uio_resid;
 1006         error = soreceive(so, &fromsa, &auio, (struct mbuf **)0,
 1007             (mp->msg_control || controlp) ? &control : (struct mbuf **)0,
 1008             &mp->msg_flags);
 1009         if (error) {
 1010                 if (auio.uio_resid != len && (error == ERESTART ||
 1011                     error == EINTR || error == EWOULDBLOCK))
 1012                         error = 0;
 1013         }
 1014 #ifdef KTRACE
 1015         if (ktruio != NULL) {
 1016                 ktruio->uio_resid = len - auio.uio_resid;
 1017                 ktrgenio(s, UIO_READ, ktruio, error);
 1018         }
 1019 #endif
 1020         if (error)
 1021                 goto out;
 1022         td->td_retval[0] = len - auio.uio_resid;
 1023         if (mp->msg_name) {
 1024                 len = mp->msg_namelen;
 1025                 if (len <= 0 || fromsa == 0)
 1026                         len = 0;
 1027                 else {
 1028                         /* save sa_len before it is destroyed by MSG_COMPAT */
 1029                         len = MIN(len, fromsa->sa_len);
 1030 #ifdef COMPAT_OLDSOCK
 1031                         if (mp->msg_flags & MSG_COMPAT)
 1032                                 ((struct osockaddr *)fromsa)->sa_family =
 1033                                     fromsa->sa_family;
 1034 #endif
 1035                         if (fromseg == UIO_USERSPACE) {
 1036                                 error = copyout(fromsa, mp->msg_name,
 1037                                     (unsigned)len);
 1038                                 if (error)
 1039                                         goto out;
 1040                         } else
 1041                                 bcopy(fromsa, mp->msg_name, len);
 1042                 }
 1043                 mp->msg_namelen = len;
 1044         }
 1045         if (mp->msg_control && controlp == NULL) {
 1046 #ifdef COMPAT_OLDSOCK
 1047                 /*
 1048                  * We assume that old recvmsg calls won't receive access
 1049                  * rights and other control info, esp. as control info
 1050                  * is always optional and those options didn't exist in 4.3.
 1051                  * If we receive rights, trim the cmsghdr; anything else
 1052                  * is tossed.
 1053                  */
 1054                 if (control && mp->msg_flags & MSG_COMPAT) {
 1055                         if (mtod(control, struct cmsghdr *)->cmsg_level !=
 1056                             SOL_SOCKET ||
 1057                             mtod(control, struct cmsghdr *)->cmsg_type !=
 1058                             SCM_RIGHTS) {
 1059                                 mp->msg_controllen = 0;
 1060                                 goto out;
 1061                         }
 1062                         control->m_len -= sizeof (struct cmsghdr);
 1063                         control->m_data += sizeof (struct cmsghdr);
 1064                 }
 1065 #endif
 1066                 len = mp->msg_controllen;
 1067                 m = control;
 1068                 mp->msg_controllen = 0;
 1069                 ctlbuf = mp->msg_control;
 1070 
 1071                 while (m && len > 0) {
 1072                         unsigned int tocopy;
 1073 
 1074                         if (len >= m->m_len)
 1075                                 tocopy = m->m_len;
 1076                         else {
 1077                                 mp->msg_flags |= MSG_CTRUNC;
 1078                                 tocopy = len;
 1079                         }
 1080 
 1081                         if ((error = copyout(mtod(m, caddr_t),
 1082                                         ctlbuf, tocopy)) != 0)
 1083                                 goto out;
 1084 
 1085                         ctlbuf += tocopy;
 1086                         len -= tocopy;
 1087                         m = m->m_next;
 1088                 }
 1089                 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
 1090         }
 1091 out:
 1092         fdrop(fp, td);
 1093 #ifdef KTRACE
 1094         if (fromsa && KTRPOINT(td, KTR_STRUCT))
 1095                 ktrsockaddr(fromsa);
 1096 #endif
 1097         if (fromsa)
 1098                 free(fromsa, M_SONAME);
 1099 
 1100         if (error == 0 && controlp != NULL)  
 1101                 *controlp = control;
 1102         else  if (control)
 1103                 m_freem(control);
 1104 
 1105         return (error);
 1106 }
 1107 
 1108 static int
 1109 recvit(td, s, mp, namelenp)
 1110         struct thread *td;
 1111         int s;
 1112         struct msghdr *mp;
 1113         void *namelenp;
 1114 {
 1115         int error;
 1116 
 1117         error = kern_recvit(td, s, mp, UIO_USERSPACE, NULL);
 1118         if (error)
 1119                 return (error);
 1120         if (namelenp) {
 1121                 error = copyout(&mp->msg_namelen, namelenp, sizeof (socklen_t));
 1122 #ifdef COMPAT_OLDSOCK
 1123                 if (mp->msg_flags & MSG_COMPAT)
 1124                         error = 0;      /* old recvfrom didn't check */
 1125 #endif
 1126         }
 1127         return (error);
 1128 }
 1129 
 1130 int
 1131 sys_recvfrom(td, uap)
 1132         struct thread *td;
 1133         struct recvfrom_args /* {
 1134                 int     s;
 1135                 caddr_t buf;
 1136                 size_t  len;
 1137                 int     flags;
 1138                 struct sockaddr * __restrict    from;
 1139                 socklen_t * __restrict fromlenaddr;
 1140         } */ *uap;
 1141 {
 1142         struct msghdr msg;
 1143         struct iovec aiov;
 1144         int error;
 1145 
 1146         if (uap->fromlenaddr) {
 1147                 error = copyin(uap->fromlenaddr,
 1148                     &msg.msg_namelen, sizeof (msg.msg_namelen));
 1149                 if (error)
 1150                         goto done2;
 1151         } else {
 1152                 msg.msg_namelen = 0;
 1153         }
 1154         msg.msg_name = uap->from;
 1155         msg.msg_iov = &aiov;
 1156         msg.msg_iovlen = 1;
 1157         aiov.iov_base = uap->buf;
 1158         aiov.iov_len = uap->len;
 1159         msg.msg_control = 0;
 1160         msg.msg_flags = uap->flags;
 1161         error = recvit(td, uap->s, &msg, uap->fromlenaddr);
 1162 done2:
 1163         return(error);
 1164 }
 1165 
 1166 #ifdef COMPAT_OLDSOCK
 1167 int
 1168 orecvfrom(td, uap)
 1169         struct thread *td;
 1170         struct recvfrom_args *uap;
 1171 {
 1172 
 1173         uap->flags |= MSG_COMPAT;
 1174         return (sys_recvfrom(td, uap));
 1175 }
 1176 #endif
 1177 
 1178 #ifdef COMPAT_OLDSOCK
 1179 int
 1180 orecv(td, uap)
 1181         struct thread *td;
 1182         struct orecv_args /* {
 1183                 int     s;
 1184                 caddr_t buf;
 1185                 int     len;
 1186                 int     flags;
 1187         } */ *uap;
 1188 {
 1189         struct msghdr msg;
 1190         struct iovec aiov;
 1191         int error;
 1192 
 1193         msg.msg_name = 0;
 1194         msg.msg_namelen = 0;
 1195         msg.msg_iov = &aiov;
 1196         msg.msg_iovlen = 1;
 1197         aiov.iov_base = uap->buf;
 1198         aiov.iov_len = uap->len;
 1199         msg.msg_control = 0;
 1200         msg.msg_flags = uap->flags;
 1201         error = recvit(td, uap->s, &msg, NULL);
 1202         return (error);
 1203 }
 1204 
 1205 /*
 1206  * Old recvmsg.  This code takes advantage of the fact that the old msghdr
 1207  * overlays the new one, missing only the flags, and with the (old) access
 1208  * rights where the control fields are now.
 1209  */
 1210 int
 1211 orecvmsg(td, uap)
 1212         struct thread *td;
 1213         struct orecvmsg_args /* {
 1214                 int     s;
 1215                 struct  omsghdr *msg;
 1216                 int     flags;
 1217         } */ *uap;
 1218 {
 1219         struct msghdr msg;
 1220         struct iovec *iov;
 1221         int error;
 1222 
 1223         error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
 1224         if (error)
 1225                 return (error);
 1226         error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
 1227         if (error)
 1228                 return (error);
 1229         msg.msg_flags = uap->flags | MSG_COMPAT;
 1230         msg.msg_iov = iov;
 1231         error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen);
 1232         if (msg.msg_controllen && error == 0)
 1233                 error = copyout(&msg.msg_controllen,
 1234                     &uap->msg->msg_accrightslen, sizeof (int));
 1235         free(iov, M_IOV);
 1236         return (error);
 1237 }
 1238 #endif
 1239 
 1240 int
 1241 sys_recvmsg(td, uap)
 1242         struct thread *td;
 1243         struct recvmsg_args /* {
 1244                 int     s;
 1245                 struct  msghdr *msg;
 1246                 int     flags;
 1247         } */ *uap;
 1248 {
 1249         struct msghdr msg;
 1250         struct iovec *uiov, *iov;
 1251         int error;
 1252 
 1253         error = copyin(uap->msg, &msg, sizeof (msg));
 1254         if (error)
 1255                 return (error);
 1256         error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
 1257         if (error)
 1258                 return (error);
 1259         msg.msg_flags = uap->flags;
 1260 #ifdef COMPAT_OLDSOCK
 1261         msg.msg_flags &= ~MSG_COMPAT;
 1262 #endif
 1263         uiov = msg.msg_iov;
 1264         msg.msg_iov = iov;
 1265         error = recvit(td, uap->s, &msg, NULL);
 1266         if (error == 0) {
 1267                 msg.msg_iov = uiov;
 1268                 error = copyout(&msg, uap->msg, sizeof(msg));
 1269         }
 1270         free(iov, M_IOV);
 1271         return (error);
 1272 }
 1273 
 1274 /* ARGSUSED */
 1275 int
 1276 sys_shutdown(td, uap)
 1277         struct thread *td;
 1278         struct shutdown_args /* {
 1279                 int     s;
 1280                 int     how;
 1281         } */ *uap;
 1282 {
 1283         struct socket *so;
 1284         struct file *fp;
 1285         int error;
 1286 
 1287         AUDIT_ARG_FD(uap->s);
 1288         error = getsock_cap(td->td_proc->p_fd, uap->s, CAP_SHUTDOWN, &fp,
 1289             NULL);
 1290         if (error == 0) {
 1291                 so = fp->f_data;
 1292                 error = soshutdown(so, uap->how);
 1293                 fdrop(fp, td);
 1294         }
 1295         return (error);
 1296 }
 1297 
 1298 /* ARGSUSED */
 1299 int
 1300 sys_setsockopt(td, uap)
 1301         struct thread *td;
 1302         struct setsockopt_args /* {
 1303                 int     s;
 1304                 int     level;
 1305                 int     name;
 1306                 caddr_t val;
 1307                 int     valsize;
 1308         } */ *uap;
 1309 {
 1310 
 1311         return (kern_setsockopt(td, uap->s, uap->level, uap->name,
 1312             uap->val, UIO_USERSPACE, uap->valsize));
 1313 }
 1314 
 1315 int
 1316 kern_setsockopt(td, s, level, name, val, valseg, valsize)
 1317         struct thread *td;
 1318         int s;
 1319         int level;
 1320         int name;
 1321         void *val;
 1322         enum uio_seg valseg;
 1323         socklen_t valsize;
 1324 {
 1325         int error;
 1326         struct socket *so;
 1327         struct file *fp;
 1328         struct sockopt sopt;
 1329 
 1330         if (val == NULL && valsize != 0)
 1331                 return (EFAULT);
 1332         if ((int)valsize < 0)
 1333                 return (EINVAL);
 1334 
 1335         sopt.sopt_dir = SOPT_SET;
 1336         sopt.sopt_level = level;
 1337         sopt.sopt_name = name;
 1338         sopt.sopt_val = val;
 1339         sopt.sopt_valsize = valsize;
 1340         switch (valseg) {
 1341         case UIO_USERSPACE:
 1342                 sopt.sopt_td = td;
 1343                 break;
 1344         case UIO_SYSSPACE:
 1345                 sopt.sopt_td = NULL;
 1346                 break;
 1347         default:
 1348                 panic("kern_setsockopt called with bad valseg");
 1349         }
 1350 
 1351         AUDIT_ARG_FD(s);
 1352         error = getsock_cap(td->td_proc->p_fd, s, CAP_SETSOCKOPT, &fp, NULL);
 1353         if (error == 0) {
 1354                 so = fp->f_data;
 1355                 error = sosetopt(so, &sopt);
 1356                 fdrop(fp, td);
 1357         }
 1358         return(error);
 1359 }
 1360 
 1361 /* ARGSUSED */
 1362 int
 1363 sys_getsockopt(td, uap)
 1364         struct thread *td;
 1365         struct getsockopt_args /* {
 1366                 int     s;
 1367                 int     level;
 1368                 int     name;
 1369                 void * __restrict       val;
 1370                 socklen_t * __restrict avalsize;
 1371         } */ *uap;
 1372 {
 1373         socklen_t valsize;
 1374         int     error;
 1375 
 1376         if (uap->val) {
 1377                 error = copyin(uap->avalsize, &valsize, sizeof (valsize));
 1378                 if (error)
 1379                         return (error);
 1380         }
 1381 
 1382         error = kern_getsockopt(td, uap->s, uap->level, uap->name,
 1383             uap->val, UIO_USERSPACE, &valsize);
 1384 
 1385         if (error == 0)
 1386                 error = copyout(&valsize, uap->avalsize, sizeof (valsize));
 1387         return (error);
 1388 }
 1389 
 1390 /*
 1391  * Kernel version of getsockopt.
 1392  * optval can be a userland or userspace. optlen is always a kernel pointer.
 1393  */
 1394 int
 1395 kern_getsockopt(td, s, level, name, val, valseg, valsize)
 1396         struct thread *td;
 1397         int s;
 1398         int level;
 1399         int name;
 1400         void *val;
 1401         enum uio_seg valseg;
 1402         socklen_t *valsize;
 1403 {
 1404         int error;
 1405         struct  socket *so;
 1406         struct file *fp;
 1407         struct  sockopt sopt;
 1408 
 1409         if (val == NULL)
 1410                 *valsize = 0;
 1411         if ((int)*valsize < 0)
 1412                 return (EINVAL);
 1413 
 1414         sopt.sopt_dir = SOPT_GET;
 1415         sopt.sopt_level = level;
 1416         sopt.sopt_name = name;
 1417         sopt.sopt_val = val;
 1418         sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */
 1419         switch (valseg) {
 1420         case UIO_USERSPACE:
 1421                 sopt.sopt_td = td;
 1422                 break;
 1423         case UIO_SYSSPACE:
 1424                 sopt.sopt_td = NULL;
 1425                 break;
 1426         default:
 1427                 panic("kern_getsockopt called with bad valseg");
 1428         }
 1429 
 1430         AUDIT_ARG_FD(s);
 1431         error = getsock_cap(td->td_proc->p_fd, s, CAP_GETSOCKOPT, &fp, NULL);
 1432         if (error == 0) {
 1433                 so = fp->f_data;
 1434                 error = sogetopt(so, &sopt);
 1435                 *valsize = sopt.sopt_valsize;
 1436                 fdrop(fp, td);
 1437         }
 1438         return (error);
 1439 }
 1440 
 1441 /*
 1442  * getsockname1() - Get socket name.
 1443  */
 1444 /* ARGSUSED */
 1445 static int
 1446 getsockname1(td, uap, compat)
 1447         struct thread *td;
 1448         struct getsockname_args /* {
 1449                 int     fdes;
 1450                 struct sockaddr * __restrict asa;
 1451                 socklen_t * __restrict alen;
 1452         } */ *uap;
 1453         int compat;
 1454 {
 1455         struct sockaddr *sa;
 1456         socklen_t len;
 1457         int error;
 1458 
 1459         error = copyin(uap->alen, &len, sizeof(len));
 1460         if (error)
 1461                 return (error);
 1462 
 1463         error = kern_getsockname(td, uap->fdes, &sa, &len);
 1464         if (error)
 1465                 return (error);
 1466 
 1467         if (len != 0) {
 1468 #ifdef COMPAT_OLDSOCK
 1469                 if (compat)
 1470                         ((struct osockaddr *)sa)->sa_family = sa->sa_family;
 1471 #endif
 1472                 error = copyout(sa, uap->asa, (u_int)len);
 1473         }
 1474         free(sa, M_SONAME);
 1475         if (error == 0)
 1476                 error = copyout(&len, uap->alen, sizeof(len));
 1477         return (error);
 1478 }
 1479 
 1480 int
 1481 kern_getsockname(struct thread *td, int fd, struct sockaddr **sa,
 1482     socklen_t *alen)
 1483 {
 1484         struct socket *so;
 1485         struct file *fp;
 1486         socklen_t len;
 1487         int error;
 1488 
 1489         if (*alen < 0)
 1490                 return (EINVAL);
 1491 
 1492         AUDIT_ARG_FD(fd);
 1493         error = getsock_cap(td->td_proc->p_fd, fd, CAP_GETSOCKNAME, &fp, NULL);
 1494         if (error)
 1495                 return (error);
 1496         so = fp->f_data;
 1497         *sa = NULL;
 1498         CURVNET_SET(so->so_vnet);
 1499         error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, sa);
 1500         CURVNET_RESTORE();
 1501         if (error)
 1502                 goto bad;
 1503         if (*sa == NULL)
 1504                 len = 0;
 1505         else
 1506                 len = MIN(*alen, (*sa)->sa_len);
 1507         *alen = len;
 1508 #ifdef KTRACE
 1509         if (KTRPOINT(td, KTR_STRUCT))
 1510                 ktrsockaddr(*sa);
 1511 #endif
 1512 bad:
 1513         fdrop(fp, td);
 1514         if (error && *sa) {
 1515                 free(*sa, M_SONAME);
 1516                 *sa = NULL;
 1517         }
 1518         return (error);
 1519 }
 1520 
 1521 int
 1522 sys_getsockname(td, uap)
 1523         struct thread *td;
 1524         struct getsockname_args *uap;
 1525 {
 1526 
 1527         return (getsockname1(td, uap, 0));
 1528 }
 1529 
 1530 #ifdef COMPAT_OLDSOCK
 1531 int
 1532 ogetsockname(td, uap)
 1533         struct thread *td;
 1534         struct getsockname_args *uap;
 1535 {
 1536 
 1537         return (getsockname1(td, uap, 1));
 1538 }
 1539 #endif /* COMPAT_OLDSOCK */
 1540 
 1541 /*
 1542  * getpeername1() - Get name of peer for connected socket.
 1543  */
 1544 /* ARGSUSED */
 1545 static int
 1546 getpeername1(td, uap, compat)
 1547         struct thread *td;
 1548         struct getpeername_args /* {
 1549                 int     fdes;
 1550                 struct sockaddr * __restrict    asa;
 1551                 socklen_t * __restrict  alen;
 1552         } */ *uap;
 1553         int compat;
 1554 {
 1555         struct sockaddr *sa;
 1556         socklen_t len;
 1557         int error;
 1558 
 1559         error = copyin(uap->alen, &len, sizeof (len));
 1560         if (error)
 1561                 return (error);
 1562 
 1563         error = kern_getpeername(td, uap->fdes, &sa, &len);
 1564         if (error)
 1565                 return (error);
 1566 
 1567         if (len != 0) {
 1568 #ifdef COMPAT_OLDSOCK
 1569                 if (compat)
 1570                         ((struct osockaddr *)sa)->sa_family = sa->sa_family;
 1571 #endif
 1572                 error = copyout(sa, uap->asa, (u_int)len);
 1573         }
 1574         free(sa, M_SONAME);
 1575         if (error == 0)
 1576                 error = copyout(&len, uap->alen, sizeof(len));
 1577         return (error);
 1578 }
 1579 
 1580 int
 1581 kern_getpeername(struct thread *td, int fd, struct sockaddr **sa,
 1582     socklen_t *alen)
 1583 {
 1584         struct socket *so;
 1585         struct file *fp;
 1586         socklen_t len;
 1587         int error;
 1588 
 1589         if (*alen < 0)
 1590                 return (EINVAL);
 1591 
 1592         AUDIT_ARG_FD(fd);
 1593         error = getsock_cap(td->td_proc->p_fd, fd, CAP_GETPEERNAME, &fp, NULL);
 1594         if (error)
 1595                 return (error);
 1596         so = fp->f_data;
 1597         if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
 1598                 error = ENOTCONN;
 1599                 goto done;
 1600         }
 1601         *sa = NULL;
 1602         CURVNET_SET(so->so_vnet);
 1603         error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, sa);
 1604         CURVNET_RESTORE();
 1605         if (error)
 1606                 goto bad;
 1607         if (*sa == NULL)
 1608                 len = 0;
 1609         else
 1610                 len = MIN(*alen, (*sa)->sa_len);
 1611         *alen = len;
 1612 #ifdef KTRACE
 1613         if (KTRPOINT(td, KTR_STRUCT))
 1614                 ktrsockaddr(*sa);
 1615 #endif
 1616 bad:
 1617         if (error && *sa) {
 1618                 free(*sa, M_SONAME);
 1619                 *sa = NULL;
 1620         }
 1621 done:
 1622         fdrop(fp, td);
 1623         return (error);
 1624 }
 1625 
 1626 int
 1627 sys_getpeername(td, uap)
 1628         struct thread *td;
 1629         struct getpeername_args *uap;
 1630 {
 1631 
 1632         return (getpeername1(td, uap, 0));
 1633 }
 1634 
 1635 #ifdef COMPAT_OLDSOCK
 1636 int
 1637 ogetpeername(td, uap)
 1638         struct thread *td;
 1639         struct ogetpeername_args *uap;
 1640 {
 1641 
 1642         /* XXX uap should have type `getpeername_args *' to begin with. */
 1643         return (getpeername1(td, (struct getpeername_args *)uap, 1));
 1644 }
 1645 #endif /* COMPAT_OLDSOCK */
 1646 
 1647 int
 1648 sockargs(mp, buf, buflen, type)
 1649         struct mbuf **mp;
 1650         caddr_t buf;
 1651         int buflen, type;
 1652 {
 1653         struct sockaddr *sa;
 1654         struct mbuf *m;
 1655         int error;
 1656 
 1657         if ((u_int)buflen > MLEN) {
 1658 #ifdef COMPAT_OLDSOCK
 1659                 if (type == MT_SONAME && (u_int)buflen <= 112)
 1660                         buflen = MLEN;          /* unix domain compat. hack */
 1661                 else
 1662 #endif
 1663                         if ((u_int)buflen > MCLBYTES)
 1664                                 return (EINVAL);
 1665         }
 1666         m = m_get(M_WAIT, type);
 1667         if ((u_int)buflen > MLEN)
 1668                 MCLGET(m, M_WAIT);
 1669         m->m_len = buflen;
 1670         error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
 1671         if (error)
 1672                 (void) m_free(m);
 1673         else {
 1674                 *mp = m;
 1675                 if (type == MT_SONAME) {
 1676                         sa = mtod(m, struct sockaddr *);
 1677 
 1678 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
 1679                         if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
 1680                                 sa->sa_family = sa->sa_len;
 1681 #endif
 1682                         sa->sa_len = buflen;
 1683                 }
 1684         }
 1685         return (error);
 1686 }
 1687 
 1688 int
 1689 getsockaddr(namp, uaddr, len)
 1690         struct sockaddr **namp;
 1691         caddr_t uaddr;
 1692         size_t len;
 1693 {
 1694         struct sockaddr *sa;
 1695         int error;
 1696 
 1697         if (len > SOCK_MAXADDRLEN)
 1698                 return (ENAMETOOLONG);
 1699         if (len < offsetof(struct sockaddr, sa_data[0]))
 1700                 return (EINVAL);
 1701         sa = malloc(len, M_SONAME, M_WAITOK);
 1702         error = copyin(uaddr, sa, len);
 1703         if (error) {
 1704                 free(sa, M_SONAME);
 1705         } else {
 1706 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
 1707                 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
 1708                         sa->sa_family = sa->sa_len;
 1709 #endif
 1710                 sa->sa_len = len;
 1711                 *namp = sa;
 1712         }
 1713         return (error);
 1714 }
 1715 
 1716 #include <sys/condvar.h>
 1717 
 1718 struct sendfile_sync {
 1719         struct mtx      mtx;
 1720         struct cv       cv;
 1721         unsigned        count;
 1722 };
 1723 
 1724 /*
 1725  * Detach mapped page and release resources back to the system.
 1726  */
 1727 void
 1728 sf_buf_mext(void *addr, void *args)
 1729 {
 1730         vm_page_t m;
 1731         struct sendfile_sync *sfs;
 1732 
 1733         m = sf_buf_page(args);
 1734         sf_buf_free(args);
 1735         vm_page_lock(m);
 1736         vm_page_unwire(m, 0);
 1737         /*
 1738          * Check for the object going away on us. This can
 1739          * happen since we don't hold a reference to it.
 1740          * If so, we're responsible for freeing the page.
 1741          */
 1742         if (m->wire_count == 0 && m->object == NULL)
 1743                 vm_page_free(m);
 1744         vm_page_unlock(m);
 1745         if (addr == NULL)
 1746                 return;
 1747         sfs = addr;
 1748         mtx_lock(&sfs->mtx);
 1749         KASSERT(sfs->count> 0, ("Sendfile sync botchup count == 0"));
 1750         if (--sfs->count == 0)
 1751                 cv_signal(&sfs->cv);
 1752         mtx_unlock(&sfs->mtx);
 1753 }
 1754 
 1755 /*
 1756  * sendfile(2)
 1757  *
 1758  * int sendfile(int fd, int s, off_t offset, size_t nbytes,
 1759  *       struct sf_hdtr *hdtr, off_t *sbytes, int flags)
 1760  *
 1761  * Send a file specified by 'fd' and starting at 'offset' to a socket
 1762  * specified by 's'. Send only 'nbytes' of the file or until EOF if nbytes ==
 1763  * 0.  Optionally add a header and/or trailer to the socket output.  If
 1764  * specified, write the total number of bytes sent into *sbytes.
 1765  */
 1766 int
 1767 sys_sendfile(struct thread *td, struct sendfile_args *uap)
 1768 {
 1769 
 1770         return (do_sendfile(td, uap, 0));
 1771 }
 1772 
 1773 static int
 1774 do_sendfile(struct thread *td, struct sendfile_args *uap, int compat)
 1775 {
 1776         struct sf_hdtr hdtr;
 1777         struct uio *hdr_uio, *trl_uio;
 1778         int error;
 1779 
 1780         hdr_uio = trl_uio = NULL;
 1781 
 1782         if (uap->hdtr != NULL) {
 1783                 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
 1784                 if (error)
 1785                         goto out;
 1786                 if (hdtr.headers != NULL) {
 1787                         error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio);
 1788                         if (error)
 1789                                 goto out;
 1790                 }
 1791                 if (hdtr.trailers != NULL) {
 1792                         error = copyinuio(hdtr.trailers, hdtr.trl_cnt, &trl_uio);
 1793                         if (error)
 1794                                 goto out;
 1795 
 1796                 }
 1797         }
 1798 
 1799         error = kern_sendfile(td, uap, hdr_uio, trl_uio, compat);
 1800 out:
 1801         if (hdr_uio)
 1802                 free(hdr_uio, M_IOV);
 1803         if (trl_uio)
 1804                 free(trl_uio, M_IOV);
 1805         return (error);
 1806 }
 1807 
 1808 #ifdef COMPAT_FREEBSD4
 1809 int
 1810 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap)
 1811 {
 1812         struct sendfile_args args;
 1813 
 1814         args.fd = uap->fd;
 1815         args.s = uap->s;
 1816         args.offset = uap->offset;
 1817         args.nbytes = uap->nbytes;
 1818         args.hdtr = uap->hdtr;
 1819         args.sbytes = uap->sbytes;
 1820         args.flags = uap->flags;
 1821 
 1822         return (do_sendfile(td, &args, 1));
 1823 }
 1824 #endif /* COMPAT_FREEBSD4 */
 1825 
 1826 int
 1827 kern_sendfile(struct thread *td, struct sendfile_args *uap,
 1828     struct uio *hdr_uio, struct uio *trl_uio, int compat)
 1829 {
 1830         struct file *sock_fp;
 1831         struct vnode *vp;
 1832         struct vm_object *obj = NULL;
 1833         struct socket *so = NULL;
 1834         struct mbuf *m = NULL;
 1835         struct sf_buf *sf;
 1836         struct vm_page *pg;
 1837         off_t off, xfsize, fsbytes = 0, sbytes = 0, rem = 0;
 1838         int error, hdrlen = 0, mnw = 0;
 1839         int vfslocked;
 1840         struct sendfile_sync *sfs = NULL;
 1841 
 1842         /*
 1843          * The file descriptor must be a regular file and have a
 1844          * backing VM object.
 1845          * File offset must be positive.  If it goes beyond EOF
 1846          * we send only the header/trailer and no payload data.
 1847          */
 1848         AUDIT_ARG_FD(uap->fd);
 1849         if ((error = fgetvp_read(td, uap->fd, CAP_READ, &vp)) != 0)
 1850                 goto out;
 1851         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 1852         vn_lock(vp, LK_SHARED | LK_RETRY);
 1853         if (vp->v_type == VREG) {
 1854                 obj = vp->v_object;
 1855                 if (obj != NULL) {
 1856                         /*
 1857                          * Temporarily increase the backing VM
 1858                          * object's reference count so that a forced
 1859                          * reclamation of its vnode does not
 1860                          * immediately destroy it.
 1861                          */
 1862                         VM_OBJECT_LOCK(obj);
 1863                         if ((obj->flags & OBJ_DEAD) == 0) {
 1864                                 vm_object_reference_locked(obj);
 1865                                 VM_OBJECT_UNLOCK(obj);
 1866                         } else {
 1867                                 VM_OBJECT_UNLOCK(obj);
 1868                                 obj = NULL;
 1869                         }
 1870                 }
 1871         }
 1872         VOP_UNLOCK(vp, 0);
 1873         VFS_UNLOCK_GIANT(vfslocked);
 1874         if (obj == NULL) {
 1875                 error = EINVAL;
 1876                 goto out;
 1877         }
 1878         if (uap->offset < 0) {
 1879                 error = EINVAL;
 1880                 goto out;
 1881         }
 1882 
 1883         /*
 1884          * The socket must be a stream socket and connected.
 1885          * Remember if it a blocking or non-blocking socket.
 1886          */
 1887         if ((error = getsock_cap(td->td_proc->p_fd, uap->s, CAP_WRITE,
 1888             &sock_fp, NULL)) != 0)
 1889                 goto out;
 1890         so = sock_fp->f_data;
 1891         if (so->so_type != SOCK_STREAM) {
 1892                 error = EINVAL;
 1893                 goto out;
 1894         }
 1895         if ((so->so_state & SS_ISCONNECTED) == 0) {
 1896                 error = ENOTCONN;
 1897                 goto out;
 1898         }
 1899         /*
 1900          * Do not wait on memory allocations but return ENOMEM for
 1901          * caller to retry later.
 1902          * XXX: Experimental.
 1903          */
 1904         if (uap->flags & SF_MNOWAIT)
 1905                 mnw = 1;
 1906 
 1907         if (uap->flags & SF_SYNC) {
 1908                 sfs = malloc(sizeof *sfs, M_TEMP, M_WAITOK | M_ZERO);
 1909                 mtx_init(&sfs->mtx, "sendfile", NULL, MTX_DEF);
 1910                 cv_init(&sfs->cv, "sendfile");
 1911         }
 1912 
 1913 #ifdef MAC
 1914         error = mac_socket_check_send(td->td_ucred, so);
 1915         if (error)
 1916                 goto out;
 1917 #endif
 1918 
 1919         /* If headers are specified copy them into mbufs. */
 1920         if (hdr_uio != NULL) {
 1921                 hdr_uio->uio_td = td;
 1922                 hdr_uio->uio_rw = UIO_WRITE;
 1923                 if (hdr_uio->uio_resid > 0) {
 1924                         /*
 1925                          * In FBSD < 5.0 the nbytes to send also included
 1926                          * the header.  If compat is specified subtract the
 1927                          * header size from nbytes.
 1928                          */
 1929                         if (compat) {
 1930                                 if (uap->nbytes > hdr_uio->uio_resid)
 1931                                         uap->nbytes -= hdr_uio->uio_resid;
 1932                                 else
 1933                                         uap->nbytes = 0;
 1934                         }
 1935                         m = m_uiotombuf(hdr_uio, (mnw ? M_NOWAIT : M_WAITOK),
 1936                             0, 0, 0);
 1937                         if (m == NULL) {
 1938                                 error = mnw ? EAGAIN : ENOBUFS;
 1939                                 goto out;
 1940                         }
 1941                         hdrlen = m_length(m, NULL);
 1942                 }
 1943         }
 1944 
 1945         /*
 1946          * Protect against multiple writers to the socket.
 1947          *
 1948          * XXXRW: Historically this has assumed non-interruptibility, so now
 1949          * we implement that, but possibly shouldn't.
 1950          */
 1951         (void)sblock(&so->so_snd, SBL_WAIT | SBL_NOINTR);
 1952 
 1953         /*
 1954          * Loop through the pages of the file, starting with the requested
 1955          * offset. Get a file page (do I/O if necessary), map the file page
 1956          * into an sf_buf, attach an mbuf header to the sf_buf, and queue
 1957          * it on the socket.
 1958          * This is done in two loops.  The inner loop turns as many pages
 1959          * as it can, up to available socket buffer space, without blocking
 1960          * into mbufs to have it bulk delivered into the socket send buffer.
 1961          * The outer loop checks the state and available space of the socket
 1962          * and takes care of the overall progress.
 1963          */
 1964         for (off = uap->offset, rem = uap->nbytes; ; ) {
 1965                 struct mbuf *mtail = NULL;
 1966                 int loopbytes = 0;
 1967                 int space = 0;
 1968                 int done = 0;
 1969 
 1970                 /*
 1971                  * Check the socket state for ongoing connection,
 1972                  * no errors and space in socket buffer.
 1973                  * If space is low allow for the remainder of the
 1974                  * file to be processed if it fits the socket buffer.
 1975                  * Otherwise block in waiting for sufficient space
 1976                  * to proceed, or if the socket is nonblocking, return
 1977                  * to userland with EAGAIN while reporting how far
 1978                  * we've come.
 1979                  * We wait until the socket buffer has significant free
 1980                  * space to do bulk sends.  This makes good use of file
 1981                  * system read ahead and allows packet segmentation
 1982                  * offloading hardware to take over lots of work.  If
 1983                  * we were not careful here we would send off only one
 1984                  * sfbuf at a time.
 1985                  */
 1986                 SOCKBUF_LOCK(&so->so_snd);
 1987                 if (so->so_snd.sb_lowat < so->so_snd.sb_hiwat / 2)
 1988                         so->so_snd.sb_lowat = so->so_snd.sb_hiwat / 2;
 1989 retry_space:
 1990                 if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 1991                         error = EPIPE;
 1992                         SOCKBUF_UNLOCK(&so->so_snd);
 1993                         goto done;
 1994                 } else if (so->so_error) {
 1995                         error = so->so_error;
 1996                         so->so_error = 0;
 1997                         SOCKBUF_UNLOCK(&so->so_snd);
 1998                         goto done;
 1999                 }
 2000                 space = sbspace(&so->so_snd);
 2001                 if (space < rem &&
 2002                     (space <= 0 ||
 2003                      space < so->so_snd.sb_lowat)) {
 2004                         if (so->so_state & SS_NBIO) {
 2005                                 SOCKBUF_UNLOCK(&so->so_snd);
 2006                                 error = EAGAIN;
 2007                                 goto done;
 2008                         }
 2009                         /*
 2010                          * sbwait drops the lock while sleeping.
 2011                          * When we loop back to retry_space the
 2012                          * state may have changed and we retest
 2013                          * for it.
 2014                          */
 2015                         error = sbwait(&so->so_snd);
 2016                         /*
 2017                          * An error from sbwait usually indicates that we've
 2018                          * been interrupted by a signal. If we've sent anything
 2019                          * then return bytes sent, otherwise return the error.
 2020                          */
 2021                         if (error) {
 2022                                 SOCKBUF_UNLOCK(&so->so_snd);
 2023                                 goto done;
 2024                         }
 2025                         goto retry_space;
 2026                 }
 2027                 SOCKBUF_UNLOCK(&so->so_snd);
 2028 
 2029                 /*
 2030                  * Reduce space in the socket buffer by the size of
 2031                  * the header mbuf chain.
 2032                  * hdrlen is set to 0 after the first loop.
 2033                  */
 2034                 space -= hdrlen;
 2035 
 2036                 /*
 2037                  * Loop and construct maximum sized mbuf chain to be bulk
 2038                  * dumped into socket buffer.
 2039                  */
 2040                 while (space > loopbytes) {
 2041                         vm_pindex_t pindex;
 2042                         vm_offset_t pgoff;
 2043                         struct mbuf *m0;
 2044 
 2045                         VM_OBJECT_LOCK(obj);
 2046                         /*
 2047                          * Calculate the amount to transfer.
 2048                          * Not to exceed a page, the EOF,
 2049                          * or the passed in nbytes.
 2050                          */
 2051                         pgoff = (vm_offset_t)(off & PAGE_MASK);
 2052                         xfsize = omin(PAGE_SIZE - pgoff,
 2053                             obj->un_pager.vnp.vnp_size - uap->offset -
 2054                             fsbytes - loopbytes);
 2055                         if (uap->nbytes)
 2056                                 rem = (uap->nbytes - fsbytes - loopbytes);
 2057                         else
 2058                                 rem = obj->un_pager.vnp.vnp_size -
 2059                                     uap->offset - fsbytes - loopbytes;
 2060                         xfsize = omin(rem, xfsize);
 2061                         xfsize = omin(space - loopbytes, xfsize);
 2062                         if (xfsize <= 0) {
 2063                                 VM_OBJECT_UNLOCK(obj);
 2064                                 done = 1;               /* all data sent */
 2065                                 break;
 2066                         }
 2067 
 2068                         /*
 2069                          * Attempt to look up the page.  Allocate
 2070                          * if not found or wait and loop if busy.
 2071                          */
 2072                         pindex = OFF_TO_IDX(off);
 2073                         pg = vm_page_grab(obj, pindex, VM_ALLOC_NOBUSY |
 2074                             VM_ALLOC_NORMAL | VM_ALLOC_WIRED | VM_ALLOC_RETRY);
 2075 
 2076                         /*
 2077                          * Check if page is valid for what we need,
 2078                          * otherwise initiate I/O.
 2079                          * If we already turned some pages into mbufs,
 2080                          * send them off before we come here again and
 2081                          * block.
 2082                          */
 2083                         if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize))
 2084                                 VM_OBJECT_UNLOCK(obj);
 2085                         else if (m != NULL)
 2086                                 error = EAGAIN; /* send what we already got */
 2087                         else if (uap->flags & SF_NODISKIO)
 2088                                 error = EBUSY;
 2089                         else {
 2090                                 int bsize;
 2091                                 ssize_t resid;
 2092 
 2093                                 /*
 2094                                  * Ensure that our page is still around
 2095                                  * when the I/O completes.
 2096                                  */
 2097                                 vm_page_io_start(pg);
 2098                                 VM_OBJECT_UNLOCK(obj);
 2099 
 2100                                 /*
 2101                                  * Get the page from backing store.
 2102                                  */
 2103                                 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 2104                                 error = vn_lock(vp, LK_SHARED);
 2105                                 if (error != 0)
 2106                                         goto after_read;
 2107                                 bsize = vp->v_mount->mnt_stat.f_iosize;
 2108 
 2109                                 /*
 2110                                  * XXXMAC: Because we don't have fp->f_cred
 2111                                  * here, we pass in NOCRED.  This is probably
 2112                                  * wrong, but is consistent with our original
 2113                                  * implementation.
 2114                                  */
 2115                                 error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE,
 2116                                     trunc_page(off), UIO_NOCOPY, IO_NODELOCKED |
 2117                                     IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT),
 2118                                     td->td_ucred, NOCRED, &resid, td);
 2119                                 VOP_UNLOCK(vp, 0);
 2120                         after_read:
 2121                                 VFS_UNLOCK_GIANT(vfslocked);
 2122                                 VM_OBJECT_LOCK(obj);
 2123                                 vm_page_io_finish(pg);
 2124                                 if (!error)
 2125                                         VM_OBJECT_UNLOCK(obj);
 2126                                 mbstat.sf_iocnt++;
 2127                         }
 2128                         if (error) {
 2129                                 vm_page_lock(pg);
 2130                                 vm_page_unwire(pg, 0);
 2131                                 /*
 2132                                  * See if anyone else might know about
 2133                                  * this page.  If not and it is not valid,
 2134                                  * then free it.
 2135                                  */
 2136                                 if (pg->wire_count == 0 && pg->valid == 0 &&
 2137                                     pg->busy == 0 && !(pg->oflags & VPO_BUSY))
 2138                                         vm_page_free(pg);
 2139                                 vm_page_unlock(pg);
 2140                                 VM_OBJECT_UNLOCK(obj);
 2141                                 if (error == EAGAIN)
 2142                                         error = 0;      /* not a real error */
 2143                                 break;
 2144                         }
 2145 
 2146                         /*
 2147                          * Get a sendfile buf.  When allocating the
 2148                          * first buffer for mbuf chain, we usually
 2149                          * wait as long as necessary, but this wait
 2150                          * can be interrupted.  For consequent
 2151                          * buffers, do not sleep, since several
 2152                          * threads might exhaust the buffers and then
 2153                          * deadlock.
 2154                          */
 2155                         sf = sf_buf_alloc(pg, (mnw || m != NULL) ? SFB_NOWAIT :
 2156                             SFB_CATCH);
 2157                         if (sf == NULL) {
 2158                                 mbstat.sf_allocfail++;
 2159                                 vm_page_lock(pg);
 2160                                 vm_page_unwire(pg, 0);
 2161                                 KASSERT(pg->object != NULL,
 2162                                     ("kern_sendfile: object disappeared"));
 2163                                 vm_page_unlock(pg);
 2164                                 if (m == NULL)
 2165                                         error = (mnw ? EAGAIN : EINTR);
 2166                                 break;
 2167                         }
 2168 
 2169                         /*
 2170                          * Get an mbuf and set it up as having
 2171                          * external storage.
 2172                          */
 2173                         m0 = m_get((mnw ? M_NOWAIT : M_WAITOK), MT_DATA);
 2174                         if (m0 == NULL) {
 2175                                 error = (mnw ? EAGAIN : ENOBUFS);
 2176                                 sf_buf_mext((void *)sf_buf_kva(sf), sf);
 2177                                 break;
 2178                         }
 2179                         MEXTADD(m0, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext,
 2180                             sfs, sf, M_RDONLY, EXT_SFBUF);
 2181                         m0->m_data = (char *)sf_buf_kva(sf) + pgoff;
 2182                         m0->m_len = xfsize;
 2183 
 2184                         /* Append to mbuf chain. */
 2185                         if (mtail != NULL)
 2186                                 mtail->m_next = m0;
 2187                         else if (m != NULL)
 2188                                 m_last(m)->m_next = m0;
 2189                         else
 2190                                 m = m0;
 2191                         mtail = m0;
 2192 
 2193                         /* Keep track of bits processed. */
 2194                         loopbytes += xfsize;
 2195                         off += xfsize;
 2196 
 2197                         if (sfs != NULL) {
 2198                                 mtx_lock(&sfs->mtx);
 2199                                 sfs->count++;
 2200                                 mtx_unlock(&sfs->mtx);
 2201                         }
 2202                 }
 2203 
 2204                 /* Add the buffer chain to the socket buffer. */
 2205                 if (m != NULL) {
 2206                         int mlen, err;
 2207 
 2208                         mlen = m_length(m, NULL);
 2209                         SOCKBUF_LOCK(&so->so_snd);
 2210                         if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 2211                                 error = EPIPE;
 2212                                 SOCKBUF_UNLOCK(&so->so_snd);
 2213                                 goto done;
 2214                         }
 2215                         SOCKBUF_UNLOCK(&so->so_snd);
 2216                         CURVNET_SET(so->so_vnet);
 2217                         /* Avoid error aliasing. */
 2218                         err = (*so->so_proto->pr_usrreqs->pru_send)
 2219                                     (so, 0, m, NULL, NULL, td);
 2220                         CURVNET_RESTORE();
 2221                         if (err == 0) {
 2222                                 /*
 2223                                  * We need two counters to get the
 2224                                  * file offset and nbytes to send
 2225                                  * right:
 2226                                  * - sbytes contains the total amount
 2227                                  *   of bytes sent, including headers.
 2228                                  * - fsbytes contains the total amount
 2229                                  *   of bytes sent from the file.
 2230                                  */
 2231                                 sbytes += mlen;
 2232                                 fsbytes += mlen;
 2233                                 if (hdrlen) {
 2234                                         fsbytes -= hdrlen;
 2235                                         hdrlen = 0;
 2236                                 }
 2237                         } else if (error == 0)
 2238                                 error = err;
 2239                         m = NULL;       /* pru_send always consumes */
 2240                 }
 2241 
 2242                 /* Quit outer loop on error or when we're done. */
 2243                 if (done) 
 2244                         break;
 2245                 if (error)
 2246                         goto done;
 2247         }
 2248 
 2249         /*
 2250          * Send trailers. Wimp out and use writev(2).
 2251          */
 2252         if (trl_uio != NULL) {
 2253                 sbunlock(&so->so_snd);
 2254                 error = kern_writev(td, uap->s, trl_uio);
 2255                 if (error == 0)
 2256                         sbytes += td->td_retval[0];
 2257                 goto out;
 2258         }
 2259 
 2260 done:
 2261         sbunlock(&so->so_snd);
 2262 out:
 2263         /*
 2264          * If there was no error we have to clear td->td_retval[0]
 2265          * because it may have been set by writev.
 2266          */
 2267         if (error == 0) {
 2268                 td->td_retval[0] = 0;
 2269         }
 2270         if (uap->sbytes != NULL) {
 2271                 copyout(&sbytes, uap->sbytes, sizeof(off_t));
 2272         }
 2273         if (obj != NULL)
 2274                 vm_object_deallocate(obj);
 2275         if (vp != NULL) {
 2276                 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 2277                 vrele(vp);
 2278                 VFS_UNLOCK_GIANT(vfslocked);
 2279         }
 2280         if (so)
 2281                 fdrop(sock_fp, td);
 2282         if (m)
 2283                 m_freem(m);
 2284 
 2285         if (sfs != NULL) {
 2286                 mtx_lock(&sfs->mtx);
 2287                 if (sfs->count != 0)
 2288                         cv_wait(&sfs->cv, &sfs->mtx);
 2289                 KASSERT(sfs->count == 0, ("sendfile sync still busy"));
 2290                 cv_destroy(&sfs->cv);
 2291                 mtx_destroy(&sfs->mtx);
 2292                 free(sfs, M_TEMP);
 2293         }
 2294 
 2295         if (error == ERESTART)
 2296                 error = EINTR;
 2297 
 2298         return (error);
 2299 }
 2300 
 2301 /*
 2302  * SCTP syscalls.
 2303  * Functionality only compiled in if SCTP is defined in the kernel Makefile,
 2304  * otherwise all return EOPNOTSUPP.
 2305  * XXX: We should make this loadable one day.
 2306  */
 2307 int
 2308 sys_sctp_peeloff(td, uap)
 2309         struct thread *td;
 2310         struct sctp_peeloff_args /* {
 2311                 int     sd;
 2312                 caddr_t name;
 2313         } */ *uap;
 2314 {
 2315 #if (defined(INET) || defined(INET6)) && defined(SCTP)
 2316         struct filedesc *fdp;
 2317         struct file *nfp = NULL;
 2318         int error;
 2319         struct socket *head, *so;
 2320         int fd;
 2321         u_int fflag;
 2322 
 2323         fdp = td->td_proc->p_fd;
 2324         AUDIT_ARG_FD(uap->sd);
 2325         error = fgetsock(td, uap->sd, CAP_PEELOFF, &head, &fflag);
 2326         if (error)
 2327                 goto done2;
 2328         if (head->so_proto->pr_protocol != IPPROTO_SCTP) {
 2329                 error = EOPNOTSUPP;
 2330                 goto done;
 2331         }
 2332         error = sctp_can_peel_off(head, (sctp_assoc_t)uap->name);
 2333         if (error)
 2334                 goto done;
 2335         /*
 2336          * At this point we know we do have a assoc to pull
 2337          * we proceed to get the fd setup. This may block
 2338          * but that is ok.
 2339          */
 2340 
 2341         error = falloc(td, &nfp, &fd, 0);
 2342         if (error)
 2343                 goto done;
 2344         td->td_retval[0] = fd;
 2345 
 2346         CURVNET_SET(head->so_vnet);
 2347         so = sonewconn(head, SS_ISCONNECTED);
 2348         if (so == NULL) 
 2349                 goto noconnection;
 2350         /*
 2351          * Before changing the flags on the socket, we have to bump the
 2352          * reference count.  Otherwise, if the protocol calls sofree(),
 2353          * the socket will be released due to a zero refcount.
 2354          */
 2355         SOCK_LOCK(so);
 2356         soref(so);                      /* file descriptor reference */
 2357         SOCK_UNLOCK(so);
 2358 
 2359         ACCEPT_LOCK();
 2360 
 2361         TAILQ_REMOVE(&head->so_comp, so, so_list);
 2362         head->so_qlen--;
 2363         so->so_state |= (head->so_state & SS_NBIO);
 2364         so->so_state &= ~SS_NOFDREF;
 2365         so->so_qstate &= ~SQ_COMP;
 2366         so->so_head = NULL;
 2367         ACCEPT_UNLOCK();
 2368         finit(nfp, fflag, DTYPE_SOCKET, so, &socketops);
 2369         error = sctp_do_peeloff(head, so, (sctp_assoc_t)uap->name);
 2370         if (error)
 2371                 goto noconnection;
 2372         if (head->so_sigio != NULL)
 2373                 fsetown(fgetown(&head->so_sigio), &so->so_sigio);
 2374 
 2375 noconnection:
 2376         /*
 2377          * close the new descriptor, assuming someone hasn't ripped it
 2378          * out from under us.
 2379          */
 2380         if (error)
 2381                 fdclose(fdp, nfp, fd, td);
 2382 
 2383         /*
 2384          * Release explicitly held references before returning.
 2385          */
 2386         CURVNET_RESTORE();
 2387 done:
 2388         if (nfp != NULL)
 2389                 fdrop(nfp, td);
 2390         fputsock(head);
 2391 done2:
 2392         return (error);
 2393 #else  /* SCTP */
 2394         return (EOPNOTSUPP);
 2395 #endif /* SCTP */
 2396 }
 2397 
 2398 int
 2399 sys_sctp_generic_sendmsg (td, uap)
 2400         struct thread *td;
 2401         struct sctp_generic_sendmsg_args /* {
 2402                 int sd, 
 2403                 caddr_t msg, 
 2404                 int mlen, 
 2405                 caddr_t to, 
 2406                 __socklen_t tolen, 
 2407                 struct sctp_sndrcvinfo *sinfo, 
 2408                 int flags
 2409         } */ *uap;
 2410 {
 2411 #if (defined(INET) || defined(INET6)) && defined(SCTP)
 2412         struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL;
 2413         struct socket *so;
 2414         struct file *fp = NULL;
 2415         int error = 0, len;
 2416         struct sockaddr *to = NULL;
 2417 #ifdef KTRACE
 2418         struct uio *ktruio = NULL;
 2419 #endif
 2420         struct uio auio;
 2421         struct iovec iov[1];
 2422         cap_rights_t rights;
 2423 
 2424         if (uap->sinfo) {
 2425                 error = copyin(uap->sinfo, &sinfo, sizeof (sinfo));
 2426                 if (error)
 2427                         return (error);
 2428                 u_sinfo = &sinfo;
 2429         }
 2430 
 2431         rights = CAP_WRITE;
 2432         if (uap->tolen) {
 2433                 error = getsockaddr(&to, uap->to, uap->tolen);
 2434                 if (error) {
 2435                         to = NULL;
 2436                         goto sctp_bad2;
 2437                 }
 2438                 rights |= CAP_CONNECT;
 2439         }
 2440 
 2441         AUDIT_ARG_FD(uap->sd);
 2442         error = getsock_cap(td->td_proc->p_fd, uap->sd, rights, &fp, NULL);
 2443         if (error)
 2444                 goto sctp_bad;
 2445 #ifdef KTRACE
 2446         if (to && (KTRPOINT(td, KTR_STRUCT)))
 2447                 ktrsockaddr(to);
 2448 #endif
 2449 
 2450         iov[0].iov_base = uap->msg;
 2451         iov[0].iov_len = uap->mlen;
 2452 
 2453         so = (struct socket *)fp->f_data;
 2454         if (so->so_proto->pr_protocol != IPPROTO_SCTP) {
 2455                 error = EOPNOTSUPP;
 2456                 goto sctp_bad;
 2457         }
 2458 #ifdef MAC
 2459         error = mac_socket_check_send(td->td_ucred, so);
 2460         if (error)
 2461                 goto sctp_bad;
 2462 #endif /* MAC */
 2463 
 2464         auio.uio_iov =  iov;
 2465         auio.uio_iovcnt = 1;
 2466         auio.uio_segflg = UIO_USERSPACE;
 2467         auio.uio_rw = UIO_WRITE;
 2468         auio.uio_td = td;
 2469         auio.uio_offset = 0;                    /* XXX */
 2470         auio.uio_resid = 0;
 2471         len = auio.uio_resid = uap->mlen;
 2472         CURVNET_SET(so->so_vnet);
 2473         error = sctp_lower_sosend(so, to, &auio,
 2474                     (struct mbuf *)NULL, (struct mbuf *)NULL,
 2475                     uap->flags, u_sinfo, td);
 2476         CURVNET_RESTORE();
 2477         if (error) {
 2478                 if (auio.uio_resid != len && (error == ERESTART ||
 2479                     error == EINTR || error == EWOULDBLOCK))
 2480                         error = 0;
 2481                 /* Generation of SIGPIPE can be controlled per socket. */
 2482                 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
 2483                     !(uap->flags & MSG_NOSIGNAL)) {
 2484                         PROC_LOCK(td->td_proc);
 2485                         tdsignal(td, SIGPIPE);
 2486                         PROC_UNLOCK(td->td_proc);
 2487                 }
 2488         }
 2489         if (error == 0)
 2490                 td->td_retval[0] = len - auio.uio_resid;
 2491 #ifdef KTRACE
 2492         if (ktruio != NULL) {
 2493                 ktruio->uio_resid = td->td_retval[0];
 2494                 ktrgenio(uap->sd, UIO_WRITE, ktruio, error);
 2495         }
 2496 #endif /* KTRACE */
 2497 sctp_bad:
 2498         if (fp)
 2499                 fdrop(fp, td);
 2500 sctp_bad2:
 2501         if (to)
 2502                 free(to, M_SONAME);
 2503         return (error);
 2504 #else  /* SCTP */
 2505         return (EOPNOTSUPP);
 2506 #endif /* SCTP */
 2507 }
 2508 
 2509 int
 2510 sys_sctp_generic_sendmsg_iov(td, uap)
 2511         struct thread *td;
 2512         struct sctp_generic_sendmsg_iov_args /* {
 2513                 int sd, 
 2514                 struct iovec *iov, 
 2515                 int iovlen, 
 2516                 caddr_t to, 
 2517                 __socklen_t tolen, 
 2518                 struct sctp_sndrcvinfo *sinfo, 
 2519                 int flags
 2520         } */ *uap;
 2521 {
 2522 #if (defined(INET) || defined(INET6)) && defined(SCTP)
 2523         struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL;
 2524         struct socket *so;
 2525         struct file *fp = NULL;
 2526         int error=0, i;
 2527         ssize_t len;
 2528         struct sockaddr *to = NULL;
 2529 #ifdef KTRACE
 2530         struct uio *ktruio = NULL;
 2531 #endif
 2532         struct uio auio;
 2533         struct iovec *iov, *tiov;
 2534         cap_rights_t rights;
 2535 
 2536         if (uap->sinfo) {
 2537                 error = copyin(uap->sinfo, &sinfo, sizeof (sinfo));
 2538                 if (error)
 2539                         return (error);
 2540                 u_sinfo = &sinfo;
 2541         }
 2542         rights = CAP_WRITE;
 2543         if (uap->tolen) {
 2544                 error = getsockaddr(&to, uap->to, uap->tolen);
 2545                 if (error) {
 2546                         to = NULL;
 2547                         goto sctp_bad2;
 2548                 }
 2549                 rights |= CAP_CONNECT;
 2550         }
 2551 
 2552         AUDIT_ARG_FD(uap->sd);
 2553         error = getsock_cap(td->td_proc->p_fd, uap->sd, rights, &fp, NULL);
 2554         if (error)
 2555                 goto sctp_bad1;
 2556 
 2557 #ifdef COMPAT_FREEBSD32
 2558         if (SV_CURPROC_FLAG(SV_ILP32))
 2559                 error = freebsd32_copyiniov((struct iovec32 *)uap->iov,
 2560                     uap->iovlen, &iov, EMSGSIZE);
 2561         else
 2562 #endif
 2563                 error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE);
 2564         if (error)
 2565                 goto sctp_bad1;
 2566 #ifdef KTRACE
 2567         if (to && (KTRPOINT(td, KTR_STRUCT)))
 2568                 ktrsockaddr(to);
 2569 #endif
 2570 
 2571         so = (struct socket *)fp->f_data;
 2572         if (so->so_proto->pr_protocol != IPPROTO_SCTP) {
 2573                 error = EOPNOTSUPP;
 2574                 goto sctp_bad;
 2575         }
 2576 #ifdef MAC
 2577         error = mac_socket_check_send(td->td_ucred, so);
 2578         if (error)
 2579                 goto sctp_bad;
 2580 #endif /* MAC */
 2581 
 2582         auio.uio_iov = iov;
 2583         auio.uio_iovcnt = uap->iovlen;
 2584         auio.uio_segflg = UIO_USERSPACE;
 2585         auio.uio_rw = UIO_WRITE;
 2586         auio.uio_td = td;
 2587         auio.uio_offset = 0;                    /* XXX */
 2588         auio.uio_resid = 0;
 2589         tiov = iov;
 2590         for (i = 0; i <uap->iovlen; i++, tiov++) {
 2591                 if ((auio.uio_resid += tiov->iov_len) < 0) {
 2592                         error = EINVAL;
 2593                         goto sctp_bad;
 2594                 }
 2595         }
 2596         len = auio.uio_resid;
 2597         CURVNET_SET(so->so_vnet);
 2598         error = sctp_lower_sosend(so, to, &auio,
 2599                     (struct mbuf *)NULL, (struct mbuf *)NULL,
 2600                     uap->flags, u_sinfo, td);
 2601         CURVNET_RESTORE();
 2602         if (error) {
 2603                 if (auio.uio_resid != len && (error == ERESTART ||
 2604                     error == EINTR || error == EWOULDBLOCK))
 2605                         error = 0;
 2606                 /* Generation of SIGPIPE can be controlled per socket */
 2607                 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
 2608                     !(uap->flags & MSG_NOSIGNAL)) {
 2609                         PROC_LOCK(td->td_proc);
 2610                         tdsignal(td, SIGPIPE);
 2611                         PROC_UNLOCK(td->td_proc);
 2612                 }
 2613         }
 2614         if (error == 0)
 2615                 td->td_retval[0] = len - auio.uio_resid;
 2616 #ifdef KTRACE
 2617         if (ktruio != NULL) {
 2618                 ktruio->uio_resid = td->td_retval[0];
 2619                 ktrgenio(uap->sd, UIO_WRITE, ktruio, error);
 2620         }
 2621 #endif /* KTRACE */
 2622 sctp_bad:
 2623         free(iov, M_IOV);
 2624 sctp_bad1:
 2625         if (fp)
 2626                 fdrop(fp, td);
 2627 sctp_bad2:
 2628         if (to)
 2629                 free(to, M_SONAME);
 2630         return (error);
 2631 #else  /* SCTP */
 2632         return (EOPNOTSUPP);
 2633 #endif /* SCTP */
 2634 }
 2635 
 2636 int
 2637 sys_sctp_generic_recvmsg(td, uap)
 2638         struct thread *td;
 2639         struct sctp_generic_recvmsg_args /* {
 2640                 int sd, 
 2641                 struct iovec *iov, 
 2642                 int iovlen,
 2643                 struct sockaddr *from, 
 2644                 __socklen_t *fromlenaddr,
 2645                 struct sctp_sndrcvinfo *sinfo, 
 2646                 int *msg_flags
 2647         } */ *uap;
 2648 {
 2649 #if (defined(INET) || defined(INET6)) && defined(SCTP)
 2650         uint8_t sockbufstore[256];
 2651         struct uio auio;
 2652         struct iovec *iov, *tiov;
 2653         struct sctp_sndrcvinfo sinfo;
 2654         struct socket *so;
 2655         struct file *fp = NULL;
 2656         struct sockaddr *fromsa;
 2657         int fromlen;
 2658         ssize_t len;
 2659         int i, msg_flags;
 2660         int error = 0;
 2661 #ifdef KTRACE
 2662         struct uio *ktruio = NULL;
 2663 #endif
 2664 
 2665         AUDIT_ARG_FD(uap->sd);
 2666         error = getsock_cap(td->td_proc->p_fd, uap->sd, CAP_READ, &fp, NULL);
 2667         if (error) {
 2668                 return (error);
 2669         }
 2670 #ifdef COMPAT_FREEBSD32
 2671         if (SV_CURPROC_FLAG(SV_ILP32))
 2672                 error = freebsd32_copyiniov((struct iovec32 *)uap->iov,
 2673                     uap->iovlen, &iov, EMSGSIZE);
 2674         else
 2675 #endif
 2676                 error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE);
 2677         if (error)
 2678                 goto out1;
 2679 
 2680         so = fp->f_data;
 2681         if (so->so_proto->pr_protocol != IPPROTO_SCTP) {
 2682                 error = EOPNOTSUPP;
 2683                 goto out;
 2684         }
 2685 #ifdef MAC
 2686         error = mac_socket_check_receive(td->td_ucred, so);
 2687         if (error) {
 2688                 goto out;
 2689         }
 2690 #endif /* MAC */
 2691 
 2692         if (uap->fromlenaddr) {
 2693                 error = copyin(uap->fromlenaddr,
 2694                     &fromlen, sizeof (fromlen));
 2695                 if (error) {
 2696                         goto out;
 2697                 }
 2698         } else {
 2699                 fromlen = 0;
 2700         }
 2701         if (uap->msg_flags) {
 2702                 error = copyin(uap->msg_flags, &msg_flags, sizeof (int));
 2703                 if (error) {
 2704                         goto out;
 2705                 }
 2706         } else {
 2707                 msg_flags = 0;
 2708         }
 2709         auio.uio_iov = iov;
 2710         auio.uio_iovcnt = uap->iovlen;
 2711         auio.uio_segflg = UIO_USERSPACE;
 2712         auio.uio_rw = UIO_READ;
 2713         auio.uio_td = td;
 2714         auio.uio_offset = 0;                    /* XXX */
 2715         auio.uio_resid = 0;
 2716         tiov = iov;
 2717         for (i = 0; i <uap->iovlen; i++, tiov++) {
 2718                 if ((auio.uio_resid += tiov->iov_len) < 0) {
 2719                         error = EINVAL;
 2720                         goto out;
 2721                 }
 2722         }
 2723         len = auio.uio_resid;
 2724         fromsa = (struct sockaddr *)sockbufstore;
 2725 
 2726 #ifdef KTRACE
 2727         if (KTRPOINT(td, KTR_GENIO))
 2728                 ktruio = cloneuio(&auio);
 2729 #endif /* KTRACE */
 2730         memset(&sinfo, 0, sizeof(struct sctp_sndrcvinfo));
 2731         CURVNET_SET(so->so_vnet);
 2732         error = sctp_sorecvmsg(so, &auio, (struct mbuf **)NULL,
 2733                     fromsa, fromlen, &msg_flags,
 2734                     (struct sctp_sndrcvinfo *)&sinfo, 1);
 2735         CURVNET_RESTORE();
 2736         if (error) {
 2737                 if (auio.uio_resid != len && (error == ERESTART ||
 2738                     error == EINTR || error == EWOULDBLOCK))
 2739                         error = 0;
 2740         } else {
 2741                 if (uap->sinfo)
 2742                         error = copyout(&sinfo, uap->sinfo, sizeof (sinfo));
 2743         }
 2744 #ifdef KTRACE
 2745         if (ktruio != NULL) {
 2746                 ktruio->uio_resid = len - auio.uio_resid;
 2747                 ktrgenio(uap->sd, UIO_READ, ktruio, error);
 2748         }
 2749 #endif /* KTRACE */
 2750         if (error)
 2751                 goto out;
 2752         td->td_retval[0] = len - auio.uio_resid;
 2753 
 2754         if (fromlen && uap->from) {
 2755                 len = fromlen;
 2756                 if (len <= 0 || fromsa == 0)
 2757                         len = 0;
 2758                 else {
 2759                         len = MIN(len, fromsa->sa_len);
 2760                         error = copyout(fromsa, uap->from, (size_t)len);
 2761                         if (error)
 2762                                 goto out;
 2763                 }
 2764                 error = copyout(&len, uap->fromlenaddr, sizeof (socklen_t));
 2765                 if (error) {
 2766                         goto out;
 2767                 }
 2768         }
 2769 #ifdef KTRACE
 2770         if (KTRPOINT(td, KTR_STRUCT))
 2771                 ktrsockaddr(fromsa);
 2772 #endif
 2773         if (uap->msg_flags) {
 2774                 error = copyout(&msg_flags, uap->msg_flags, sizeof (int));
 2775                 if (error) {
 2776                         goto out;
 2777                 }
 2778         }
 2779 out:
 2780         free(iov, M_IOV);
 2781 out1:
 2782         if (fp) 
 2783                 fdrop(fp, td);
 2784 
 2785         return (error);
 2786 #else  /* SCTP */
 2787         return (EOPNOTSUPP);
 2788 #endif /* SCTP */
 2789 }

Cache object: 17d5ef129135c56201861632efca11b3


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.