[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/uipc_syscalls.c

Version: -  FREEBSD  -  FREEBSD8  -  FREEBSD7  -  FREEBSD72  -  FREEBSD71  -  FREEBSD70  -  FREEBSD6  -  FREEBSD64  -  FREEBSD63  -  FREEBSD62  -  FREEBSD61  -  FREEBSD60  -  FREEBSD5  -  FREEBSD55  -  FREEBSD54  -  FREEBSD53  -  FREEBSD52  -  FREEBSD51  -  FREEBSD50  -  FREEBSD4  -  FREEBSD3  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  xnu-1456.1.26  -  OPENSOLARIS  -  minix-3-1-1  -  FREEBSD-LIBC  -  FREEBSD7-LIBC  -  FREEBSD6-LIBC  -  GLIBC27 
SearchContext: -  none  -  excerpts  -  bigexcerpts 

    1 /*-
    2  * Copyright (c) 1982, 1986, 1989, 1990, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * sendfile(2) and related extensions:
    6  * Copyright (c) 1998, David Greenman. All rights reserved.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  * 4. Neither the name of the University nor the names of its contributors
   17  *    may be used to endorse or promote products derived from this software
   18  *    without specific prior written permission.
   19  *
   20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   30  * SUCH DAMAGE.
   31  *
   32  *      @(#)uipc_syscalls.c     8.4 (Berkeley) 2/21/94
   33  */
   34 
   35 #include <sys/cdefs.h>
   36 __FBSDID("$FreeBSD: head/sys/kern/uipc_syscalls.c 198853 2009-11-03 12:52:35Z kib $");
   37 
   38 #include "opt_inet.h"
   39 #include "opt_inet6.h"
   40 #include "opt_sctp.h"
   41 #include "opt_compat.h"
   42 #include "opt_ktrace.h"
   43 
   44 #include <sys/param.h>
   45 #include <sys/systm.h>
   46 #include <sys/kernel.h>
   47 #include <sys/lock.h>
   48 #include <sys/mutex.h>
   49 #include <sys/sysproto.h>
   50 #include <sys/malloc.h>
   51 #include <sys/filedesc.h>
   52 #include <sys/event.h>
   53 #include <sys/proc.h>
   54 #include <sys/fcntl.h>
   55 #include <sys/file.h>
   56 #include <sys/filio.h>
   57 #include <sys/jail.h>
   58 #include <sys/mount.h>
   59 #include <sys/mbuf.h>
   60 #include <sys/protosw.h>
   61 #include <sys/sf_buf.h>
   62 #include <sys/socket.h>
   63 #include <sys/socketvar.h>
   64 #include <sys/signalvar.h>
   65 #include <sys/syscallsubr.h>
   66 #include <sys/sysctl.h>
   67 #include <sys/uio.h>
   68 #include <sys/vnode.h>
   69 #ifdef KTRACE
   70 #include <sys/ktrace.h>
   71 #endif
   72 
   73 #include <net/vnet.h>
   74 
   75 #include <security/audit/audit.h>
   76 #include <security/mac/mac_framework.h>
   77 
   78 #include <vm/vm.h>
   79 #include <vm/vm_object.h>
   80 #include <vm/vm_page.h>
   81 #include <vm/vm_pageout.h>
   82 #include <vm/vm_kern.h>
   83 #include <vm/vm_extern.h>
   84 
   85 #if defined(INET) || defined(INET6)
   86 #ifdef SCTP
   87 #include <netinet/sctp.h>
   88 #include <netinet/sctp_peeloff.h>
   89 #endif /* SCTP */
   90 #endif /* INET || INET6 */
   91 
   92 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
   93 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp);
   94 
   95 static int accept1(struct thread *td, struct accept_args *uap, int compat);
   96 static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat);
   97 static int getsockname1(struct thread *td, struct getsockname_args *uap,
   98                         int compat);
   99 static int getpeername1(struct thread *td, struct getpeername_args *uap,
  100                         int compat);
  101 
  102 /*
  103  * NSFBUFS-related variables and associated sysctls
  104  */
  105 int nsfbufs;
  106 int nsfbufspeak;
  107 int nsfbufsused;
  108 
  109 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
  110     "Maximum number of sendfile(2) sf_bufs available");
  111 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
  112     "Number of sendfile(2) sf_bufs at peak usage");
  113 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
  114     "Number of sendfile(2) sf_bufs in use");
  115 
  116 /*
  117  * Convert a user file descriptor to a kernel file entry.  A reference on the
  118  * file entry is held upon returning.  This is lighter weight than
  119  * fgetsock(), which bumps the socket reference drops the file reference
  120  * count instead, as this approach avoids several additional mutex operations
  121  * associated with the additional reference count.  If requested, return the
  122  * open file flags.
  123  */
  124 static int
  125 getsock(struct filedesc *fdp, int fd, struct file **fpp, u_int *fflagp)
  126 {
  127         struct file *fp;
  128         int error;
  129 
  130         fp = NULL;
  131         if (fdp == NULL || (fp = fget_unlocked(fdp, fd)) == NULL) {
  132                 error = EBADF;
  133         } else if (fp->f_type != DTYPE_SOCKET) {
  134                 fdrop(fp, curthread);
  135                 fp = NULL;
  136                 error = ENOTSOCK;
  137         } else {
  138                 if (fflagp != NULL)
  139                         *fflagp = fp->f_flag;
  140                 error = 0;
  141         }
  142         *fpp = fp;
  143         return (error);
  144 }
  145 
  146 /*
  147  * System call interface to the socket abstraction.
  148  */
  149 #if defined(COMPAT_43)
  150 #define COMPAT_OLDSOCK
  151 #endif
  152 
  153 int
  154 socket(td, uap)
  155         struct thread *td;
  156         struct socket_args /* {
  157                 int     domain;
  158                 int     type;
  159                 int     protocol;
  160         } */ *uap;
  161 {
  162         struct filedesc *fdp;
  163         struct socket *so;
  164         struct file *fp;
  165         int fd, error;
  166 
  167         AUDIT_ARG_SOCKET(uap->domain, uap->type, uap->protocol);
  168 #ifdef MAC
  169         error = mac_socket_check_create(td->td_ucred, uap->domain, uap->type,
  170             uap->protocol);
  171         if (error)
  172                 return (error);
  173 #endif
  174         fdp = td->td_proc->p_fd;
  175         error = falloc(td, &fp, &fd);
  176         if (error)
  177                 return (error);
  178         /* An extra reference on `fp' has been held for us by falloc(). */
  179         error = socreate(uap->domain, &so, uap->type, uap->protocol,
  180             td->td_ucred, td);
  181         if (error) {
  182                 fdclose(fdp, fp, fd, td);
  183         } else {
  184                 finit(fp, FREAD | FWRITE, DTYPE_SOCKET, so, &socketops);
  185                 td->td_retval[0] = fd;
  186         }
  187         fdrop(fp, td);
  188         return (error);
  189 }
  190 
  191 /* ARGSUSED */
  192 int
  193 bind(td, uap)
  194         struct thread *td;
  195         struct bind_args /* {
  196                 int     s;
  197                 caddr_t name;
  198                 int     namelen;
  199         } */ *uap;
  200 {
  201         struct sockaddr *sa;
  202         int error;
  203 
  204         if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0)
  205                 return (error);
  206 
  207         error = kern_bind(td, uap->s, sa);
  208         free(sa, M_SONAME);
  209         return (error);
  210 }
  211 
  212 int
  213 kern_bind(td, fd, sa)
  214         struct thread *td;
  215         int fd;
  216         struct sockaddr *sa;
  217 {
  218         struct socket *so;
  219         struct file *fp;
  220         int error;
  221 
  222         AUDIT_ARG_FD(fd);
  223         error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
  224         if (error)
  225                 return (error);
  226         so = fp->f_data;
  227 #ifdef KTRACE
  228         if (KTRPOINT(td, KTR_STRUCT))
  229                 ktrsockaddr(sa);
  230 #endif
  231 #ifdef MAC
  232         error = mac_socket_check_bind(td->td_ucred, so, sa);
  233         if (error == 0)
  234 #endif
  235                 error = sobind(so, sa, td);
  236         fdrop(fp, td);
  237         return (error);
  238 }
  239 
  240 /* ARGSUSED */
  241 int
  242 listen(td, uap)
  243         struct thread *td;
  244         struct listen_args /* {
  245                 int     s;
  246                 int     backlog;
  247         } */ *uap;
  248 {
  249         struct socket *so;
  250         struct file *fp;
  251         int error;
  252 
  253         AUDIT_ARG_FD(uap->s);
  254         error = getsock(td->td_proc->p_fd, uap->s, &fp, NULL);
  255         if (error == 0) {
  256                 so = fp->f_data;
  257 #ifdef MAC
  258                 error = mac_socket_check_listen(td->td_ucred, so);
  259                 if (error == 0) {
  260 #endif
  261                         CURVNET_SET(so->so_vnet);
  262                         error = solisten(so, uap->backlog, td);
  263                         CURVNET_RESTORE();
  264 #ifdef MAC
  265                 }
  266 #endif
  267                 fdrop(fp, td);
  268         }
  269         return(error);
  270 }
  271 
  272 /*
  273  * accept1()
  274  */
  275 static int
  276 accept1(td, uap, compat)
  277         struct thread *td;
  278         struct accept_args /* {
  279                 int     s;
  280                 struct sockaddr * __restrict name;
  281                 socklen_t       * __restrict anamelen;
  282         } */ *uap;
  283         int compat;
  284 {
  285         struct sockaddr *name;
  286         socklen_t namelen;
  287         struct file *fp;
  288         int error;
  289 
  290         if (uap->name == NULL)
  291                 return (kern_accept(td, uap->s, NULL, NULL, NULL));
  292 
  293         error = copyin(uap->anamelen, &namelen, sizeof (namelen));
  294         if (error)
  295                 return (error);
  296 
  297         error = kern_accept(td, uap->s, &name, &namelen, &fp);
  298 
  299         /*
  300          * return a namelen of zero for older code which might
  301          * ignore the return value from accept.
  302          */
  303         if (error) {
  304                 (void) copyout(&namelen,
  305                     uap->anamelen, sizeof(*uap->anamelen));
  306                 return (error);
  307         }
  308 
  309         if (error == 0 && name != NULL) {
  310 #ifdef COMPAT_OLDSOCK
  311                 if (compat)
  312                         ((struct osockaddr *)name)->sa_family =
  313                             name->sa_family;
  314 #endif
  315                 error = copyout(name, uap->name, namelen);
  316         }
  317         if (error == 0)
  318                 error = copyout(&namelen, uap->anamelen,
  319                     sizeof(namelen));
  320         if (error)
  321                 fdclose(td->td_proc->p_fd, fp, td->td_retval[0], td);
  322         fdrop(fp, td);
  323         free(name, M_SONAME);
  324         return (error);
  325 }
  326 
  327 int
  328 kern_accept(struct thread *td, int s, struct sockaddr **name,
  329     socklen_t *namelen, struct file **fp)
  330 {
  331         struct filedesc *fdp;
  332         struct file *headfp, *nfp = NULL;
  333         struct sockaddr *sa = NULL;
  334         int error;
  335         struct socket *head, *so;
  336         int fd;
  337         u_int fflag;
  338         pid_t pgid;
  339         int tmp;
  340 
  341         if (name) {
  342                 *name = NULL;
  343                 if (*namelen < 0)
  344                         return (EINVAL);
  345         }
  346 
  347         AUDIT_ARG_FD(s);
  348         fdp = td->td_proc->p_fd;
  349         error = getsock(fdp, s, &headfp, &fflag);
  350         if (error)
  351                 return (error);
  352         head = headfp->f_data;
  353         if ((head->so_options & SO_ACCEPTCONN) == 0) {
  354                 error = EINVAL;
  355                 goto done;
  356         }
  357 #ifdef MAC
  358         error = mac_socket_check_accept(td->td_ucred, head);
  359         if (error != 0)
  360                 goto done;
  361 #endif
  362         error = falloc(td, &nfp, &fd);
  363         if (error)
  364                 goto done;
  365         ACCEPT_LOCK();
  366         if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) {
  367                 ACCEPT_UNLOCK();
  368                 error = EWOULDBLOCK;
  369                 goto noconnection;
  370         }
  371         while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
  372                 if (head->so_rcv.sb_state & SBS_CANTRCVMORE) {
  373                         head->so_error = ECONNABORTED;
  374                         break;
  375                 }
  376                 error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH,
  377                     "accept", 0);
  378                 if (error) {
  379                         ACCEPT_UNLOCK();
  380                         goto noconnection;
  381                 }
  382         }
  383         if (head->so_error) {
  384                 error = head->so_error;
  385                 head->so_error = 0;
  386                 ACCEPT_UNLOCK();
  387                 goto noconnection;
  388         }
  389         so = TAILQ_FIRST(&head->so_comp);
  390         KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP"));
  391         KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP"));
  392 
  393         /*
  394          * Before changing the flags on the socket, we have to bump the
  395          * reference count.  Otherwise, if the protocol calls sofree(),
  396          * the socket will be released due to a zero refcount.
  397          */
  398         SOCK_LOCK(so);                  /* soref() and so_state update */
  399         soref(so);                      /* file descriptor reference */
  400 
  401         TAILQ_REMOVE(&head->so_comp, so, so_list);
  402         head->so_qlen--;
  403         so->so_state |= (head->so_state & SS_NBIO);
  404         so->so_qstate &= ~SQ_COMP;
  405         so->so_head = NULL;
  406 
  407         SOCK_UNLOCK(so);
  408         ACCEPT_UNLOCK();
  409 
  410         /* An extra reference on `nfp' has been held for us by falloc(). */
  411         td->td_retval[0] = fd;
  412 
  413         /* connection has been removed from the listen queue */
  414         KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0);
  415 
  416         pgid = fgetown(&head->so_sigio);
  417         if (pgid != 0)
  418                 fsetown(pgid, &so->so_sigio);
  419 
  420         finit(nfp, fflag, DTYPE_SOCKET, so, &socketops);
  421         /* Sync socket nonblocking/async state with file flags */
  422         tmp = fflag & FNONBLOCK;
  423         (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td);
  424         tmp = fflag & FASYNC;
  425         (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td);
  426         sa = 0;
  427         CURVNET_SET(so->so_vnet);
  428         error = soaccept(so, &sa);
  429         CURVNET_RESTORE();
  430         if (error) {
  431                 /*
  432                  * return a namelen of zero for older code which might
  433                  * ignore the return value from accept.
  434                  */
  435                 if (name)
  436                         *namelen = 0;
  437                 goto noconnection;
  438         }
  439         if (sa == NULL) {
  440                 if (name)
  441                         *namelen = 0;
  442                 goto done;
  443         }
  444         if (name) {
  445                 /* check sa_len before it is destroyed */
  446                 if (*namelen > sa->sa_len)
  447                         *namelen = sa->sa_len;
  448 #ifdef KTRACE
  449                 if (KTRPOINT(td, KTR_STRUCT))
  450                         ktrsockaddr(sa);
  451 #endif
  452                 *name = sa;
  453                 sa = NULL;
  454         }
  455 noconnection:
  456         if (sa)
  457                 free(sa, M_SONAME);
  458 
  459         /*
  460          * close the new descriptor, assuming someone hasn't ripped it
  461          * out from under us.
  462          */
  463         if (error)
  464                 fdclose(fdp, nfp, fd, td);
  465 
  466         /*
  467          * Release explicitly held references before returning.  We return
  468          * a reference on nfp to the caller on success if they request it.
  469          */
  470 done:
  471         if (fp != NULL) {
  472                 if (error == 0) {
  473                         *fp = nfp;
  474                         nfp = NULL;
  475                 } else
  476                         *fp = NULL;
  477         }
  478         if (nfp != NULL)
  479                 fdrop(nfp, td);
  480         fdrop(headfp, td);
  481         return (error);
  482 }
  483 
  484 int
  485 accept(td, uap)
  486         struct thread *td;
  487         struct accept_args *uap;
  488 {
  489 
  490         return (accept1(td, uap, 0));
  491 }
  492 
  493 #ifdef COMPAT_OLDSOCK
  494 int
  495 oaccept(td, uap)
  496         struct thread *td;
  497         struct accept_args *uap;
  498 {
  499 
  500         return (accept1(td, uap, 1));
  501 }
  502 #endif /* COMPAT_OLDSOCK */
  503 
  504 /* ARGSUSED */
  505 int
  506 connect(td, uap)
  507         struct thread *td;
  508         struct connect_args /* {
  509                 int     s;
  510                 caddr_t name;
  511                 int     namelen;
  512         } */ *uap;
  513 {
  514         struct sockaddr *sa;
  515         int error;
  516 
  517         error = getsockaddr(&sa, uap->name, uap->namelen);
  518         if (error)
  519                 return (error);
  520 
  521         error = kern_connect(td, uap->s, sa);
  522         free(sa, M_SONAME);
  523         return (error);
  524 }
  525 
  526 
  527 int
  528 kern_connect(td, fd, sa)
  529         struct thread *td;
  530         int fd;
  531         struct sockaddr *sa;
  532 {
  533         struct socket *so;
  534         struct file *fp;
  535         int error;
  536         int interrupted = 0;
  537 
  538         AUDIT_ARG_FD(fd);
  539         error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
  540         if (error)
  541                 return (error);
  542         so = fp->f_data;
  543         if (so->so_state & SS_ISCONNECTING) {
  544                 error = EALREADY;
  545                 goto done1;
  546         }
  547 #ifdef KTRACE
  548         if (KTRPOINT(td, KTR_STRUCT))
  549                 ktrsockaddr(sa);
  550 #endif
  551 #ifdef MAC
  552         error = mac_socket_check_connect(td->td_ucred, so, sa);
  553         if (error)
  554                 goto bad;
  555 #endif
  556         error = soconnect(so, sa, td);
  557         if (error)
  558                 goto bad;
  559         if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
  560                 error = EINPROGRESS;
  561                 goto done1;
  562         }
  563         SOCK_LOCK(so);
  564         while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
  565                 error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH,
  566                     "connec", 0);
  567                 if (error) {
  568                         if (error == EINTR || error == ERESTART)
  569                                 interrupted = 1;
  570                         break;
  571                 }
  572         }
  573         if (error == 0) {
  574                 error = so->so_error;
  575                 so->so_error = 0;
  576         }
  577         SOCK_UNLOCK(so);
  578 bad:
  579         if (!interrupted)
  580                 so->so_state &= ~SS_ISCONNECTING;
  581         if (error == ERESTART)
  582                 error = EINTR;
  583 done1:
  584         fdrop(fp, td);
  585         return (error);
  586 }
  587 
  588 int
  589 kern_socketpair(struct thread *td, int domain, int type, int protocol,
  590     int *rsv)
  591 {
  592         struct filedesc *fdp = td->td_proc->p_fd;
  593         struct file *fp1, *fp2;
  594         struct socket *so1, *so2;
  595         int fd, error;
  596 
  597         AUDIT_ARG_SOCKET(domain, type, protocol);
  598 #ifdef MAC
  599         /* We might want to have a separate check for socket pairs. */
  600         error = mac_socket_check_create(td->td_ucred, domain, type,
  601             protocol);
  602         if (error)
  603                 return (error);
  604 #endif
  605         error = socreate(domain, &so1, type, protocol, td->td_ucred, td);
  606         if (error)
  607                 return (error);
  608         error = socreate(domain, &so2, type, protocol, td->td_ucred, td);
  609         if (error)
  610                 goto free1;
  611         /* On success extra reference to `fp1' and 'fp2' is set by falloc. */
  612         error = falloc(td, &fp1, &fd);
  613         if (error)
  614                 goto free2;
  615         rsv[0] = fd;
  616         fp1->f_data = so1;      /* so1 already has ref count */
  617         error = falloc(td, &fp2, &fd);
  618         if (error)
  619                 goto free3;
  620         fp2->f_data = so2;      /* so2 already has ref count */
  621         rsv[1] = fd;
  622         error = soconnect2(so1, so2);
  623         if (error)
  624                 goto free4;
  625         if (type == SOCK_DGRAM) {
  626                 /*
  627                  * Datagram socket connection is asymmetric.
  628                  */
  629                  error = soconnect2(so2, so1);
  630                  if (error)
  631                         goto free4;
  632         }
  633         finit(fp1, FREAD | FWRITE, DTYPE_SOCKET, fp1->f_data, &socketops);
  634         finit(fp2, FREAD | FWRITE, DTYPE_SOCKET, fp2->f_data, &socketops);
  635         fdrop(fp1, td);
  636         fdrop(fp2, td);
  637         return (0);
  638 free4:
  639         fdclose(fdp, fp2, rsv[1], td);
  640         fdrop(fp2, td);
  641 free3:
  642         fdclose(fdp, fp1, rsv[0], td);
  643         fdrop(fp1, td);
  644 free2:
  645         if (so2 != NULL)
  646                 (void)soclose(so2);
  647 free1:
  648         if (so1 != NULL)
  649                 (void)soclose(so1);
  650         return (error);
  651 }
  652 
  653 int
  654 socketpair(struct thread *td, struct socketpair_args *uap)
  655 {
  656         int error, sv[2];
  657 
  658         error = kern_socketpair(td, uap->domain, uap->type,
  659             uap->protocol, sv);
  660         if (error)
  661                 return (error);
  662         error = copyout(sv, uap->rsv, 2 * sizeof(int));
  663         if (error) {
  664                 (void)kern_close(td, sv[0]);
  665                 (void)kern_close(td, sv[1]);
  666         }
  667         return (error);
  668 }
  669 
  670 static int
  671 sendit(td, s, mp, flags)
  672         struct thread *td;
  673         int s;
  674         struct msghdr *mp;
  675         int flags;
  676 {
  677         struct mbuf *control;
  678         struct sockaddr *to;
  679         int error;
  680 
  681         if (mp->msg_name != NULL) {
  682                 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
  683                 if (error) {
  684                         to = NULL;
  685                         goto bad;
  686                 }
  687                 mp->msg_name = to;
  688         } else {
  689                 to = NULL;
  690         }
  691 
  692         if (mp->msg_control) {
  693                 if (mp->msg_controllen < sizeof(struct cmsghdr)
  694 #ifdef COMPAT_OLDSOCK
  695                     && mp->msg_flags != MSG_COMPAT
  696 #endif
  697                 ) {
  698                         error = EINVAL;
  699                         goto bad;
  700                 }
  701                 error = sockargs(&control, mp->msg_control,
  702                     mp->msg_controllen, MT_CONTROL);
  703                 if (error)
  704                         goto bad;
  705 #ifdef COMPAT_OLDSOCK
  706                 if (mp->msg_flags == MSG_COMPAT) {
  707                         struct cmsghdr *cm;
  708 
  709                         M_PREPEND(control, sizeof(*cm), M_WAIT);
  710                         cm = mtod(control, struct cmsghdr *);
  711                         cm->cmsg_len = control->m_len;
  712                         cm->cmsg_level = SOL_SOCKET;
  713                         cm->cmsg_type = SCM_RIGHTS;
  714                 }
  715 #endif
  716         } else {
  717                 control = NULL;
  718         }
  719 
  720         error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE);
  721 
  722 bad:
  723         if (to)
  724                 free(to, M_SONAME);
  725         return (error);
  726 }
  727 
  728 int
  729 kern_sendit(td, s, mp, flags, control, segflg)
  730         struct thread *td;
  731         int s;
  732         struct msghdr *mp;
  733         int flags;
  734         struct mbuf *control;
  735         enum uio_seg segflg;
  736 {
  737         struct file *fp;
  738         struct uio auio;
  739         struct iovec *iov;
  740         struct socket *so;
  741         int i;
  742         int len, error;
  743 #ifdef KTRACE
  744         struct uio *ktruio = NULL;
  745 #endif
  746 
  747         AUDIT_ARG_FD(s);
  748         error = getsock(td->td_proc->p_fd, s, &fp, NULL);
  749         if (error)
  750                 return (error);
  751         so = (struct socket *)fp->f_data;
  752 
  753 #ifdef MAC
  754         if (mp->msg_name != NULL) {
  755                 error = mac_socket_check_connect(td->td_ucred, so,
  756                     mp->msg_name);
  757                 if (error)
  758                         goto bad;
  759         }
  760         error = mac_socket_check_send(td->td_ucred, so);
  761         if (error)
  762                 goto bad;
  763 #endif
  764 
  765         auio.uio_iov = mp->msg_iov;
  766         auio.uio_iovcnt = mp->msg_iovlen;
  767         auio.uio_segflg = segflg;
  768         auio.uio_rw = UIO_WRITE;
  769         auio.uio_td = td;
  770         auio.uio_offset = 0;                    /* XXX */
  771         auio.uio_resid = 0;
  772         iov = mp->msg_iov;
  773         for (i = 0; i < mp->msg_iovlen; i++, iov++) {
  774                 if ((auio.uio_resid += iov->iov_len) < 0) {
  775                         error = EINVAL;
  776                         goto bad;
  777                 }
  778         }
  779 #ifdef KTRACE
  780         if (KTRPOINT(td, KTR_GENIO))
  781                 ktruio = cloneuio(&auio);
  782 #endif
  783         len = auio.uio_resid;
  784         error = sosend(so, mp->msg_name, &auio, 0, control, flags, td);
  785         if (error) {
  786                 if (auio.uio_resid != len && (error == ERESTART ||
  787                     error == EINTR || error == EWOULDBLOCK))
  788                         error = 0;
  789                 /* Generation of SIGPIPE can be controlled per socket */
  790                 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
  791                     !(flags & MSG_NOSIGNAL)) {
  792                         PROC_LOCK(td->td_proc);
  793                         psignal(td->td_proc, SIGPIPE);
  794                         PROC_UNLOCK(td->td_proc);
  795                 }
  796         }
  797         if (error == 0)
  798                 td->td_retval[0] = len - auio.uio_resid;
  799 #ifdef KTRACE
  800         if (ktruio != NULL) {
  801                 ktruio->uio_resid = td->td_retval[0];
  802                 ktrgenio(s, UIO_WRITE, ktruio, error);
  803         }
  804 #endif
  805 bad:
  806         fdrop(fp, td);
  807         return (error);
  808 }
  809 
  810 int
  811 sendto(td, uap)
  812         struct thread *td;
  813         struct sendto_args /* {
  814                 int     s;
  815                 caddr_t buf;
  816                 size_t  len;
  817                 int     flags;
  818                 caddr_t to;
  819                 int     tolen;
  820         } */ *uap;
  821 {
  822         struct msghdr msg;
  823         struct iovec aiov;
  824         int error;
  825 
  826         msg.msg_name = uap->to;
  827         msg.msg_namelen = uap->tolen;
  828         msg.msg_iov = &aiov;
  829         msg.msg_iovlen = 1;
  830         msg.msg_control = 0;
  831 #ifdef COMPAT_OLDSOCK
  832         msg.msg_flags = 0;
  833 #endif
  834         aiov.iov_base = uap->buf;
  835         aiov.iov_len = uap->len;
  836         error = sendit(td, uap->s, &msg, uap->flags);
  837         return (error);
  838 }
  839 
  840 #ifdef COMPAT_OLDSOCK
  841 int
  842 osend(td, uap)
  843         struct thread *td;
  844         struct osend_args /* {
  845                 int     s;
  846                 caddr_t buf;
  847                 int     len;
  848                 int     flags;
  849         } */ *uap;
  850 {
  851         struct msghdr msg;
  852         struct iovec aiov;
  853         int error;
  854 
  855         msg.msg_name = 0;
  856         msg.msg_namelen = 0;
  857         msg.msg_iov = &aiov;
  858         msg.msg_iovlen = 1;
  859         aiov.iov_base = uap->buf;
  860         aiov.iov_len = uap->len;
  861         msg.msg_control = 0;
  862         msg.msg_flags = 0;
  863         error = sendit(td, uap->s, &msg, uap->flags);
  864         return (error);
  865 }
  866 
  867 int
  868 osendmsg(td, uap)
  869         struct thread *td;
  870         struct osendmsg_args /* {
  871                 int     s;
  872                 caddr_t msg;
  873                 int     flags;
  874         } */ *uap;
  875 {
  876         struct msghdr msg;
  877         struct iovec *iov;
  878         int error;
  879 
  880         error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
  881         if (error)
  882                 return (error);
  883         error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
  884         if (error)
  885                 return (error);
  886         msg.msg_iov = iov;
  887         msg.msg_flags = MSG_COMPAT;
  888         error = sendit(td, uap->s, &msg, uap->flags);
  889         free(iov, M_IOV);
  890         return (error);
  891 }
  892 #endif
  893 
  894 int
  895 sendmsg(td, uap)
  896         struct thread *td;
  897         struct sendmsg_args /* {
  898                 int     s;
  899                 caddr_t msg;
  900                 int     flags;
  901         } */ *uap;
  902 {
  903         struct msghdr msg;
  904         struct iovec *iov;
  905         int error;
  906 
  907         error = copyin(uap->msg, &msg, sizeof (msg));
  908         if (error)
  909                 return (error);
  910         error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
  911         if (error)
  912                 return (error);
  913         msg.msg_iov = iov;
  914 #ifdef COMPAT_OLDSOCK
  915         msg.msg_flags = 0;
  916 #endif
  917         error = sendit(td, uap->s, &msg, uap->flags);
  918         free(iov, M_IOV);
  919         return (error);
  920 }
  921 
  922 int
  923 kern_recvit(td, s, mp, fromseg, controlp)
  924         struct thread *td;
  925         int s;
  926         struct msghdr *mp;
  927         enum uio_seg fromseg;
  928         struct mbuf **controlp;
  929 {
  930         struct uio auio;
  931         struct iovec *iov;
  932         int i;
  933         socklen_t len;
  934         int error;
  935         struct mbuf *m, *control = 0;
  936         caddr_t ctlbuf;
  937         struct file *fp;
  938         struct socket *so;
  939         struct sockaddr *fromsa = 0;
  940 #ifdef KTRACE
  941         struct uio *ktruio = NULL;
  942 #endif
  943 
  944         if(controlp != NULL)
  945                 *controlp = 0;
  946 
  947         AUDIT_ARG_FD(s);
  948         error = getsock(td->td_proc->p_fd, s, &fp, NULL);
  949         if (error)
  950                 return (error);
  951         so = fp->f_data;
  952 
  953 #ifdef MAC
  954         error = mac_socket_check_receive(td->td_ucred, so);
  955         if (error) {
  956                 fdrop(fp, td);
  957                 return (error);
  958         }
  959 #endif
  960 
  961         auio.uio_iov = mp->msg_iov;
  962         auio.uio_iovcnt = mp->msg_iovlen;
  963         auio.uio_segflg = UIO_USERSPACE;
  964         auio.uio_rw = UIO_READ;
  965         auio.uio_td = td;
  966         auio.uio_offset = 0;                    /* XXX */
  967         auio.uio_resid = 0;
  968         iov = mp->msg_iov;
  969         for (i = 0; i < mp->msg_iovlen; i++, iov++) {
  970                 if ((auio.uio_resid += iov->iov_len) < 0) {
  971                         fdrop(fp, td);
  972                         return (EINVAL);
  973                 }
  974         }
  975 #ifdef KTRACE
  976         if (KTRPOINT(td, KTR_GENIO))
  977                 ktruio = cloneuio(&auio);
  978 #endif
  979         len = auio.uio_resid;
  980         CURVNET_SET(so->so_vnet);
  981         error = soreceive(so, &fromsa, &auio, (struct mbuf **)0,
  982             (mp->msg_control || controlp) ? &control : (struct mbuf **)0,
  983             &mp->msg_flags);
  984         CURVNET_RESTORE();
  985         if (error) {
  986                 if (auio.uio_resid != (int)len && (error == ERESTART ||
  987                     error == EINTR || error == EWOULDBLOCK))
  988                         error = 0;
  989         }
  990 #ifdef KTRACE
  991         if (ktruio != NULL) {
  992                 ktruio->uio_resid = (int)len - auio.uio_resid;
  993                 ktrgenio(s, UIO_READ, ktruio, error);
  994         }
  995 #endif
  996         if (error)
  997                 goto out;
  998         td->td_retval[0] = (int)len - auio.uio_resid;
  999         if (mp->msg_name) {
 1000                 len = mp->msg_namelen;
 1001                 if (len <= 0 || fromsa == 0)
 1002                         len = 0;
 1003                 else {
 1004                         /* save sa_len before it is destroyed by MSG_COMPAT */
 1005                         len = MIN(len, fromsa->sa_len);
 1006 #ifdef COMPAT_OLDSOCK
 1007                         if (mp->msg_flags & MSG_COMPAT)
 1008                                 ((struct osockaddr *)fromsa)->sa_family =
 1009                                     fromsa->sa_family;
 1010 #endif
 1011                         if (fromseg == UIO_USERSPACE) {
 1012                                 error = copyout(fromsa, mp->msg_name,
 1013                                     (unsigned)len);
 1014                                 if (error)
 1015                                         goto out;
 1016                         } else
 1017                                 bcopy(fromsa, mp->msg_name, len);
 1018                 }
 1019                 mp->msg_namelen = len;
 1020         }
 1021         if (mp->msg_control && controlp == NULL) {
 1022 #ifdef COMPAT_OLDSOCK
 1023                 /*
 1024                  * We assume that old recvmsg calls won't receive access
 1025                  * rights and other control info, esp. as control info
 1026                  * is always optional and those options didn't exist in 4.3.
 1027                  * If we receive rights, trim the cmsghdr; anything else
 1028                  * is tossed.
 1029                  */
 1030                 if (control && mp->msg_flags & MSG_COMPAT) {
 1031                         if (mtod(control, struct cmsghdr *)->cmsg_level !=
 1032                             SOL_SOCKET ||
 1033                             mtod(control, struct cmsghdr *)->cmsg_type !=
 1034                             SCM_RIGHTS) {
 1035                                 mp->msg_controllen = 0;
 1036                                 goto out;
 1037                         }
 1038                         control->m_len -= sizeof (struct cmsghdr);
 1039                         control->m_data += sizeof (struct cmsghdr);
 1040                 }
 1041 #endif
 1042                 len = mp->msg_controllen;
 1043                 m = control;
 1044                 mp->msg_controllen = 0;
 1045                 ctlbuf = mp->msg_control;
 1046 
 1047                 while (m && len > 0) {
 1048                         unsigned int tocopy;
 1049 
 1050                         if (len >= m->m_len)
 1051                                 tocopy = m->m_len;
 1052                         else {
 1053                                 mp->msg_flags |= MSG_CTRUNC;
 1054                                 tocopy = len;
 1055                         }
 1056 
 1057                         if ((error = copyout(mtod(m, caddr_t),
 1058                                         ctlbuf, tocopy)) != 0)
 1059                                 goto out;
 1060 
 1061                         ctlbuf += tocopy;
 1062                         len -= tocopy;
 1063                         m = m->m_next;
 1064                 }
 1065                 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
 1066         }
 1067 out:
 1068         fdrop(fp, td);
 1069 #ifdef KTRACE
 1070         if (fromsa && KTRPOINT(td, KTR_STRUCT))
 1071                 ktrsockaddr(fromsa);
 1072 #endif
 1073         if (fromsa)
 1074                 free(fromsa, M_SONAME);
 1075 
 1076         if (error == 0 && controlp != NULL)  
 1077                 *controlp = control;
 1078         else  if (control)
 1079                 m_freem(control);
 1080 
 1081         return (error);
 1082 }
 1083 
 1084 static int
 1085 recvit(td, s, mp, namelenp)
 1086         struct thread *td;
 1087         int s;
 1088         struct msghdr *mp;
 1089         void *namelenp;
 1090 {
 1091         int error;
 1092 
 1093         error = kern_recvit(td, s, mp, UIO_USERSPACE, NULL);
 1094         if (error)
 1095                 return (error);
 1096         if (namelenp) {
 1097                 error = copyout(&mp->msg_namelen, namelenp, sizeof (socklen_t));
 1098 #ifdef COMPAT_OLDSOCK
 1099                 if (mp->msg_flags & MSG_COMPAT)
 1100                         error = 0;      /* old recvfrom didn't check */
 1101 #endif
 1102         }
 1103         return (error);
 1104 }
 1105 
 1106 int
 1107 recvfrom(td, uap)
 1108         struct thread *td;
 1109         struct recvfrom_args /* {
 1110                 int     s;
 1111                 caddr_t buf;
 1112                 size_t  len;
 1113                 int     flags;
 1114                 struct sockaddr * __restrict    from;
 1115                 socklen_t * __restrict fromlenaddr;
 1116         } */ *uap;
 1117 {
 1118         struct msghdr msg;
 1119         struct iovec aiov;
 1120         int error;
 1121 
 1122         if (uap->fromlenaddr) {
 1123                 error = copyin(uap->fromlenaddr,
 1124                     &msg.msg_namelen, sizeof (msg.msg_namelen));
 1125                 if (error)
 1126                         goto done2;
 1127         } else {
 1128                 msg.msg_namelen = 0;
 1129         }
 1130         msg.msg_name = uap->from;
 1131         msg.msg_iov = &aiov;
 1132         msg.msg_iovlen = 1;
 1133         aiov.iov_base = uap->buf;
 1134         aiov.iov_len = uap->len;
 1135         msg.msg_control = 0;
 1136         msg.msg_flags = uap->flags;
 1137         error = recvit(td, uap->s, &msg, uap->fromlenaddr);
 1138 done2:
 1139         return(error);
 1140 }
 1141 
 1142 #ifdef COMPAT_OLDSOCK
 1143 int
 1144 orecvfrom(td, uap)
 1145         struct thread *td;
 1146         struct recvfrom_args *uap;
 1147 {
 1148 
 1149         uap->flags |= MSG_COMPAT;
 1150         return (recvfrom(td, uap));
 1151 }
 1152 #endif
 1153 
 1154 #ifdef COMPAT_OLDSOCK
 1155 int
 1156 orecv(td, uap)
 1157         struct thread *td;
 1158         struct orecv_args /* {
 1159                 int     s;
 1160                 caddr_t buf;
 1161                 int     len;
 1162                 int     flags;
 1163         } */ *uap;
 1164 {
 1165         struct msghdr msg;
 1166         struct iovec aiov;
 1167         int error;
 1168 
 1169         msg.msg_name = 0;
 1170         msg.msg_namelen = 0;
 1171         msg.msg_iov = &aiov;
 1172         msg.msg_iovlen = 1;
 1173         aiov.iov_base = uap->buf;
 1174         aiov.iov_len = uap->len;
 1175         msg.msg_control = 0;
 1176         msg.msg_flags = uap->flags;
 1177         error = recvit(td, uap->s, &msg, NULL);
 1178         return (error);
 1179 }
 1180 
 1181 /*
 1182  * Old recvmsg.  This code takes advantage of the fact that the old msghdr
 1183  * overlays the new one, missing only the flags, and with the (old) access
 1184  * rights where the control fields are now.
 1185  */
 1186 int
 1187 orecvmsg(td, uap)
 1188         struct thread *td;
 1189         struct orecvmsg_args /* {
 1190                 int     s;
 1191                 struct  omsghdr *msg;
 1192                 int     flags;
 1193         } */ *uap;
 1194 {
 1195         struct msghdr msg;
 1196         struct iovec *iov;
 1197         int error;
 1198 
 1199         error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
 1200         if (error)
 1201                 return (error);
 1202         error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
 1203         if (error)
 1204                 return (error);
 1205         msg.msg_flags = uap->flags | MSG_COMPAT;
 1206         msg.msg_iov = iov;
 1207         error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen);
 1208         if (msg.msg_controllen && error == 0)
 1209                 error = copyout(&msg.msg_controllen,
 1210                     &uap->msg->msg_accrightslen, sizeof (int));
 1211         free(iov, M_IOV);
 1212         return (error);
 1213 }
 1214 #endif
 1215 
 1216 int
 1217 recvmsg(td, uap)
 1218         struct thread *td;
 1219         struct recvmsg_args /* {
 1220                 int     s;
 1221                 struct  msghdr *msg;
 1222                 int     flags;
 1223         } */ *uap;
 1224 {
 1225         struct msghdr msg;
 1226         struct iovec *uiov, *iov;
 1227         int error;
 1228 
 1229         error = copyin(uap->msg, &msg, sizeof (msg));
 1230         if (error)
 1231                 return (error);
 1232         error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
 1233         if (error)
 1234                 return (error);
 1235         msg.msg_flags = uap->flags;
 1236 #ifdef COMPAT_OLDSOCK
 1237         msg.msg_flags &= ~MSG_COMPAT;
 1238 #endif
 1239         uiov = msg.msg_iov;
 1240         msg.msg_iov = iov;
 1241         error = recvit(td, uap->s, &msg, NULL);
 1242         if (error == 0) {
 1243                 msg.msg_iov = uiov;
 1244                 error = copyout(&msg, uap->msg, sizeof(msg));
 1245         }
 1246         free(iov, M_IOV);
 1247         return (error);
 1248 }
 1249 
 1250 /* ARGSUSED */
 1251 int
 1252 shutdown(td, uap)
 1253         struct thread *td;
 1254         struct shutdown_args /* {
 1255                 int     s;
 1256                 int     how;
 1257         } */ *uap;
 1258 {
 1259         struct socket *so;
 1260         struct file *fp;
 1261         int error;
 1262 
 1263         AUDIT_ARG_FD(uap->s);
 1264         error = getsock(td->td_proc->p_fd, uap->s, &fp, NULL);
 1265         if (error == 0) {
 1266                 so = fp->f_data;
 1267                 error = soshutdown(so, uap->how);
 1268                 fdrop(fp, td);
 1269         }
 1270         return (error);
 1271 }
 1272 
 1273 /* ARGSUSED */
 1274 int
 1275 setsockopt(td, uap)
 1276         struct thread *td;
 1277         struct setsockopt_args /* {
 1278                 int     s;
 1279                 int     level;
 1280                 int     name;
 1281                 caddr_t val;
 1282                 int     valsize;
 1283         } */ *uap;
 1284 {
 1285 
 1286         return (kern_setsockopt(td, uap->s, uap->level, uap->name,
 1287             uap->val, UIO_USERSPACE, uap->valsize));
 1288 }
 1289 
 1290 int
 1291 kern_setsockopt(td, s, level, name, val, valseg, valsize)
 1292         struct thread *td;
 1293         int s;
 1294         int level;
 1295         int name;
 1296         void *val;
 1297         enum uio_seg valseg;
 1298         socklen_t valsize;
 1299 {
 1300         int error;
 1301         struct socket *so;
 1302         struct file *fp;
 1303         struct sockopt sopt;
 1304 
 1305         if (val == NULL && valsize != 0)
 1306                 return (EFAULT);
 1307         if ((int)valsize < 0)
 1308                 return (EINVAL);
 1309 
 1310         sopt.sopt_dir = SOPT_SET;
 1311         sopt.sopt_level = level;
 1312         sopt.sopt_name = name;
 1313         sopt.sopt_val = val;
 1314         sopt.sopt_valsize = valsize;
 1315         switch (valseg) {
 1316         case UIO_USERSPACE:
 1317                 sopt.sopt_td = td;
 1318                 break;
 1319         case UIO_SYSSPACE:
 1320                 sopt.sopt_td = NULL;
 1321                 break;
 1322         default:
 1323                 panic("kern_setsockopt called with bad valseg");
 1324         }
 1325 
 1326         AUDIT_ARG_FD(s);
 1327         error = getsock(td->td_proc->p_fd, s, &fp, NULL);
 1328         if (error == 0) {
 1329                 so = fp->f_data;
 1330                 CURVNET_SET(so->so_vnet);
 1331                 error = sosetopt(so, &sopt);
 1332                 CURVNET_RESTORE();
 1333                 fdrop(fp, td);
 1334         }
 1335         return(error);
 1336 }
 1337 
 1338 /* ARGSUSED */
 1339 int
 1340 getsockopt(td, uap)
 1341         struct thread *td;
 1342         struct getsockopt_args /* {
 1343                 int     s;
 1344                 int     level;
 1345                 int     name;
 1346                 void * __restrict       val;
 1347                 socklen_t * __restrict avalsize;
 1348         } */ *uap;
 1349 {
 1350         socklen_t valsize;
 1351         int     error;
 1352 
 1353         if (uap->val) {
 1354                 error = copyin(uap->avalsize, &valsize, sizeof (valsize));
 1355                 if (error)
 1356                         return (error);
 1357         }
 1358 
 1359         error = kern_getsockopt(td, uap->s, uap->level, uap->name,
 1360             uap->val, UIO_USERSPACE, &valsize);
 1361 
 1362         if (error == 0)
 1363                 error = copyout(&valsize, uap->avalsize, sizeof (valsize));
 1364         return (error);
 1365 }
 1366 
 1367 /*
 1368  * Kernel version of getsockopt.
 1369  * optval can be a userland or userspace. optlen is always a kernel pointer.
 1370  */
 1371 int
 1372 kern_getsockopt(td, s, level, name, val, valseg, valsize)
 1373         struct thread *td;
 1374         int s;
 1375         int level;
 1376         int name;
 1377         void *val;
 1378         enum uio_seg valseg;
 1379         socklen_t *valsize;
 1380 {
 1381         int error;
 1382         struct  socket *so;
 1383         struct file *fp;
 1384         struct  sockopt sopt;
 1385 
 1386         if (val == NULL)
 1387                 *valsize = 0;
 1388         if ((int)*valsize < 0)
 1389                 return (EINVAL);
 1390 
 1391         sopt.sopt_dir = SOPT_GET;
 1392         sopt.sopt_level = level;
 1393         sopt.sopt_name = name;
 1394         sopt.sopt_val = val;
 1395         sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */
 1396         switch (valseg) {
 1397         case UIO_USERSPACE:
 1398                 sopt.sopt_td = td;
 1399                 break;
 1400         case UIO_SYSSPACE:
 1401                 sopt.sopt_td = NULL;
 1402                 break;
 1403         default:
 1404                 panic("kern_getsockopt called with bad valseg");
 1405         }
 1406 
 1407         AUDIT_ARG_FD(s);
 1408         error = getsock(td->td_proc->p_fd, s, &fp, NULL);
 1409         if (error == 0) {
 1410                 so = fp->f_data;
 1411                 CURVNET_SET(so->so_vnet);
 1412                 error = sogetopt(so, &sopt);
 1413                 CURVNET_RESTORE();
 1414                 *valsize = sopt.sopt_valsize;
 1415                 fdrop(fp, td);
 1416         }
 1417         return (error);
 1418 }
 1419 
 1420 /*
 1421  * getsockname1() - Get socket name.
 1422  */
 1423 /* ARGSUSED */
 1424 static int
 1425 getsockname1(td, uap, compat)
 1426         struct thread *td;
 1427         struct getsockname_args /* {
 1428                 int     fdes;
 1429                 struct sockaddr * __restrict asa;
 1430                 socklen_t * __restrict alen;
 1431         } */ *uap;
 1432         int compat;
 1433 {
 1434         struct sockaddr *sa;
 1435         socklen_t len;
 1436         int error;
 1437 
 1438         error = copyin(uap->alen, &len, sizeof(len));
 1439         if (error)
 1440                 return (error);
 1441 
 1442         error = kern_getsockname(td, uap->fdes, &sa, &len);
 1443         if (error)
 1444                 return (error);
 1445 
 1446         if (len != 0) {
 1447 #ifdef COMPAT_OLDSOCK
 1448                 if (compat)
 1449                         ((struct osockaddr *)sa)->sa_family = sa->sa_family;
 1450 #endif
 1451                 error = copyout(sa, uap->asa, (u_int)len);
 1452         }
 1453         free(sa, M_SONAME);
 1454         if (error == 0)
 1455                 error = copyout(&len, uap->alen, sizeof(len));
 1456         return (error);
 1457 }
 1458 
 1459 int
 1460 kern_getsockname(struct thread *td, int fd, struct sockaddr **sa,
 1461     socklen_t *alen)
 1462 {
 1463         struct socket *so;
 1464         struct file *fp;
 1465         socklen_t len;
 1466         int error;
 1467 
 1468         if (*alen < 0)
 1469                 return (EINVAL);
 1470 
 1471         AUDIT_ARG_FD(fd);
 1472         error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
 1473         if (error)
 1474                 return (error);
 1475         so = fp->f_data;
 1476         *sa = NULL;
 1477         CURVNET_SET(so->so_vnet);
 1478         error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, sa);
 1479         CURVNET_RESTORE();
 1480         if (error)
 1481                 goto bad;
 1482         if (*sa == NULL)
 1483                 len = 0;
 1484         else
 1485                 len = MIN(*alen, (*sa)->sa_len);
 1486         *alen = len;
 1487 #ifdef KTRACE
 1488         if (KTRPOINT(td, KTR_STRUCT))
 1489                 ktrsockaddr(*sa);
 1490 #endif
 1491 bad:
 1492         fdrop(fp, td);
 1493         if (error && *sa) {
 1494                 free(*sa, M_SONAME);
 1495                 *sa = NULL;
 1496         }
 1497         return (error);
 1498 }
 1499 
 1500 int
 1501 getsockname(td, uap)
 1502         struct thread *td;
 1503         struct getsockname_args *uap;
 1504 {
 1505 
 1506         return (getsockname1(td, uap, 0));
 1507 }
 1508 
 1509 #ifdef COMPAT_OLDSOCK
 1510 int
 1511 ogetsockname(td, uap)
 1512         struct thread *td;
 1513         struct getsockname_args *uap;
 1514 {
 1515 
 1516         return (getsockname1(td, uap, 1));
 1517 }
 1518 #endif /* COMPAT_OLDSOCK */
 1519 
 1520 /*
 1521  * getpeername1() - Get name of peer for connected socket.
 1522  */
 1523 /* ARGSUSED */
 1524 static int
 1525 getpeername1(td, uap, compat)
 1526         struct thread *td;
 1527         struct getpeername_args /* {
 1528                 int     fdes;
 1529                 struct sockaddr * __restrict    asa;
 1530                 socklen_t * __restrict  alen;
 1531         } */ *uap;
 1532         int compat;
 1533 {
 1534         struct sockaddr *sa;
 1535         socklen_t len;
 1536         int error;
 1537 
 1538         error = copyin(uap->alen, &len, sizeof (len));
 1539         if (error)
 1540                 return (error);
 1541 
 1542         error = kern_getpeername(td, uap->fdes, &sa, &len);
 1543         if (error)
 1544                 return (error);
 1545 
 1546         if (len != 0) {
 1547 #ifdef COMPAT_OLDSOCK
 1548                 if (compat)
 1549                         ((struct osockaddr *)sa)->sa_family = sa->sa_family;
 1550 #endif
 1551                 error = copyout(sa, uap->asa, (u_int)len);
 1552         }
 1553         free(sa, M_SONAME);
 1554         if (error == 0)
 1555                 error = copyout(&len, uap->alen, sizeof(len));
 1556         return (error);
 1557 }
 1558 
 1559 int
 1560 kern_getpeername(struct thread *td, int fd, struct sockaddr **sa,
 1561     socklen_t *alen)
 1562 {
 1563         struct socket *so;
 1564         struct file *fp;
 1565         socklen_t len;
 1566         int error;
 1567 
 1568         if (*alen < 0)
 1569                 return (EINVAL);
 1570 
 1571         AUDIT_ARG_FD(fd);
 1572         error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
 1573         if (error)
 1574                 return (error);
 1575         so = fp->f_data;
 1576         if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
 1577                 error = ENOTCONN;
 1578                 goto done;
 1579         }
 1580         *sa = NULL;
 1581         CURVNET_SET(so->so_vnet);
 1582         error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, sa);
 1583         CURVNET_RESTORE();
 1584         if (error)
 1585                 goto bad;
 1586         if (*sa == NULL)
 1587                 len = 0;
 1588         else
 1589                 len = MIN(*alen, (*sa)->sa_len);
 1590         *alen = len;
 1591 #ifdef KTRACE
 1592         if (KTRPOINT(td, KTR_STRUCT))
 1593                 ktrsockaddr(*sa);
 1594 #endif
 1595 bad:
 1596         if (error && *sa) {
 1597                 free(*sa, M_SONAME);
 1598                 *sa = NULL;
 1599         }
 1600 done:
 1601         fdrop(fp, td);
 1602         return (error);
 1603 }
 1604 
 1605 int
 1606 getpeername(td, uap)
 1607         struct thread *td;
 1608         struct getpeername_args *uap;
 1609 {
 1610 
 1611         return (getpeername1(td, uap, 0));
 1612 }
 1613 
 1614 #ifdef COMPAT_OLDSOCK
 1615 int
 1616 ogetpeername(td, uap)
 1617         struct thread *td;
 1618         struct ogetpeername_args *uap;
 1619 {
 1620 
 1621         /* XXX uap should have type `getpeername_args *' to begin with. */
 1622         return (getpeername1(td, (struct getpeername_args *)uap, 1));
 1623 }
 1624 #endif /* COMPAT_OLDSOCK */
 1625 
 1626 int
 1627 sockargs(mp, buf, buflen, type)
 1628         struct mbuf **mp;
 1629         caddr_t buf;
 1630         int buflen, type;
 1631 {
 1632         struct sockaddr *sa;
 1633         struct mbuf *m;
 1634         int error;
 1635 
 1636         if ((u_int)buflen > MLEN) {
 1637 #ifdef COMPAT_OLDSOCK
 1638                 if (type == MT_SONAME && (u_int)buflen <= 112)
 1639                         buflen = MLEN;          /* unix domain compat. hack */
 1640                 else
 1641 #endif
 1642                         if ((u_int)buflen > MCLBYTES)
 1643                                 return (EINVAL);
 1644         }
 1645         m = m_get(M_WAIT, type);
 1646         if ((u_int)buflen > MLEN)
 1647                 MCLGET(m, M_WAIT);
 1648         m->m_len = buflen;
 1649         error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
 1650         if (error)
 1651                 (void) m_free(m);
 1652         else {
 1653                 *mp = m;
 1654                 if (type == MT_SONAME) {
 1655                         sa = mtod(m, struct sockaddr *);
 1656 
 1657 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
 1658                         if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
 1659                                 sa->sa_family = sa->sa_len;
 1660 #endif
 1661                         sa->sa_len = buflen;
 1662                 }
 1663         }
 1664         return (error);
 1665 }
 1666 
 1667 int
 1668 getsockaddr(namp, uaddr, len)
 1669         struct sockaddr **namp;
 1670         caddr_t uaddr;
 1671         size_t len;
 1672 {
 1673         struct sockaddr *sa;
 1674         int error;
 1675 
 1676         if (len > SOCK_MAXADDRLEN)
 1677                 return (ENAMETOOLONG);
 1678         if (len < offsetof(struct sockaddr, sa_data[0]))
 1679                 return (EINVAL);
 1680         sa = malloc(len, M_SONAME, M_WAITOK);
 1681         error = copyin(uaddr, sa, len);
 1682         if (error) {
 1683                 free(sa, M_SONAME);
 1684         } else {
 1685 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
 1686                 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
 1687                         sa->sa_family = sa->sa_len;
 1688 #endif
 1689                 sa->sa_len = len;
 1690                 *namp = sa;
 1691         }
 1692         return (error);
 1693 }
 1694 
 1695 #include <sys/condvar.h>
 1696 
 1697 struct sendfile_sync {
 1698         struct mtx      mtx;
 1699         struct cv       cv;
 1700         unsigned        count;
 1701 };
 1702 
 1703 /*
 1704  * Detach mapped page and release resources back to the system.
 1705  */
 1706 void
 1707 sf_buf_mext(void *addr, void *args)
 1708 {
 1709         vm_page_t m;
 1710         struct sendfile_sync *sfs;
 1711 
 1712         m = sf_buf_page(args);
 1713         sf_buf_free(args);
 1714         vm_page_lock_queues();
 1715         vm_page_unwire(m, 0);
 1716         /*
 1717          * Check for the object going away on us. This can
 1718          * happen since we don't hold a reference to it.
 1719          * If so, we're responsible for freeing the page.
 1720          */
 1721         if (m->wire_count == 0 && m->object == NULL)
 1722                 vm_page_free(m);
 1723         vm_page_unlock_queues();
 1724         if (addr == NULL)
 1725                 return;
 1726         sfs = addr;
 1727         mtx_lock(&sfs->mtx);
 1728         KASSERT(sfs->count> 0, ("Sendfile sync botchup count == 0"));
 1729         if (--sfs->count == 0)
 1730                 cv_signal(&sfs->cv);
 1731         mtx_unlock(&sfs->mtx);
 1732 }
 1733 
 1734 /*
 1735  * sendfile(2)
 1736  *
 1737  * int sendfile(int fd, int s, off_t offset, size_t nbytes,
 1738  *       struct sf_hdtr *hdtr, off_t *sbytes, int flags)
 1739  *
 1740  * Send a file specified by 'fd' and starting at 'offset' to a socket
 1741  * specified by 's'. Send only 'nbytes' of the file or until EOF if nbytes ==
 1742  * 0.  Optionally add a header and/or trailer to the socket output.  If
 1743  * specified, write the total number of bytes sent into *sbytes.
 1744  */
 1745 int
 1746 sendfile(struct thread *td, struct sendfile_args *uap)
 1747 {
 1748 
 1749         return (do_sendfile(td, uap, 0));
 1750 }
 1751 
 1752 static int
 1753 do_sendfile(struct thread *td, struct sendfile_args *uap, int compat)
 1754 {
 1755         struct sf_hdtr hdtr;
 1756         struct uio *hdr_uio, *trl_uio;
 1757         int error;
 1758 
 1759         hdr_uio = trl_uio = NULL;
 1760 
 1761         if (uap->hdtr != NULL) {
 1762                 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
 1763                 if (error)
 1764                         goto out;
 1765                 if (hdtr.headers != NULL) {
 1766                         error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio);
 1767                         if (error)
 1768                                 goto out;
 1769                 }
 1770                 if (hdtr.trailers != NULL) {
 1771                         error = copyinuio(hdtr.trailers, hdtr.trl_cnt, &trl_uio);
 1772                         if (error)
 1773                                 goto out;
 1774 
 1775                 }
 1776         }
 1777 
 1778         error = kern_sendfile(td, uap, hdr_uio, trl_uio, compat);
 1779 out:
 1780         if (hdr_uio)
 1781                 free(hdr_uio, M_IOV);
 1782         if (trl_uio)
 1783                 free(trl_uio, M_IOV);
 1784         return (error);
 1785 }
 1786 
 1787 #ifdef COMPAT_FREEBSD4
 1788 int
 1789 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap)
 1790 {
 1791         struct sendfile_args args;
 1792 
 1793         args.fd = uap->fd;
 1794         args.s = uap->s;
 1795         args.offset = uap->offset;
 1796         args.nbytes = uap->nbytes;
 1797         args.hdtr = uap->hdtr;
 1798         args.sbytes = uap->sbytes;
 1799         args.flags = uap->flags;
 1800 
 1801         return (do_sendfile(td, &args, 1));
 1802 }
 1803 #endif /* COMPAT_FREEBSD4 */
 1804 
 1805 int
 1806 kern_sendfile(struct thread *td, struct sendfile_args *uap,
 1807     struct uio *hdr_uio, struct uio *trl_uio, int compat)
 1808 {
 1809         struct file *sock_fp;
 1810         struct vnode *vp;
 1811         struct vm_object *obj = NULL;
 1812         struct socket *so = NULL;
 1813         struct mbuf *m = NULL;
 1814         struct sf_buf *sf;
 1815         struct vm_page *pg;
 1816         off_t off, xfsize, fsbytes = 0, sbytes = 0, rem = 0;
 1817         int error, hdrlen = 0, mnw = 0;
 1818         int vfslocked;
 1819         struct sendfile_sync *sfs = NULL;
 1820 
 1821         /*
 1822          * The file descriptor must be a regular file and have a
 1823          * backing VM object.
 1824          * File offset must be positive.  If it goes beyond EOF
 1825          * we send only the header/trailer and no payload data.
 1826          */
 1827         AUDIT_ARG_FD(uap->fd);
 1828         if ((error = fgetvp_read(td, uap->fd, &vp)) != 0)
 1829                 goto out;
 1830         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 1831         vn_lock(vp, LK_SHARED | LK_RETRY);
 1832         if (vp->v_type == VREG) {
 1833                 obj = vp->v_object;
 1834                 if (obj != NULL) {
 1835                         /*
 1836                          * Temporarily increase the backing VM
 1837                          * object's reference count so that a forced
 1838                          * reclamation of its vnode does not
 1839                          * immediately destroy it.
 1840                          */
 1841                         VM_OBJECT_LOCK(obj);
 1842                         if ((obj->flags & OBJ_DEAD) == 0) {
 1843                                 vm_object_reference_locked(obj);
 1844                                 VM_OBJECT_UNLOCK(obj);
 1845                         } else {
 1846                                 VM_OBJECT_UNLOCK(obj);
 1847                                 obj = NULL;
 1848                         }
 1849                 }
 1850         }
 1851         VOP_UNLOCK(vp, 0);
 1852         VFS_UNLOCK_GIANT(vfslocked);
 1853         if (obj == NULL) {
 1854                 error = EINVAL;
 1855                 goto out;
 1856         }
 1857         if (uap->offset < 0) {
 1858                 error = EINVAL;
 1859                 goto out;
 1860         }
 1861 
 1862         /*
 1863          * The socket must be a stream socket and connected.
 1864          * Remember if it a blocking or non-blocking socket.
 1865          */
 1866         if ((error = getsock(td->td_proc->p_fd, uap->s, &sock_fp,
 1867             NULL)) != 0)
 1868                 goto out;
 1869         so = sock_fp->f_data;
 1870         if (so->so_type != SOCK_STREAM) {
 1871                 error = EINVAL;
 1872                 goto out;
 1873         }
 1874         if ((so->so_state & SS_ISCONNECTED) == 0) {
 1875                 error = ENOTCONN;
 1876                 goto out;
 1877         }
 1878         /*
 1879          * Do not wait on memory allocations but return ENOMEM for
 1880          * caller to retry later.
 1881          * XXX: Experimental.
 1882          */
 1883         if (uap->flags & SF_MNOWAIT)
 1884                 mnw = 1;
 1885 
 1886         if (uap->flags & SF_SYNC) {
 1887                 sfs = malloc(sizeof *sfs, M_TEMP, M_WAITOK);
 1888                 memset(sfs, 0, sizeof *sfs);
 1889                 mtx_init(&sfs->mtx, "sendfile", MTX_DEF, 0);
 1890                 cv_init(&sfs->cv, "sendfile");
 1891         }
 1892 
 1893 #ifdef MAC
 1894         error = mac_socket_check_send(td->td_ucred, so);
 1895         if (error)
 1896                 goto out;
 1897 #endif
 1898 
 1899         /* If headers are specified copy them into mbufs. */
 1900         if (hdr_uio != NULL) {
 1901                 hdr_uio->uio_td = td;
 1902                 hdr_uio->uio_rw = UIO_WRITE;
 1903                 if (hdr_uio->uio_resid > 0) {
 1904                         /*
 1905                          * In FBSD < 5.0 the nbytes to send also included
 1906                          * the header.  If compat is specified subtract the
 1907                          * header size from nbytes.
 1908                          */
 1909                         if (compat) {
 1910                                 if (uap->nbytes > hdr_uio->uio_resid)
 1911                                         uap->nbytes -= hdr_uio->uio_resid;
 1912                                 else
 1913                                         uap->nbytes = 0;
 1914                         }
 1915                         m = m_uiotombuf(hdr_uio, (mnw ? M_NOWAIT : M_WAITOK),
 1916                             0, 0, 0);
 1917                         if (m == NULL) {
 1918                                 error = mnw ? EAGAIN : ENOBUFS;
 1919                                 goto out;
 1920                         }
 1921                         hdrlen = m_length(m, NULL);
 1922                 }
 1923         }
 1924 
 1925         /*
 1926          * Protect against multiple writers to the socket.
 1927          *
 1928          * XXXRW: Historically this has assumed non-interruptibility, so now
 1929          * we implement that, but possibly shouldn't.
 1930          */
 1931         (void)sblock(&so->so_snd, SBL_WAIT | SBL_NOINTR);
 1932 
 1933         /*
 1934          * Loop through the pages of the file, starting with the requested
 1935          * offset. Get a file page (do I/O if necessary), map the file page
 1936          * into an sf_buf, attach an mbuf header to the sf_buf, and queue
 1937          * it on the socket.
 1938          * This is done in two loops.  The inner loop turns as many pages
 1939          * as it can, up to available socket buffer space, without blocking
 1940          * into mbufs to have it bulk delivered into the socket send buffer.
 1941          * The outer loop checks the state and available space of the socket
 1942          * and takes care of the overall progress.
 1943          */
 1944         for (off = uap->offset, rem = uap->nbytes; ; ) {
 1945                 int loopbytes = 0;
 1946                 int space = 0;
 1947                 int done = 0;
 1948 
 1949                 /*
 1950                  * Check the socket state for ongoing connection,
 1951                  * no errors and space in socket buffer.
 1952                  * If space is low allow for the remainder of the
 1953                  * file to be processed if it fits the socket buffer.
 1954                  * Otherwise block in waiting for sufficient space
 1955                  * to proceed, or if the socket is nonblocking, return
 1956                  * to userland with EAGAIN while reporting how far
 1957                  * we've come.
 1958                  * We wait until the socket buffer has significant free
 1959                  * space to do bulk sends.  This makes good use of file
 1960                  * system read ahead and allows packet segmentation
 1961                  * offloading hardware to take over lots of work.  If
 1962                  * we were not careful here we would send off only one
 1963                  * sfbuf at a time.
 1964                  */
 1965                 SOCKBUF_LOCK(&so->so_snd);
 1966                 if (so->so_snd.sb_lowat < so->so_snd.sb_hiwat / 2)
 1967                         so->so_snd.sb_lowat = so->so_snd.sb_hiwat / 2;
 1968 retry_space:
 1969                 if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 1970                         error = EPIPE;
 1971                         SOCKBUF_UNLOCK(&so->so_snd);
 1972                         goto done;
 1973                 } else if (so->so_error) {
 1974                         error = so->so_error;
 1975                         so->so_error = 0;
 1976                         SOCKBUF_UNLOCK(&so->so_snd);
 1977                         goto done;
 1978                 }
 1979                 space = sbspace(&so->so_snd);
 1980                 if (space < rem &&
 1981                     (space <= 0 ||
 1982                      space < so->so_snd.sb_lowat)) {
 1983                         if (so->so_state & SS_NBIO) {
 1984                                 SOCKBUF_UNLOCK(&so->so_snd);
 1985                                 error = EAGAIN;
 1986                                 goto done;
 1987                         }
 1988                         /*
 1989                          * sbwait drops the lock while sleeping.
 1990                          * When we loop back to retry_space the
 1991                          * state may have changed and we retest
 1992                          * for it.
 1993                          */
 1994                         error = sbwait(&so->so_snd);
 1995                         /*
 1996                          * An error from sbwait usually indicates that we've
 1997                          * been interrupted by a signal. If we've sent anything
 1998                          * then return bytes sent, otherwise return the error.
 1999                          */
 2000                         if (error) {
 2001                                 SOCKBUF_UNLOCK(&so->so_snd);
 2002                                 goto done;
 2003                         }
 2004                         goto retry_space;
 2005                 }
 2006                 SOCKBUF_UNLOCK(&so->so_snd);
 2007 
 2008                 /*
 2009                  * Reduce space in the socket buffer by the size of
 2010                  * the header mbuf chain.
 2011                  * hdrlen is set to 0 after the first loop.
 2012                  */
 2013                 space -= hdrlen;
 2014 
 2015                 /*
 2016                  * Loop and construct maximum sized mbuf chain to be bulk
 2017                  * dumped into socket buffer.
 2018                  */
 2019                 while (space > loopbytes) {
 2020                         vm_pindex_t pindex;
 2021                         vm_offset_t pgoff;
 2022                         struct mbuf *m0;
 2023 
 2024                         VM_OBJECT_LOCK(obj);
 2025                         /*
 2026                          * Calculate the amount to transfer.
 2027                          * Not to exceed a page, the EOF,
 2028                          * or the passed in nbytes.
 2029                          */
 2030                         pgoff = (vm_offset_t)(off & PAGE_MASK);
 2031                         xfsize = omin(PAGE_SIZE - pgoff,
 2032                             obj->un_pager.vnp.vnp_size - uap->offset -
 2033                             fsbytes - loopbytes);
 2034                         if (uap->nbytes)
 2035                                 rem = (uap->nbytes - fsbytes - loopbytes);
 2036                         else
 2037                                 rem = obj->un_pager.vnp.vnp_size -
 2038                                     uap->offset - fsbytes - loopbytes;
 2039                         xfsize = omin(rem, xfsize);
 2040                         xfsize = omin(space - loopbytes, xfsize);
 2041                         if (xfsize <= 0) {
 2042                                 VM_OBJECT_UNLOCK(obj);
 2043                                 done = 1;               /* all data sent */
 2044                                 break;
 2045                         }
 2046 
 2047                         /*
 2048                          * Attempt to look up the page.  Allocate
 2049                          * if not found or wait and loop if busy.
 2050                          */
 2051                         pindex = OFF_TO_IDX(off);
 2052                         pg = vm_page_grab(obj, pindex, VM_ALLOC_NOBUSY |
 2053                             VM_ALLOC_NORMAL | VM_ALLOC_WIRED | VM_ALLOC_RETRY);
 2054 
 2055                         /*
 2056                          * Check if page is valid for what we need,
 2057                          * otherwise initiate I/O.
 2058                          * If we already turned some pages into mbufs,
 2059                          * send them off before we come here again and
 2060                          * block.
 2061                          */
 2062                         if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize))
 2063                                 VM_OBJECT_UNLOCK(obj);
 2064                         else if (m != NULL)
 2065                                 error = EAGAIN; /* send what we already got */
 2066                         else if (uap->flags & SF_NODISKIO)
 2067                                 error = EBUSY;
 2068                         else {
 2069                                 int bsize, resid;
 2070 
 2071                                 /*
 2072                                  * Ensure that our page is still around
 2073                                  * when the I/O completes.
 2074                                  */
 2075                                 vm_page_io_start(pg);
 2076                                 VM_OBJECT_UNLOCK(obj);
 2077 
 2078                                 /*
 2079                                  * Get the page from backing store.
 2080                                  */
 2081                                 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 2082                                 error = vn_lock(vp, LK_SHARED);
 2083                                 if (error != 0)
 2084                                         goto after_read;
 2085                                 bsize = vp->v_mount->mnt_stat.f_iosize;
 2086 
 2087                                 /*
 2088                                  * XXXMAC: Because we don't have fp->f_cred
 2089                                  * here, we pass in NOCRED.  This is probably
 2090                                  * wrong, but is consistent with our original
 2091                                  * implementation.
 2092                                  */
 2093                                 error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE,
 2094                                     trunc_page(off), UIO_NOCOPY, IO_NODELOCKED |
 2095                                     IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT),
 2096                                     td->td_ucred, NOCRED, &resid, td);
 2097                                 VOP_UNLOCK(vp, 0);
 2098                         after_read:
 2099                                 VFS_UNLOCK_GIANT(vfslocked);
 2100                                 VM_OBJECT_LOCK(obj);
 2101                                 vm_page_io_finish(pg);
 2102                                 if (!error)
 2103                                         VM_OBJECT_UNLOCK(obj);
 2104                                 mbstat.sf_iocnt++;
 2105                         }
 2106                         if (error) {
 2107                                 vm_page_lock_queues();
 2108                                 vm_page_unwire(pg, 0);
 2109                                 /*
 2110                                  * See if anyone else might know about
 2111                                  * this page.  If not and it is not valid,
 2112                                  * then free it.
 2113                                  */
 2114                                 if (pg->wire_count == 0 && pg->valid == 0 &&
 2115                                     pg->busy == 0 && !(pg->oflags & VPO_BUSY) &&
 2116                                     pg->hold_count == 0) {
 2117                                         vm_page_free(pg);
 2118                                 }
 2119                                 vm_page_unlock_queues();
 2120                                 VM_OBJECT_UNLOCK(obj);
 2121                                 if (error == EAGAIN)
 2122                                         error = 0;      /* not a real error */
 2123                                 break;
 2124                         }
 2125 
 2126                         /*
 2127                          * Get a sendfile buf.  We usually wait as long
 2128                          * as necessary, but this wait can be interrupted.
 2129                          */
 2130                         if ((sf = sf_buf_alloc(pg,
 2131                             (mnw ? SFB_NOWAIT : SFB_CATCH))) == NULL) {
 2132                                 mbstat.sf_allocfail++;
 2133                                 vm_page_lock_queues();
 2134                                 vm_page_unwire(pg, 0);
 2135                                 /*
 2136                                  * XXX: Not same check as above!?
 2137                                  */
 2138                                 if (pg->wire_count == 0 && pg->object == NULL)
 2139                                         vm_page_free(pg);
 2140                                 vm_page_unlock_queues();
 2141                                 error = (mnw ? EAGAIN : EINTR);
 2142                                 break;
 2143                         }
 2144 
 2145                         /*
 2146                          * Get an mbuf and set it up as having
 2147                          * external storage.
 2148                          */
 2149                         m0 = m_get((mnw ? M_NOWAIT : M_WAITOK), MT_DATA);
 2150                         if (m0 == NULL) {
 2151                                 error = (mnw ? EAGAIN : ENOBUFS);
 2152                                 sf_buf_mext((void *)sf_buf_kva(sf), sf);
 2153                                 break;
 2154                         }
 2155                         MEXTADD(m0, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext,
 2156                             sfs, sf, M_RDONLY, EXT_SFBUF);
 2157                         m0->m_data = (char *)sf_buf_kva(sf) + pgoff;
 2158                         m0->m_len = xfsize;
 2159 
 2160                         /* Append to mbuf chain. */
 2161                         if (m != NULL)
 2162                                 m_cat(m, m0);
 2163                         else
 2164                                 m = m0;
 2165 
 2166                         /* Keep track of bits processed. */
 2167                         loopbytes += xfsize;
 2168                         off += xfsize;
 2169 
 2170                         if (sfs != NULL) {
 2171                                 mtx_lock(&sfs->mtx);
 2172                                 sfs->count++;
 2173                                 mtx_unlock(&sfs->mtx);
 2174                         }
 2175                 }
 2176 
 2177                 /* Add the buffer chain to the socket buffer. */
 2178                 if (m != NULL) {
 2179                         int mlen, err;
 2180 
 2181                         mlen = m_length(m, NULL);
 2182                         SOCKBUF_LOCK(&so->so_snd);
 2183                         if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 2184                                 error = EPIPE;
 2185                                 SOCKBUF_UNLOCK(&so->so_snd);
 2186                                 goto done;
 2187                         }
 2188                         SOCKBUF_UNLOCK(&so->so_snd);
 2189                         CURVNET_SET(so->so_vnet);
 2190                         /* Avoid error aliasing. */
 2191                         err = (*so->so_proto->pr_usrreqs->pru_send)
 2192                                     (so, 0, m, NULL, NULL, td);
 2193                         CURVNET_RESTORE();
 2194                         if (err == 0) {
 2195                                 /*
 2196                                  * We need two counters to get the
 2197                                  * file offset and nbytes to send
 2198                                  * right:
 2199                                  * - sbytes contains the total amount
 2200                                  *   of bytes sent, including headers.
 2201                                  * - fsbytes contains the total amount
 2202                                  *   of bytes sent from the file.
 2203                                  */
 2204                                 sbytes += mlen;
 2205                                 fsbytes += mlen;
 2206                                 if (hdrlen) {
 2207                                         fsbytes -= hdrlen;
 2208                                         hdrlen = 0;
 2209                                 }
 2210                         } else if (error == 0)
 2211                                 error = err;
 2212                         m = NULL;       /* pru_send always consumes */
 2213                 }
 2214 
 2215                 /* Quit outer loop on error or when we're done. */
 2216                 if (done) 
 2217                         break;
 2218                 if (error)
 2219                         goto done;
 2220         }
 2221 
 2222         /*
 2223          * Send trailers. Wimp out and use writev(2).
 2224          */
 2225         if (trl_uio != NULL) {
 2226                 sbunlock(&so->so_snd);
 2227                 error = kern_writev(td, uap->s, trl_uio);
 2228                 if (error == 0)
 2229                         sbytes += td->td_retval[0];
 2230                 goto out;
 2231         }
 2232 
 2233 done:
 2234         sbunlock(&so->so_snd);
 2235 out:
 2236         /*
 2237          * If there was no error we have to clear td->td_retval[0]
 2238          * because it may have been set by writev.
 2239          */
 2240         if (error == 0) {
 2241                 td->td_retval[0] = 0;
 2242         }
 2243         if (uap->sbytes != NULL) {
 2244                 copyout(&sbytes, uap->sbytes, sizeof(off_t));
 2245         }
 2246         if (obj != NULL)
 2247                 vm_object_deallocate(obj);
 2248         if (vp != NULL) {
 2249                 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 2250                 vrele(vp);
 2251                 VFS_UNLOCK_GIANT(vfslocked);
 2252         }
 2253         if (so)
 2254                 fdrop(sock_fp, td);
 2255         if (m)
 2256                 m_freem(m);
 2257 
 2258         if (sfs != NULL) {
 2259                 mtx_lock(&sfs->mtx);
 2260                 if (sfs->count != 0)
 2261                         cv_wait(&sfs->cv, &sfs->mtx);
 2262                 KASSERT(sfs->count == 0, ("sendfile sync still busy"));
 2263                 cv_destroy(&sfs->cv);
 2264                 mtx_destroy(&sfs->mtx);
 2265                 free(sfs, M_TEMP);
 2266         }
 2267 
 2268         if (error == ERESTART)
 2269                 error = EINTR;
 2270 
 2271         return (error);
 2272 }
 2273 
 2274 /*
 2275  * SCTP syscalls.
 2276  * Functionality only compiled in if SCTP is defined in the kernel Makefile,
 2277  * otherwise all return EOPNOTSUPP.
 2278  * XXX: We should make this loadable one day.
 2279  */
 2280 int
 2281 sctp_peeloff(td, uap)
 2282         struct thread *td;
 2283         struct sctp_peeloff_args /* {
 2284                 int     sd;
 2285                 caddr_t name;
 2286         } */ *uap;
 2287 {
 2288 #if (defined(INET) || defined(INET6)) && defined(SCTP)
 2289         struct filedesc *fdp;
 2290         struct file *nfp = NULL;
 2291         int error;
 2292         struct socket *head, *so;
 2293         int fd;
 2294         u_int fflag;
 2295 
 2296         fdp = td->td_proc->p_fd;
 2297         AUDIT_ARG_FD(uap->sd);
 2298         error = fgetsock(td, uap->sd, &head, &fflag);
 2299         if (error)
 2300                 goto done2;
 2301         error = sctp_can_peel_off(head, (sctp_assoc_t)uap->name);
 2302         if (error)
 2303                 goto done2;
 2304         /*
 2305          * At this point we know we do have a assoc to pull
 2306          * we proceed to get the fd setup. This may block
 2307          * but that is ok.
 2308          */
 2309 
 2310         error = falloc(td, &nfp, &fd);
 2311         if (error)
 2312                 goto done;
 2313         td->td_retval[0] = fd;
 2314 
 2315         CURVNET_SET(head->so_vnet);
 2316         so = sonewconn(head, SS_ISCONNECTED);
 2317         if (so == NULL) 
 2318                 goto noconnection;
 2319         /*
 2320          * Before changing the flags on the socket, we have to bump the
 2321          * reference count.  Otherwise, if the protocol calls sofree(),
 2322          * the socket will be released due to a zero refcount.
 2323          */
 2324         SOCK_LOCK(so);
 2325         soref(so);                      /* file descriptor reference */
 2326         SOCK_UNLOCK(so);
 2327 
 2328         ACCEPT_LOCK();
 2329 
 2330         TAILQ_REMOVE(&head->so_comp, so, so_list);
 2331         head->so_qlen--;
 2332         so->so_state |= (head->so_state & SS_NBIO);
 2333         so->so_state &= ~SS_NOFDREF;
 2334         so->so_qstate &= ~SQ_COMP;
 2335         so->so_head = NULL;
 2336         ACCEPT_UNLOCK();
 2337         finit(nfp, fflag, DTYPE_SOCKET, so, &socketops);
 2338         error = sctp_do_peeloff(head, so, (sctp_assoc_t)uap->name);
 2339         if (error)
 2340                 goto noconnection;
 2341         if (head->so_sigio != NULL)
 2342                 fsetown(fgetown(&head->so_sigio), &so->so_sigio);
 2343 
 2344 noconnection:
 2345         /*
 2346          * close the new descriptor, assuming someone hasn't ripped it
 2347          * out from under us.
 2348          */
 2349         if (error)
 2350                 fdclose(fdp, nfp, fd, td);
 2351 
 2352         /*
 2353          * Release explicitly held references before returning.
 2354          */
 2355         CURVNET_RESTORE();
 2356 done:
 2357         if (nfp != NULL)
 2358                 fdrop(nfp, td);
 2359         fputsock(head);
 2360 done2:
 2361         return (error);
 2362 #else  /* SCTP */
 2363         return (EOPNOTSUPP);
 2364 #endif /* SCTP */
 2365 }
 2366 
 2367 int
 2368 sctp_generic_sendmsg (td, uap)
 2369         struct thread *td;
 2370         struct sctp_generic_sendmsg_args /* {
 2371                 int sd, 
 2372                 caddr_t msg, 
 2373                 int mlen, 
 2374                 caddr_t to, 
 2375                 __socklen_t tolen, 
 2376                 struct sctp_sndrcvinfo *sinfo, 
 2377                 int flags
 2378         } */ *uap;
 2379 {
 2380 #if (defined(INET) || defined(INET6)) && defined(SCTP)
 2381         struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL;
 2382         struct socket *so;
 2383         struct file *fp = NULL;
 2384         int use_rcvinfo = 1;
 2385         int error = 0, len;
 2386         struct sockaddr *to = NULL;
 2387 #ifdef KTRACE
 2388         struct uio *ktruio = NULL;
 2389 #endif
 2390         struct uio auio;
 2391         struct iovec iov[1];
 2392 
 2393         if (uap->sinfo) {
 2394                 error = copyin(uap->sinfo, &sinfo, sizeof (sinfo));
 2395                 if (error)
 2396                         return (error);
 2397                 u_sinfo = &sinfo;
 2398         }
 2399         if (uap->tolen) {
 2400                 error = getsockaddr(&to, uap->to, uap->tolen);
 2401                 if (error) {
 2402                         to = NULL;
 2403                         goto sctp_bad2;
 2404                 }
 2405         }
 2406 
 2407         AUDIT_ARG_FD(uap->sd);
 2408         error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL);
 2409         if (error)
 2410                 goto sctp_bad;
 2411 #ifdef KTRACE
 2412         if (KTRPOINT(td, KTR_STRUCT))
 2413                 ktrsockaddr(to);
 2414 #endif
 2415 
 2416         iov[0].iov_base = uap->msg;
 2417         iov[0].iov_len = uap->mlen;
 2418 
 2419         so = (struct socket *)fp->f_data;
 2420 #ifdef MAC
 2421         error = mac_socket_check_send(td->td_ucred, so);
 2422         if (error)
 2423                 goto sctp_bad;
 2424 #endif /* MAC */
 2425 
 2426         auio.uio_iov =  iov;
 2427         auio.uio_iovcnt = 1;
 2428         auio.uio_segflg = UIO_USERSPACE;
 2429         auio.uio_rw = UIO_WRITE;
 2430         auio.uio_td = td;
 2431         auio.uio_offset = 0;                    /* XXX */
 2432         auio.uio_resid = 0;
 2433         len = auio.uio_resid = uap->mlen;
 2434         CURVNET_SET(so->so_vnet);
 2435         error = sctp_lower_sosend(so, to, &auio,
 2436                     (struct mbuf *)NULL, (struct mbuf *)NULL,
 2437                     uap->flags, use_rcvinfo, u_sinfo, td);
 2438         CURVNET_RESTORE();
 2439         if (error) {
 2440                 if (auio.uio_resid != len && (error == ERESTART ||
 2441                     error == EINTR || error == EWOULDBLOCK))
 2442                         error = 0;
 2443                 /* Generation of SIGPIPE can be controlled per socket. */
 2444                 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
 2445                     !(uap->flags & MSG_NOSIGNAL)) {
 2446                         PROC_LOCK(td->td_proc);
 2447                         psignal(td->td_proc, SIGPIPE);
 2448                         PROC_UNLOCK(td->td_proc);
 2449                 }
 2450         }
 2451         if (error == 0)
 2452                 td->td_retval[0] = len - auio.uio_resid;
 2453 #ifdef KTRACE
 2454         if (ktruio != NULL) {
 2455                 ktruio->uio_resid = td->td_retval[0];
 2456                 ktrgenio(uap->sd, UIO_WRITE, ktruio, error);
 2457         }
 2458 #endif /* KTRACE */
 2459 sctp_bad:
 2460         if (fp)
 2461                 fdrop(fp, td);
 2462 sctp_bad2:
 2463         if (to)
 2464                 free(to, M_SONAME);
 2465         return (error);
 2466 #else  /* SCTP */
 2467         return (EOPNOTSUPP);
 2468 #endif /* SCTP */
 2469 }
 2470 
 2471 int
 2472 sctp_generic_sendmsg_iov(td, uap)
 2473         struct thread *td;
 2474         struct sctp_generic_sendmsg_iov_args /* {
 2475                 int sd, 
 2476                 struct iovec *iov, 
 2477                 int iovlen, 
 2478                 caddr_t to, 
 2479                 __socklen_t tolen, 
 2480                 struct sctp_sndrcvinfo *sinfo, 
 2481                 int flags
 2482         } */ *uap;
 2483 {
 2484 #if (defined(INET) || defined(INET6)) && defined(SCTP)
 2485         struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL;
 2486         struct socket *so;
 2487         struct file *fp = NULL;
 2488         int use_rcvinfo = 1;
 2489         int error=0, len, i;
 2490         struct sockaddr *to = NULL;
 2491 #ifdef KTRACE
 2492         struct uio *ktruio = NULL;
 2493 #endif
 2494         struct uio auio;
 2495         struct iovec *iov, *tiov;
 2496 
 2497         if (uap->sinfo) {
 2498                 error = copyin(uap->sinfo, &sinfo, sizeof (sinfo));
 2499                 if (error)
 2500                         return (error);
 2501                 u_sinfo = &sinfo;
 2502         }
 2503         if (uap->tolen) {
 2504                 error = getsockaddr(&to, uap->to, uap->tolen);
 2505                 if (error) {
 2506                         to = NULL;
 2507                         goto sctp_bad2;
 2508                 }
 2509         }
 2510 
 2511         AUDIT_ARG_FD(uap->sd);
 2512         error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL);
 2513         if (error)
 2514                 goto sctp_bad1;
 2515 
 2516         error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE);
 2517         if (error)
 2518                 goto sctp_bad1;
 2519 #ifdef KTRACE
 2520         if (KTRPOINT(td, KTR_STRUCT))
 2521                 ktrsockaddr(to);
 2522 #endif
 2523 
 2524         so = (struct socket *)fp->f_data;
 2525 #ifdef MAC
 2526         error = mac_socket_check_send(td->td_ucred, so);
 2527         if (error)
 2528                 goto sctp_bad;
 2529 #endif /* MAC */
 2530 
 2531         auio.uio_iov =  iov;
 2532         auio.uio_iovcnt = uap->iovlen;
 2533         auio.uio_segflg = UIO_USERSPACE;
 2534         auio.uio_rw = UIO_WRITE;
 2535         auio.uio_td = td;
 2536         auio.uio_offset = 0;                    /* XXX */
 2537         auio.uio_resid = 0;
 2538         tiov = iov;
 2539         for (i = 0; i <uap->iovlen; i++, tiov++) {
 2540                 if ((auio.uio_resid += tiov->iov_len) < 0) {
 2541                         error = EINVAL;
 2542                         goto sctp_bad;
 2543                 }
 2544         }
 2545         len = auio.uio_resid;
 2546         CURVNET_SET(so->so_vnet);
 2547         error = sctp_lower_sosend(so, to, &auio,
 2548                     (struct mbuf *)NULL, (struct mbuf *)NULL,
 2549                     uap->flags, use_rcvinfo, u_sinfo, td);
 2550         CURVNET_RESTORE();
 2551         if (error) {
 2552                 if (auio.uio_resid != len && (error == ERESTART ||
 2553                     error == EINTR || error == EWOULDBLOCK))
 2554                         error = 0;
 2555                 /* Generation of SIGPIPE can be controlled per socket */
 2556                 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
 2557                     !(uap->flags & MSG_NOSIGNAL)) {
 2558                         PROC_LOCK(td->td_proc);
 2559                         psignal(td->td_proc, SIGPIPE);
 2560                         PROC_UNLOCK(td->td_proc);
 2561                 }
 2562         }
 2563         if (error == 0)
 2564                 td->td_retval[0] = len - auio.uio_resid;
 2565 #ifdef KTRACE
 2566         if (ktruio != NULL) {
 2567                 ktruio->uio_resid = td->td_retval[0];
 2568                 ktrgenio(uap->sd, UIO_WRITE, ktruio, error);
 2569         }
 2570 #endif /* KTRACE */
 2571 sctp_bad:
 2572         free(iov, M_IOV);
 2573 sctp_bad1:
 2574         if (fp)
 2575                 fdrop(fp, td);
 2576 sctp_bad2:
 2577         if (to)
 2578                 free(to, M_SONAME);
 2579         return (error);
 2580 #else  /* SCTP */
 2581         return (EOPNOTSUPP);
 2582 #endif /* SCTP */
 2583 }
 2584 
 2585 int
 2586 sctp_generic_recvmsg(td, uap)
 2587         struct thread *td;
 2588         struct sctp_generic_recvmsg_args /* {
 2589                 int sd, 
 2590                 struct iovec *iov, 
 2591                 int iovlen,
 2592                 struct sockaddr *from, 
 2593                 __socklen_t *fromlenaddr,
 2594                 struct sctp_sndrcvinfo *sinfo, 
 2595                 int *msg_flags
 2596         } */ *uap;
 2597 {
 2598 #if (defined(INET) || defined(INET6)) && defined(SCTP)
 2599         u_int8_t sockbufstore[256];
 2600         struct uio auio;
 2601         struct iovec *iov, *tiov;
 2602         struct sctp_sndrcvinfo sinfo;
 2603         struct socket *so;
 2604         struct file *fp = NULL;
 2605         struct sockaddr *fromsa;
 2606         int fromlen;
 2607         int len, i, msg_flags;
 2608         int error = 0;
 2609 #ifdef KTRACE
 2610         struct uio *ktruio = NULL;
 2611 #endif
 2612 
 2613         AUDIT_ARG_FD(uap->sd);
 2614         error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL);
 2615         if (error) {
 2616                 return (error);
 2617         }
 2618         error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE);
 2619         if (error) {
 2620                 goto out1;
 2621         }
 2622 
 2623         so = fp->f_data;
 2624 #ifdef MAC
 2625         error = mac_socket_check_receive(td->td_ucred, so);
 2626         if (error) {
 2627                 goto out;
 2628                 return (error);
 2629         }
 2630 #endif /* MAC */
 2631 
 2632         if (uap->fromlenaddr) {
 2633                 error = copyin(uap->fromlenaddr,
 2634                     &fromlen, sizeof (fromlen));
 2635                 if (error) {
 2636                         goto out;
 2637                 }
 2638         } else {
 2639                 fromlen = 0;
 2640         }
 2641         if(uap->msg_flags) {
 2642                 error = copyin(uap->msg_flags, &msg_flags, sizeof (int));
 2643                 if (error) {
 2644                         goto out;
 2645                 }
 2646         } else {
 2647                 msg_flags = 0;
 2648         }
 2649         auio.uio_iov = iov;
 2650         auio.uio_iovcnt = uap->iovlen;
 2651         auio.uio_segflg = UIO_USERSPACE;
 2652         auio.uio_rw = UIO_READ;
 2653         auio.uio_td = td;
 2654         auio.uio_offset = 0;                    /* XXX */
 2655         auio.uio_resid = 0;
 2656         tiov = iov;
 2657         for (i = 0; i <uap->iovlen; i++, tiov++) {
 2658                 if ((auio.uio_resid += tiov->iov_len) < 0) {
 2659                         error = EINVAL;
 2660                         goto out;
 2661                 }
 2662         }
 2663         len = auio.uio_resid;
 2664         fromsa = (struct sockaddr *)sockbufstore;
 2665 
 2666 #ifdef KTRACE
 2667         if (KTRPOINT(td, KTR_GENIO))
 2668                 ktruio = cloneuio(&auio);
 2669 #endif /* KTRACE */
 2670         CURVNET_SET(so->so_vnet);
 2671         error = sctp_sorecvmsg(so, &auio, (struct mbuf **)NULL,
 2672                     fromsa, fromlen, &msg_flags,
 2673                     (struct sctp_sndrcvinfo *)&sinfo, 1);
 2674         CURVNET_RESTORE();
 2675         if (error) {
 2676                 if (auio.uio_resid != (int)len && (error == ERESTART ||
 2677                     error == EINTR || error == EWOULDBLOCK))
 2678                         error = 0;
 2679         } else {
 2680                 if (uap->sinfo)
 2681                         error = copyout(&sinfo, uap->sinfo, sizeof (sinfo));
 2682         }
 2683 #ifdef KTRACE
 2684         if (ktruio != NULL) {
 2685                 ktruio->uio_resid = (int)len - auio.uio_resid;
 2686                 ktrgenio(uap->sd, UIO_READ, ktruio, error);
 2687         }
 2688 #endif /* KTRACE */
 2689         if (error)
 2690                 goto out;
 2691         td->td_retval[0] = (int)len - auio.uio_resid;
 2692 
 2693         if (fromlen && uap->from) {
 2694                 len = fromlen;
 2695                 if (len <= 0 || fromsa == 0)
 2696                         len = 0;
 2697                 else {
 2698                         len = MIN(len, fromsa->sa_len);
 2699                         error = copyout(fromsa, uap->from, (unsigned)len);
 2700                         if (error)
 2701                                 goto out;
 2702                 }
 2703                 error = copyout(&len, uap->fromlenaddr, sizeof (socklen_t));
 2704                 if (error) {
 2705                         goto out;
 2706                 }
 2707         }
 2708 #ifdef KTRACE
 2709         if (KTRPOINT(td, KTR_STRUCT))
 2710                 ktrsockaddr(fromsa);
 2711 #endif
 2712         if (uap->msg_flags) {
 2713                 error = copyout(&msg_flags, uap->msg_flags, sizeof (int));
 2714                 if (error) {
 2715                         goto out;
 2716                 }
 2717         }
 2718 out:
 2719         free(iov, M_IOV);
 2720 out1:
 2721         if (fp) 
 2722                 fdrop(fp, td);
 2723 
 2724         return (error);
 2725 #else  /* SCTP */
 2726         return (EOPNOTSUPP);
 2727 #endif /* SCTP */
 2728 }

Cache object: 467455ffd395a76be18e801f2a2275b6


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.