The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/uipc_syscalls.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1982, 1986, 1989, 1990, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * sendfile(2) and related extensions:
    6  * Copyright (c) 1998, David Greenman. All rights reserved.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  * 4. Neither the name of the University nor the names of its contributors
   17  *    may be used to endorse or promote products derived from this software
   18  *    without specific prior written permission.
   19  *
   20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   30  * SUCH DAMAGE.
   31  *
   32  *      @(#)uipc_syscalls.c     8.4 (Berkeley) 2/21/94
   33  */
   34 
   35 #include <sys/cdefs.h>
   36 __FBSDID("$FreeBSD: releng/8.1/sys/kern/uipc_syscalls.c 209711 2010-07-05 18:45:59Z tuexen $");
   37 
   38 #include "opt_inet.h"
   39 #include "opt_inet6.h"
   40 #include "opt_sctp.h"
   41 #include "opt_compat.h"
   42 #include "opt_ktrace.h"
   43 
   44 #include <sys/param.h>
   45 #include <sys/systm.h>
   46 #include <sys/kernel.h>
   47 #include <sys/lock.h>
   48 #include <sys/mutex.h>
   49 #include <sys/sysproto.h>
   50 #include <sys/malloc.h>
   51 #include <sys/filedesc.h>
   52 #include <sys/event.h>
   53 #include <sys/proc.h>
   54 #include <sys/fcntl.h>
   55 #include <sys/file.h>
   56 #include <sys/filio.h>
   57 #include <sys/jail.h>
   58 #include <sys/mount.h>
   59 #include <sys/mbuf.h>
   60 #include <sys/protosw.h>
   61 #include <sys/sf_buf.h>
   62 #include <sys/sysent.h>
   63 #include <sys/socket.h>
   64 #include <sys/socketvar.h>
   65 #include <sys/signalvar.h>
   66 #include <sys/syscallsubr.h>
   67 #include <sys/sysctl.h>
   68 #include <sys/uio.h>
   69 #include <sys/vnode.h>
   70 #ifdef KTRACE
   71 #include <sys/ktrace.h>
   72 #endif
   73 #ifdef COMPAT_FREEBSD32
   74 #include <compat/freebsd32/freebsd32_util.h>
   75 #endif
   76 
   77 #include <net/vnet.h>
   78 
   79 #include <security/audit/audit.h>
   80 #include <security/mac/mac_framework.h>
   81 
   82 #include <vm/vm.h>
   83 #include <vm/vm_object.h>
   84 #include <vm/vm_page.h>
   85 #include <vm/vm_pageout.h>
   86 #include <vm/vm_kern.h>
   87 #include <vm/vm_extern.h>
   88 
   89 #if defined(INET) || defined(INET6)
   90 #ifdef SCTP
   91 #include <netinet/sctp.h>
   92 #include <netinet/sctp_peeloff.h>
   93 #endif /* SCTP */
   94 #endif /* INET || INET6 */
   95 
   96 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
   97 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp);
   98 
   99 static int accept1(struct thread *td, struct accept_args *uap, int compat);
  100 static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat);
  101 static int getsockname1(struct thread *td, struct getsockname_args *uap,
  102                         int compat);
  103 static int getpeername1(struct thread *td, struct getpeername_args *uap,
  104                         int compat);
  105 
  106 /*
  107  * NSFBUFS-related variables and associated sysctls
  108  */
  109 int nsfbufs;
  110 int nsfbufspeak;
  111 int nsfbufsused;
  112 
  113 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
  114     "Maximum number of sendfile(2) sf_bufs available");
  115 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
  116     "Number of sendfile(2) sf_bufs at peak usage");
  117 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
  118     "Number of sendfile(2) sf_bufs in use");
  119 
  120 /*
  121  * Convert a user file descriptor to a kernel file entry.  A reference on the
  122  * file entry is held upon returning.  This is lighter weight than
  123  * fgetsock(), which bumps the socket reference drops the file reference
  124  * count instead, as this approach avoids several additional mutex operations
  125  * associated with the additional reference count.  If requested, return the
  126  * open file flags.
  127  */
  128 static int
  129 getsock(struct filedesc *fdp, int fd, struct file **fpp, u_int *fflagp)
  130 {
  131         struct file *fp;
  132         int error;
  133 
  134         fp = NULL;
  135         if (fdp == NULL || (fp = fget_unlocked(fdp, fd)) == NULL) {
  136                 error = EBADF;
  137         } else if (fp->f_type != DTYPE_SOCKET) {
  138                 fdrop(fp, curthread);
  139                 fp = NULL;
  140                 error = ENOTSOCK;
  141         } else {
  142                 if (fflagp != NULL)
  143                         *fflagp = fp->f_flag;
  144                 error = 0;
  145         }
  146         *fpp = fp;
  147         return (error);
  148 }
  149 
  150 /*
  151  * System call interface to the socket abstraction.
  152  */
  153 #if defined(COMPAT_43)
  154 #define COMPAT_OLDSOCK
  155 #endif
  156 
  157 int
  158 socket(td, uap)
  159         struct thread *td;
  160         struct socket_args /* {
  161                 int     domain;
  162                 int     type;
  163                 int     protocol;
  164         } */ *uap;
  165 {
  166         struct filedesc *fdp;
  167         struct socket *so;
  168         struct file *fp;
  169         int fd, error;
  170 
  171         AUDIT_ARG_SOCKET(uap->domain, uap->type, uap->protocol);
  172 #ifdef MAC
  173         error = mac_socket_check_create(td->td_ucred, uap->domain, uap->type,
  174             uap->protocol);
  175         if (error)
  176                 return (error);
  177 #endif
  178         fdp = td->td_proc->p_fd;
  179         error = falloc(td, &fp, &fd);
  180         if (error)
  181                 return (error);
  182         /* An extra reference on `fp' has been held for us by falloc(). */
  183         error = socreate(uap->domain, &so, uap->type, uap->protocol,
  184             td->td_ucred, td);
  185         if (error) {
  186                 fdclose(fdp, fp, fd, td);
  187         } else {
  188                 finit(fp, FREAD | FWRITE, DTYPE_SOCKET, so, &socketops);
  189                 td->td_retval[0] = fd;
  190         }
  191         fdrop(fp, td);
  192         return (error);
  193 }
  194 
  195 /* ARGSUSED */
  196 int
  197 bind(td, uap)
  198         struct thread *td;
  199         struct bind_args /* {
  200                 int     s;
  201                 caddr_t name;
  202                 int     namelen;
  203         } */ *uap;
  204 {
  205         struct sockaddr *sa;
  206         int error;
  207 
  208         if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0)
  209                 return (error);
  210 
  211         error = kern_bind(td, uap->s, sa);
  212         free(sa, M_SONAME);
  213         return (error);
  214 }
  215 
  216 int
  217 kern_bind(td, fd, sa)
  218         struct thread *td;
  219         int fd;
  220         struct sockaddr *sa;
  221 {
  222         struct socket *so;
  223         struct file *fp;
  224         int error;
  225 
  226         AUDIT_ARG_FD(fd);
  227         error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
  228         if (error)
  229                 return (error);
  230         so = fp->f_data;
  231 #ifdef KTRACE
  232         if (KTRPOINT(td, KTR_STRUCT))
  233                 ktrsockaddr(sa);
  234 #endif
  235 #ifdef MAC
  236         error = mac_socket_check_bind(td->td_ucred, so, sa);
  237         if (error == 0)
  238 #endif
  239                 error = sobind(so, sa, td);
  240         fdrop(fp, td);
  241         return (error);
  242 }
  243 
  244 /* ARGSUSED */
  245 int
  246 listen(td, uap)
  247         struct thread *td;
  248         struct listen_args /* {
  249                 int     s;
  250                 int     backlog;
  251         } */ *uap;
  252 {
  253         struct socket *so;
  254         struct file *fp;
  255         int error;
  256 
  257         AUDIT_ARG_FD(uap->s);
  258         error = getsock(td->td_proc->p_fd, uap->s, &fp, NULL);
  259         if (error == 0) {
  260                 so = fp->f_data;
  261 #ifdef MAC
  262                 error = mac_socket_check_listen(td->td_ucred, so);
  263                 if (error == 0) {
  264 #endif
  265                         CURVNET_SET(so->so_vnet);
  266                         error = solisten(so, uap->backlog, td);
  267                         CURVNET_RESTORE();
  268 #ifdef MAC
  269                 }
  270 #endif
  271                 fdrop(fp, td);
  272         }
  273         return(error);
  274 }
  275 
  276 /*
  277  * accept1()
  278  */
  279 static int
  280 accept1(td, uap, compat)
  281         struct thread *td;
  282         struct accept_args /* {
  283                 int     s;
  284                 struct sockaddr * __restrict name;
  285                 socklen_t       * __restrict anamelen;
  286         } */ *uap;
  287         int compat;
  288 {
  289         struct sockaddr *name;
  290         socklen_t namelen;
  291         struct file *fp;
  292         int error;
  293 
  294         if (uap->name == NULL)
  295                 return (kern_accept(td, uap->s, NULL, NULL, NULL));
  296 
  297         error = copyin(uap->anamelen, &namelen, sizeof (namelen));
  298         if (error)
  299                 return (error);
  300 
  301         error = kern_accept(td, uap->s, &name, &namelen, &fp);
  302 
  303         /*
  304          * return a namelen of zero for older code which might
  305          * ignore the return value from accept.
  306          */
  307         if (error) {
  308                 (void) copyout(&namelen,
  309                     uap->anamelen, sizeof(*uap->anamelen));
  310                 return (error);
  311         }
  312 
  313         if (error == 0 && name != NULL) {
  314 #ifdef COMPAT_OLDSOCK
  315                 if (compat)
  316                         ((struct osockaddr *)name)->sa_family =
  317                             name->sa_family;
  318 #endif
  319                 error = copyout(name, uap->name, namelen);
  320         }
  321         if (error == 0)
  322                 error = copyout(&namelen, uap->anamelen,
  323                     sizeof(namelen));
  324         if (error)
  325                 fdclose(td->td_proc->p_fd, fp, td->td_retval[0], td);
  326         fdrop(fp, td);
  327         free(name, M_SONAME);
  328         return (error);
  329 }
  330 
  331 int
  332 kern_accept(struct thread *td, int s, struct sockaddr **name,
  333     socklen_t *namelen, struct file **fp)
  334 {
  335         struct filedesc *fdp;
  336         struct file *headfp, *nfp = NULL;
  337         struct sockaddr *sa = NULL;
  338         int error;
  339         struct socket *head, *so;
  340         int fd;
  341         u_int fflag;
  342         pid_t pgid;
  343         int tmp;
  344 
  345         if (name) {
  346                 *name = NULL;
  347                 if (*namelen < 0)
  348                         return (EINVAL);
  349         }
  350 
  351         AUDIT_ARG_FD(s);
  352         fdp = td->td_proc->p_fd;
  353         error = getsock(fdp, s, &headfp, &fflag);
  354         if (error)
  355                 return (error);
  356         head = headfp->f_data;
  357         if ((head->so_options & SO_ACCEPTCONN) == 0) {
  358                 error = EINVAL;
  359                 goto done;
  360         }
  361 #ifdef MAC
  362         error = mac_socket_check_accept(td->td_ucred, head);
  363         if (error != 0)
  364                 goto done;
  365 #endif
  366         error = falloc(td, &nfp, &fd);
  367         if (error)
  368                 goto done;
  369         ACCEPT_LOCK();
  370         if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) {
  371                 ACCEPT_UNLOCK();
  372                 error = EWOULDBLOCK;
  373                 goto noconnection;
  374         }
  375         while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
  376                 if (head->so_rcv.sb_state & SBS_CANTRCVMORE) {
  377                         head->so_error = ECONNABORTED;
  378                         break;
  379                 }
  380                 error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH,
  381                     "accept", 0);
  382                 if (error) {
  383                         ACCEPT_UNLOCK();
  384                         goto noconnection;
  385                 }
  386         }
  387         if (head->so_error) {
  388                 error = head->so_error;
  389                 head->so_error = 0;
  390                 ACCEPT_UNLOCK();
  391                 goto noconnection;
  392         }
  393         so = TAILQ_FIRST(&head->so_comp);
  394         KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP"));
  395         KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP"));
  396 
  397         /*
  398          * Before changing the flags on the socket, we have to bump the
  399          * reference count.  Otherwise, if the protocol calls sofree(),
  400          * the socket will be released due to a zero refcount.
  401          */
  402         SOCK_LOCK(so);                  /* soref() and so_state update */
  403         soref(so);                      /* file descriptor reference */
  404 
  405         TAILQ_REMOVE(&head->so_comp, so, so_list);
  406         head->so_qlen--;
  407         so->so_state |= (head->so_state & SS_NBIO);
  408         so->so_qstate &= ~SQ_COMP;
  409         so->so_head = NULL;
  410 
  411         SOCK_UNLOCK(so);
  412         ACCEPT_UNLOCK();
  413 
  414         /* An extra reference on `nfp' has been held for us by falloc(). */
  415         td->td_retval[0] = fd;
  416 
  417         /* connection has been removed from the listen queue */
  418         KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0);
  419 
  420         pgid = fgetown(&head->so_sigio);
  421         if (pgid != 0)
  422                 fsetown(pgid, &so->so_sigio);
  423 
  424         finit(nfp, fflag, DTYPE_SOCKET, so, &socketops);
  425         /* Sync socket nonblocking/async state with file flags */
  426         tmp = fflag & FNONBLOCK;
  427         (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td);
  428         tmp = fflag & FASYNC;
  429         (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td);
  430         sa = 0;
  431         CURVNET_SET(so->so_vnet);
  432         error = soaccept(so, &sa);
  433         CURVNET_RESTORE();
  434         if (error) {
  435                 /*
  436                  * return a namelen of zero for older code which might
  437                  * ignore the return value from accept.
  438                  */
  439                 if (name)
  440                         *namelen = 0;
  441                 goto noconnection;
  442         }
  443         if (sa == NULL) {
  444                 if (name)
  445                         *namelen = 0;
  446                 goto done;
  447         }
  448         if (name) {
  449                 /* check sa_len before it is destroyed */
  450                 if (*namelen > sa->sa_len)
  451                         *namelen = sa->sa_len;
  452 #ifdef KTRACE
  453                 if (KTRPOINT(td, KTR_STRUCT))
  454                         ktrsockaddr(sa);
  455 #endif
  456                 *name = sa;
  457                 sa = NULL;
  458         }
  459 noconnection:
  460         if (sa)
  461                 free(sa, M_SONAME);
  462 
  463         /*
  464          * close the new descriptor, assuming someone hasn't ripped it
  465          * out from under us.
  466          */
  467         if (error)
  468                 fdclose(fdp, nfp, fd, td);
  469 
  470         /*
  471          * Release explicitly held references before returning.  We return
  472          * a reference on nfp to the caller on success if they request it.
  473          */
  474 done:
  475         if (fp != NULL) {
  476                 if (error == 0) {
  477                         *fp = nfp;
  478                         nfp = NULL;
  479                 } else
  480                         *fp = NULL;
  481         }
  482         if (nfp != NULL)
  483                 fdrop(nfp, td);
  484         fdrop(headfp, td);
  485         return (error);
  486 }
  487 
  488 int
  489 accept(td, uap)
  490         struct thread *td;
  491         struct accept_args *uap;
  492 {
  493 
  494         return (accept1(td, uap, 0));
  495 }
  496 
  497 #ifdef COMPAT_OLDSOCK
  498 int
  499 oaccept(td, uap)
  500         struct thread *td;
  501         struct accept_args *uap;
  502 {
  503 
  504         return (accept1(td, uap, 1));
  505 }
  506 #endif /* COMPAT_OLDSOCK */
  507 
  508 /* ARGSUSED */
  509 int
  510 connect(td, uap)
  511         struct thread *td;
  512         struct connect_args /* {
  513                 int     s;
  514                 caddr_t name;
  515                 int     namelen;
  516         } */ *uap;
  517 {
  518         struct sockaddr *sa;
  519         int error;
  520 
  521         error = getsockaddr(&sa, uap->name, uap->namelen);
  522         if (error)
  523                 return (error);
  524 
  525         error = kern_connect(td, uap->s, sa);
  526         free(sa, M_SONAME);
  527         return (error);
  528 }
  529 
  530 
  531 int
  532 kern_connect(td, fd, sa)
  533         struct thread *td;
  534         int fd;
  535         struct sockaddr *sa;
  536 {
  537         struct socket *so;
  538         struct file *fp;
  539         int error;
  540         int interrupted = 0;
  541 
  542         AUDIT_ARG_FD(fd);
  543         error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
  544         if (error)
  545                 return (error);
  546         so = fp->f_data;
  547         if (so->so_state & SS_ISCONNECTING) {
  548                 error = EALREADY;
  549                 goto done1;
  550         }
  551 #ifdef KTRACE
  552         if (KTRPOINT(td, KTR_STRUCT))
  553                 ktrsockaddr(sa);
  554 #endif
  555 #ifdef MAC
  556         error = mac_socket_check_connect(td->td_ucred, so, sa);
  557         if (error)
  558                 goto bad;
  559 #endif
  560         error = soconnect(so, sa, td);
  561         if (error)
  562                 goto bad;
  563         if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
  564                 error = EINPROGRESS;
  565                 goto done1;
  566         }
  567         SOCK_LOCK(so);
  568         while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
  569                 error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH,
  570                     "connec", 0);
  571                 if (error) {
  572                         if (error == EINTR || error == ERESTART)
  573                                 interrupted = 1;
  574                         break;
  575                 }
  576         }
  577         if (error == 0) {
  578                 error = so->so_error;
  579                 so->so_error = 0;
  580         }
  581         SOCK_UNLOCK(so);
  582 bad:
  583         if (!interrupted)
  584                 so->so_state &= ~SS_ISCONNECTING;
  585         if (error == ERESTART)
  586                 error = EINTR;
  587 done1:
  588         fdrop(fp, td);
  589         return (error);
  590 }
  591 
  592 int
  593 kern_socketpair(struct thread *td, int domain, int type, int protocol,
  594     int *rsv)
  595 {
  596         struct filedesc *fdp = td->td_proc->p_fd;
  597         struct file *fp1, *fp2;
  598         struct socket *so1, *so2;
  599         int fd, error;
  600 
  601         AUDIT_ARG_SOCKET(domain, type, protocol);
  602 #ifdef MAC
  603         /* We might want to have a separate check for socket pairs. */
  604         error = mac_socket_check_create(td->td_ucred, domain, type,
  605             protocol);
  606         if (error)
  607                 return (error);
  608 #endif
  609         error = socreate(domain, &so1, type, protocol, td->td_ucred, td);
  610         if (error)
  611                 return (error);
  612         error = socreate(domain, &so2, type, protocol, td->td_ucred, td);
  613         if (error)
  614                 goto free1;
  615         /* On success extra reference to `fp1' and 'fp2' is set by falloc. */
  616         error = falloc(td, &fp1, &fd);
  617         if (error)
  618                 goto free2;
  619         rsv[0] = fd;
  620         fp1->f_data = so1;      /* so1 already has ref count */
  621         error = falloc(td, &fp2, &fd);
  622         if (error)
  623                 goto free3;
  624         fp2->f_data = so2;      /* so2 already has ref count */
  625         rsv[1] = fd;
  626         error = soconnect2(so1, so2);
  627         if (error)
  628                 goto free4;
  629         if (type == SOCK_DGRAM) {
  630                 /*
  631                  * Datagram socket connection is asymmetric.
  632                  */
  633                  error = soconnect2(so2, so1);
  634                  if (error)
  635                         goto free4;
  636         }
  637         finit(fp1, FREAD | FWRITE, DTYPE_SOCKET, fp1->f_data, &socketops);
  638         finit(fp2, FREAD | FWRITE, DTYPE_SOCKET, fp2->f_data, &socketops);
  639         fdrop(fp1, td);
  640         fdrop(fp2, td);
  641         return (0);
  642 free4:
  643         fdclose(fdp, fp2, rsv[1], td);
  644         fdrop(fp2, td);
  645 free3:
  646         fdclose(fdp, fp1, rsv[0], td);
  647         fdrop(fp1, td);
  648 free2:
  649         if (so2 != NULL)
  650                 (void)soclose(so2);
  651 free1:
  652         if (so1 != NULL)
  653                 (void)soclose(so1);
  654         return (error);
  655 }
  656 
  657 int
  658 socketpair(struct thread *td, struct socketpair_args *uap)
  659 {
  660         int error, sv[2];
  661 
  662         error = kern_socketpair(td, uap->domain, uap->type,
  663             uap->protocol, sv);
  664         if (error)
  665                 return (error);
  666         error = copyout(sv, uap->rsv, 2 * sizeof(int));
  667         if (error) {
  668                 (void)kern_close(td, sv[0]);
  669                 (void)kern_close(td, sv[1]);
  670         }
  671         return (error);
  672 }
  673 
  674 static int
  675 sendit(td, s, mp, flags)
  676         struct thread *td;
  677         int s;
  678         struct msghdr *mp;
  679         int flags;
  680 {
  681         struct mbuf *control;
  682         struct sockaddr *to;
  683         int error;
  684 
  685         if (mp->msg_name != NULL) {
  686                 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
  687                 if (error) {
  688                         to = NULL;
  689                         goto bad;
  690                 }
  691                 mp->msg_name = to;
  692         } else {
  693                 to = NULL;
  694         }
  695 
  696         if (mp->msg_control) {
  697                 if (mp->msg_controllen < sizeof(struct cmsghdr)
  698 #ifdef COMPAT_OLDSOCK
  699                     && mp->msg_flags != MSG_COMPAT
  700 #endif
  701                 ) {
  702                         error = EINVAL;
  703                         goto bad;
  704                 }
  705                 error = sockargs(&control, mp->msg_control,
  706                     mp->msg_controllen, MT_CONTROL);
  707                 if (error)
  708                         goto bad;
  709 #ifdef COMPAT_OLDSOCK
  710                 if (mp->msg_flags == MSG_COMPAT) {
  711                         struct cmsghdr *cm;
  712 
  713                         M_PREPEND(control, sizeof(*cm), M_WAIT);
  714                         cm = mtod(control, struct cmsghdr *);
  715                         cm->cmsg_len = control->m_len;
  716                         cm->cmsg_level = SOL_SOCKET;
  717                         cm->cmsg_type = SCM_RIGHTS;
  718                 }
  719 #endif
  720         } else {
  721                 control = NULL;
  722         }
  723 
  724         error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE);
  725 
  726 bad:
  727         if (to)
  728                 free(to, M_SONAME);
  729         return (error);
  730 }
  731 
  732 int
  733 kern_sendit(td, s, mp, flags, control, segflg)
  734         struct thread *td;
  735         int s;
  736         struct msghdr *mp;
  737         int flags;
  738         struct mbuf *control;
  739         enum uio_seg segflg;
  740 {
  741         struct file *fp;
  742         struct uio auio;
  743         struct iovec *iov;
  744         struct socket *so;
  745         int i;
  746         int len, error;
  747 #ifdef KTRACE
  748         struct uio *ktruio = NULL;
  749 #endif
  750 
  751         AUDIT_ARG_FD(s);
  752         error = getsock(td->td_proc->p_fd, s, &fp, NULL);
  753         if (error)
  754                 return (error);
  755         so = (struct socket *)fp->f_data;
  756 
  757 #ifdef MAC
  758         if (mp->msg_name != NULL) {
  759                 error = mac_socket_check_connect(td->td_ucred, so,
  760                     mp->msg_name);
  761                 if (error)
  762                         goto bad;
  763         }
  764         error = mac_socket_check_send(td->td_ucred, so);
  765         if (error)
  766                 goto bad;
  767 #endif
  768 
  769         auio.uio_iov = mp->msg_iov;
  770         auio.uio_iovcnt = mp->msg_iovlen;
  771         auio.uio_segflg = segflg;
  772         auio.uio_rw = UIO_WRITE;
  773         auio.uio_td = td;
  774         auio.uio_offset = 0;                    /* XXX */
  775         auio.uio_resid = 0;
  776         iov = mp->msg_iov;
  777         for (i = 0; i < mp->msg_iovlen; i++, iov++) {
  778                 if ((auio.uio_resid += iov->iov_len) < 0) {
  779                         error = EINVAL;
  780                         goto bad;
  781                 }
  782         }
  783 #ifdef KTRACE
  784         if (KTRPOINT(td, KTR_GENIO))
  785                 ktruio = cloneuio(&auio);
  786 #endif
  787         len = auio.uio_resid;
  788         error = sosend(so, mp->msg_name, &auio, 0, control, flags, td);
  789         if (error) {
  790                 if (auio.uio_resid != len && (error == ERESTART ||
  791                     error == EINTR || error == EWOULDBLOCK))
  792                         error = 0;
  793                 /* Generation of SIGPIPE can be controlled per socket */
  794                 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
  795                     !(flags & MSG_NOSIGNAL)) {
  796                         PROC_LOCK(td->td_proc);
  797                         psignal(td->td_proc, SIGPIPE);
  798                         PROC_UNLOCK(td->td_proc);
  799                 }
  800         }
  801         if (error == 0)
  802                 td->td_retval[0] = len - auio.uio_resid;
  803 #ifdef KTRACE
  804         if (ktruio != NULL) {
  805                 ktruio->uio_resid = td->td_retval[0];
  806                 ktrgenio(s, UIO_WRITE, ktruio, error);
  807         }
  808 #endif
  809 bad:
  810         fdrop(fp, td);
  811         return (error);
  812 }
  813 
  814 int
  815 sendto(td, uap)
  816         struct thread *td;
  817         struct sendto_args /* {
  818                 int     s;
  819                 caddr_t buf;
  820                 size_t  len;
  821                 int     flags;
  822                 caddr_t to;
  823                 int     tolen;
  824         } */ *uap;
  825 {
  826         struct msghdr msg;
  827         struct iovec aiov;
  828         int error;
  829 
  830         msg.msg_name = uap->to;
  831         msg.msg_namelen = uap->tolen;
  832         msg.msg_iov = &aiov;
  833         msg.msg_iovlen = 1;
  834         msg.msg_control = 0;
  835 #ifdef COMPAT_OLDSOCK
  836         msg.msg_flags = 0;
  837 #endif
  838         aiov.iov_base = uap->buf;
  839         aiov.iov_len = uap->len;
  840         error = sendit(td, uap->s, &msg, uap->flags);
  841         return (error);
  842 }
  843 
  844 #ifdef COMPAT_OLDSOCK
  845 int
  846 osend(td, uap)
  847         struct thread *td;
  848         struct osend_args /* {
  849                 int     s;
  850                 caddr_t buf;
  851                 int     len;
  852                 int     flags;
  853         } */ *uap;
  854 {
  855         struct msghdr msg;
  856         struct iovec aiov;
  857         int error;
  858 
  859         msg.msg_name = 0;
  860         msg.msg_namelen = 0;
  861         msg.msg_iov = &aiov;
  862         msg.msg_iovlen = 1;
  863         aiov.iov_base = uap->buf;
  864         aiov.iov_len = uap->len;
  865         msg.msg_control = 0;
  866         msg.msg_flags = 0;
  867         error = sendit(td, uap->s, &msg, uap->flags);
  868         return (error);
  869 }
  870 
  871 int
  872 osendmsg(td, uap)
  873         struct thread *td;
  874         struct osendmsg_args /* {
  875                 int     s;
  876                 caddr_t msg;
  877                 int     flags;
  878         } */ *uap;
  879 {
  880         struct msghdr msg;
  881         struct iovec *iov;
  882         int error;
  883 
  884         error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
  885         if (error)
  886                 return (error);
  887         error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
  888         if (error)
  889                 return (error);
  890         msg.msg_iov = iov;
  891         msg.msg_flags = MSG_COMPAT;
  892         error = sendit(td, uap->s, &msg, uap->flags);
  893         free(iov, M_IOV);
  894         return (error);
  895 }
  896 #endif
  897 
  898 int
  899 sendmsg(td, uap)
  900         struct thread *td;
  901         struct sendmsg_args /* {
  902                 int     s;
  903                 caddr_t msg;
  904                 int     flags;
  905         } */ *uap;
  906 {
  907         struct msghdr msg;
  908         struct iovec *iov;
  909         int error;
  910 
  911         error = copyin(uap->msg, &msg, sizeof (msg));
  912         if (error)
  913                 return (error);
  914         error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
  915         if (error)
  916                 return (error);
  917         msg.msg_iov = iov;
  918 #ifdef COMPAT_OLDSOCK
  919         msg.msg_flags = 0;
  920 #endif
  921         error = sendit(td, uap->s, &msg, uap->flags);
  922         free(iov, M_IOV);
  923         return (error);
  924 }
  925 
  926 int
  927 kern_recvit(td, s, mp, fromseg, controlp)
  928         struct thread *td;
  929         int s;
  930         struct msghdr *mp;
  931         enum uio_seg fromseg;
  932         struct mbuf **controlp;
  933 {
  934         struct uio auio;
  935         struct iovec *iov;
  936         int i;
  937         socklen_t len;
  938         int error;
  939         struct mbuf *m, *control = 0;
  940         caddr_t ctlbuf;
  941         struct file *fp;
  942         struct socket *so;
  943         struct sockaddr *fromsa = 0;
  944 #ifdef KTRACE
  945         struct uio *ktruio = NULL;
  946 #endif
  947 
  948         if(controlp != NULL)
  949                 *controlp = 0;
  950 
  951         AUDIT_ARG_FD(s);
  952         error = getsock(td->td_proc->p_fd, s, &fp, NULL);
  953         if (error)
  954                 return (error);
  955         so = fp->f_data;
  956 
  957 #ifdef MAC
  958         error = mac_socket_check_receive(td->td_ucred, so);
  959         if (error) {
  960                 fdrop(fp, td);
  961                 return (error);
  962         }
  963 #endif
  964 
  965         auio.uio_iov = mp->msg_iov;
  966         auio.uio_iovcnt = mp->msg_iovlen;
  967         auio.uio_segflg = UIO_USERSPACE;
  968         auio.uio_rw = UIO_READ;
  969         auio.uio_td = td;
  970         auio.uio_offset = 0;                    /* XXX */
  971         auio.uio_resid = 0;
  972         iov = mp->msg_iov;
  973         for (i = 0; i < mp->msg_iovlen; i++, iov++) {
  974                 if ((auio.uio_resid += iov->iov_len) < 0) {
  975                         fdrop(fp, td);
  976                         return (EINVAL);
  977                 }
  978         }
  979 #ifdef KTRACE
  980         if (KTRPOINT(td, KTR_GENIO))
  981                 ktruio = cloneuio(&auio);
  982 #endif
  983         len = auio.uio_resid;
  984         CURVNET_SET(so->so_vnet);
  985         error = soreceive(so, &fromsa, &auio, (struct mbuf **)0,
  986             (mp->msg_control || controlp) ? &control : (struct mbuf **)0,
  987             &mp->msg_flags);
  988         CURVNET_RESTORE();
  989         if (error) {
  990                 if (auio.uio_resid != (int)len && (error == ERESTART ||
  991                     error == EINTR || error == EWOULDBLOCK))
  992                         error = 0;
  993         }
  994 #ifdef KTRACE
  995         if (ktruio != NULL) {
  996                 ktruio->uio_resid = (int)len - auio.uio_resid;
  997                 ktrgenio(s, UIO_READ, ktruio, error);
  998         }
  999 #endif
 1000         if (error)
 1001                 goto out;
 1002         td->td_retval[0] = (int)len - auio.uio_resid;
 1003         if (mp->msg_name) {
 1004                 len = mp->msg_namelen;
 1005                 if (len <= 0 || fromsa == 0)
 1006                         len = 0;
 1007                 else {
 1008                         /* save sa_len before it is destroyed by MSG_COMPAT */
 1009                         len = MIN(len, fromsa->sa_len);
 1010 #ifdef COMPAT_OLDSOCK
 1011                         if (mp->msg_flags & MSG_COMPAT)
 1012                                 ((struct osockaddr *)fromsa)->sa_family =
 1013                                     fromsa->sa_family;
 1014 #endif
 1015                         if (fromseg == UIO_USERSPACE) {
 1016                                 error = copyout(fromsa, mp->msg_name,
 1017                                     (unsigned)len);
 1018                                 if (error)
 1019                                         goto out;
 1020                         } else
 1021                                 bcopy(fromsa, mp->msg_name, len);
 1022                 }
 1023                 mp->msg_namelen = len;
 1024         }
 1025         if (mp->msg_control && controlp == NULL) {
 1026 #ifdef COMPAT_OLDSOCK
 1027                 /*
 1028                  * We assume that old recvmsg calls won't receive access
 1029                  * rights and other control info, esp. as control info
 1030                  * is always optional and those options didn't exist in 4.3.
 1031                  * If we receive rights, trim the cmsghdr; anything else
 1032                  * is tossed.
 1033                  */
 1034                 if (control && mp->msg_flags & MSG_COMPAT) {
 1035                         if (mtod(control, struct cmsghdr *)->cmsg_level !=
 1036                             SOL_SOCKET ||
 1037                             mtod(control, struct cmsghdr *)->cmsg_type !=
 1038                             SCM_RIGHTS) {
 1039                                 mp->msg_controllen = 0;
 1040                                 goto out;
 1041                         }
 1042                         control->m_len -= sizeof (struct cmsghdr);
 1043                         control->m_data += sizeof (struct cmsghdr);
 1044                 }
 1045 #endif
 1046                 len = mp->msg_controllen;
 1047                 m = control;
 1048                 mp->msg_controllen = 0;
 1049                 ctlbuf = mp->msg_control;
 1050 
 1051                 while (m && len > 0) {
 1052                         unsigned int tocopy;
 1053 
 1054                         if (len >= m->m_len)
 1055                                 tocopy = m->m_len;
 1056                         else {
 1057                                 mp->msg_flags |= MSG_CTRUNC;
 1058                                 tocopy = len;
 1059                         }
 1060 
 1061                         if ((error = copyout(mtod(m, caddr_t),
 1062                                         ctlbuf, tocopy)) != 0)
 1063                                 goto out;
 1064 
 1065                         ctlbuf += tocopy;
 1066                         len -= tocopy;
 1067                         m = m->m_next;
 1068                 }
 1069                 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
 1070         }
 1071 out:
 1072         fdrop(fp, td);
 1073 #ifdef KTRACE
 1074         if (fromsa && KTRPOINT(td, KTR_STRUCT))
 1075                 ktrsockaddr(fromsa);
 1076 #endif
 1077         if (fromsa)
 1078                 free(fromsa, M_SONAME);
 1079 
 1080         if (error == 0 && controlp != NULL)  
 1081                 *controlp = control;
 1082         else  if (control)
 1083                 m_freem(control);
 1084 
 1085         return (error);
 1086 }
 1087 
 1088 static int
 1089 recvit(td, s, mp, namelenp)
 1090         struct thread *td;
 1091         int s;
 1092         struct msghdr *mp;
 1093         void *namelenp;
 1094 {
 1095         int error;
 1096 
 1097         error = kern_recvit(td, s, mp, UIO_USERSPACE, NULL);
 1098         if (error)
 1099                 return (error);
 1100         if (namelenp) {
 1101                 error = copyout(&mp->msg_namelen, namelenp, sizeof (socklen_t));
 1102 #ifdef COMPAT_OLDSOCK
 1103                 if (mp->msg_flags & MSG_COMPAT)
 1104                         error = 0;      /* old recvfrom didn't check */
 1105 #endif
 1106         }
 1107         return (error);
 1108 }
 1109 
 1110 int
 1111 recvfrom(td, uap)
 1112         struct thread *td;
 1113         struct recvfrom_args /* {
 1114                 int     s;
 1115                 caddr_t buf;
 1116                 size_t  len;
 1117                 int     flags;
 1118                 struct sockaddr * __restrict    from;
 1119                 socklen_t * __restrict fromlenaddr;
 1120         } */ *uap;
 1121 {
 1122         struct msghdr msg;
 1123         struct iovec aiov;
 1124         int error;
 1125 
 1126         if (uap->fromlenaddr) {
 1127                 error = copyin(uap->fromlenaddr,
 1128                     &msg.msg_namelen, sizeof (msg.msg_namelen));
 1129                 if (error)
 1130                         goto done2;
 1131         } else {
 1132                 msg.msg_namelen = 0;
 1133         }
 1134         msg.msg_name = uap->from;
 1135         msg.msg_iov = &aiov;
 1136         msg.msg_iovlen = 1;
 1137         aiov.iov_base = uap->buf;
 1138         aiov.iov_len = uap->len;
 1139         msg.msg_control = 0;
 1140         msg.msg_flags = uap->flags;
 1141         error = recvit(td, uap->s, &msg, uap->fromlenaddr);
 1142 done2:
 1143         return(error);
 1144 }
 1145 
 1146 #ifdef COMPAT_OLDSOCK
 1147 int
 1148 orecvfrom(td, uap)
 1149         struct thread *td;
 1150         struct recvfrom_args *uap;
 1151 {
 1152 
 1153         uap->flags |= MSG_COMPAT;
 1154         return (recvfrom(td, uap));
 1155 }
 1156 #endif
 1157 
 1158 #ifdef COMPAT_OLDSOCK
 1159 int
 1160 orecv(td, uap)
 1161         struct thread *td;
 1162         struct orecv_args /* {
 1163                 int     s;
 1164                 caddr_t buf;
 1165                 int     len;
 1166                 int     flags;
 1167         } */ *uap;
 1168 {
 1169         struct msghdr msg;
 1170         struct iovec aiov;
 1171         int error;
 1172 
 1173         msg.msg_name = 0;
 1174         msg.msg_namelen = 0;
 1175         msg.msg_iov = &aiov;
 1176         msg.msg_iovlen = 1;
 1177         aiov.iov_base = uap->buf;
 1178         aiov.iov_len = uap->len;
 1179         msg.msg_control = 0;
 1180         msg.msg_flags = uap->flags;
 1181         error = recvit(td, uap->s, &msg, NULL);
 1182         return (error);
 1183 }
 1184 
 1185 /*
 1186  * Old recvmsg.  This code takes advantage of the fact that the old msghdr
 1187  * overlays the new one, missing only the flags, and with the (old) access
 1188  * rights where the control fields are now.
 1189  */
 1190 int
 1191 orecvmsg(td, uap)
 1192         struct thread *td;
 1193         struct orecvmsg_args /* {
 1194                 int     s;
 1195                 struct  omsghdr *msg;
 1196                 int     flags;
 1197         } */ *uap;
 1198 {
 1199         struct msghdr msg;
 1200         struct iovec *iov;
 1201         int error;
 1202 
 1203         error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
 1204         if (error)
 1205                 return (error);
 1206         error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
 1207         if (error)
 1208                 return (error);
 1209         msg.msg_flags = uap->flags | MSG_COMPAT;
 1210         msg.msg_iov = iov;
 1211         error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen);
 1212         if (msg.msg_controllen && error == 0)
 1213                 error = copyout(&msg.msg_controllen,
 1214                     &uap->msg->msg_accrightslen, sizeof (int));
 1215         free(iov, M_IOV);
 1216         return (error);
 1217 }
 1218 #endif
 1219 
 1220 int
 1221 recvmsg(td, uap)
 1222         struct thread *td;
 1223         struct recvmsg_args /* {
 1224                 int     s;
 1225                 struct  msghdr *msg;
 1226                 int     flags;
 1227         } */ *uap;
 1228 {
 1229         struct msghdr msg;
 1230         struct iovec *uiov, *iov;
 1231         int error;
 1232 
 1233         error = copyin(uap->msg, &msg, sizeof (msg));
 1234         if (error)
 1235                 return (error);
 1236         error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
 1237         if (error)
 1238                 return (error);
 1239         msg.msg_flags = uap->flags;
 1240 #ifdef COMPAT_OLDSOCK
 1241         msg.msg_flags &= ~MSG_COMPAT;
 1242 #endif
 1243         uiov = msg.msg_iov;
 1244         msg.msg_iov = iov;
 1245         error = recvit(td, uap->s, &msg, NULL);
 1246         if (error == 0) {
 1247                 msg.msg_iov = uiov;
 1248                 error = copyout(&msg, uap->msg, sizeof(msg));
 1249         }
 1250         free(iov, M_IOV);
 1251         return (error);
 1252 }
 1253 
 1254 /* ARGSUSED */
 1255 int
 1256 shutdown(td, uap)
 1257         struct thread *td;
 1258         struct shutdown_args /* {
 1259                 int     s;
 1260                 int     how;
 1261         } */ *uap;
 1262 {
 1263         struct socket *so;
 1264         struct file *fp;
 1265         int error;
 1266 
 1267         AUDIT_ARG_FD(uap->s);
 1268         error = getsock(td->td_proc->p_fd, uap->s, &fp, NULL);
 1269         if (error == 0) {
 1270                 so = fp->f_data;
 1271                 error = soshutdown(so, uap->how);
 1272                 fdrop(fp, td);
 1273         }
 1274         return (error);
 1275 }
 1276 
 1277 /* ARGSUSED */
 1278 int
 1279 setsockopt(td, uap)
 1280         struct thread *td;
 1281         struct setsockopt_args /* {
 1282                 int     s;
 1283                 int     level;
 1284                 int     name;
 1285                 caddr_t val;
 1286                 int     valsize;
 1287         } */ *uap;
 1288 {
 1289 
 1290         return (kern_setsockopt(td, uap->s, uap->level, uap->name,
 1291             uap->val, UIO_USERSPACE, uap->valsize));
 1292 }
 1293 
 1294 int
 1295 kern_setsockopt(td, s, level, name, val, valseg, valsize)
 1296         struct thread *td;
 1297         int s;
 1298         int level;
 1299         int name;
 1300         void *val;
 1301         enum uio_seg valseg;
 1302         socklen_t valsize;
 1303 {
 1304         int error;
 1305         struct socket *so;
 1306         struct file *fp;
 1307         struct sockopt sopt;
 1308 
 1309         if (val == NULL && valsize != 0)
 1310                 return (EFAULT);
 1311         if ((int)valsize < 0)
 1312                 return (EINVAL);
 1313 
 1314         sopt.sopt_dir = SOPT_SET;
 1315         sopt.sopt_level = level;
 1316         sopt.sopt_name = name;
 1317         sopt.sopt_val = val;
 1318         sopt.sopt_valsize = valsize;
 1319         switch (valseg) {
 1320         case UIO_USERSPACE:
 1321                 sopt.sopt_td = td;
 1322                 break;
 1323         case UIO_SYSSPACE:
 1324                 sopt.sopt_td = NULL;
 1325                 break;
 1326         default:
 1327                 panic("kern_setsockopt called with bad valseg");
 1328         }
 1329 
 1330         AUDIT_ARG_FD(s);
 1331         error = getsock(td->td_proc->p_fd, s, &fp, NULL);
 1332         if (error == 0) {
 1333                 so = fp->f_data;
 1334                 CURVNET_SET(so->so_vnet);
 1335                 error = sosetopt(so, &sopt);
 1336                 CURVNET_RESTORE();
 1337                 fdrop(fp, td);
 1338         }
 1339         return(error);
 1340 }
 1341 
 1342 /* ARGSUSED */
 1343 int
 1344 getsockopt(td, uap)
 1345         struct thread *td;
 1346         struct getsockopt_args /* {
 1347                 int     s;
 1348                 int     level;
 1349                 int     name;
 1350                 void * __restrict       val;
 1351                 socklen_t * __restrict avalsize;
 1352         } */ *uap;
 1353 {
 1354         socklen_t valsize;
 1355         int     error;
 1356 
 1357         if (uap->val) {
 1358                 error = copyin(uap->avalsize, &valsize, sizeof (valsize));
 1359                 if (error)
 1360                         return (error);
 1361         }
 1362 
 1363         error = kern_getsockopt(td, uap->s, uap->level, uap->name,
 1364             uap->val, UIO_USERSPACE, &valsize);
 1365 
 1366         if (error == 0)
 1367                 error = copyout(&valsize, uap->avalsize, sizeof (valsize));
 1368         return (error);
 1369 }
 1370 
 1371 /*
 1372  * Kernel version of getsockopt.
 1373  * optval can be a userland or userspace. optlen is always a kernel pointer.
 1374  */
 1375 int
 1376 kern_getsockopt(td, s, level, name, val, valseg, valsize)
 1377         struct thread *td;
 1378         int s;
 1379         int level;
 1380         int name;
 1381         void *val;
 1382         enum uio_seg valseg;
 1383         socklen_t *valsize;
 1384 {
 1385         int error;
 1386         struct  socket *so;
 1387         struct file *fp;
 1388         struct  sockopt sopt;
 1389 
 1390         if (val == NULL)
 1391                 *valsize = 0;
 1392         if ((int)*valsize < 0)
 1393                 return (EINVAL);
 1394 
 1395         sopt.sopt_dir = SOPT_GET;
 1396         sopt.sopt_level = level;
 1397         sopt.sopt_name = name;
 1398         sopt.sopt_val = val;
 1399         sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */
 1400         switch (valseg) {
 1401         case UIO_USERSPACE:
 1402                 sopt.sopt_td = td;
 1403                 break;
 1404         case UIO_SYSSPACE:
 1405                 sopt.sopt_td = NULL;
 1406                 break;
 1407         default:
 1408                 panic("kern_getsockopt called with bad valseg");
 1409         }
 1410 
 1411         AUDIT_ARG_FD(s);
 1412         error = getsock(td->td_proc->p_fd, s, &fp, NULL);
 1413         if (error == 0) {
 1414                 so = fp->f_data;
 1415                 CURVNET_SET(so->so_vnet);
 1416                 error = sogetopt(so, &sopt);
 1417                 CURVNET_RESTORE();
 1418                 *valsize = sopt.sopt_valsize;
 1419                 fdrop(fp, td);
 1420         }
 1421         return (error);
 1422 }
 1423 
 1424 /*
 1425  * getsockname1() - Get socket name.
 1426  */
 1427 /* ARGSUSED */
 1428 static int
 1429 getsockname1(td, uap, compat)
 1430         struct thread *td;
 1431         struct getsockname_args /* {
 1432                 int     fdes;
 1433                 struct sockaddr * __restrict asa;
 1434                 socklen_t * __restrict alen;
 1435         } */ *uap;
 1436         int compat;
 1437 {
 1438         struct sockaddr *sa;
 1439         socklen_t len;
 1440         int error;
 1441 
 1442         error = copyin(uap->alen, &len, sizeof(len));
 1443         if (error)
 1444                 return (error);
 1445 
 1446         error = kern_getsockname(td, uap->fdes, &sa, &len);
 1447         if (error)
 1448                 return (error);
 1449 
 1450         if (len != 0) {
 1451 #ifdef COMPAT_OLDSOCK
 1452                 if (compat)
 1453                         ((struct osockaddr *)sa)->sa_family = sa->sa_family;
 1454 #endif
 1455                 error = copyout(sa, uap->asa, (u_int)len);
 1456         }
 1457         free(sa, M_SONAME);
 1458         if (error == 0)
 1459                 error = copyout(&len, uap->alen, sizeof(len));
 1460         return (error);
 1461 }
 1462 
 1463 int
 1464 kern_getsockname(struct thread *td, int fd, struct sockaddr **sa,
 1465     socklen_t *alen)
 1466 {
 1467         struct socket *so;
 1468         struct file *fp;
 1469         socklen_t len;
 1470         int error;
 1471 
 1472         if (*alen < 0)
 1473                 return (EINVAL);
 1474 
 1475         AUDIT_ARG_FD(fd);
 1476         error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
 1477         if (error)
 1478                 return (error);
 1479         so = fp->f_data;
 1480         *sa = NULL;
 1481         CURVNET_SET(so->so_vnet);
 1482         error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, sa);
 1483         CURVNET_RESTORE();
 1484         if (error)
 1485                 goto bad;
 1486         if (*sa == NULL)
 1487                 len = 0;
 1488         else
 1489                 len = MIN(*alen, (*sa)->sa_len);
 1490         *alen = len;
 1491 #ifdef KTRACE
 1492         if (KTRPOINT(td, KTR_STRUCT))
 1493                 ktrsockaddr(*sa);
 1494 #endif
 1495 bad:
 1496         fdrop(fp, td);
 1497         if (error && *sa) {
 1498                 free(*sa, M_SONAME);
 1499                 *sa = NULL;
 1500         }
 1501         return (error);
 1502 }
 1503 
 1504 int
 1505 getsockname(td, uap)
 1506         struct thread *td;
 1507         struct getsockname_args *uap;
 1508 {
 1509 
 1510         return (getsockname1(td, uap, 0));
 1511 }
 1512 
 1513 #ifdef COMPAT_OLDSOCK
 1514 int
 1515 ogetsockname(td, uap)
 1516         struct thread *td;
 1517         struct getsockname_args *uap;
 1518 {
 1519 
 1520         return (getsockname1(td, uap, 1));
 1521 }
 1522 #endif /* COMPAT_OLDSOCK */
 1523 
 1524 /*
 1525  * getpeername1() - Get name of peer for connected socket.
 1526  */
 1527 /* ARGSUSED */
 1528 static int
 1529 getpeername1(td, uap, compat)
 1530         struct thread *td;
 1531         struct getpeername_args /* {
 1532                 int     fdes;
 1533                 struct sockaddr * __restrict    asa;
 1534                 socklen_t * __restrict  alen;
 1535         } */ *uap;
 1536         int compat;
 1537 {
 1538         struct sockaddr *sa;
 1539         socklen_t len;
 1540         int error;
 1541 
 1542         error = copyin(uap->alen, &len, sizeof (len));
 1543         if (error)
 1544                 return (error);
 1545 
 1546         error = kern_getpeername(td, uap->fdes, &sa, &len);
 1547         if (error)
 1548                 return (error);
 1549 
 1550         if (len != 0) {
 1551 #ifdef COMPAT_OLDSOCK
 1552                 if (compat)
 1553                         ((struct osockaddr *)sa)->sa_family = sa->sa_family;
 1554 #endif
 1555                 error = copyout(sa, uap->asa, (u_int)len);
 1556         }
 1557         free(sa, M_SONAME);
 1558         if (error == 0)
 1559                 error = copyout(&len, uap->alen, sizeof(len));
 1560         return (error);
 1561 }
 1562 
 1563 int
 1564 kern_getpeername(struct thread *td, int fd, struct sockaddr **sa,
 1565     socklen_t *alen)
 1566 {
 1567         struct socket *so;
 1568         struct file *fp;
 1569         socklen_t len;
 1570         int error;
 1571 
 1572         if (*alen < 0)
 1573                 return (EINVAL);
 1574 
 1575         AUDIT_ARG_FD(fd);
 1576         error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
 1577         if (error)
 1578                 return (error);
 1579         so = fp->f_data;
 1580         if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
 1581                 error = ENOTCONN;
 1582                 goto done;
 1583         }
 1584         *sa = NULL;
 1585         CURVNET_SET(so->so_vnet);
 1586         error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, sa);
 1587         CURVNET_RESTORE();
 1588         if (error)
 1589                 goto bad;
 1590         if (*sa == NULL)
 1591                 len = 0;
 1592         else
 1593                 len = MIN(*alen, (*sa)->sa_len);
 1594         *alen = len;
 1595 #ifdef KTRACE
 1596         if (KTRPOINT(td, KTR_STRUCT))
 1597                 ktrsockaddr(*sa);
 1598 #endif
 1599 bad:
 1600         if (error && *sa) {
 1601                 free(*sa, M_SONAME);
 1602                 *sa = NULL;
 1603         }
 1604 done:
 1605         fdrop(fp, td);
 1606         return (error);
 1607 }
 1608 
 1609 int
 1610 getpeername(td, uap)
 1611         struct thread *td;
 1612         struct getpeername_args *uap;
 1613 {
 1614 
 1615         return (getpeername1(td, uap, 0));
 1616 }
 1617 
 1618 #ifdef COMPAT_OLDSOCK
 1619 int
 1620 ogetpeername(td, uap)
 1621         struct thread *td;
 1622         struct ogetpeername_args *uap;
 1623 {
 1624 
 1625         /* XXX uap should have type `getpeername_args *' to begin with. */
 1626         return (getpeername1(td, (struct getpeername_args *)uap, 1));
 1627 }
 1628 #endif /* COMPAT_OLDSOCK */
 1629 
 1630 int
 1631 sockargs(mp, buf, buflen, type)
 1632         struct mbuf **mp;
 1633         caddr_t buf;
 1634         int buflen, type;
 1635 {
 1636         struct sockaddr *sa;
 1637         struct mbuf *m;
 1638         int error;
 1639 
 1640         if ((u_int)buflen > MLEN) {
 1641 #ifdef COMPAT_OLDSOCK
 1642                 if (type == MT_SONAME && (u_int)buflen <= 112)
 1643                         buflen = MLEN;          /* unix domain compat. hack */
 1644                 else
 1645 #endif
 1646                         if ((u_int)buflen > MCLBYTES)
 1647                                 return (EINVAL);
 1648         }
 1649         m = m_get(M_WAIT, type);
 1650         if ((u_int)buflen > MLEN)
 1651                 MCLGET(m, M_WAIT);
 1652         m->m_len = buflen;
 1653         error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
 1654         if (error)
 1655                 (void) m_free(m);
 1656         else {
 1657                 *mp = m;
 1658                 if (type == MT_SONAME) {
 1659                         sa = mtod(m, struct sockaddr *);
 1660 
 1661 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
 1662                         if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
 1663                                 sa->sa_family = sa->sa_len;
 1664 #endif
 1665                         sa->sa_len = buflen;
 1666                 }
 1667         }
 1668         return (error);
 1669 }
 1670 
 1671 int
 1672 getsockaddr(namp, uaddr, len)
 1673         struct sockaddr **namp;
 1674         caddr_t uaddr;
 1675         size_t len;
 1676 {
 1677         struct sockaddr *sa;
 1678         int error;
 1679 
 1680         if (len > SOCK_MAXADDRLEN)
 1681                 return (ENAMETOOLONG);
 1682         if (len < offsetof(struct sockaddr, sa_data[0]))
 1683                 return (EINVAL);
 1684         sa = malloc(len, M_SONAME, M_WAITOK);
 1685         error = copyin(uaddr, sa, len);
 1686         if (error) {
 1687                 free(sa, M_SONAME);
 1688         } else {
 1689 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
 1690                 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
 1691                         sa->sa_family = sa->sa_len;
 1692 #endif
 1693                 sa->sa_len = len;
 1694                 *namp = sa;
 1695         }
 1696         return (error);
 1697 }
 1698 
 1699 #include <sys/condvar.h>
 1700 
 1701 struct sendfile_sync {
 1702         struct mtx      mtx;
 1703         struct cv       cv;
 1704         unsigned        count;
 1705 };
 1706 
 1707 /*
 1708  * Detach mapped page and release resources back to the system.
 1709  */
 1710 void
 1711 sf_buf_mext(void *addr, void *args)
 1712 {
 1713         vm_page_t m;
 1714         struct sendfile_sync *sfs;
 1715 
 1716         m = sf_buf_page(args);
 1717         sf_buf_free(args);
 1718         vm_page_lock_queues();
 1719         vm_page_unwire(m, 0);
 1720         /*
 1721          * Check for the object going away on us. This can
 1722          * happen since we don't hold a reference to it.
 1723          * If so, we're responsible for freeing the page.
 1724          */
 1725         if (m->wire_count == 0 && m->object == NULL)
 1726                 vm_page_free(m);
 1727         vm_page_unlock_queues();
 1728         if (addr == NULL)
 1729                 return;
 1730         sfs = addr;
 1731         mtx_lock(&sfs->mtx);
 1732         KASSERT(sfs->count> 0, ("Sendfile sync botchup count == 0"));
 1733         if (--sfs->count == 0)
 1734                 cv_signal(&sfs->cv);
 1735         mtx_unlock(&sfs->mtx);
 1736 }
 1737 
 1738 /*
 1739  * sendfile(2)
 1740  *
 1741  * int sendfile(int fd, int s, off_t offset, size_t nbytes,
 1742  *       struct sf_hdtr *hdtr, off_t *sbytes, int flags)
 1743  *
 1744  * Send a file specified by 'fd' and starting at 'offset' to a socket
 1745  * specified by 's'. Send only 'nbytes' of the file or until EOF if nbytes ==
 1746  * 0.  Optionally add a header and/or trailer to the socket output.  If
 1747  * specified, write the total number of bytes sent into *sbytes.
 1748  */
 1749 int
 1750 sendfile(struct thread *td, struct sendfile_args *uap)
 1751 {
 1752 
 1753         return (do_sendfile(td, uap, 0));
 1754 }
 1755 
 1756 static int
 1757 do_sendfile(struct thread *td, struct sendfile_args *uap, int compat)
 1758 {
 1759         struct sf_hdtr hdtr;
 1760         struct uio *hdr_uio, *trl_uio;
 1761         int error;
 1762 
 1763         hdr_uio = trl_uio = NULL;
 1764 
 1765         if (uap->hdtr != NULL) {
 1766                 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
 1767                 if (error)
 1768                         goto out;
 1769                 if (hdtr.headers != NULL) {
 1770                         error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio);
 1771                         if (error)
 1772                                 goto out;
 1773                 }
 1774                 if (hdtr.trailers != NULL) {
 1775                         error = copyinuio(hdtr.trailers, hdtr.trl_cnt, &trl_uio);
 1776                         if (error)
 1777                                 goto out;
 1778 
 1779                 }
 1780         }
 1781 
 1782         error = kern_sendfile(td, uap, hdr_uio, trl_uio, compat);
 1783 out:
 1784         if (hdr_uio)
 1785                 free(hdr_uio, M_IOV);
 1786         if (trl_uio)
 1787                 free(trl_uio, M_IOV);
 1788         return (error);
 1789 }
 1790 
 1791 #ifdef COMPAT_FREEBSD4
 1792 int
 1793 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap)
 1794 {
 1795         struct sendfile_args args;
 1796 
 1797         args.fd = uap->fd;
 1798         args.s = uap->s;
 1799         args.offset = uap->offset;
 1800         args.nbytes = uap->nbytes;
 1801         args.hdtr = uap->hdtr;
 1802         args.sbytes = uap->sbytes;
 1803         args.flags = uap->flags;
 1804 
 1805         return (do_sendfile(td, &args, 1));
 1806 }
 1807 #endif /* COMPAT_FREEBSD4 */
 1808 
 1809 int
 1810 kern_sendfile(struct thread *td, struct sendfile_args *uap,
 1811     struct uio *hdr_uio, struct uio *trl_uio, int compat)
 1812 {
 1813         struct file *sock_fp;
 1814         struct vnode *vp;
 1815         struct vm_object *obj = NULL;
 1816         struct socket *so = NULL;
 1817         struct mbuf *m = NULL;
 1818         struct sf_buf *sf;
 1819         struct vm_page *pg;
 1820         off_t off, xfsize, fsbytes = 0, sbytes = 0, rem = 0;
 1821         int error, hdrlen = 0, mnw = 0;
 1822         int vfslocked;
 1823         struct sendfile_sync *sfs = NULL;
 1824 
 1825         /*
 1826          * The file descriptor must be a regular file and have a
 1827          * backing VM object.
 1828          * File offset must be positive.  If it goes beyond EOF
 1829          * we send only the header/trailer and no payload data.
 1830          */
 1831         AUDIT_ARG_FD(uap->fd);
 1832         if ((error = fgetvp_read(td, uap->fd, &vp)) != 0)
 1833                 goto out;
 1834         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 1835         vn_lock(vp, LK_SHARED | LK_RETRY);
 1836         if (vp->v_type == VREG) {
 1837                 obj = vp->v_object;
 1838                 if (obj != NULL) {
 1839                         /*
 1840                          * Temporarily increase the backing VM
 1841                          * object's reference count so that a forced
 1842                          * reclamation of its vnode does not
 1843                          * immediately destroy it.
 1844                          */
 1845                         VM_OBJECT_LOCK(obj);
 1846                         if ((obj->flags & OBJ_DEAD) == 0) {
 1847                                 vm_object_reference_locked(obj);
 1848                                 VM_OBJECT_UNLOCK(obj);
 1849                         } else {
 1850                                 VM_OBJECT_UNLOCK(obj);
 1851                                 obj = NULL;
 1852                         }
 1853                 }
 1854         }
 1855         VOP_UNLOCK(vp, 0);
 1856         VFS_UNLOCK_GIANT(vfslocked);
 1857         if (obj == NULL) {
 1858                 error = EINVAL;
 1859                 goto out;
 1860         }
 1861         if (uap->offset < 0) {
 1862                 error = EINVAL;
 1863                 goto out;
 1864         }
 1865 
 1866         /*
 1867          * The socket must be a stream socket and connected.
 1868          * Remember if it a blocking or non-blocking socket.
 1869          */
 1870         if ((error = getsock(td->td_proc->p_fd, uap->s, &sock_fp,
 1871             NULL)) != 0)
 1872                 goto out;
 1873         so = sock_fp->f_data;
 1874         if (so->so_type != SOCK_STREAM) {
 1875                 error = EINVAL;
 1876                 goto out;
 1877         }
 1878         if ((so->so_state & SS_ISCONNECTED) == 0) {
 1879                 error = ENOTCONN;
 1880                 goto out;
 1881         }
 1882         /*
 1883          * Do not wait on memory allocations but return ENOMEM for
 1884          * caller to retry later.
 1885          * XXX: Experimental.
 1886          */
 1887         if (uap->flags & SF_MNOWAIT)
 1888                 mnw = 1;
 1889 
 1890         if (uap->flags & SF_SYNC) {
 1891                 sfs = malloc(sizeof *sfs, M_TEMP, M_WAITOK);
 1892                 memset(sfs, 0, sizeof *sfs);
 1893                 mtx_init(&sfs->mtx, "sendfile", NULL, MTX_DEF);
 1894                 cv_init(&sfs->cv, "sendfile");
 1895         }
 1896 
 1897 #ifdef MAC
 1898         error = mac_socket_check_send(td->td_ucred, so);
 1899         if (error)
 1900                 goto out;
 1901 #endif
 1902 
 1903         /* If headers are specified copy them into mbufs. */
 1904         if (hdr_uio != NULL) {
 1905                 hdr_uio->uio_td = td;
 1906                 hdr_uio->uio_rw = UIO_WRITE;
 1907                 if (hdr_uio->uio_resid > 0) {
 1908                         /*
 1909                          * In FBSD < 5.0 the nbytes to send also included
 1910                          * the header.  If compat is specified subtract the
 1911                          * header size from nbytes.
 1912                          */
 1913                         if (compat) {
 1914                                 if (uap->nbytes > hdr_uio->uio_resid)
 1915                                         uap->nbytes -= hdr_uio->uio_resid;
 1916                                 else
 1917                                         uap->nbytes = 0;
 1918                         }
 1919                         m = m_uiotombuf(hdr_uio, (mnw ? M_NOWAIT : M_WAITOK),
 1920                             0, 0, 0);
 1921                         if (m == NULL) {
 1922                                 error = mnw ? EAGAIN : ENOBUFS;
 1923                                 goto out;
 1924                         }
 1925                         hdrlen = m_length(m, NULL);
 1926                 }
 1927         }
 1928 
 1929         /*
 1930          * Protect against multiple writers to the socket.
 1931          *
 1932          * XXXRW: Historically this has assumed non-interruptibility, so now
 1933          * we implement that, but possibly shouldn't.
 1934          */
 1935         (void)sblock(&so->so_snd, SBL_WAIT | SBL_NOINTR);
 1936 
 1937         /*
 1938          * Loop through the pages of the file, starting with the requested
 1939          * offset. Get a file page (do I/O if necessary), map the file page
 1940          * into an sf_buf, attach an mbuf header to the sf_buf, and queue
 1941          * it on the socket.
 1942          * This is done in two loops.  The inner loop turns as many pages
 1943          * as it can, up to available socket buffer space, without blocking
 1944          * into mbufs to have it bulk delivered into the socket send buffer.
 1945          * The outer loop checks the state and available space of the socket
 1946          * and takes care of the overall progress.
 1947          */
 1948         for (off = uap->offset, rem = uap->nbytes; ; ) {
 1949                 int loopbytes = 0;
 1950                 int space = 0;
 1951                 int done = 0;
 1952 
 1953                 /*
 1954                  * Check the socket state for ongoing connection,
 1955                  * no errors and space in socket buffer.
 1956                  * If space is low allow for the remainder of the
 1957                  * file to be processed if it fits the socket buffer.
 1958                  * Otherwise block in waiting for sufficient space
 1959                  * to proceed, or if the socket is nonblocking, return
 1960                  * to userland with EAGAIN while reporting how far
 1961                  * we've come.
 1962                  * We wait until the socket buffer has significant free
 1963                  * space to do bulk sends.  This makes good use of file
 1964                  * system read ahead and allows packet segmentation
 1965                  * offloading hardware to take over lots of work.  If
 1966                  * we were not careful here we would send off only one
 1967                  * sfbuf at a time.
 1968                  */
 1969                 SOCKBUF_LOCK(&so->so_snd);
 1970                 if (so->so_snd.sb_lowat < so->so_snd.sb_hiwat / 2)
 1971                         so->so_snd.sb_lowat = so->so_snd.sb_hiwat / 2;
 1972 retry_space:
 1973                 if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 1974                         error = EPIPE;
 1975                         SOCKBUF_UNLOCK(&so->so_snd);
 1976                         goto done;
 1977                 } else if (so->so_error) {
 1978                         error = so->so_error;
 1979                         so->so_error = 0;
 1980                         SOCKBUF_UNLOCK(&so->so_snd);
 1981                         goto done;
 1982                 }
 1983                 space = sbspace(&so->so_snd);
 1984                 if (space < rem &&
 1985                     (space <= 0 ||
 1986                      space < so->so_snd.sb_lowat)) {
 1987                         if (so->so_state & SS_NBIO) {
 1988                                 SOCKBUF_UNLOCK(&so->so_snd);
 1989                                 error = EAGAIN;
 1990                                 goto done;
 1991                         }
 1992                         /*
 1993                          * sbwait drops the lock while sleeping.
 1994                          * When we loop back to retry_space the
 1995                          * state may have changed and we retest
 1996                          * for it.
 1997                          */
 1998                         error = sbwait(&so->so_snd);
 1999                         /*
 2000                          * An error from sbwait usually indicates that we've
 2001                          * been interrupted by a signal. If we've sent anything
 2002                          * then return bytes sent, otherwise return the error.
 2003                          */
 2004                         if (error) {
 2005                                 SOCKBUF_UNLOCK(&so->so_snd);
 2006                                 goto done;
 2007                         }
 2008                         goto retry_space;
 2009                 }
 2010                 SOCKBUF_UNLOCK(&so->so_snd);
 2011 
 2012                 /*
 2013                  * Reduce space in the socket buffer by the size of
 2014                  * the header mbuf chain.
 2015                  * hdrlen is set to 0 after the first loop.
 2016                  */
 2017                 space -= hdrlen;
 2018 
 2019                 /*
 2020                  * Loop and construct maximum sized mbuf chain to be bulk
 2021                  * dumped into socket buffer.
 2022                  */
 2023                 while(space > loopbytes) {
 2024                         vm_pindex_t pindex;
 2025                         vm_offset_t pgoff;
 2026                         struct mbuf *m0;
 2027 
 2028                         VM_OBJECT_LOCK(obj);
 2029                         /*
 2030                          * Calculate the amount to transfer.
 2031                          * Not to exceed a page, the EOF,
 2032                          * or the passed in nbytes.
 2033                          */
 2034                         pgoff = (vm_offset_t)(off & PAGE_MASK);
 2035                         xfsize = omin(PAGE_SIZE - pgoff,
 2036                             obj->un_pager.vnp.vnp_size - uap->offset -
 2037                             fsbytes - loopbytes);
 2038                         if (uap->nbytes)
 2039                                 rem = (uap->nbytes - fsbytes - loopbytes);
 2040                         else
 2041                                 rem = obj->un_pager.vnp.vnp_size -
 2042                                     uap->offset - fsbytes - loopbytes;
 2043                         xfsize = omin(rem, xfsize);
 2044                         xfsize = omin(space - loopbytes, xfsize);
 2045                         if (xfsize <= 0) {
 2046                                 VM_OBJECT_UNLOCK(obj);
 2047                                 done = 1;               /* all data sent */
 2048                                 break;
 2049                         }
 2050 
 2051                         /*
 2052                          * Attempt to look up the page.  Allocate
 2053                          * if not found or wait and loop if busy.
 2054                          */
 2055                         pindex = OFF_TO_IDX(off);
 2056                         pg = vm_page_grab(obj, pindex, VM_ALLOC_NOBUSY |
 2057                             VM_ALLOC_NORMAL | VM_ALLOC_WIRED | VM_ALLOC_RETRY);
 2058 
 2059                         /*
 2060                          * Check if page is valid for what we need,
 2061                          * otherwise initiate I/O.
 2062                          * If we already turned some pages into mbufs,
 2063                          * send them off before we come here again and
 2064                          * block.
 2065                          */
 2066                         if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize))
 2067                                 VM_OBJECT_UNLOCK(obj);
 2068                         else if (m != NULL)
 2069                                 error = EAGAIN; /* send what we already got */
 2070                         else if (uap->flags & SF_NODISKIO)
 2071                                 error = EBUSY;
 2072                         else {
 2073                                 int bsize, resid;
 2074 
 2075                                 /*
 2076                                  * Ensure that our page is still around
 2077                                  * when the I/O completes.
 2078                                  */
 2079                                 vm_page_io_start(pg);
 2080                                 VM_OBJECT_UNLOCK(obj);
 2081 
 2082                                 /*
 2083                                  * Get the page from backing store.
 2084                                  */
 2085                                 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 2086                                 error = vn_lock(vp, LK_SHARED);
 2087                                 if (error != 0)
 2088                                         goto after_read;
 2089                                 bsize = vp->v_mount->mnt_stat.f_iosize;
 2090 
 2091                                 /*
 2092                                  * XXXMAC: Because we don't have fp->f_cred
 2093                                  * here, we pass in NOCRED.  This is probably
 2094                                  * wrong, but is consistent with our original
 2095                                  * implementation.
 2096                                  */
 2097                                 error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE,
 2098                                     trunc_page(off), UIO_NOCOPY, IO_NODELOCKED |
 2099                                     IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT),
 2100                                     td->td_ucred, NOCRED, &resid, td);
 2101                                 VOP_UNLOCK(vp, 0);
 2102                         after_read:
 2103                                 VFS_UNLOCK_GIANT(vfslocked);
 2104                                 VM_OBJECT_LOCK(obj);
 2105                                 vm_page_io_finish(pg);
 2106                                 if (!error)
 2107                                         VM_OBJECT_UNLOCK(obj);
 2108                                 mbstat.sf_iocnt++;
 2109                         }
 2110                         if (error) {
 2111                                 vm_page_lock_queues();
 2112                                 vm_page_unwire(pg, 0);
 2113                                 /*
 2114                                  * See if anyone else might know about
 2115                                  * this page.  If not and it is not valid,
 2116                                  * then free it.
 2117                                  */
 2118                                 if (pg->wire_count == 0 && pg->valid == 0 &&
 2119                                     pg->busy == 0 && !(pg->oflags & VPO_BUSY) &&
 2120                                     pg->hold_count == 0) {
 2121                                         vm_page_free(pg);
 2122                                 }
 2123                                 vm_page_unlock_queues();
 2124                                 VM_OBJECT_UNLOCK(obj);
 2125                                 if (error == EAGAIN)
 2126                                         error = 0;      /* not a real error */
 2127                                 break;
 2128                         }
 2129 
 2130                         /*
 2131                          * Get a sendfile buf.  We usually wait as long
 2132                          * as necessary, but this wait can be interrupted.
 2133                          */
 2134                         if ((sf = sf_buf_alloc(pg,
 2135                             (mnw ? SFB_NOWAIT : SFB_CATCH))) == NULL) {
 2136                                 mbstat.sf_allocfail++;
 2137                                 vm_page_lock_queues();
 2138                                 vm_page_unwire(pg, 0);
 2139                                 /*
 2140                                  * XXX: Not same check as above!?
 2141                                  */
 2142                                 if (pg->wire_count == 0 && pg->object == NULL)
 2143                                         vm_page_free(pg);
 2144                                 vm_page_unlock_queues();
 2145                                 error = (mnw ? EAGAIN : EINTR);
 2146                                 break;
 2147                         }
 2148 
 2149                         /*
 2150                          * Get an mbuf and set it up as having
 2151                          * external storage.
 2152                          */
 2153                         m0 = m_get((mnw ? M_NOWAIT : M_WAITOK), MT_DATA);
 2154                         if (m0 == NULL) {
 2155                                 error = (mnw ? EAGAIN : ENOBUFS);
 2156                                 sf_buf_mext((void *)sf_buf_kva(sf), sf);
 2157                                 break;
 2158                         }
 2159                         MEXTADD(m0, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext,
 2160                             sfs, sf, M_RDONLY, EXT_SFBUF);
 2161                         m0->m_data = (char *)sf_buf_kva(sf) + pgoff;
 2162                         m0->m_len = xfsize;
 2163 
 2164                         /* Append to mbuf chain. */
 2165                         if (m != NULL)
 2166                                 m_cat(m, m0);
 2167                         else
 2168                                 m = m0;
 2169 
 2170                         /* Keep track of bits processed. */
 2171                         loopbytes += xfsize;
 2172                         off += xfsize;
 2173 
 2174                         if (sfs != NULL) {
 2175                                 mtx_lock(&sfs->mtx);
 2176                                 sfs->count++;
 2177                                 mtx_unlock(&sfs->mtx);
 2178                         }
 2179                 }
 2180 
 2181                 /* Add the buffer chain to the socket buffer. */
 2182                 if (m != NULL) {
 2183                         int mlen, err;
 2184 
 2185                         mlen = m_length(m, NULL);
 2186                         SOCKBUF_LOCK(&so->so_snd);
 2187                         if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 2188                                 error = EPIPE;
 2189                                 SOCKBUF_UNLOCK(&so->so_snd);
 2190                                 goto done;
 2191                         }
 2192                         SOCKBUF_UNLOCK(&so->so_snd);
 2193                         CURVNET_SET(so->so_vnet);
 2194                         /* Avoid error aliasing. */
 2195                         err = (*so->so_proto->pr_usrreqs->pru_send)
 2196                                     (so, 0, m, NULL, NULL, td);
 2197                         CURVNET_RESTORE();
 2198                         if (err == 0) {
 2199                                 /*
 2200                                  * We need two counters to get the
 2201                                  * file offset and nbytes to send
 2202                                  * right:
 2203                                  * - sbytes contains the total amount
 2204                                  *   of bytes sent, including headers.
 2205                                  * - fsbytes contains the total amount
 2206                                  *   of bytes sent from the file.
 2207                                  */
 2208                                 sbytes += mlen;
 2209                                 fsbytes += mlen;
 2210                                 if (hdrlen) {
 2211                                         fsbytes -= hdrlen;
 2212                                         hdrlen = 0;
 2213                                 }
 2214                         } else if (error == 0)
 2215                                 error = err;
 2216                         m = NULL;       /* pru_send always consumes */
 2217                 }
 2218 
 2219                 /* Quit outer loop on error or when we're done. */
 2220                 if (done) 
 2221                         break;
 2222                 if (error)
 2223                         goto done;
 2224         }
 2225 
 2226         /*
 2227          * Send trailers. Wimp out and use writev(2).
 2228          */
 2229         if (trl_uio != NULL) {
 2230                 sbunlock(&so->so_snd);
 2231                 error = kern_writev(td, uap->s, trl_uio);
 2232                 if (error == 0)
 2233                         sbytes += td->td_retval[0];
 2234                 goto out;
 2235         }
 2236 
 2237 done:
 2238         sbunlock(&so->so_snd);
 2239 out:
 2240         /*
 2241          * If there was no error we have to clear td->td_retval[0]
 2242          * because it may have been set by writev.
 2243          */
 2244         if (error == 0) {
 2245                 td->td_retval[0] = 0;
 2246         }
 2247         if (uap->sbytes != NULL) {
 2248                 copyout(&sbytes, uap->sbytes, sizeof(off_t));
 2249         }
 2250         if (obj != NULL)
 2251                 vm_object_deallocate(obj);
 2252         if (vp != NULL) {
 2253                 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 2254                 vrele(vp);
 2255                 VFS_UNLOCK_GIANT(vfslocked);
 2256         }
 2257         if (so)
 2258                 fdrop(sock_fp, td);
 2259         if (m)
 2260                 m_freem(m);
 2261 
 2262         if (sfs != NULL) {
 2263                 mtx_lock(&sfs->mtx);
 2264                 if (sfs->count != 0)
 2265                         cv_wait(&sfs->cv, &sfs->mtx);
 2266                 KASSERT(sfs->count == 0, ("sendfile sync still busy"));
 2267                 cv_destroy(&sfs->cv);
 2268                 mtx_destroy(&sfs->mtx);
 2269                 free(sfs, M_TEMP);
 2270         }
 2271 
 2272         if (error == ERESTART)
 2273                 error = EINTR;
 2274 
 2275         return (error);
 2276 }
 2277 
 2278 /*
 2279  * SCTP syscalls.
 2280  * Functionality only compiled in if SCTP is defined in the kernel Makefile,
 2281  * otherwise all return EOPNOTSUPP.
 2282  * XXX: We should make this loadable one day.
 2283  */
 2284 int
 2285 sctp_peeloff(td, uap)
 2286         struct thread *td;
 2287         struct sctp_peeloff_args /* {
 2288                 int     sd;
 2289                 caddr_t name;
 2290         } */ *uap;
 2291 {
 2292 #if (defined(INET) || defined(INET6)) && defined(SCTP)
 2293         struct filedesc *fdp;
 2294         struct file *nfp = NULL;
 2295         int error;
 2296         struct socket *head, *so;
 2297         int fd;
 2298         u_int fflag;
 2299 
 2300         fdp = td->td_proc->p_fd;
 2301         AUDIT_ARG_FD(uap->sd);
 2302         error = fgetsock(td, uap->sd, &head, &fflag);
 2303         if (error)
 2304                 goto done2;
 2305         error = sctp_can_peel_off(head, (sctp_assoc_t)uap->name);
 2306         if (error)
 2307                 goto done2;
 2308         /*
 2309          * At this point we know we do have a assoc to pull
 2310          * we proceed to get the fd setup. This may block
 2311          * but that is ok.
 2312          */
 2313 
 2314         error = falloc(td, &nfp, &fd);
 2315         if (error)
 2316                 goto done;
 2317         td->td_retval[0] = fd;
 2318 
 2319         CURVNET_SET(head->so_vnet);
 2320         so = sonewconn(head, SS_ISCONNECTED);
 2321         if (so == NULL) 
 2322                 goto noconnection;
 2323         /*
 2324          * Before changing the flags on the socket, we have to bump the
 2325          * reference count.  Otherwise, if the protocol calls sofree(),
 2326          * the socket will be released due to a zero refcount.
 2327          */
 2328         SOCK_LOCK(so);
 2329         soref(so);                      /* file descriptor reference */
 2330         SOCK_UNLOCK(so);
 2331 
 2332         ACCEPT_LOCK();
 2333 
 2334         TAILQ_REMOVE(&head->so_comp, so, so_list);
 2335         head->so_qlen--;
 2336         so->so_state |= (head->so_state & SS_NBIO);
 2337         so->so_state &= ~SS_NOFDREF;
 2338         so->so_qstate &= ~SQ_COMP;
 2339         so->so_head = NULL;
 2340         ACCEPT_UNLOCK();
 2341         finit(nfp, fflag, DTYPE_SOCKET, so, &socketops);
 2342         error = sctp_do_peeloff(head, so, (sctp_assoc_t)uap->name);
 2343         if (error)
 2344                 goto noconnection;
 2345         if (head->so_sigio != NULL)
 2346                 fsetown(fgetown(&head->so_sigio), &so->so_sigio);
 2347 
 2348 noconnection:
 2349         /*
 2350          * close the new descriptor, assuming someone hasn't ripped it
 2351          * out from under us.
 2352          */
 2353         if (error)
 2354                 fdclose(fdp, nfp, fd, td);
 2355 
 2356         /*
 2357          * Release explicitly held references before returning.
 2358          */
 2359         CURVNET_RESTORE();
 2360 done:
 2361         if (nfp != NULL)
 2362                 fdrop(nfp, td);
 2363         fputsock(head);
 2364 done2:
 2365         return (error);
 2366 #else  /* SCTP */
 2367         return (EOPNOTSUPP);
 2368 #endif /* SCTP */
 2369 }
 2370 
 2371 int
 2372 sctp_generic_sendmsg (td, uap)
 2373         struct thread *td;
 2374         struct sctp_generic_sendmsg_args /* {
 2375                 int sd, 
 2376                 caddr_t msg, 
 2377                 int mlen, 
 2378                 caddr_t to, 
 2379                 __socklen_t tolen, 
 2380                 struct sctp_sndrcvinfo *sinfo, 
 2381                 int flags
 2382         } */ *uap;
 2383 {
 2384 #if (defined(INET) || defined(INET6)) && defined(SCTP)
 2385         struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL;
 2386         struct socket *so;
 2387         struct file *fp = NULL;
 2388         int use_rcvinfo = 1;
 2389         int error = 0, len;
 2390         struct sockaddr *to = NULL;
 2391 #ifdef KTRACE
 2392         struct uio *ktruio = NULL;
 2393 #endif
 2394         struct uio auio;
 2395         struct iovec iov[1];
 2396 
 2397         if (uap->sinfo) {
 2398                 error = copyin(uap->sinfo, &sinfo, sizeof (sinfo));
 2399                 if (error)
 2400                         return (error);
 2401                 u_sinfo = &sinfo;
 2402         }
 2403         if (uap->tolen) {
 2404                 error = getsockaddr(&to, uap->to, uap->tolen);
 2405                 if (error) {
 2406                         to = NULL;
 2407                         goto sctp_bad2;
 2408                 }
 2409         }
 2410 
 2411         AUDIT_ARG_FD(uap->sd);
 2412         error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL);
 2413         if (error)
 2414                 goto sctp_bad;
 2415 #ifdef KTRACE
 2416         if (to && (KTRPOINT(td, KTR_STRUCT)))
 2417                 ktrsockaddr(to);
 2418 #endif
 2419 
 2420         iov[0].iov_base = uap->msg;
 2421         iov[0].iov_len = uap->mlen;
 2422 
 2423         so = (struct socket *)fp->f_data;
 2424 #ifdef MAC
 2425         error = mac_socket_check_send(td->td_ucred, so);
 2426         if (error)
 2427                 goto sctp_bad;
 2428 #endif /* MAC */
 2429 
 2430         auio.uio_iov =  iov;
 2431         auio.uio_iovcnt = 1;
 2432         auio.uio_segflg = UIO_USERSPACE;
 2433         auio.uio_rw = UIO_WRITE;
 2434         auio.uio_td = td;
 2435         auio.uio_offset = 0;                    /* XXX */
 2436         auio.uio_resid = 0;
 2437         len = auio.uio_resid = uap->mlen;
 2438         CURVNET_SET(so->so_vnet);
 2439         error = sctp_lower_sosend(so, to, &auio,
 2440                     (struct mbuf *)NULL, (struct mbuf *)NULL,
 2441                     uap->flags, use_rcvinfo, u_sinfo, td);
 2442         CURVNET_RESTORE();
 2443         if (error) {
 2444                 if (auio.uio_resid != len && (error == ERESTART ||
 2445                     error == EINTR || error == EWOULDBLOCK))
 2446                         error = 0;
 2447                 /* Generation of SIGPIPE can be controlled per socket. */
 2448                 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
 2449                     !(uap->flags & MSG_NOSIGNAL)) {
 2450                         PROC_LOCK(td->td_proc);
 2451                         psignal(td->td_proc, SIGPIPE);
 2452                         PROC_UNLOCK(td->td_proc);
 2453                 }
 2454         }
 2455         if (error == 0)
 2456                 td->td_retval[0] = len - auio.uio_resid;
 2457 #ifdef KTRACE
 2458         if (ktruio != NULL) {
 2459                 ktruio->uio_resid = td->td_retval[0];
 2460                 ktrgenio(uap->sd, UIO_WRITE, ktruio, error);
 2461         }
 2462 #endif /* KTRACE */
 2463 sctp_bad:
 2464         if (fp)
 2465                 fdrop(fp, td);
 2466 sctp_bad2:
 2467         if (to)
 2468                 free(to, M_SONAME);
 2469         return (error);
 2470 #else  /* SCTP */
 2471         return (EOPNOTSUPP);
 2472 #endif /* SCTP */
 2473 }
 2474 
 2475 int
 2476 sctp_generic_sendmsg_iov(td, uap)
 2477         struct thread *td;
 2478         struct sctp_generic_sendmsg_iov_args /* {
 2479                 int sd, 
 2480                 struct iovec *iov, 
 2481                 int iovlen, 
 2482                 caddr_t to, 
 2483                 __socklen_t tolen, 
 2484                 struct sctp_sndrcvinfo *sinfo, 
 2485                 int flags
 2486         } */ *uap;
 2487 {
 2488 #if (defined(INET) || defined(INET6)) && defined(SCTP)
 2489         struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL;
 2490         struct socket *so;
 2491         struct file *fp = NULL;
 2492         int use_rcvinfo = 1;
 2493         int error=0, len, i;
 2494         struct sockaddr *to = NULL;
 2495 #ifdef KTRACE
 2496         struct uio *ktruio = NULL;
 2497 #endif
 2498         struct uio auio;
 2499         struct iovec *iov, *tiov;
 2500 
 2501         if (uap->sinfo) {
 2502                 error = copyin(uap->sinfo, &sinfo, sizeof (sinfo));
 2503                 if (error)
 2504                         return (error);
 2505                 u_sinfo = &sinfo;
 2506         }
 2507         if (uap->tolen) {
 2508                 error = getsockaddr(&to, uap->to, uap->tolen);
 2509                 if (error) {
 2510                         to = NULL;
 2511                         goto sctp_bad2;
 2512                 }
 2513         }
 2514 
 2515         AUDIT_ARG_FD(uap->sd);
 2516         error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL);
 2517         if (error)
 2518                 goto sctp_bad1;
 2519 
 2520 #ifdef COMPAT_FREEBSD32
 2521         if (SV_CURPROC_FLAG(SV_ILP32))
 2522                 error = freebsd32_copyiniov((struct iovec32 *)uap->iov,
 2523                     uap->iovlen, &iov, EMSGSIZE);
 2524         else
 2525 #endif
 2526                 error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE);
 2527         if (error)
 2528                 goto sctp_bad1;
 2529 #ifdef KTRACE
 2530         if (to && (KTRPOINT(td, KTR_STRUCT)))
 2531                 ktrsockaddr(to);
 2532 #endif
 2533 
 2534         so = (struct socket *)fp->f_data;
 2535 #ifdef MAC
 2536         error = mac_socket_check_send(td->td_ucred, so);
 2537         if (error)
 2538                 goto sctp_bad;
 2539 #endif /* MAC */
 2540 
 2541         auio.uio_iov = iov;
 2542         auio.uio_iovcnt = uap->iovlen;
 2543         auio.uio_segflg = UIO_USERSPACE;
 2544         auio.uio_rw = UIO_WRITE;
 2545         auio.uio_td = td;
 2546         auio.uio_offset = 0;                    /* XXX */
 2547         auio.uio_resid = 0;
 2548         tiov = iov;
 2549         for (i = 0; i <uap->iovlen; i++, tiov++) {
 2550                 if ((auio.uio_resid += tiov->iov_len) < 0) {
 2551                         error = EINVAL;
 2552                         goto sctp_bad;
 2553                 }
 2554         }
 2555         len = auio.uio_resid;
 2556         CURVNET_SET(so->so_vnet);
 2557         error = sctp_lower_sosend(so, to, &auio,
 2558                     (struct mbuf *)NULL, (struct mbuf *)NULL,
 2559                     uap->flags, use_rcvinfo, u_sinfo, td);
 2560         CURVNET_RESTORE();
 2561         if (error) {
 2562                 if (auio.uio_resid != len && (error == ERESTART ||
 2563                     error == EINTR || error == EWOULDBLOCK))
 2564                         error = 0;
 2565                 /* Generation of SIGPIPE can be controlled per socket */
 2566                 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
 2567                     !(uap->flags & MSG_NOSIGNAL)) {
 2568                         PROC_LOCK(td->td_proc);
 2569                         psignal(td->td_proc, SIGPIPE);
 2570                         PROC_UNLOCK(td->td_proc);
 2571                 }
 2572         }
 2573         if (error == 0)
 2574                 td->td_retval[0] = len - auio.uio_resid;
 2575 #ifdef KTRACE
 2576         if (ktruio != NULL) {
 2577                 ktruio->uio_resid = td->td_retval[0];
 2578                 ktrgenio(uap->sd, UIO_WRITE, ktruio, error);
 2579         }
 2580 #endif /* KTRACE */
 2581 sctp_bad:
 2582         free(iov, M_IOV);
 2583 sctp_bad1:
 2584         if (fp)
 2585                 fdrop(fp, td);
 2586 sctp_bad2:
 2587         if (to)
 2588                 free(to, M_SONAME);
 2589         return (error);
 2590 #else  /* SCTP */
 2591         return (EOPNOTSUPP);
 2592 #endif /* SCTP */
 2593 }
 2594 
 2595 int
 2596 sctp_generic_recvmsg(td, uap)
 2597         struct thread *td;
 2598         struct sctp_generic_recvmsg_args /* {
 2599                 int sd, 
 2600                 struct iovec *iov, 
 2601                 int iovlen,
 2602                 struct sockaddr *from, 
 2603                 __socklen_t *fromlenaddr,
 2604                 struct sctp_sndrcvinfo *sinfo, 
 2605                 int *msg_flags
 2606         } */ *uap;
 2607 {
 2608 #if (defined(INET) || defined(INET6)) && defined(SCTP)
 2609         u_int8_t sockbufstore[256];
 2610         struct uio auio;
 2611         struct iovec *iov, *tiov;
 2612         struct sctp_sndrcvinfo sinfo;
 2613         struct socket *so;
 2614         struct file *fp = NULL;
 2615         struct sockaddr *fromsa;
 2616         int fromlen;
 2617         int len, i, msg_flags;
 2618         int error = 0;
 2619 #ifdef KTRACE
 2620         struct uio *ktruio = NULL;
 2621 #endif
 2622 
 2623         AUDIT_ARG_FD(uap->sd);
 2624         error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL);
 2625         if (error) {
 2626                 return (error);
 2627         }
 2628 #ifdef COMPAT_FREEBSD32
 2629         if (SV_CURPROC_FLAG(SV_ILP32))
 2630                 error = freebsd32_copyiniov((struct iovec32 *)uap->iov,
 2631                     uap->iovlen, &iov, EMSGSIZE);
 2632         else
 2633 #endif
 2634                 error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE);
 2635         if (error)
 2636                 goto out1;
 2637 
 2638         so = fp->f_data;
 2639 #ifdef MAC
 2640         error = mac_socket_check_receive(td->td_ucred, so);
 2641         if (error) {
 2642                 goto out;
 2643         }
 2644 #endif /* MAC */
 2645 
 2646         if (uap->fromlenaddr) {
 2647                 error = copyin(uap->fromlenaddr,
 2648                     &fromlen, sizeof (fromlen));
 2649                 if (error) {
 2650                         goto out;
 2651                 }
 2652         } else {
 2653                 fromlen = 0;
 2654         }
 2655         if (uap->msg_flags) {
 2656                 error = copyin(uap->msg_flags, &msg_flags, sizeof (int));
 2657                 if (error) {
 2658                         goto out;
 2659                 }
 2660         } else {
 2661                 msg_flags = 0;
 2662         }
 2663         auio.uio_iov = iov;
 2664         auio.uio_iovcnt = uap->iovlen;
 2665         auio.uio_segflg = UIO_USERSPACE;
 2666         auio.uio_rw = UIO_READ;
 2667         auio.uio_td = td;
 2668         auio.uio_offset = 0;                    /* XXX */
 2669         auio.uio_resid = 0;
 2670         tiov = iov;
 2671         for (i = 0; i <uap->iovlen; i++, tiov++) {
 2672                 if ((auio.uio_resid += tiov->iov_len) < 0) {
 2673                         error = EINVAL;
 2674                         goto out;
 2675                 }
 2676         }
 2677         len = auio.uio_resid;
 2678         fromsa = (struct sockaddr *)sockbufstore;
 2679 
 2680 #ifdef KTRACE
 2681         if (KTRPOINT(td, KTR_GENIO))
 2682                 ktruio = cloneuio(&auio);
 2683 #endif /* KTRACE */
 2684         memset(&sinfo, 0, sizeof(struct sctp_sndrcvinfo));
 2685         CURVNET_SET(so->so_vnet);
 2686         error = sctp_sorecvmsg(so, &auio, (struct mbuf **)NULL,
 2687                     fromsa, fromlen, &msg_flags,
 2688                     (struct sctp_sndrcvinfo *)&sinfo, 1);
 2689         CURVNET_RESTORE();
 2690         if (error) {
 2691                 if (auio.uio_resid != (int)len && (error == ERESTART ||
 2692                     error == EINTR || error == EWOULDBLOCK))
 2693                         error = 0;
 2694         } else {
 2695                 if (uap->sinfo)
 2696                         error = copyout(&sinfo, uap->sinfo, sizeof (sinfo));
 2697         }
 2698 #ifdef KTRACE
 2699         if (ktruio != NULL) {
 2700                 ktruio->uio_resid = (int)len - auio.uio_resid;
 2701                 ktrgenio(uap->sd, UIO_READ, ktruio, error);
 2702         }
 2703 #endif /* KTRACE */
 2704         if (error)
 2705                 goto out;
 2706         td->td_retval[0] = (int)len - auio.uio_resid;
 2707 
 2708         if (fromlen && uap->from) {
 2709                 len = fromlen;
 2710                 if (len <= 0 || fromsa == 0)
 2711                         len = 0;
 2712                 else {
 2713                         len = MIN(len, fromsa->sa_len);
 2714                         error = copyout(fromsa, uap->from, (unsigned)len);
 2715                         if (error)
 2716                                 goto out;
 2717                 }
 2718                 error = copyout(&len, uap->fromlenaddr, sizeof (socklen_t));
 2719                 if (error) {
 2720                         goto out;
 2721                 }
 2722         }
 2723 #ifdef KTRACE
 2724         if (KTRPOINT(td, KTR_STRUCT))
 2725                 ktrsockaddr(fromsa);
 2726 #endif
 2727         if (uap->msg_flags) {
 2728                 error = copyout(&msg_flags, uap->msg_flags, sizeof (int));
 2729                 if (error) {
 2730                         goto out;
 2731                 }
 2732         }
 2733 out:
 2734         free(iov, M_IOV);
 2735 out1:
 2736         if (fp) 
 2737                 fdrop(fp, td);
 2738 
 2739         return (error);
 2740 #else  /* SCTP */
 2741         return (EOPNOTSUPP);
 2742 #endif /* SCTP */
 2743 }

Cache object: 11d3666301fcc7b77eccd752932e4ee0


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.