The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/uipc_syscalls.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1982, 1986, 1989, 1990, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * sendfile(2) and related extensions:
    6  * Copyright (c) 1998, David Greenman. All rights reserved.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  * 4. Neither the name of the University nor the names of its contributors
   17  *    may be used to endorse or promote products derived from this software
   18  *    without specific prior written permission.
   19  *
   20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   30  * SUCH DAMAGE.
   31  *
   32  *      @(#)uipc_syscalls.c     8.4 (Berkeley) 2/21/94
   33  */
   34 
   35 #include <sys/cdefs.h>
   36 __FBSDID("$FreeBSD$");
   37 
   38 #include "opt_sctp.h"
   39 #include "opt_compat.h"
   40 #include "opt_ktrace.h"
   41 #include "opt_mac.h"
   42 
   43 #include <sys/param.h>
   44 #include <sys/systm.h>
   45 #include <sys/kernel.h>
   46 #include <sys/lock.h>
   47 #include <sys/mutex.h>
   48 #include <sys/sysproto.h>
   49 #include <sys/malloc.h>
   50 #include <sys/filedesc.h>
   51 #include <sys/event.h>
   52 #include <sys/proc.h>
   53 #include <sys/fcntl.h>
   54 #include <sys/file.h>
   55 #include <sys/filio.h>
   56 #include <sys/mount.h>
   57 #include <sys/mbuf.h>
   58 #include <sys/protosw.h>
   59 #include <sys/sf_buf.h>
   60 #include <sys/socket.h>
   61 #include <sys/socketvar.h>
   62 #include <sys/signalvar.h>
   63 #include <sys/syscallsubr.h>
   64 #include <sys/sysctl.h>
   65 #include <sys/uio.h>
   66 #include <sys/vnode.h>
   67 #ifdef KTRACE
   68 #include <sys/ktrace.h>
   69 #endif
   70 
   71 #include <security/mac/mac_framework.h>
   72 
   73 #include <vm/vm.h>
   74 #include <vm/vm_object.h>
   75 #include <vm/vm_page.h>
   76 #include <vm/vm_pageout.h>
   77 #include <vm/vm_kern.h>
   78 #include <vm/vm_extern.h>
   79 
   80 #ifdef SCTP
   81 #include <netinet/sctp.h>
   82 #include <netinet/sctp_peeloff.h>
   83 #endif /* SCTP */
   84 
   85 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
   86 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp);
   87 
   88 static int accept1(struct thread *td, struct accept_args *uap, int compat);
   89 static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat);
   90 static int getsockname1(struct thread *td, struct getsockname_args *uap,
   91                         int compat);
   92 static int getpeername1(struct thread *td, struct getpeername_args *uap,
   93                         int compat);
   94 
   95 /*
   96  * NSFBUFS-related variables and associated sysctls
   97  */
   98 int nsfbufs;
   99 int nsfbufspeak;
  100 int nsfbufsused;
  101 
  102 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
  103     "Maximum number of sendfile(2) sf_bufs available");
  104 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
  105     "Number of sendfile(2) sf_bufs at peak usage");
  106 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
  107     "Number of sendfile(2) sf_bufs in use");
  108 
  109 /*
  110  * Convert a user file descriptor to a kernel file entry.  A reference on the
  111  * file entry is held upon returning.  This is lighter weight than
  112  * fgetsock(), which bumps the socket reference drops the file reference
  113  * count instead, as this approach avoids several additional mutex operations
  114  * associated with the additional reference count.  If requested, return the
  115  * open file flags.
  116  */
  117 static int
  118 getsock(struct filedesc *fdp, int fd, struct file **fpp, u_int *fflagp)
  119 {
  120         struct file *fp;
  121         int error;
  122 
  123         fp = NULL;
  124         if (fdp == NULL)
  125                 error = EBADF;
  126         else {
  127                 FILEDESC_SLOCK(fdp);
  128                 fp = fget_locked(fdp, fd);
  129                 if (fp == NULL)
  130                         error = EBADF;
  131                 else if (fp->f_type != DTYPE_SOCKET) {
  132                         fp = NULL;
  133                         error = ENOTSOCK;
  134                 } else {
  135                         fhold(fp);
  136                         if (fflagp != NULL)
  137                                 *fflagp = fp->f_flag;
  138                         error = 0;
  139                 }
  140                 FILEDESC_SUNLOCK(fdp);
  141         }
  142         *fpp = fp;
  143         return (error);
  144 }
  145 
  146 /*
  147  * System call interface to the socket abstraction.
  148  */
  149 #if defined(COMPAT_43)
  150 #define COMPAT_OLDSOCK
  151 #endif
  152 
  153 int
  154 socket(td, uap)
  155         struct thread *td;
  156         struct socket_args /* {
  157                 int     domain;
  158                 int     type;
  159                 int     protocol;
  160         } */ *uap;
  161 {
  162         struct filedesc *fdp;
  163         struct socket *so;
  164         struct file *fp;
  165         int fd, error;
  166 
  167 #ifdef MAC
  168         error = mac_check_socket_create(td->td_ucred, uap->domain, uap->type,
  169             uap->protocol);
  170         if (error)
  171                 return (error);
  172 #endif
  173         fdp = td->td_proc->p_fd;
  174         error = falloc(td, &fp, &fd);
  175         if (error)
  176                 return (error);
  177         /* An extra reference on `fp' has been held for us by falloc(). */
  178         error = socreate(uap->domain, &so, uap->type, uap->protocol,
  179             td->td_ucred, td);
  180         if (error) {
  181                 fdclose(fdp, fp, fd, td);
  182         } else {
  183                 FILE_LOCK(fp);
  184                 fp->f_data = so;        /* already has ref count */
  185                 fp->f_flag = FREAD|FWRITE;
  186                 fp->f_type = DTYPE_SOCKET;
  187                 fp->f_ops = &socketops;
  188                 FILE_UNLOCK(fp);
  189                 td->td_retval[0] = fd;
  190         }
  191         fdrop(fp, td);
  192         return (error);
  193 }
  194 
  195 /* ARGSUSED */
  196 int
  197 bind(td, uap)
  198         struct thread *td;
  199         struct bind_args /* {
  200                 int     s;
  201                 caddr_t name;
  202                 int     namelen;
  203         } */ *uap;
  204 {
  205         struct sockaddr *sa;
  206         int error;
  207 
  208         if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0)
  209                 return (error);
  210 
  211         error = kern_bind(td, uap->s, sa);
  212         free(sa, M_SONAME);
  213         return (error);
  214 }
  215 
  216 int
  217 kern_bind(td, fd, sa)
  218         struct thread *td;
  219         int fd;
  220         struct sockaddr *sa;
  221 {
  222         struct socket *so;
  223         struct file *fp;
  224         int error;
  225 
  226         error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
  227         if (error)
  228                 return (error);
  229         so = fp->f_data;
  230 #ifdef MAC
  231         SOCK_LOCK(so);
  232         error = mac_check_socket_bind(td->td_ucred, so, sa);
  233         SOCK_UNLOCK(so);
  234         if (error)
  235                 goto done;
  236 #endif
  237         error = sobind(so, sa, td);
  238 #ifdef MAC
  239 done:
  240 #endif
  241         fdrop(fp, td);
  242         return (error);
  243 }
  244 
  245 /* ARGSUSED */
  246 int
  247 listen(td, uap)
  248         struct thread *td;
  249         struct listen_args /* {
  250                 int     s;
  251                 int     backlog;
  252         } */ *uap;
  253 {
  254         struct socket *so;
  255         struct file *fp;
  256         int error;
  257 
  258         error = getsock(td->td_proc->p_fd, uap->s, &fp, NULL);
  259         if (error == 0) {
  260                 so = fp->f_data;
  261 #ifdef MAC
  262                 SOCK_LOCK(so);
  263                 error = mac_check_socket_listen(td->td_ucred, so);
  264                 SOCK_UNLOCK(so);
  265                 if (error)
  266                         goto done;
  267 #endif
  268                 error = solisten(so, uap->backlog, td);
  269 #ifdef MAC
  270 done:
  271 #endif
  272                 fdrop(fp, td);
  273         }
  274         return(error);
  275 }
  276 
  277 /*
  278  * accept1()
  279  */
  280 static int
  281 accept1(td, uap, compat)
  282         struct thread *td;
  283         struct accept_args /* {
  284                 int     s;
  285                 struct sockaddr * __restrict name;
  286                 socklen_t       * __restrict anamelen;
  287         } */ *uap;
  288         int compat;
  289 {
  290         struct sockaddr *name;
  291         socklen_t namelen;
  292         struct file *fp;
  293         int error;
  294 
  295         if (uap->name == NULL)
  296                 return (kern_accept(td, uap->s, NULL, NULL, NULL));
  297 
  298         error = copyin(uap->anamelen, &namelen, sizeof (namelen));
  299         if (error)
  300                 return (error);
  301 
  302         error = kern_accept(td, uap->s, &name, &namelen, &fp);
  303 
  304         /*
  305          * return a namelen of zero for older code which might
  306          * ignore the return value from accept.
  307          */
  308         if (error) {
  309                 (void) copyout(&namelen,
  310                     uap->anamelen, sizeof(*uap->anamelen));
  311                 return (error);
  312         }
  313 
  314         if (error == 0 && name != NULL) {
  315 #ifdef COMPAT_OLDSOCK
  316                 if (compat)
  317                         ((struct osockaddr *)name)->sa_family =
  318                             name->sa_family;
  319 #endif
  320                 error = copyout(name, uap->name, namelen);
  321         }
  322         if (error == 0)
  323                 error = copyout(&namelen, uap->anamelen,
  324                     sizeof(namelen));
  325         if (error)
  326                 fdclose(td->td_proc->p_fd, fp, td->td_retval[0], td);
  327         fdrop(fp, td);
  328         free(name, M_SONAME);
  329         return (error);
  330 }
  331 
  332 int
  333 kern_accept(struct thread *td, int s, struct sockaddr **name,
  334     socklen_t *namelen, struct file **fp)
  335 {
  336         struct filedesc *fdp;
  337         struct file *headfp, *nfp = NULL;
  338         struct sockaddr *sa = NULL;
  339         int error;
  340         struct socket *head, *so;
  341         int fd;
  342         u_int fflag;
  343         pid_t pgid;
  344         int tmp;
  345 
  346         if (name) {
  347                 *name = NULL;
  348                 if (*namelen < 0)
  349                         return (EINVAL);
  350         }
  351 
  352         fdp = td->td_proc->p_fd;
  353         error = getsock(fdp, s, &headfp, &fflag);
  354         if (error)
  355                 return (error);
  356         head = headfp->f_data;
  357         if ((head->so_options & SO_ACCEPTCONN) == 0) {
  358                 error = EINVAL;
  359                 goto done;
  360         }
  361 #ifdef MAC
  362         SOCK_LOCK(head);
  363         error = mac_check_socket_accept(td->td_ucred, head);
  364         SOCK_UNLOCK(head);
  365         if (error != 0)
  366                 goto done;
  367 #endif
  368         error = falloc(td, &nfp, &fd);
  369         if (error)
  370                 goto done;
  371         ACCEPT_LOCK();
  372         if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) {
  373                 ACCEPT_UNLOCK();
  374                 error = EWOULDBLOCK;
  375                 goto noconnection;
  376         }
  377         while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
  378                 if (head->so_rcv.sb_state & SBS_CANTRCVMORE) {
  379                         head->so_error = ECONNABORTED;
  380                         break;
  381                 }
  382                 error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH,
  383                     "accept", 0);
  384                 if (error) {
  385                         ACCEPT_UNLOCK();
  386                         goto noconnection;
  387                 }
  388         }
  389         if (head->so_error) {
  390                 error = head->so_error;
  391                 head->so_error = 0;
  392                 ACCEPT_UNLOCK();
  393                 goto noconnection;
  394         }
  395         so = TAILQ_FIRST(&head->so_comp);
  396         KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP"));
  397         KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP"));
  398 
  399         /*
  400          * Before changing the flags on the socket, we have to bump the
  401          * reference count.  Otherwise, if the protocol calls sofree(),
  402          * the socket will be released due to a zero refcount.
  403          */
  404         SOCK_LOCK(so);                  /* soref() and so_state update */
  405         soref(so);                      /* file descriptor reference */
  406 
  407         TAILQ_REMOVE(&head->so_comp, so, so_list);
  408         head->so_qlen--;
  409         so->so_state |= (head->so_state & SS_NBIO);
  410         so->so_qstate &= ~SQ_COMP;
  411         so->so_head = NULL;
  412 
  413         SOCK_UNLOCK(so);
  414         ACCEPT_UNLOCK();
  415 
  416         /* An extra reference on `nfp' has been held for us by falloc(). */
  417         td->td_retval[0] = fd;
  418 
  419         /* connection has been removed from the listen queue */
  420         KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0);
  421 
  422         pgid = fgetown(&head->so_sigio);
  423         if (pgid != 0)
  424                 fsetown(pgid, &so->so_sigio);
  425 
  426         FILE_LOCK(nfp);
  427         nfp->f_data = so;       /* nfp has ref count from falloc */
  428         nfp->f_flag = fflag;
  429         nfp->f_type = DTYPE_SOCKET;
  430         nfp->f_ops = &socketops;
  431         FILE_UNLOCK(nfp);
  432         /* Sync socket nonblocking/async state with file flags */
  433         tmp = fflag & FNONBLOCK;
  434         (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td);
  435         tmp = fflag & FASYNC;
  436         (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td);
  437         sa = 0;
  438         error = soaccept(so, &sa);
  439         if (error) {
  440                 /*
  441                  * return a namelen of zero for older code which might
  442                  * ignore the return value from accept.
  443                  */
  444                 if (name)
  445                         *namelen = 0;
  446                 goto noconnection;
  447         }
  448         if (sa == NULL) {
  449                 if (name)
  450                         *namelen = 0;
  451                 goto done;
  452         }
  453         if (name) {
  454                 /* check sa_len before it is destroyed */
  455                 if (*namelen > sa->sa_len)
  456                         *namelen = sa->sa_len;
  457                 *name = sa;
  458                 sa = NULL;
  459         }
  460 noconnection:
  461         if (sa)
  462                 FREE(sa, M_SONAME);
  463 
  464         /*
  465          * close the new descriptor, assuming someone hasn't ripped it
  466          * out from under us.
  467          */
  468         if (error)
  469                 fdclose(fdp, nfp, fd, td);
  470 
  471         /*
  472          * Release explicitly held references before returning.  We return
  473          * a reference on nfp to the caller on success if they request it.
  474          */
  475 done:
  476         if (fp != NULL) {
  477                 if (error == 0) {
  478                         *fp = nfp;
  479                         nfp = NULL;
  480                 } else
  481                         *fp = NULL;
  482         }
  483         if (nfp != NULL)
  484                 fdrop(nfp, td);
  485         fdrop(headfp, td);
  486         return (error);
  487 }
  488 
  489 int
  490 accept(td, uap)
  491         struct thread *td;
  492         struct accept_args *uap;
  493 {
  494 
  495         return (accept1(td, uap, 0));
  496 }
  497 
  498 #ifdef COMPAT_OLDSOCK
  499 int
  500 oaccept(td, uap)
  501         struct thread *td;
  502         struct accept_args *uap;
  503 {
  504 
  505         return (accept1(td, uap, 1));
  506 }
  507 #endif /* COMPAT_OLDSOCK */
  508 
  509 /* ARGSUSED */
  510 int
  511 connect(td, uap)
  512         struct thread *td;
  513         struct connect_args /* {
  514                 int     s;
  515                 caddr_t name;
  516                 int     namelen;
  517         } */ *uap;
  518 {
  519         struct sockaddr *sa;
  520         int error;
  521 
  522         error = getsockaddr(&sa, uap->name, uap->namelen);
  523         if (error)
  524                 return (error);
  525 
  526         error = kern_connect(td, uap->s, sa);
  527         free(sa, M_SONAME);
  528         return (error);
  529 }
  530 
  531 
  532 int
  533 kern_connect(td, fd, sa)
  534         struct thread *td;
  535         int fd;
  536         struct sockaddr *sa;
  537 {
  538         struct socket *so;
  539         struct file *fp;
  540         int error;
  541         int interrupted = 0;
  542 
  543         error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
  544         if (error)
  545                 return (error);
  546         so = fp->f_data;
  547         if (so->so_state & SS_ISCONNECTING) {
  548                 error = EALREADY;
  549                 goto done1;
  550         }
  551 #ifdef MAC
  552         SOCK_LOCK(so);
  553         error = mac_check_socket_connect(td->td_ucred, so, sa);
  554         SOCK_UNLOCK(so);
  555         if (error)
  556                 goto bad;
  557 #endif
  558         error = soconnect(so, sa, td);
  559         if (error)
  560                 goto bad;
  561         if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
  562                 error = EINPROGRESS;
  563                 goto done1;
  564         }
  565         SOCK_LOCK(so);
  566         while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
  567                 error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH,
  568                     "connec", 0);
  569                 if (error) {
  570                         if (error == EINTR || error == ERESTART)
  571                                 interrupted = 1;
  572                         break;
  573                 }
  574         }
  575         if (error == 0) {
  576                 error = so->so_error;
  577                 so->so_error = 0;
  578         }
  579         SOCK_UNLOCK(so);
  580 bad:
  581         if (!interrupted)
  582                 so->so_state &= ~SS_ISCONNECTING;
  583         if (error == ERESTART)
  584                 error = EINTR;
  585 done1:
  586         fdrop(fp, td);
  587         return (error);
  588 }
  589 
  590 int
  591 socketpair(td, uap)
  592         struct thread *td;
  593         struct socketpair_args /* {
  594                 int     domain;
  595                 int     type;
  596                 int     protocol;
  597                 int     *rsv;
  598         } */ *uap;
  599 {
  600         struct filedesc *fdp = td->td_proc->p_fd;
  601         struct file *fp1, *fp2;
  602         struct socket *so1, *so2;
  603         int fd, error, sv[2];
  604 
  605 #ifdef MAC
  606         /* We might want to have a separate check for socket pairs. */
  607         error = mac_check_socket_create(td->td_ucred, uap->domain, uap->type,
  608             uap->protocol);
  609         if (error)
  610                 return (error);
  611 #endif
  612 
  613         error = socreate(uap->domain, &so1, uap->type, uap->protocol,
  614             td->td_ucred, td);
  615         if (error)
  616                 return (error);
  617         error = socreate(uap->domain, &so2, uap->type, uap->protocol,
  618             td->td_ucred, td);
  619         if (error)
  620                 goto free1;
  621         /* On success extra reference to `fp1' and 'fp2' is set by falloc. */
  622         error = falloc(td, &fp1, &fd);
  623         if (error)
  624                 goto free2;
  625         sv[0] = fd;
  626         fp1->f_data = so1;      /* so1 already has ref count */
  627         error = falloc(td, &fp2, &fd);
  628         if (error)
  629                 goto free3;
  630         fp2->f_data = so2;      /* so2 already has ref count */
  631         sv[1] = fd;
  632         error = soconnect2(so1, so2);
  633         if (error)
  634                 goto free4;
  635         if (uap->type == SOCK_DGRAM) {
  636                 /*
  637                  * Datagram socket connection is asymmetric.
  638                  */
  639                  error = soconnect2(so2, so1);
  640                  if (error)
  641                         goto free4;
  642         }
  643         FILE_LOCK(fp1);
  644         fp1->f_flag = FREAD|FWRITE;
  645         fp1->f_type = DTYPE_SOCKET;
  646         fp1->f_ops = &socketops;
  647         FILE_UNLOCK(fp1);
  648         FILE_LOCK(fp2);
  649         fp2->f_flag = FREAD|FWRITE;
  650         fp2->f_type = DTYPE_SOCKET;
  651         fp2->f_ops = &socketops;
  652         FILE_UNLOCK(fp2);
  653         so1 = so2 = NULL;
  654         error = copyout(sv, uap->rsv, 2 * sizeof (int));
  655         if (error)
  656                 goto free4;
  657         fdrop(fp1, td);
  658         fdrop(fp2, td);
  659         return (0);
  660 free4:
  661         fdclose(fdp, fp2, sv[1], td);
  662         fdrop(fp2, td);
  663 free3:
  664         fdclose(fdp, fp1, sv[0], td);
  665         fdrop(fp1, td);
  666 free2:
  667         if (so2 != NULL)
  668                 (void)soclose(so2);
  669 free1:
  670         if (so1 != NULL)
  671                 (void)soclose(so1);
  672         return (error);
  673 }
  674 
  675 static int
  676 sendit(td, s, mp, flags)
  677         struct thread *td;
  678         int s;
  679         struct msghdr *mp;
  680         int flags;
  681 {
  682         struct mbuf *control;
  683         struct sockaddr *to;
  684         int error;
  685 
  686         if (mp->msg_name != NULL) {
  687                 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
  688                 if (error) {
  689                         to = NULL;
  690                         goto bad;
  691                 }
  692                 mp->msg_name = to;
  693         } else {
  694                 to = NULL;
  695         }
  696 
  697         if (mp->msg_control) {
  698                 if (mp->msg_controllen < sizeof(struct cmsghdr)
  699 #ifdef COMPAT_OLDSOCK
  700                     && mp->msg_flags != MSG_COMPAT
  701 #endif
  702                 ) {
  703                         error = EINVAL;
  704                         goto bad;
  705                 }
  706                 error = sockargs(&control, mp->msg_control,
  707                     mp->msg_controllen, MT_CONTROL);
  708                 if (error)
  709                         goto bad;
  710 #ifdef COMPAT_OLDSOCK
  711                 if (mp->msg_flags == MSG_COMPAT) {
  712                         struct cmsghdr *cm;
  713 
  714                         M_PREPEND(control, sizeof(*cm), M_TRYWAIT);
  715                         if (control == 0) {
  716                                 error = ENOBUFS;
  717                                 goto bad;
  718                         } else {
  719                                 cm = mtod(control, struct cmsghdr *);
  720                                 cm->cmsg_len = control->m_len;
  721                                 cm->cmsg_level = SOL_SOCKET;
  722                                 cm->cmsg_type = SCM_RIGHTS;
  723                         }
  724                 }
  725 #endif
  726         } else {
  727                 control = NULL;
  728         }
  729 
  730         error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE);
  731 
  732 bad:
  733         if (to)
  734                 FREE(to, M_SONAME);
  735         return (error);
  736 }
  737 
  738 int
  739 kern_sendit(td, s, mp, flags, control, segflg)
  740         struct thread *td;
  741         int s;
  742         struct msghdr *mp;
  743         int flags;
  744         struct mbuf *control;
  745         enum uio_seg segflg;
  746 {
  747         struct file *fp;
  748         struct uio auio;
  749         struct iovec *iov;
  750         struct socket *so;
  751         int i;
  752         int len, error;
  753 #ifdef KTRACE
  754         struct uio *ktruio = NULL;
  755 #endif
  756 
  757         error = getsock(td->td_proc->p_fd, s, &fp, NULL);
  758         if (error)
  759                 return (error);
  760         so = (struct socket *)fp->f_data;
  761 
  762 #ifdef MAC
  763         SOCK_LOCK(so);
  764         error = mac_check_socket_send(td->td_ucred, so);
  765         SOCK_UNLOCK(so);
  766         if (error)
  767                 goto bad;
  768 #endif
  769 
  770         auio.uio_iov = mp->msg_iov;
  771         auio.uio_iovcnt = mp->msg_iovlen;
  772         auio.uio_segflg = segflg;
  773         auio.uio_rw = UIO_WRITE;
  774         auio.uio_td = td;
  775         auio.uio_offset = 0;                    /* XXX */
  776         auio.uio_resid = 0;
  777         iov = mp->msg_iov;
  778         for (i = 0; i < mp->msg_iovlen; i++, iov++) {
  779                 if ((auio.uio_resid += iov->iov_len) < 0) {
  780                         error = EINVAL;
  781                         goto bad;
  782                 }
  783         }
  784 #ifdef KTRACE
  785         if (KTRPOINT(td, KTR_GENIO))
  786                 ktruio = cloneuio(&auio);
  787 #endif
  788         len = auio.uio_resid;
  789         error = sosend(so, mp->msg_name, &auio, 0, control, flags, td);
  790         if (error) {
  791                 if (auio.uio_resid != len && (error == ERESTART ||
  792                     error == EINTR || error == EWOULDBLOCK))
  793                         error = 0;
  794                 /* Generation of SIGPIPE can be controlled per socket */
  795                 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
  796                     !(flags & MSG_NOSIGNAL)) {
  797                         PROC_LOCK(td->td_proc);
  798                         psignal(td->td_proc, SIGPIPE);
  799                         PROC_UNLOCK(td->td_proc);
  800                 }
  801         }
  802         if (error == 0)
  803                 td->td_retval[0] = len - auio.uio_resid;
  804 #ifdef KTRACE
  805         if (ktruio != NULL) {
  806                 ktruio->uio_resid = td->td_retval[0];
  807                 ktrgenio(s, UIO_WRITE, ktruio, error);
  808         }
  809 #endif
  810 bad:
  811         fdrop(fp, td);
  812         return (error);
  813 }
  814 
  815 int
  816 sendto(td, uap)
  817         struct thread *td;
  818         struct sendto_args /* {
  819                 int     s;
  820                 caddr_t buf;
  821                 size_t  len;
  822                 int     flags;
  823                 caddr_t to;
  824                 int     tolen;
  825         } */ *uap;
  826 {
  827         struct msghdr msg;
  828         struct iovec aiov;
  829         int error;
  830 
  831         msg.msg_name = uap->to;
  832         msg.msg_namelen = uap->tolen;
  833         msg.msg_iov = &aiov;
  834         msg.msg_iovlen = 1;
  835         msg.msg_control = 0;
  836 #ifdef COMPAT_OLDSOCK
  837         msg.msg_flags = 0;
  838 #endif
  839         aiov.iov_base = uap->buf;
  840         aiov.iov_len = uap->len;
  841         error = sendit(td, uap->s, &msg, uap->flags);
  842         return (error);
  843 }
  844 
  845 #ifdef COMPAT_OLDSOCK
  846 int
  847 osend(td, uap)
  848         struct thread *td;
  849         struct osend_args /* {
  850                 int     s;
  851                 caddr_t buf;
  852                 int     len;
  853                 int     flags;
  854         } */ *uap;
  855 {
  856         struct msghdr msg;
  857         struct iovec aiov;
  858         int error;
  859 
  860         msg.msg_name = 0;
  861         msg.msg_namelen = 0;
  862         msg.msg_iov = &aiov;
  863         msg.msg_iovlen = 1;
  864         aiov.iov_base = uap->buf;
  865         aiov.iov_len = uap->len;
  866         msg.msg_control = 0;
  867         msg.msg_flags = 0;
  868         error = sendit(td, uap->s, &msg, uap->flags);
  869         return (error);
  870 }
  871 
  872 int
  873 osendmsg(td, uap)
  874         struct thread *td;
  875         struct osendmsg_args /* {
  876                 int     s;
  877                 caddr_t msg;
  878                 int     flags;
  879         } */ *uap;
  880 {
  881         struct msghdr msg;
  882         struct iovec *iov;
  883         int error;
  884 
  885         error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
  886         if (error)
  887                 return (error);
  888         error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
  889         if (error)
  890                 return (error);
  891         msg.msg_iov = iov;
  892         msg.msg_flags = MSG_COMPAT;
  893         error = sendit(td, uap->s, &msg, uap->flags);
  894         free(iov, M_IOV);
  895         return (error);
  896 }
  897 #endif
  898 
  899 int
  900 sendmsg(td, uap)
  901         struct thread *td;
  902         struct sendmsg_args /* {
  903                 int     s;
  904                 caddr_t msg;
  905                 int     flags;
  906         } */ *uap;
  907 {
  908         struct msghdr msg;
  909         struct iovec *iov;
  910         int error;
  911 
  912         error = copyin(uap->msg, &msg, sizeof (msg));
  913         if (error)
  914                 return (error);
  915         error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
  916         if (error)
  917                 return (error);
  918         msg.msg_iov = iov;
  919 #ifdef COMPAT_OLDSOCK
  920         msg.msg_flags = 0;
  921 #endif
  922         error = sendit(td, uap->s, &msg, uap->flags);
  923         free(iov, M_IOV);
  924         return (error);
  925 }
  926 
  927 int
  928 kern_recvit(td, s, mp, fromseg, controlp)
  929         struct thread *td;
  930         int s;
  931         struct msghdr *mp;
  932         enum uio_seg fromseg;
  933         struct mbuf **controlp;
  934 {
  935         struct uio auio;
  936         struct iovec *iov;
  937         int i;
  938         socklen_t len;
  939         int error;
  940         struct mbuf *m, *control = 0;
  941         caddr_t ctlbuf;
  942         struct file *fp;
  943         struct socket *so;
  944         struct sockaddr *fromsa = 0;
  945 #ifdef KTRACE
  946         struct uio *ktruio = NULL;
  947 #endif
  948 
  949         if(controlp != NULL)
  950                 *controlp = 0;
  951 
  952         error = getsock(td->td_proc->p_fd, s, &fp, NULL);
  953         if (error)
  954                 return (error);
  955         so = fp->f_data;
  956 
  957 #ifdef MAC
  958         SOCK_LOCK(so);
  959         error = mac_check_socket_receive(td->td_ucred, so);
  960         SOCK_UNLOCK(so);
  961         if (error) {
  962                 fdrop(fp, td);
  963                 return (error);
  964         }
  965 #endif
  966 
  967         auio.uio_iov = mp->msg_iov;
  968         auio.uio_iovcnt = mp->msg_iovlen;
  969         auio.uio_segflg = UIO_USERSPACE;
  970         auio.uio_rw = UIO_READ;
  971         auio.uio_td = td;
  972         auio.uio_offset = 0;                    /* XXX */
  973         auio.uio_resid = 0;
  974         iov = mp->msg_iov;
  975         for (i = 0; i < mp->msg_iovlen; i++, iov++) {
  976                 if ((auio.uio_resid += iov->iov_len) < 0) {
  977                         fdrop(fp, td);
  978                         return (EINVAL);
  979                 }
  980         }
  981 #ifdef KTRACE
  982         if (KTRPOINT(td, KTR_GENIO))
  983                 ktruio = cloneuio(&auio);
  984 #endif
  985         len = auio.uio_resid;
  986         error = soreceive(so, &fromsa, &auio, (struct mbuf **)0,
  987             (mp->msg_control || controlp) ? &control : (struct mbuf **)0,
  988             &mp->msg_flags);
  989         if (error) {
  990                 if (auio.uio_resid != (int)len && (error == ERESTART ||
  991                     error == EINTR || error == EWOULDBLOCK))
  992                         error = 0;
  993         }
  994 #ifdef KTRACE
  995         if (ktruio != NULL) {
  996                 ktruio->uio_resid = (int)len - auio.uio_resid;
  997                 ktrgenio(s, UIO_READ, ktruio, error);
  998         }
  999 #endif
 1000         if (error)
 1001                 goto out;
 1002         td->td_retval[0] = (int)len - auio.uio_resid;
 1003         if (mp->msg_name) {
 1004                 len = mp->msg_namelen;
 1005                 if (len <= 0 || fromsa == 0)
 1006                         len = 0;
 1007                 else {
 1008                         /* save sa_len before it is destroyed by MSG_COMPAT */
 1009                         len = MIN(len, fromsa->sa_len);
 1010 #ifdef COMPAT_OLDSOCK
 1011                         if (mp->msg_flags & MSG_COMPAT)
 1012                                 ((struct osockaddr *)fromsa)->sa_family =
 1013                                     fromsa->sa_family;
 1014 #endif
 1015                         if (fromseg == UIO_USERSPACE) {
 1016                                 error = copyout(fromsa, mp->msg_name,
 1017                                     (unsigned)len);
 1018                                 if (error)
 1019                                         goto out;
 1020                         } else
 1021                                 bcopy(fromsa, mp->msg_name, len);
 1022                 }
 1023                 mp->msg_namelen = len;
 1024         }
 1025         if (mp->msg_control && controlp == NULL) {
 1026 #ifdef COMPAT_OLDSOCK
 1027                 /*
 1028                  * We assume that old recvmsg calls won't receive access
 1029                  * rights and other control info, esp. as control info
 1030                  * is always optional and those options didn't exist in 4.3.
 1031                  * If we receive rights, trim the cmsghdr; anything else
 1032                  * is tossed.
 1033                  */
 1034                 if (control && mp->msg_flags & MSG_COMPAT) {
 1035                         if (mtod(control, struct cmsghdr *)->cmsg_level !=
 1036                             SOL_SOCKET ||
 1037                             mtod(control, struct cmsghdr *)->cmsg_type !=
 1038                             SCM_RIGHTS) {
 1039                                 mp->msg_controllen = 0;
 1040                                 goto out;
 1041                         }
 1042                         control->m_len -= sizeof (struct cmsghdr);
 1043                         control->m_data += sizeof (struct cmsghdr);
 1044                 }
 1045 #endif
 1046                 len = mp->msg_controllen;
 1047                 m = control;
 1048                 mp->msg_controllen = 0;
 1049                 ctlbuf = mp->msg_control;
 1050 
 1051                 while (m && len > 0) {
 1052                         unsigned int tocopy;
 1053 
 1054                         if (len >= m->m_len)
 1055                                 tocopy = m->m_len;
 1056                         else {
 1057                                 mp->msg_flags |= MSG_CTRUNC;
 1058                                 tocopy = len;
 1059                         }
 1060 
 1061                         if ((error = copyout(mtod(m, caddr_t),
 1062                                         ctlbuf, tocopy)) != 0)
 1063                                 goto out;
 1064 
 1065                         ctlbuf += tocopy;
 1066                         len -= tocopy;
 1067                         m = m->m_next;
 1068                 }
 1069                 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
 1070         }
 1071 out:
 1072         fdrop(fp, td);
 1073         if (fromsa)
 1074                 FREE(fromsa, M_SONAME);
 1075 
 1076         if (error == 0 && controlp != NULL)  
 1077                 *controlp = control;
 1078         else  if (control)
 1079                 m_freem(control);
 1080 
 1081         return (error);
 1082 }
 1083 
 1084 static int
 1085 recvit(td, s, mp, namelenp)
 1086         struct thread *td;
 1087         int s;
 1088         struct msghdr *mp;
 1089         void *namelenp;
 1090 {
 1091         int error;
 1092 
 1093         error = kern_recvit(td, s, mp, UIO_USERSPACE, NULL);
 1094         if (error)
 1095                 return (error);
 1096         if (namelenp) {
 1097                 error = copyout(&mp->msg_namelen, namelenp, sizeof (socklen_t));
 1098 #ifdef COMPAT_OLDSOCK
 1099                 if (mp->msg_flags & MSG_COMPAT)
 1100                         error = 0;      /* old recvfrom didn't check */
 1101 #endif
 1102         }
 1103         return (error);
 1104 }
 1105 
 1106 int
 1107 recvfrom(td, uap)
 1108         struct thread *td;
 1109         struct recvfrom_args /* {
 1110                 int     s;
 1111                 caddr_t buf;
 1112                 size_t  len;
 1113                 int     flags;
 1114                 struct sockaddr * __restrict    from;
 1115                 socklen_t * __restrict fromlenaddr;
 1116         } */ *uap;
 1117 {
 1118         struct msghdr msg;
 1119         struct iovec aiov;
 1120         int error;
 1121 
 1122         if (uap->fromlenaddr) {
 1123                 error = copyin(uap->fromlenaddr,
 1124                     &msg.msg_namelen, sizeof (msg.msg_namelen));
 1125                 if (error)
 1126                         goto done2;
 1127         } else {
 1128                 msg.msg_namelen = 0;
 1129         }
 1130         msg.msg_name = uap->from;
 1131         msg.msg_iov = &aiov;
 1132         msg.msg_iovlen = 1;
 1133         aiov.iov_base = uap->buf;
 1134         aiov.iov_len = uap->len;
 1135         msg.msg_control = 0;
 1136         msg.msg_flags = uap->flags;
 1137         error = recvit(td, uap->s, &msg, uap->fromlenaddr);
 1138 done2:
 1139         return(error);
 1140 }
 1141 
 1142 #ifdef COMPAT_OLDSOCK
 1143 int
 1144 orecvfrom(td, uap)
 1145         struct thread *td;
 1146         struct recvfrom_args *uap;
 1147 {
 1148 
 1149         uap->flags |= MSG_COMPAT;
 1150         return (recvfrom(td, uap));
 1151 }
 1152 #endif
 1153 
 1154 #ifdef COMPAT_OLDSOCK
 1155 int
 1156 orecv(td, uap)
 1157         struct thread *td;
 1158         struct orecv_args /* {
 1159                 int     s;
 1160                 caddr_t buf;
 1161                 int     len;
 1162                 int     flags;
 1163         } */ *uap;
 1164 {
 1165         struct msghdr msg;
 1166         struct iovec aiov;
 1167         int error;
 1168 
 1169         msg.msg_name = 0;
 1170         msg.msg_namelen = 0;
 1171         msg.msg_iov = &aiov;
 1172         msg.msg_iovlen = 1;
 1173         aiov.iov_base = uap->buf;
 1174         aiov.iov_len = uap->len;
 1175         msg.msg_control = 0;
 1176         msg.msg_flags = uap->flags;
 1177         error = recvit(td, uap->s, &msg, NULL);
 1178         return (error);
 1179 }
 1180 
 1181 /*
 1182  * Old recvmsg.  This code takes advantage of the fact that the old msghdr
 1183  * overlays the new one, missing only the flags, and with the (old) access
 1184  * rights where the control fields are now.
 1185  */
 1186 int
 1187 orecvmsg(td, uap)
 1188         struct thread *td;
 1189         struct orecvmsg_args /* {
 1190                 int     s;
 1191                 struct  omsghdr *msg;
 1192                 int     flags;
 1193         } */ *uap;
 1194 {
 1195         struct msghdr msg;
 1196         struct iovec *iov;
 1197         int error;
 1198 
 1199         error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
 1200         if (error)
 1201                 return (error);
 1202         error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
 1203         if (error)
 1204                 return (error);
 1205         msg.msg_flags = uap->flags | MSG_COMPAT;
 1206         msg.msg_iov = iov;
 1207         error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen);
 1208         if (msg.msg_controllen && error == 0)
 1209                 error = copyout(&msg.msg_controllen,
 1210                     &uap->msg->msg_accrightslen, sizeof (int));
 1211         free(iov, M_IOV);
 1212         return (error);
 1213 }
 1214 #endif
 1215 
 1216 int
 1217 recvmsg(td, uap)
 1218         struct thread *td;
 1219         struct recvmsg_args /* {
 1220                 int     s;
 1221                 struct  msghdr *msg;
 1222                 int     flags;
 1223         } */ *uap;
 1224 {
 1225         struct msghdr msg;
 1226         struct iovec *uiov, *iov;
 1227         int error;
 1228 
 1229         error = copyin(uap->msg, &msg, sizeof (msg));
 1230         if (error)
 1231                 return (error);
 1232         error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
 1233         if (error)
 1234                 return (error);
 1235         msg.msg_flags = uap->flags;
 1236 #ifdef COMPAT_OLDSOCK
 1237         msg.msg_flags &= ~MSG_COMPAT;
 1238 #endif
 1239         uiov = msg.msg_iov;
 1240         msg.msg_iov = iov;
 1241         error = recvit(td, uap->s, &msg, NULL);
 1242         if (error == 0) {
 1243                 msg.msg_iov = uiov;
 1244                 error = copyout(&msg, uap->msg, sizeof(msg));
 1245         }
 1246         free(iov, M_IOV);
 1247         return (error);
 1248 }
 1249 
 1250 /* ARGSUSED */
 1251 int
 1252 shutdown(td, uap)
 1253         struct thread *td;
 1254         struct shutdown_args /* {
 1255                 int     s;
 1256                 int     how;
 1257         } */ *uap;
 1258 {
 1259         struct socket *so;
 1260         struct file *fp;
 1261         int error;
 1262 
 1263         error = getsock(td->td_proc->p_fd, uap->s, &fp, NULL);
 1264         if (error == 0) {
 1265                 so = fp->f_data;
 1266                 error = soshutdown(so, uap->how);
 1267                 fdrop(fp, td);
 1268         }
 1269         return (error);
 1270 }
 1271 
 1272 /* ARGSUSED */
 1273 int
 1274 setsockopt(td, uap)
 1275         struct thread *td;
 1276         struct setsockopt_args /* {
 1277                 int     s;
 1278                 int     level;
 1279                 int     name;
 1280                 caddr_t val;
 1281                 int     valsize;
 1282         } */ *uap;
 1283 {
 1284 
 1285         return (kern_setsockopt(td, uap->s, uap->level, uap->name,
 1286             uap->val, UIO_USERSPACE, uap->valsize));
 1287 }
 1288 
 1289 int
 1290 kern_setsockopt(td, s, level, name, val, valseg, valsize)
 1291         struct thread *td;
 1292         int s;
 1293         int level;
 1294         int name;
 1295         void *val;
 1296         enum uio_seg valseg;
 1297         socklen_t valsize;
 1298 {
 1299         int error;
 1300         struct socket *so;
 1301         struct file *fp;
 1302         struct sockopt sopt;
 1303 
 1304         if (val == NULL && valsize != 0)
 1305                 return (EFAULT);
 1306         if ((int)valsize < 0)
 1307                 return (EINVAL);
 1308 
 1309         sopt.sopt_dir = SOPT_SET;
 1310         sopt.sopt_level = level;
 1311         sopt.sopt_name = name;
 1312         sopt.sopt_val = val;
 1313         sopt.sopt_valsize = valsize;
 1314         switch (valseg) {
 1315         case UIO_USERSPACE:
 1316                 sopt.sopt_td = td;
 1317                 break;
 1318         case UIO_SYSSPACE:
 1319                 sopt.sopt_td = NULL;
 1320                 break;
 1321         default:
 1322                 panic("kern_setsockopt called with bad valseg");
 1323         }
 1324 
 1325         error = getsock(td->td_proc->p_fd, s, &fp, NULL);
 1326         if (error == 0) {
 1327                 so = fp->f_data;
 1328                 error = sosetopt(so, &sopt);
 1329                 fdrop(fp, td);
 1330         }
 1331         return(error);
 1332 }
 1333 
 1334 /* ARGSUSED */
 1335 int
 1336 getsockopt(td, uap)
 1337         struct thread *td;
 1338         struct getsockopt_args /* {
 1339                 int     s;
 1340                 int     level;
 1341                 int     name;
 1342                 void * __restrict       val;
 1343                 socklen_t * __restrict avalsize;
 1344         } */ *uap;
 1345 {
 1346         socklen_t valsize;
 1347         int     error;
 1348 
 1349         if (uap->val) {
 1350                 error = copyin(uap->avalsize, &valsize, sizeof (valsize));
 1351                 if (error)
 1352                         return (error);
 1353         }
 1354 
 1355         error = kern_getsockopt(td, uap->s, uap->level, uap->name,
 1356             uap->val, UIO_USERSPACE, &valsize);
 1357 
 1358         if (error == 0)
 1359                 error = copyout(&valsize, uap->avalsize, sizeof (valsize));
 1360         return (error);
 1361 }
 1362 
 1363 /*
 1364  * Kernel version of getsockopt.
 1365  * optval can be a userland or userspace. optlen is always a kernel pointer.
 1366  */
 1367 int
 1368 kern_getsockopt(td, s, level, name, val, valseg, valsize)
 1369         struct thread *td;
 1370         int s;
 1371         int level;
 1372         int name;
 1373         void *val;
 1374         enum uio_seg valseg;
 1375         socklen_t *valsize;
 1376 {
 1377         int error;
 1378         struct  socket *so;
 1379         struct file *fp;
 1380         struct  sockopt sopt;
 1381 
 1382         if (val == NULL)
 1383                 *valsize = 0;
 1384         if ((int)*valsize < 0)
 1385                 return (EINVAL);
 1386 
 1387         sopt.sopt_dir = SOPT_GET;
 1388         sopt.sopt_level = level;
 1389         sopt.sopt_name = name;
 1390         sopt.sopt_val = val;
 1391         sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */
 1392         switch (valseg) {
 1393         case UIO_USERSPACE:
 1394                 sopt.sopt_td = td;
 1395                 break;
 1396         case UIO_SYSSPACE:
 1397                 sopt.sopt_td = NULL;
 1398                 break;
 1399         default:
 1400                 panic("kern_getsockopt called with bad valseg");
 1401         }
 1402 
 1403         error = getsock(td->td_proc->p_fd, s, &fp, NULL);
 1404         if (error == 0) {
 1405                 so = fp->f_data;
 1406                 error = sogetopt(so, &sopt);
 1407                 *valsize = sopt.sopt_valsize;
 1408                 fdrop(fp, td);
 1409         }
 1410         return (error);
 1411 }
 1412 
 1413 /*
 1414  * getsockname1() - Get socket name.
 1415  */
 1416 /* ARGSUSED */
 1417 static int
 1418 getsockname1(td, uap, compat)
 1419         struct thread *td;
 1420         struct getsockname_args /* {
 1421                 int     fdes;
 1422                 struct sockaddr * __restrict asa;
 1423                 socklen_t * __restrict alen;
 1424         } */ *uap;
 1425         int compat;
 1426 {
 1427         struct sockaddr *sa;
 1428         socklen_t len;
 1429         int error;
 1430 
 1431         error = copyin(uap->alen, &len, sizeof(len));
 1432         if (error)
 1433                 return (error);
 1434 
 1435         error = kern_getsockname(td, uap->fdes, &sa, &len);
 1436         if (error)
 1437                 return (error);
 1438 
 1439         if (len != 0) {
 1440 #ifdef COMPAT_OLDSOCK
 1441                 if (compat)
 1442                         ((struct osockaddr *)sa)->sa_family = sa->sa_family;
 1443 #endif
 1444                 error = copyout(sa, uap->asa, (u_int)len);
 1445         }
 1446         free(sa, M_SONAME);
 1447         if (error == 0)
 1448                 error = copyout(&len, uap->alen, sizeof(len));
 1449         return (error);
 1450 }
 1451 
 1452 int
 1453 kern_getsockname(struct thread *td, int fd, struct sockaddr **sa,
 1454     socklen_t *alen)
 1455 {
 1456         struct socket *so;
 1457         struct file *fp;
 1458         socklen_t len;
 1459         int error;
 1460 
 1461         if (*alen < 0)
 1462                 return (EINVAL);
 1463 
 1464         error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
 1465         if (error)
 1466                 return (error);
 1467         so = fp->f_data;
 1468         *sa = NULL;
 1469         error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, sa);
 1470         if (error)
 1471                 goto bad;
 1472         if (*sa == NULL)
 1473                 len = 0;
 1474         else
 1475                 len = MIN(*alen, (*sa)->sa_len);
 1476         *alen = len;
 1477 bad:
 1478         fdrop(fp, td);
 1479         if (error && *sa) {
 1480                 free(*sa, M_SONAME);
 1481                 *sa = NULL;
 1482         }
 1483         return (error);
 1484 }
 1485 
 1486 int
 1487 getsockname(td, uap)
 1488         struct thread *td;
 1489         struct getsockname_args *uap;
 1490 {
 1491 
 1492         return (getsockname1(td, uap, 0));
 1493 }
 1494 
 1495 #ifdef COMPAT_OLDSOCK
 1496 int
 1497 ogetsockname(td, uap)
 1498         struct thread *td;
 1499         struct getsockname_args *uap;
 1500 {
 1501 
 1502         return (getsockname1(td, uap, 1));
 1503 }
 1504 #endif /* COMPAT_OLDSOCK */
 1505 
 1506 /*
 1507  * getpeername1() - Get name of peer for connected socket.
 1508  */
 1509 /* ARGSUSED */
 1510 static int
 1511 getpeername1(td, uap, compat)
 1512         struct thread *td;
 1513         struct getpeername_args /* {
 1514                 int     fdes;
 1515                 struct sockaddr * __restrict    asa;
 1516                 socklen_t * __restrict  alen;
 1517         } */ *uap;
 1518         int compat;
 1519 {
 1520         struct sockaddr *sa;
 1521         socklen_t len;
 1522         int error;
 1523 
 1524         error = copyin(uap->alen, &len, sizeof (len));
 1525         if (error)
 1526                 return (error);
 1527 
 1528         error = kern_getpeername(td, uap->fdes, &sa, &len);
 1529         if (error)
 1530                 return (error);
 1531 
 1532         if (len != 0) {
 1533 #ifdef COMPAT_OLDSOCK
 1534                 if (compat)
 1535                         ((struct osockaddr *)sa)->sa_family = sa->sa_family;
 1536 #endif
 1537                 error = copyout(sa, uap->asa, (u_int)len);
 1538         }
 1539         free(sa, M_SONAME);
 1540         if (error == 0)
 1541                 error = copyout(&len, uap->alen, sizeof(len));
 1542         return (error);
 1543 }
 1544 
 1545 int
 1546 kern_getpeername(struct thread *td, int fd, struct sockaddr **sa,
 1547     socklen_t *alen)
 1548 {
 1549         struct socket *so;
 1550         struct file *fp;
 1551         socklen_t len;
 1552         int error;
 1553 
 1554         if (*alen < 0)
 1555                 return (EINVAL);
 1556 
 1557         error = getsock(td->td_proc->p_fd, fd, &fp, NULL);
 1558         if (error)
 1559                 return (error);
 1560         so = fp->f_data;
 1561         if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
 1562                 error = ENOTCONN;
 1563                 goto done;
 1564         }
 1565         *sa = NULL;
 1566         error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, sa);
 1567         if (error)
 1568                 goto bad;
 1569         if (*sa == NULL)
 1570                 len = 0;
 1571         else
 1572                 len = MIN(*alen, (*sa)->sa_len);
 1573         *alen = len;
 1574 bad:
 1575         if (error && *sa) {
 1576                 free(*sa, M_SONAME);
 1577                 *sa = NULL;
 1578         }
 1579 done:
 1580         fdrop(fp, td);
 1581         return (error);
 1582 }
 1583 
 1584 int
 1585 getpeername(td, uap)
 1586         struct thread *td;
 1587         struct getpeername_args *uap;
 1588 {
 1589 
 1590         return (getpeername1(td, uap, 0));
 1591 }
 1592 
 1593 #ifdef COMPAT_OLDSOCK
 1594 int
 1595 ogetpeername(td, uap)
 1596         struct thread *td;
 1597         struct ogetpeername_args *uap;
 1598 {
 1599 
 1600         /* XXX uap should have type `getpeername_args *' to begin with. */
 1601         return (getpeername1(td, (struct getpeername_args *)uap, 1));
 1602 }
 1603 #endif /* COMPAT_OLDSOCK */
 1604 
 1605 int
 1606 sockargs(mp, buf, buflen, type)
 1607         struct mbuf **mp;
 1608         caddr_t buf;
 1609         int buflen, type;
 1610 {
 1611         struct sockaddr *sa;
 1612         struct mbuf *m;
 1613         int error;
 1614 
 1615         if ((u_int)buflen > MLEN) {
 1616 #ifdef COMPAT_OLDSOCK
 1617                 if (type == MT_SONAME && (u_int)buflen <= 112)
 1618                         buflen = MLEN;          /* unix domain compat. hack */
 1619                 else
 1620 #endif
 1621                         if ((u_int)buflen > MCLBYTES)
 1622                                 return (EINVAL);
 1623         }
 1624         m = m_get(M_TRYWAIT, type);
 1625         if (m == NULL)
 1626                 return (ENOBUFS);
 1627         if ((u_int)buflen > MLEN) {
 1628                 MCLGET(m, M_TRYWAIT);
 1629                 if ((m->m_flags & M_EXT) == 0) {
 1630                         m_free(m);
 1631                         return (ENOBUFS);
 1632                 }
 1633         }
 1634         m->m_len = buflen;
 1635         error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
 1636         if (error)
 1637                 (void) m_free(m);
 1638         else {
 1639                 *mp = m;
 1640                 if (type == MT_SONAME) {
 1641                         sa = mtod(m, struct sockaddr *);
 1642 
 1643 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
 1644                         if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
 1645                                 sa->sa_family = sa->sa_len;
 1646 #endif
 1647                         sa->sa_len = buflen;
 1648                 }
 1649         }
 1650         return (error);
 1651 }
 1652 
 1653 int
 1654 getsockaddr(namp, uaddr, len)
 1655         struct sockaddr **namp;
 1656         caddr_t uaddr;
 1657         size_t len;
 1658 {
 1659         struct sockaddr *sa;
 1660         int error;
 1661 
 1662         if (len > SOCK_MAXADDRLEN)
 1663                 return (ENAMETOOLONG);
 1664         if (len < offsetof(struct sockaddr, sa_data[0]))
 1665                 return (EINVAL);
 1666         MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK);
 1667         error = copyin(uaddr, sa, len);
 1668         if (error) {
 1669                 FREE(sa, M_SONAME);
 1670         } else {
 1671 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
 1672                 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
 1673                         sa->sa_family = sa->sa_len;
 1674 #endif
 1675                 sa->sa_len = len;
 1676                 *namp = sa;
 1677         }
 1678         return (error);
 1679 }
 1680 
 1681 /*
 1682  * Detach mapped page and release resources back to the system.
 1683  */
 1684 void
 1685 sf_buf_mext(void *addr, void *args)
 1686 {
 1687         vm_page_t m;
 1688 
 1689         m = sf_buf_page(args);
 1690         sf_buf_free(args);
 1691         vm_page_lock_queues();
 1692         vm_page_unwire(m, 0);
 1693         /*
 1694          * Check for the object going away on us. This can
 1695          * happen since we don't hold a reference to it.
 1696          * If so, we're responsible for freeing the page.
 1697          */
 1698         if (m->wire_count == 0 && m->object == NULL)
 1699                 vm_page_free(m);
 1700         vm_page_unlock_queues();
 1701 }
 1702 
 1703 /*
 1704  * sendfile(2)
 1705  *
 1706  * int sendfile(int fd, int s, off_t offset, size_t nbytes,
 1707  *       struct sf_hdtr *hdtr, off_t *sbytes, int flags)
 1708  *
 1709  * Send a file specified by 'fd' and starting at 'offset' to a socket
 1710  * specified by 's'. Send only 'nbytes' of the file or until EOF if nbytes ==
 1711  * 0.  Optionally add a header and/or trailer to the socket output.  If
 1712  * specified, write the total number of bytes sent into *sbytes.
 1713  */
 1714 int
 1715 sendfile(struct thread *td, struct sendfile_args *uap)
 1716 {
 1717 
 1718         return (do_sendfile(td, uap, 0));
 1719 }
 1720 
 1721 static int
 1722 do_sendfile(struct thread *td, struct sendfile_args *uap, int compat)
 1723 {
 1724         struct sf_hdtr hdtr;
 1725         struct uio *hdr_uio, *trl_uio;
 1726         int error;
 1727 
 1728         hdr_uio = trl_uio = NULL;
 1729 
 1730         if (uap->hdtr != NULL) {
 1731                 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
 1732                 if (error)
 1733                         goto out;
 1734                 if (hdtr.headers != NULL) {
 1735                         error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio);
 1736                         if (error)
 1737                                 goto out;
 1738                 }
 1739                 if (hdtr.trailers != NULL) {
 1740                         error = copyinuio(hdtr.trailers, hdtr.trl_cnt, &trl_uio);
 1741                         if (error)
 1742                                 goto out;
 1743 
 1744                 }
 1745         }
 1746 
 1747         error = kern_sendfile(td, uap, hdr_uio, trl_uio, compat);
 1748 out:
 1749         if (hdr_uio)
 1750                 free(hdr_uio, M_IOV);
 1751         if (trl_uio)
 1752                 free(trl_uio, M_IOV);
 1753         return (error);
 1754 }
 1755 
 1756 #ifdef COMPAT_FREEBSD4
 1757 int
 1758 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap)
 1759 {
 1760         struct sendfile_args args;
 1761 
 1762         args.fd = uap->fd;
 1763         args.s = uap->s;
 1764         args.offset = uap->offset;
 1765         args.nbytes = uap->nbytes;
 1766         args.hdtr = uap->hdtr;
 1767         args.sbytes = uap->sbytes;
 1768         args.flags = uap->flags;
 1769 
 1770         return (do_sendfile(td, &args, 1));
 1771 }
 1772 #endif /* COMPAT_FREEBSD4 */
 1773 
 1774 int
 1775 kern_sendfile(struct thread *td, struct sendfile_args *uap,
 1776     struct uio *hdr_uio, struct uio *trl_uio, int compat)
 1777 {
 1778         struct file *sock_fp;
 1779         struct vnode *vp;
 1780         struct vm_object *obj = NULL;
 1781         struct socket *so = NULL;
 1782         struct mbuf *m = NULL;
 1783         struct sf_buf *sf;
 1784         struct vm_page *pg;
 1785         off_t off, xfsize, fsbytes = 0, sbytes = 0, rem = 0;
 1786         int error, hdrlen = 0, mnw = 0;
 1787         int vfslocked;
 1788 
 1789         /*
 1790          * The file descriptor must be a regular file and have a
 1791          * backing VM object.
 1792          * File offset must be positive.  If it goes beyond EOF
 1793          * we send only the header/trailer and no payload data.
 1794          */
 1795         if ((error = fgetvp_read(td, uap->fd, &vp)) != 0)
 1796                 goto out;
 1797         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 1798         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 1799         if (vp->v_type == VREG) {
 1800                 obj = vp->v_object;
 1801                 if (obj != NULL) {
 1802                         /*
 1803                          * Temporarily increase the backing VM
 1804                          * object's reference count so that a forced
 1805                          * reclamation of its vnode does not
 1806                          * immediately destroy it.
 1807                          */
 1808                         VM_OBJECT_LOCK(obj);
 1809                         if ((obj->flags & OBJ_DEAD) == 0) {
 1810                                 vm_object_reference_locked(obj);
 1811                                 VM_OBJECT_UNLOCK(obj);
 1812                         } else {
 1813                                 VM_OBJECT_UNLOCK(obj);
 1814                                 obj = NULL;
 1815                         }
 1816                 }
 1817         }
 1818         VOP_UNLOCK(vp, 0, td);
 1819         VFS_UNLOCK_GIANT(vfslocked);
 1820         if (obj == NULL) {
 1821                 error = EINVAL;
 1822                 goto out;
 1823         }
 1824         if (uap->offset < 0) {
 1825                 error = EINVAL;
 1826                 goto out;
 1827         }
 1828 
 1829         /*
 1830          * The socket must be a stream socket and connected.
 1831          * Remember if it a blocking or non-blocking socket.
 1832          */
 1833         if ((error = getsock(td->td_proc->p_fd, uap->s, &sock_fp,
 1834             NULL)) != 0)
 1835                 goto out;
 1836         so = sock_fp->f_data;
 1837         if (so->so_type != SOCK_STREAM) {
 1838                 error = EINVAL;
 1839                 goto out;
 1840         }
 1841         if ((so->so_state & SS_ISCONNECTED) == 0) {
 1842                 error = ENOTCONN;
 1843                 goto out;
 1844         }
 1845         /*
 1846          * Do not wait on memory allocations but return ENOMEM for
 1847          * caller to retry later.
 1848          * XXX: Experimental.
 1849          */
 1850         if (uap->flags & SF_MNOWAIT)
 1851                 mnw = 1;
 1852 
 1853 #ifdef MAC
 1854         SOCK_LOCK(so);
 1855         error = mac_check_socket_send(td->td_ucred, so);
 1856         SOCK_UNLOCK(so);
 1857         if (error)
 1858                 goto out;
 1859 #endif
 1860 
 1861         /* If headers are specified copy them into mbufs. */
 1862         if (hdr_uio != NULL) {
 1863                 hdr_uio->uio_td = td;
 1864                 hdr_uio->uio_rw = UIO_WRITE;
 1865                 if (hdr_uio->uio_resid > 0) {
 1866                         /*
 1867                          * In FBSD < 5.0 the nbytes to send also included
 1868                          * the header.  If compat is specified subtract the
 1869                          * header size from nbytes.
 1870                          */
 1871                         if (compat) {
 1872                                 if (uap->nbytes > hdr_uio->uio_resid)
 1873                                         uap->nbytes -= hdr_uio->uio_resid;
 1874                                 else
 1875                                         uap->nbytes = 0;
 1876                         }
 1877                         m = m_uiotombuf(hdr_uio, (mnw ? M_NOWAIT : M_WAITOK),
 1878                             0, 0, 0);
 1879                         if (m == NULL) {
 1880                                 error = mnw ? EAGAIN : ENOBUFS;
 1881                                 goto out;
 1882                         }
 1883                         hdrlen = m_length(m, NULL);
 1884                 }
 1885         }
 1886 
 1887         /*
 1888          * Protect against multiple writers to the socket.
 1889          *
 1890          * XXXRW: Historically this has assumed non-interruptibility, so now
 1891          * we implement that, but possibly shouldn't.
 1892          */
 1893         (void)sblock(&so->so_snd, SBL_WAIT | SBL_NOINTR);
 1894 
 1895         /*
 1896          * Loop through the pages of the file, starting with the requested
 1897          * offset. Get a file page (do I/O if necessary), map the file page
 1898          * into an sf_buf, attach an mbuf header to the sf_buf, and queue
 1899          * it on the socket.
 1900          * This is done in two loops.  The inner loop turns as many pages
 1901          * as it can, up to available socket buffer space, without blocking
 1902          * into mbufs to have it bulk delivered into the socket send buffer.
 1903          * The outer loop checks the state and available space of the socket
 1904          * and takes care of the overall progress.
 1905          */
 1906         for (off = uap->offset, rem = uap->nbytes; ; ) {
 1907                 int loopbytes = 0;
 1908                 int space = 0;
 1909                 int done = 0;
 1910 
 1911                 /*
 1912                  * Check the socket state for ongoing connection,
 1913                  * no errors and space in socket buffer.
 1914                  * If space is low allow for the remainder of the
 1915                  * file to be processed if it fits the socket buffer.
 1916                  * Otherwise block in waiting for sufficient space
 1917                  * to proceed, or if the socket is nonblocking, return
 1918                  * to userland with EAGAIN while reporting how far
 1919                  * we've come.
 1920                  * We wait until the socket buffer has significant free
 1921                  * space to do bulk sends.  This makes good use of file
 1922                  * system read ahead and allows packet segmentation
 1923                  * offloading hardware to take over lots of work.  If
 1924                  * we were not careful here we would send off only one
 1925                  * sfbuf at a time.
 1926                  */
 1927                 SOCKBUF_LOCK(&so->so_snd);
 1928                 if (so->so_snd.sb_lowat < so->so_snd.sb_hiwat / 2)
 1929                         so->so_snd.sb_lowat = so->so_snd.sb_hiwat / 2;
 1930 retry_space:
 1931                 if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 1932                         error = EPIPE;
 1933                         SOCKBUF_UNLOCK(&so->so_snd);
 1934                         goto done;
 1935                 } else if (so->so_error) {
 1936                         error = so->so_error;
 1937                         so->so_error = 0;
 1938                         SOCKBUF_UNLOCK(&so->so_snd);
 1939                         goto done;
 1940                 }
 1941                 space = sbspace(&so->so_snd);
 1942                 if (space < rem &&
 1943                     (space <= 0 ||
 1944                      space < so->so_snd.sb_lowat)) {
 1945                         if (so->so_state & SS_NBIO) {
 1946                                 SOCKBUF_UNLOCK(&so->so_snd);
 1947                                 error = EAGAIN;
 1948                                 goto done;
 1949                         }
 1950                         /*
 1951                          * sbwait drops the lock while sleeping.
 1952                          * When we loop back to retry_space the
 1953                          * state may have changed and we retest
 1954                          * for it.
 1955                          */
 1956                         error = sbwait(&so->so_snd);
 1957                         /*
 1958                          * An error from sbwait usually indicates that we've
 1959                          * been interrupted by a signal. If we've sent anything
 1960                          * then return bytes sent, otherwise return the error.
 1961                          */
 1962                         if (error) {
 1963                                 SOCKBUF_UNLOCK(&so->so_snd);
 1964                                 goto done;
 1965                         }
 1966                         goto retry_space;
 1967                 }
 1968                 SOCKBUF_UNLOCK(&so->so_snd);
 1969 
 1970                 /*
 1971                  * Reduce space in the socket buffer by the size of
 1972                  * the header mbuf chain.
 1973                  * hdrlen is set to 0 after the first loop.
 1974                  */
 1975                 space -= hdrlen;
 1976 
 1977                 /*
 1978                  * Loop and construct maximum sized mbuf chain to be bulk
 1979                  * dumped into socket buffer.
 1980                  */
 1981                 while(space > loopbytes) {
 1982                         vm_pindex_t pindex;
 1983                         vm_offset_t pgoff;
 1984                         struct mbuf *m0;
 1985 
 1986                         VM_OBJECT_LOCK(obj);
 1987                         /*
 1988                          * Calculate the amount to transfer.
 1989                          * Not to exceed a page, the EOF,
 1990                          * or the passed in nbytes.
 1991                          */
 1992                         pgoff = (vm_offset_t)(off & PAGE_MASK);
 1993                         xfsize = omin(PAGE_SIZE - pgoff,
 1994                             obj->un_pager.vnp.vnp_size - uap->offset -
 1995                             fsbytes - loopbytes);
 1996                         if (uap->nbytes)
 1997                                 rem = (uap->nbytes - fsbytes - loopbytes);
 1998                         else
 1999                                 rem = obj->un_pager.vnp.vnp_size -
 2000                                     uap->offset - fsbytes - loopbytes;
 2001                         xfsize = omin(rem, xfsize);
 2002                         if (xfsize <= 0) {
 2003                                 VM_OBJECT_UNLOCK(obj);
 2004                                 done = 1;               /* all data sent */
 2005                                 break;
 2006                         }
 2007                         /*
 2008                          * Don't overflow the send buffer.
 2009                          * Stop here and send out what we've
 2010                          * already got.
 2011                          */
 2012                         if (space < loopbytes + xfsize) {
 2013                                 VM_OBJECT_UNLOCK(obj);
 2014                                 break;
 2015                         }
 2016 
 2017                         /*
 2018                          * Attempt to look up the page.  Allocate
 2019                          * if not found or wait and loop if busy.
 2020                          */
 2021                         pindex = OFF_TO_IDX(off);
 2022                         pg = vm_page_grab(obj, pindex, VM_ALLOC_NOBUSY |
 2023                             VM_ALLOC_NORMAL | VM_ALLOC_WIRED | VM_ALLOC_RETRY);
 2024 
 2025                         /*
 2026                          * Check if page is valid for what we need,
 2027                          * otherwise initiate I/O.
 2028                          * If we already turned some pages into mbufs,
 2029                          * send them off before we come here again and
 2030                          * block.
 2031                          */
 2032                         if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize))
 2033                                 VM_OBJECT_UNLOCK(obj);
 2034                         else if (m != NULL)
 2035                                 error = EAGAIN; /* send what we already got */
 2036                         else if (uap->flags & SF_NODISKIO)
 2037                                 error = EBUSY;
 2038                         else {
 2039                                 int bsize, resid;
 2040 
 2041                                 /*
 2042                                  * Ensure that our page is still around
 2043                                  * when the I/O completes.
 2044                                  */
 2045                                 vm_page_io_start(pg);
 2046                                 VM_OBJECT_UNLOCK(obj);
 2047 
 2048                                 /*
 2049                                  * Get the page from backing store.
 2050                                  */
 2051                                 bsize = vp->v_mount->mnt_stat.f_iosize;
 2052                                 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 2053                                 vn_lock(vp, LK_SHARED | LK_RETRY, td);
 2054 
 2055                                 /*
 2056                                  * XXXMAC: Because we don't have fp->f_cred
 2057                                  * here, we pass in NOCRED.  This is probably
 2058                                  * wrong, but is consistent with our original
 2059                                  * implementation.
 2060                                  */
 2061                                 error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE,
 2062                                     trunc_page(off), UIO_NOCOPY, IO_NODELOCKED |
 2063                                     IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT),
 2064                                     td->td_ucred, NOCRED, &resid, td);
 2065                                 VOP_UNLOCK(vp, 0, td);
 2066                                 VFS_UNLOCK_GIANT(vfslocked);
 2067                                 VM_OBJECT_LOCK(obj);
 2068                                 vm_page_io_finish(pg);
 2069                                 if (!error)
 2070                                         VM_OBJECT_UNLOCK(obj);
 2071                                 mbstat.sf_iocnt++;
 2072                         }
 2073                         if (error) {
 2074                                 vm_page_lock_queues();
 2075                                 vm_page_unwire(pg, 0);
 2076                                 /*
 2077                                  * See if anyone else might know about
 2078                                  * this page.  If not and it is not valid,
 2079                                  * then free it.
 2080                                  */
 2081                                 if (pg->wire_count == 0 && pg->valid == 0 &&
 2082                                     pg->busy == 0 && !(pg->oflags & VPO_BUSY) &&
 2083                                     pg->hold_count == 0) {
 2084                                         vm_page_free(pg);
 2085                                 }
 2086                                 vm_page_unlock_queues();
 2087                                 VM_OBJECT_UNLOCK(obj);
 2088                                 if (error == EAGAIN)
 2089                                         error = 0;      /* not a real error */
 2090                                 break;
 2091                         }
 2092 
 2093                         /*
 2094                          * Get a sendfile buf.  We usually wait as long
 2095                          * as necessary, but this wait can be interrupted.
 2096                          */
 2097                         if ((sf = sf_buf_alloc(pg,
 2098                             (mnw ? SFB_NOWAIT : SFB_CATCH))) == NULL) {
 2099                                 mbstat.sf_allocfail++;
 2100                                 vm_page_lock_queues();
 2101                                 vm_page_unwire(pg, 0);
 2102                                 /*
 2103                                  * XXX: Not same check as above!?
 2104                                  */
 2105                                 if (pg->wire_count == 0 && pg->object == NULL)
 2106                                         vm_page_free(pg);
 2107                                 vm_page_unlock_queues();
 2108                                 error = (mnw ? EAGAIN : EINTR);
 2109                                 break;
 2110                         }
 2111 
 2112                         /*
 2113                          * Get an mbuf and set it up as having
 2114                          * external storage.
 2115                          */
 2116                         m0 = m_get((mnw ? M_NOWAIT : M_WAITOK), MT_DATA);
 2117                         if (m0 == NULL) {
 2118                                 error = (mnw ? EAGAIN : ENOBUFS);
 2119                                 sf_buf_mext((void *)sf_buf_kva(sf), sf);
 2120                                 break;
 2121                         }
 2122                         MEXTADD(m0, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext,
 2123                             sf, M_RDONLY, EXT_SFBUF);
 2124                         m0->m_data = (char *)sf_buf_kva(sf) + pgoff;
 2125                         m0->m_len = xfsize;
 2126 
 2127                         /* Append to mbuf chain. */
 2128                         if (m != NULL)
 2129                                 m_cat(m, m0);
 2130                         else
 2131                                 m = m0;
 2132 
 2133                         /* Keep track of bits processed. */
 2134                         loopbytes += xfsize;
 2135                         off += xfsize;
 2136                 }
 2137 
 2138                 /* Add the buffer chain to the socket buffer. */
 2139                 if (m != NULL) {
 2140                         int mlen, err;
 2141 
 2142                         mlen = m_length(m, NULL);
 2143                         SOCKBUF_LOCK(&so->so_snd);
 2144                         if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 2145                                 error = EPIPE;
 2146                                 SOCKBUF_UNLOCK(&so->so_snd);
 2147                                 goto done;
 2148                         }
 2149                         SOCKBUF_UNLOCK(&so->so_snd);
 2150                         /* Avoid error aliasing. */
 2151                         err = (*so->so_proto->pr_usrreqs->pru_send)
 2152                                     (so, 0, m, NULL, NULL, td);
 2153                         if (err == 0) {
 2154                                 /*
 2155                                  * We need two counters to get the
 2156                                  * file offset and nbytes to send
 2157                                  * right:
 2158                                  * - sbytes contains the total amount
 2159                                  *   of bytes sent, including headers.
 2160                                  * - fsbytes contains the total amount
 2161                                  *   of bytes sent from the file.
 2162                                  */
 2163                                 sbytes += mlen;
 2164                                 fsbytes += mlen;
 2165                                 if (hdrlen) {
 2166                                         fsbytes -= hdrlen;
 2167                                         hdrlen = 0;
 2168                                 }
 2169                         } else if (error == 0)
 2170                                 error = err;
 2171                         m = NULL;       /* pru_send always consumes */
 2172                 }
 2173 
 2174                 /* Quit outer loop on error or when we're done. */
 2175                 if (error || done)
 2176                         goto done;
 2177         }
 2178 
 2179         /*
 2180          * Send trailers. Wimp out and use writev(2).
 2181          */
 2182         if (trl_uio != NULL) {
 2183                 error = kern_writev(td, uap->s, trl_uio);
 2184                 if (error)
 2185                         goto done;
 2186                 sbytes += td->td_retval[0];
 2187         }
 2188 
 2189 done:
 2190         sbunlock(&so->so_snd);
 2191 out:
 2192         /*
 2193          * If there was no error we have to clear td->td_retval[0]
 2194          * because it may have been set by writev.
 2195          */
 2196         if (error == 0) {
 2197                 td->td_retval[0] = 0;
 2198         }
 2199         if (uap->sbytes != NULL) {
 2200                 copyout(&sbytes, uap->sbytes, sizeof(off_t));
 2201         }
 2202         if (obj != NULL)
 2203                 vm_object_deallocate(obj);
 2204         if (vp != NULL) {
 2205                 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 2206                 vrele(vp);
 2207                 VFS_UNLOCK_GIANT(vfslocked);
 2208         }
 2209         if (so)
 2210                 fdrop(sock_fp, td);
 2211         if (m)
 2212                 m_freem(m);
 2213 
 2214         if (error == ERESTART)
 2215                 error = EINTR;
 2216 
 2217         return (error);
 2218 }
 2219 
 2220 /*
 2221  * SCTP syscalls.
 2222  * Functionality only compiled in if SCTP is defined in the kernel Makefile,
 2223  * otherwise all return EOPNOTSUPP.
 2224  * XXX: We should make this loadable one day.
 2225  */
 2226 int
 2227 sctp_peeloff(td, uap)
 2228         struct thread *td;
 2229         struct sctp_peeloff_args /* {
 2230                 int     sd;
 2231                 caddr_t name;
 2232         } */ *uap;
 2233 {
 2234 #ifdef SCTP
 2235         struct filedesc *fdp;
 2236         struct file *nfp = NULL;
 2237         int error;
 2238         struct socket *head, *so;
 2239         int fd;
 2240         u_int fflag;
 2241 
 2242         fdp = td->td_proc->p_fd;
 2243         error = fgetsock(td, uap->sd, &head, &fflag);
 2244         if (error)
 2245                 goto done2;
 2246         error = sctp_can_peel_off(head, (sctp_assoc_t)uap->name);
 2247         if (error)
 2248                 goto done2;
 2249         /*
 2250          * At this point we know we do have a assoc to pull
 2251          * we proceed to get the fd setup. This may block
 2252          * but that is ok.
 2253          */
 2254 
 2255         error = falloc(td, &nfp, &fd);
 2256         if (error)
 2257                 goto done;
 2258         td->td_retval[0] = fd;
 2259 
 2260         so = sonewconn(head, SS_ISCONNECTED);
 2261         if (so == NULL) 
 2262                 goto noconnection;
 2263         /*
 2264          * Before changing the flags on the socket, we have to bump the
 2265          * reference count.  Otherwise, if the protocol calls sofree(),
 2266          * the socket will be released due to a zero refcount.
 2267          */
 2268         SOCK_LOCK(so);
 2269         soref(so);                      /* file descriptor reference */
 2270         SOCK_UNLOCK(so);
 2271 
 2272         ACCEPT_LOCK();
 2273 
 2274         TAILQ_REMOVE(&head->so_comp, so, so_list);
 2275         head->so_qlen--;
 2276         so->so_state |= (head->so_state & SS_NBIO);
 2277         so->so_state &= ~SS_NOFDREF;
 2278         so->so_qstate &= ~SQ_COMP;
 2279         so->so_head = NULL;
 2280         ACCEPT_UNLOCK();
 2281         FILE_LOCK(nfp);
 2282         nfp->f_data = so;
 2283         nfp->f_flag = fflag;
 2284         nfp->f_type = DTYPE_SOCKET;
 2285         nfp->f_ops = &socketops;
 2286         FILE_UNLOCK(nfp);
 2287         error = sctp_do_peeloff(head, so, (sctp_assoc_t)uap->name);
 2288         if (error)
 2289                 goto noconnection;
 2290         if (head->so_sigio != NULL)
 2291                 fsetown(fgetown(&head->so_sigio), &so->so_sigio);
 2292 
 2293 noconnection:
 2294         /*
 2295          * close the new descriptor, assuming someone hasn't ripped it
 2296          * out from under us.
 2297          */
 2298         if (error)
 2299                 fdclose(fdp, nfp, fd, td);
 2300 
 2301         /*
 2302          * Release explicitly held references before returning.
 2303          */
 2304 done:
 2305         if (nfp != NULL)
 2306                 fdrop(nfp, td);
 2307         fputsock(head);
 2308 done2:
 2309         return (error);
 2310 #else  /* SCTP */
 2311         return (EOPNOTSUPP);
 2312 #endif /* SCTP */
 2313 }
 2314 
 2315 int
 2316 sctp_generic_sendmsg (td, uap)
 2317         struct thread *td;
 2318         struct sctp_generic_sendmsg_args /* {
 2319                 int sd, 
 2320                 caddr_t msg, 
 2321                 int mlen, 
 2322                 caddr_t to, 
 2323                 __socklen_t tolen, 
 2324                 struct sctp_sndrcvinfo *sinfo, 
 2325                 int flags
 2326         } */ *uap;
 2327 {
 2328 #ifdef SCTP
 2329         struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL;
 2330         struct socket *so;
 2331         struct file *fp = NULL;
 2332         int use_rcvinfo = 1;
 2333         int error = 0, len;
 2334         struct sockaddr *to = NULL;
 2335 #ifdef KTRACE
 2336         struct uio *ktruio = NULL;
 2337 #endif
 2338         struct uio auio;
 2339         struct iovec iov[1];
 2340 
 2341         if (uap->sinfo) {
 2342                 error = copyin(uap->sinfo, &sinfo, sizeof (sinfo));
 2343                 if (error)
 2344                         return (error);
 2345                 u_sinfo = &sinfo;
 2346         }
 2347         if (uap->tolen) {
 2348                 error = getsockaddr(&to, uap->to, uap->tolen);
 2349                 if (error) {
 2350                         to = NULL;
 2351                         goto sctp_bad2;
 2352                 }
 2353         }
 2354 
 2355         error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL);
 2356         if (error)
 2357                 goto sctp_bad;
 2358 
 2359         iov[0].iov_base = uap->msg;
 2360         iov[0].iov_len = uap->mlen;
 2361 
 2362         so = (struct socket *)fp->f_data;
 2363 #ifdef MAC
 2364         SOCK_LOCK(so);
 2365         error = mac_check_socket_send(td->td_ucred, so);
 2366         SOCK_UNLOCK(so);
 2367         if (error)
 2368                 goto sctp_bad;
 2369 #endif /* MAC */
 2370 
 2371         auio.uio_iov =  iov;
 2372         auio.uio_iovcnt = 1;
 2373         auio.uio_segflg = UIO_USERSPACE;
 2374         auio.uio_rw = UIO_WRITE;
 2375         auio.uio_td = td;
 2376         auio.uio_offset = 0;                    /* XXX */
 2377         auio.uio_resid = 0;
 2378         len = auio.uio_resid = uap->mlen;
 2379         error = sctp_lower_sosend(so, to, &auio,
 2380                     (struct mbuf *)NULL, (struct mbuf *)NULL,
 2381                     uap->flags, use_rcvinfo, u_sinfo, td);
 2382         if (error) {
 2383                 if (auio.uio_resid != len && (error == ERESTART ||
 2384                     error == EINTR || error == EWOULDBLOCK))
 2385                         error = 0;
 2386                 /* Generation of SIGPIPE can be controlled per socket. */
 2387                 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
 2388                     !(uap->flags & MSG_NOSIGNAL)) {
 2389                         PROC_LOCK(td->td_proc);
 2390                         psignal(td->td_proc, SIGPIPE);
 2391                         PROC_UNLOCK(td->td_proc);
 2392                 }
 2393         }
 2394         if (error == 0)
 2395                 td->td_retval[0] = len - auio.uio_resid;
 2396 #ifdef KTRACE
 2397         if (ktruio != NULL) {
 2398                 ktruio->uio_resid = td->td_retval[0];
 2399                 ktrgenio(uap->sd, UIO_WRITE, ktruio, error);
 2400         }
 2401 #endif /* KTRACE */
 2402 sctp_bad:
 2403         if (fp)
 2404                 fdrop(fp, td);
 2405 sctp_bad2:
 2406         if (to)
 2407                 free(to, M_SONAME);
 2408         return (error);
 2409 #else  /* SCTP */
 2410         return (EOPNOTSUPP);
 2411 #endif /* SCTP */
 2412 }
 2413 
 2414 int
 2415 sctp_generic_sendmsg_iov(td, uap)
 2416         struct thread *td;
 2417         struct sctp_generic_sendmsg_iov_args /* {
 2418                 int sd, 
 2419                 struct iovec *iov, 
 2420                 int iovlen, 
 2421                 caddr_t to, 
 2422                 __socklen_t tolen, 
 2423                 struct sctp_sndrcvinfo *sinfo, 
 2424                 int flags
 2425         } */ *uap;
 2426 {
 2427 #ifdef SCTP
 2428         struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL;
 2429         struct socket *so;
 2430         struct file *fp = NULL;
 2431         int use_rcvinfo = 1;
 2432         int error=0, len, i;
 2433         struct sockaddr *to = NULL;
 2434 #ifdef KTRACE
 2435         struct uio *ktruio = NULL;
 2436 #endif
 2437         struct uio auio;
 2438         struct iovec *iov, *tiov;
 2439 
 2440         if (uap->sinfo) {
 2441                 error = copyin(uap->sinfo, &sinfo, sizeof (sinfo));
 2442                 if (error)
 2443                         return (error);
 2444                 u_sinfo = &sinfo;
 2445         }
 2446         if (uap->tolen) {
 2447                 error = getsockaddr(&to, uap->to, uap->tolen);
 2448                 if (error) {
 2449                         to = NULL;
 2450                         goto sctp_bad2;
 2451                 }
 2452         }
 2453 
 2454         error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL);
 2455         if (error)
 2456                 goto sctp_bad1;
 2457 
 2458         error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE);
 2459         if (error)
 2460                 goto sctp_bad1;
 2461 
 2462         so = (struct socket *)fp->f_data;
 2463 #ifdef MAC
 2464         SOCK_LOCK(so);
 2465         error = mac_check_socket_send(td->td_ucred, so);
 2466         SOCK_UNLOCK(so);
 2467         if (error)
 2468                 goto sctp_bad;
 2469 #endif /* MAC */
 2470 
 2471         auio.uio_iov =  iov;
 2472         auio.uio_iovcnt = uap->iovlen;
 2473         auio.uio_segflg = UIO_USERSPACE;
 2474         auio.uio_rw = UIO_WRITE;
 2475         auio.uio_td = td;
 2476         auio.uio_offset = 0;                    /* XXX */
 2477         auio.uio_resid = 0;
 2478         tiov = iov;
 2479         for (i = 0; i <uap->iovlen; i++, tiov++) {
 2480                 if ((auio.uio_resid += tiov->iov_len) < 0) {
 2481                         error = EINVAL;
 2482                         goto sctp_bad;
 2483                 }
 2484         }
 2485         len = auio.uio_resid;
 2486         error = sctp_lower_sosend(so, to, &auio,
 2487                     (struct mbuf *)NULL, (struct mbuf *)NULL,
 2488                     uap->flags, use_rcvinfo, u_sinfo, td);
 2489         if (error) {
 2490                 if (auio.uio_resid != len && (error == ERESTART ||
 2491                     error == EINTR || error == EWOULDBLOCK))
 2492                         error = 0;
 2493                 /* Generation of SIGPIPE can be controlled per socket */
 2494                 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
 2495                     !(uap->flags & MSG_NOSIGNAL)) {
 2496                         PROC_LOCK(td->td_proc);
 2497                         psignal(td->td_proc, SIGPIPE);
 2498                         PROC_UNLOCK(td->td_proc);
 2499                 }
 2500         }
 2501         if (error == 0)
 2502                 td->td_retval[0] = len - auio.uio_resid;
 2503 #ifdef KTRACE
 2504         if (ktruio != NULL) {
 2505                 ktruio->uio_resid = td->td_retval[0];
 2506                 ktrgenio(uap->sd, UIO_WRITE, ktruio, error);
 2507         }
 2508 #endif /* KTRACE */
 2509 sctp_bad:
 2510         free(iov, M_IOV);
 2511 sctp_bad1:
 2512         if (fp)
 2513                 fdrop(fp, td);
 2514 sctp_bad2:
 2515         if (to)
 2516                 free(to, M_SONAME);
 2517         return (error);
 2518 #else  /* SCTP */
 2519         return (EOPNOTSUPP);
 2520 #endif /* SCTP */
 2521 }
 2522 
 2523 int
 2524 sctp_generic_recvmsg(td, uap)
 2525         struct thread *td;
 2526         struct sctp_generic_recvmsg_args /* {
 2527                 int sd, 
 2528                 struct iovec *iov, 
 2529                 int iovlen,
 2530                 struct sockaddr *from, 
 2531                 __socklen_t *fromlenaddr,
 2532                 struct sctp_sndrcvinfo *sinfo, 
 2533                 int *msg_flags
 2534         } */ *uap;
 2535 {
 2536 #ifdef SCTP
 2537         u_int8_t sockbufstore[256];
 2538         struct uio auio;
 2539         struct iovec *iov, *tiov;
 2540         struct sctp_sndrcvinfo sinfo;
 2541         struct socket *so;
 2542         struct file *fp = NULL;
 2543         struct sockaddr *fromsa;
 2544         int fromlen;
 2545         int len, i, msg_flags;
 2546         int error = 0;
 2547 #ifdef KTRACE
 2548         struct uio *ktruio = NULL;
 2549 #endif
 2550         error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL);
 2551         if (error) {
 2552                 return (error);
 2553         }
 2554         error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE);
 2555         if (error) {
 2556                 goto out1;
 2557         }
 2558 
 2559         so = fp->f_data;
 2560 #ifdef MAC
 2561         SOCK_LOCK(so);
 2562         error = mac_check_socket_receive(td->td_ucred, so);
 2563         SOCK_UNLOCK(so);
 2564         if (error) {
 2565                 goto out;
 2566                 return (error);
 2567         }
 2568 #endif /* MAC */
 2569 
 2570         if (uap->fromlenaddr) {
 2571                 error = copyin(uap->fromlenaddr,
 2572                     &fromlen, sizeof (fromlen));
 2573                 if (error) {
 2574                         goto out;
 2575                 }
 2576         } else {
 2577                 fromlen = 0;
 2578         }
 2579         if(uap->msg_flags) {
 2580                 error = copyin(uap->msg_flags, &msg_flags, sizeof (int));
 2581                 if (error) {
 2582                         goto out;
 2583                 }
 2584         } else {
 2585                 msg_flags = 0;
 2586         }
 2587         auio.uio_iov = iov;
 2588         auio.uio_iovcnt = uap->iovlen;
 2589         auio.uio_segflg = UIO_USERSPACE;
 2590         auio.uio_rw = UIO_READ;
 2591         auio.uio_td = td;
 2592         auio.uio_offset = 0;                    /* XXX */
 2593         auio.uio_resid = 0;
 2594         tiov = iov;
 2595         for (i = 0; i <uap->iovlen; i++, tiov++) {
 2596                 if ((auio.uio_resid += tiov->iov_len) < 0) {
 2597                         error = EINVAL;
 2598                         goto out;
 2599                 }
 2600         }
 2601         len = auio.uio_resid;
 2602         fromsa = (struct sockaddr *)sockbufstore;
 2603 
 2604 #ifdef KTRACE
 2605         if (KTRPOINT(td, KTR_GENIO))
 2606                 ktruio = cloneuio(&auio);
 2607 #endif /* KTRACE */
 2608         error = sctp_sorecvmsg(so, &auio, (struct mbuf **)NULL,
 2609                     fromsa, fromlen, &msg_flags,
 2610                     (struct sctp_sndrcvinfo *)&sinfo, 1);
 2611         if (error) {
 2612                 if (auio.uio_resid != (int)len && (error == ERESTART ||
 2613                     error == EINTR || error == EWOULDBLOCK))
 2614                         error = 0;
 2615         } else {
 2616                 if (uap->sinfo)
 2617                         error = copyout(&sinfo, uap->sinfo, sizeof (sinfo));
 2618         }
 2619 #ifdef KTRACE
 2620         if (ktruio != NULL) {
 2621                 ktruio->uio_resid = (int)len - auio.uio_resid;
 2622                 ktrgenio(uap->sd, UIO_READ, ktruio, error);
 2623         }
 2624 #endif /* KTRACE */
 2625         if (error)
 2626                 goto out;
 2627         td->td_retval[0] = (int)len - auio.uio_resid;
 2628 
 2629         if (fromlen && uap->from) {
 2630                 len = fromlen;
 2631                 if (len <= 0 || fromsa == 0)
 2632                         len = 0;
 2633                 else {
 2634                         len = MIN(len, fromsa->sa_len);
 2635                         error = copyout(fromsa, uap->from, (unsigned)len);
 2636                         if (error)
 2637                                 goto out;
 2638                 }
 2639                 error = copyout(&len, uap->fromlenaddr, sizeof (socklen_t));
 2640                 if (error) {
 2641                         goto out;
 2642                 }
 2643         }
 2644         if (uap->msg_flags) {
 2645                 error = copyout(&msg_flags, uap->msg_flags, sizeof (int));
 2646                 if (error) {
 2647                         goto out;
 2648                 }
 2649         }
 2650 out:
 2651         free(iov, M_IOV);
 2652 out1:
 2653         if (fp) 
 2654                 fdrop(fp, td);
 2655 
 2656         return (error);
 2657 #else  /* SCTP */
 2658         return (EOPNOTSUPP);
 2659 #endif /* SCTP */
 2660 }

Cache object: 04301925fdcd1b6b0b621666bf6affd7


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.