[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/uipc_syscalls.c

Version: -  FREEBSD  -  FREEBSD10  -  FREEBSD9  -  FREEBSD92  -  FREEBSD91  -  FREEBSD90  -  FREEBSD8  -  FREEBSD82  -  FREEBSD81  -  FREEBSD80  -  FREEBSD7  -  FREEBSD74  -  FREEBSD73  -  FREEBSD72  -  FREEBSD71  -  FREEBSD70  -  FREEBSD6  -  FREEBSD64  -  FREEBSD63  -  FREEBSD62  -  FREEBSD61  -  FREEBSD60  -  FREEBSD5  -  FREEBSD55  -  FREEBSD54  -  FREEBSD53  -  FREEBSD52  -  FREEBSD51  -  FREEBSD50  -  FREEBSD4  -  FREEBSD3  -  FREEBSD22  -  cheribsd  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  xnu-1456.1.26  -  xnu-1699.24.8  -  xnu-2050.18.24  -  OPENSOLARIS  -  minix-3-1-1  -  FREEBSD-LIBC  -  FREEBSD8-LIBC  -  FREEBSD7-LIBC  -  FREEBSD6-LIBC  -  GLIBC27 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1982, 1986, 1989, 1990, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * sendfile(2) and related extensions:
    6  * Copyright (c) 1998, David Greenman. All rights reserved.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  * 4. Neither the name of the University nor the names of its contributors
   17  *    may be used to endorse or promote products derived from this software
   18  *    without specific prior written permission.
   19  *
   20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   30  * SUCH DAMAGE.
   31  *
   32  *      @(#)uipc_syscalls.c     8.4 (Berkeley) 2/21/94
   33  */
   34 
   35 #include <sys/cdefs.h>
   36 __FBSDID("$FreeBSD: src/sys/kern/uipc_syscalls.c,v 1.221.2.1.2.1 2008/02/14 11:47:39 simon Exp $");
   37 
   38 #include "opt_compat.h"
   39 #include "opt_ktrace.h"
   40 #include "opt_mac.h"
   41 
   42 #include <sys/param.h>
   43 #include <sys/systm.h>
   44 #include <sys/kernel.h>
   45 #include <sys/lock.h>
   46 #include <sys/mac.h>
   47 #include <sys/mutex.h>
   48 #include <sys/sysproto.h>
   49 #include <sys/malloc.h>
   50 #include <sys/filedesc.h>
   51 #include <sys/event.h>
   52 #include <sys/proc.h>
   53 #include <sys/fcntl.h>
   54 #include <sys/file.h>
   55 #include <sys/filio.h>
   56 #include <sys/mount.h>
   57 #include <sys/mbuf.h>
   58 #include <sys/protosw.h>
   59 #include <sys/sf_buf.h>
   60 #include <sys/socket.h>
   61 #include <sys/socketvar.h>
   62 #include <sys/signalvar.h>
   63 #include <sys/syscallsubr.h>
   64 #include <sys/sysctl.h>
   65 #include <sys/uio.h>
   66 #include <sys/vnode.h>
   67 #ifdef KTRACE
   68 #include <sys/ktrace.h>
   69 #endif
   70 
   71 #include <vm/vm.h>
   72 #include <vm/vm_object.h>
   73 #include <vm/vm_page.h>
   74 #include <vm/vm_pageout.h>
   75 #include <vm/vm_kern.h>
   76 #include <vm/vm_extern.h>
   77 
   78 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
   79 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp);
   80 
   81 static int accept1(struct thread *td, struct accept_args *uap, int compat);
   82 static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat);
   83 static int getsockname1(struct thread *td, struct getsockname_args *uap,
   84                         int compat);
   85 static int getpeername1(struct thread *td, struct getpeername_args *uap,
   86                         int compat);
   87 
   88 /*
   89  * NSFBUFS-related variables and associated sysctls
   90  */
   91 int nsfbufs;
   92 int nsfbufspeak;
   93 int nsfbufsused;
   94 
   95 SYSCTL_DECL(_kern_ipc);
   96 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
   97     "Maximum number of sendfile(2) sf_bufs available");
   98 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
   99     "Number of sendfile(2) sf_bufs at peak usage");
  100 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
  101     "Number of sendfile(2) sf_bufs in use");
  102 
  103 /*
  104  * Convert a user file descriptor to a kernel file entry.  A reference on the
  105  * file entry is held upon returning.  This is lighter weight than
  106  * fgetsock(), which bumps the socket reference drops the file reference
  107  * count instead, as this approach avoids several additional mutex operations
  108  * associated with the additional reference count.
  109  */
  110 static int
  111 getsock(struct filedesc *fdp, int fd, struct file **fpp)
  112 {
  113         struct file *fp;
  114         int error;
  115 
  116         fp = NULL;
  117         if (fdp == NULL)
  118                 error = EBADF;
  119         else {
  120                 FILEDESC_LOCK_FAST(fdp);
  121                 fp = fget_locked(fdp, fd);
  122                 if (fp == NULL)
  123                         error = EBADF;
  124                 else if (fp->f_type != DTYPE_SOCKET) {
  125                         fp = NULL;
  126                         error = ENOTSOCK;
  127                 } else {
  128                         fhold(fp);
  129                         error = 0;
  130                 }
  131                 FILEDESC_UNLOCK_FAST(fdp);
  132         }
  133         *fpp = fp;
  134         return (error);
  135 }
  136 
  137 /*
  138  * System call interface to the socket abstraction.
  139  */
  140 #if defined(COMPAT_43)
  141 #define COMPAT_OLDSOCK
  142 #endif
  143 
  144 /*
  145  * MPSAFE
  146  */
  147 int
  148 socket(td, uap)
  149         struct thread *td;
  150         register struct socket_args /* {
  151                 int     domain;
  152                 int     type;
  153                 int     protocol;
  154         } */ *uap;
  155 {
  156         struct filedesc *fdp;
  157         struct socket *so;
  158         struct file *fp;
  159         int fd, error;
  160 
  161 #ifdef MAC
  162         error = mac_check_socket_create(td->td_ucred, uap->domain, uap->type,
  163             uap->protocol);
  164         if (error)
  165                 return (error);
  166 #endif
  167         fdp = td->td_proc->p_fd;
  168         error = falloc(td, &fp, &fd);
  169         if (error)
  170                 return (error);
  171         /* An extra reference on `fp' has been held for us by falloc(). */
  172         NET_LOCK_GIANT();
  173         error = socreate(uap->domain, &so, uap->type, uap->protocol,
  174             td->td_ucred, td);
  175         NET_UNLOCK_GIANT();
  176         if (error) {
  177                 fdclose(fdp, fp, fd, td);
  178         } else {
  179                 FILEDESC_LOCK_FAST(fdp);
  180                 fp->f_data = so;        /* already has ref count */
  181                 fp->f_flag = FREAD|FWRITE;
  182                 fp->f_ops = &socketops;
  183                 fp->f_type = DTYPE_SOCKET;
  184                 FILEDESC_UNLOCK_FAST(fdp);
  185                 td->td_retval[0] = fd;
  186         }
  187         fdrop(fp, td);
  188         return (error);
  189 }
  190 
  191 /*
  192  * MPSAFE
  193  */
  194 /* ARGSUSED */
  195 int
  196 bind(td, uap)
  197         struct thread *td;
  198         register struct bind_args /* {
  199                 int     s;
  200                 caddr_t name;
  201                 int     namelen;
  202         } */ *uap;
  203 {
  204         struct sockaddr *sa;
  205         int error;
  206 
  207         if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0)
  208                 return (error);
  209 
  210         return (kern_bind(td, uap->s, sa));
  211 }
  212 
  213 int
  214 kern_bind(td, fd, sa)
  215         struct thread *td;
  216         int fd;
  217         struct sockaddr *sa;
  218 {
  219         struct socket *so;
  220         struct file *fp;
  221         int error;
  222 
  223         NET_LOCK_GIANT();
  224         error = getsock(td->td_proc->p_fd, fd, &fp);
  225         if (error)
  226                 goto done2;
  227         so = fp->f_data;
  228 #ifdef MAC
  229         SOCK_LOCK(so);
  230         error = mac_check_socket_bind(td->td_ucred, so, sa);
  231         SOCK_UNLOCK(so);
  232         if (error)
  233                 goto done1;
  234 #endif
  235         error = sobind(so, sa, td);
  236 #ifdef MAC
  237 done1:
  238 #endif
  239         fdrop(fp, td);
  240 done2:
  241         NET_UNLOCK_GIANT();
  242         FREE(sa, M_SONAME);
  243         return (error);
  244 }
  245 
  246 /*
  247  * MPSAFE
  248  */
  249 /* ARGSUSED */
  250 int
  251 listen(td, uap)
  252         struct thread *td;
  253         register struct listen_args /* {
  254                 int     s;
  255                 int     backlog;
  256         } */ *uap;
  257 {
  258         struct socket *so;
  259         struct file *fp;
  260         int error;
  261 
  262         NET_LOCK_GIANT();
  263         error = getsock(td->td_proc->p_fd, uap->s, &fp);
  264         if (error == 0) {
  265                 so = fp->f_data;
  266 #ifdef MAC
  267                 SOCK_LOCK(so);
  268                 error = mac_check_socket_listen(td->td_ucred, so);
  269                 SOCK_UNLOCK(so);
  270                 if (error)
  271                         goto done;
  272 #endif
  273                 error = solisten(so, uap->backlog, td);
  274 #ifdef MAC
  275 done:
  276 #endif
  277                 fdrop(fp, td);
  278         }
  279         NET_UNLOCK_GIANT();
  280         return(error);
  281 }
  282 
  283 /*
  284  * accept1()
  285  * MPSAFE
  286  */
  287 static int
  288 accept1(td, uap, compat)
  289         struct thread *td;
  290         register struct accept_args /* {
  291                 int     s;
  292                 struct sockaddr * __restrict name;
  293                 socklen_t       * __restrict anamelen;
  294         } */ *uap;
  295         int compat;
  296 {
  297         struct filedesc *fdp;
  298         struct file *nfp = NULL;
  299         struct sockaddr *sa = NULL;
  300         socklen_t namelen;
  301         int error;
  302         struct socket *head, *so;
  303         int fd;
  304         u_int fflag;
  305         pid_t pgid;
  306         int tmp;
  307 
  308         fdp = td->td_proc->p_fd;
  309         if (uap->name) {
  310                 error = copyin(uap->anamelen, &namelen, sizeof (namelen));
  311                 if(error)
  312                         return (error);
  313                 if (namelen < 0)
  314                         return (EINVAL);
  315         }
  316         NET_LOCK_GIANT();
  317         error = fgetsock(td, uap->s, &head, &fflag);
  318         if (error)
  319                 goto done2;
  320         if ((head->so_options & SO_ACCEPTCONN) == 0) {
  321                 error = EINVAL;
  322                 goto done;
  323         }
  324 #ifdef MAC
  325         SOCK_LOCK(head);
  326         error = mac_check_socket_accept(td->td_ucred, head);
  327         SOCK_UNLOCK(head);
  328         if (error != 0)
  329                 goto done;
  330 #endif
  331         error = falloc(td, &nfp, &fd);
  332         if (error)
  333                 goto done;
  334         ACCEPT_LOCK();
  335         if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) {
  336                 ACCEPT_UNLOCK();
  337                 error = EWOULDBLOCK;
  338                 goto noconnection;
  339         }
  340         while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
  341                 if (head->so_rcv.sb_state & SBS_CANTRCVMORE) {
  342                         head->so_error = ECONNABORTED;
  343                         break;
  344                 }
  345                 error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH,
  346                     "accept", 0);
  347                 if (error) {
  348                         ACCEPT_UNLOCK();
  349                         goto noconnection;
  350                 }
  351         }
  352         if (head->so_error) {
  353                 error = head->so_error;
  354                 head->so_error = 0;
  355                 ACCEPT_UNLOCK();
  356                 goto noconnection;
  357         }
  358         so = TAILQ_FIRST(&head->so_comp);
  359         KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP"));
  360         KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP"));
  361 
  362         /*
  363          * Before changing the flags on the socket, we have to bump the
  364          * reference count.  Otherwise, if the protocol calls sofree(),
  365          * the socket will be released due to a zero refcount.
  366          */
  367         SOCK_LOCK(so);                  /* soref() and so_state update */
  368         soref(so);                      /* file descriptor reference */
  369 
  370         TAILQ_REMOVE(&head->so_comp, so, so_list);
  371         head->so_qlen--;
  372         so->so_state |= (head->so_state & SS_NBIO);
  373         so->so_qstate &= ~SQ_COMP;
  374         so->so_head = NULL;
  375 
  376         SOCK_UNLOCK(so);
  377         ACCEPT_UNLOCK();
  378 
  379         /* An extra reference on `nfp' has been held for us by falloc(). */
  380         td->td_retval[0] = fd;
  381 
  382         /* connection has been removed from the listen queue */
  383         KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0);
  384 
  385         pgid = fgetown(&head->so_sigio);
  386         if (pgid != 0)
  387                 fsetown(pgid, &so->so_sigio);
  388 
  389         FILE_LOCK(nfp);
  390         nfp->f_data = so;       /* nfp has ref count from falloc */
  391         nfp->f_flag = fflag;
  392         nfp->f_ops = &socketops;
  393         nfp->f_type = DTYPE_SOCKET;
  394         FILE_UNLOCK(nfp);
  395         /* Sync socket nonblocking/async state with file flags */
  396         tmp = fflag & FNONBLOCK;
  397         (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td);
  398         tmp = fflag & FASYNC;
  399         (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td);
  400         sa = 0;
  401         error = soaccept(so, &sa);
  402         if (error) {
  403                 /*
  404                  * return a namelen of zero for older code which might
  405                  * ignore the return value from accept.
  406                  */
  407                 if (uap->name != NULL) {
  408                         namelen = 0;
  409                         (void) copyout(&namelen,
  410                             uap->anamelen, sizeof(*uap->anamelen));
  411                 }
  412                 goto noconnection;
  413         }
  414         if (sa == NULL) {
  415                 namelen = 0;
  416                 if (uap->name)
  417                         goto gotnoname;
  418                 error = 0;
  419                 goto done;
  420         }
  421         if (uap->name) {
  422                 /* check sa_len before it is destroyed */
  423                 if (namelen > sa->sa_len)
  424                         namelen = sa->sa_len;
  425 #ifdef COMPAT_OLDSOCK
  426                 if (compat)
  427                         ((struct osockaddr *)sa)->sa_family =
  428                             sa->sa_family;
  429 #endif
  430                 error = copyout(sa, uap->name, (u_int)namelen);
  431                 if (!error)
  432 gotnoname:
  433                         error = copyout(&namelen,
  434                             uap->anamelen, sizeof (*uap->anamelen));
  435         }
  436 noconnection:
  437         if (sa)
  438                 FREE(sa, M_SONAME);
  439 
  440         /*
  441          * close the new descriptor, assuming someone hasn't ripped it
  442          * out from under us.
  443          */
  444         if (error)
  445                 fdclose(fdp, nfp, fd, td);
  446 
  447         /*
  448          * Release explicitly held references before returning.
  449          */
  450 done:
  451         if (nfp != NULL)
  452                 fdrop(nfp, td);
  453         fputsock(head);
  454 done2:
  455         NET_UNLOCK_GIANT();
  456         return (error);
  457 }
  458 
  459 /*
  460  * MPSAFE (accept1() is MPSAFE)
  461  */
  462 int
  463 accept(td, uap)
  464         struct thread *td;
  465         struct accept_args *uap;
  466 {
  467 
  468         return (accept1(td, uap, 0));
  469 }
  470 
  471 #ifdef COMPAT_OLDSOCK
  472 /*
  473  * MPSAFE (accept1() is MPSAFE)
  474  */
  475 int
  476 oaccept(td, uap)
  477         struct thread *td;
  478         struct accept_args *uap;
  479 {
  480 
  481         return (accept1(td, uap, 1));
  482 }
  483 #endif /* COMPAT_OLDSOCK */
  484 
  485 /*
  486  * MPSAFE
  487  */
  488 /* ARGSUSED */
  489 int
  490 connect(td, uap)
  491         struct thread *td;
  492         register struct connect_args /* {
  493                 int     s;
  494                 caddr_t name;
  495                 int     namelen;
  496         } */ *uap;
  497 {
  498         struct sockaddr *sa;
  499         int error;
  500 
  501         error = getsockaddr(&sa, uap->name, uap->namelen);
  502         if (error)
  503                 return (error);
  504 
  505         return (kern_connect(td, uap->s, sa));
  506 }
  507 
  508 
  509 int
  510 kern_connect(td, fd, sa)
  511         struct thread *td;
  512         int fd;
  513         struct sockaddr *sa;
  514 {
  515         struct socket *so;
  516         struct file *fp;
  517         int error;
  518         int interrupted = 0;
  519 
  520         NET_LOCK_GIANT();
  521         error = getsock(td->td_proc->p_fd, fd, &fp);
  522         if (error)
  523                 goto done2;
  524         so = fp->f_data;
  525         if (so->so_state & SS_ISCONNECTING) {
  526                 error = EALREADY;
  527                 goto done1;
  528         }
  529 #ifdef MAC
  530         SOCK_LOCK(so);
  531         error = mac_check_socket_connect(td->td_ucred, so, sa);
  532         SOCK_UNLOCK(so);
  533         if (error)
  534                 goto bad;
  535 #endif
  536         error = soconnect(so, sa, td);
  537         if (error)
  538                 goto bad;
  539         if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
  540                 error = EINPROGRESS;
  541                 goto done1;
  542         }
  543         SOCK_LOCK(so);
  544         while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
  545                 error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH,
  546                     "connec", 0);
  547                 if (error) {
  548                         if (error == EINTR || error == ERESTART)
  549                                 interrupted = 1;
  550                         break;
  551                 }
  552         }
  553         if (error == 0) {
  554                 error = so->so_error;
  555                 so->so_error = 0;
  556         }
  557         SOCK_UNLOCK(so);
  558 bad:
  559         if (!interrupted)
  560                 so->so_state &= ~SS_ISCONNECTING;
  561         if (error == ERESTART)
  562                 error = EINTR;
  563 done1:
  564         fdrop(fp, td);
  565 done2:
  566         NET_UNLOCK_GIANT();
  567         FREE(sa, M_SONAME);
  568         return (error);
  569 }
  570 
  571 /*
  572  * MPSAFE
  573  */
  574 int
  575 socketpair(td, uap)
  576         struct thread *td;
  577         register struct socketpair_args /* {
  578                 int     domain;
  579                 int     type;
  580                 int     protocol;
  581                 int     *rsv;
  582         } */ *uap;
  583 {
  584         register struct filedesc *fdp = td->td_proc->p_fd;
  585         struct file *fp1, *fp2;
  586         struct socket *so1, *so2;
  587         int fd, error, sv[2];
  588 
  589 #ifdef MAC
  590         /* We might want to have a separate check for socket pairs. */
  591         error = mac_check_socket_create(td->td_ucred, uap->domain, uap->type,
  592             uap->protocol);
  593         if (error)
  594                 return (error);
  595 #endif
  596 
  597         NET_LOCK_GIANT();
  598         error = socreate(uap->domain, &so1, uap->type, uap->protocol,
  599             td->td_ucred, td);
  600         if (error)
  601                 goto done2;
  602         error = socreate(uap->domain, &so2, uap->type, uap->protocol,
  603             td->td_ucred, td);
  604         if (error)
  605                 goto free1;
  606         /* On success extra reference to `fp1' and 'fp2' is set by falloc. */
  607         error = falloc(td, &fp1, &fd);
  608         if (error)
  609                 goto free2;
  610         sv[0] = fd;
  611         fp1->f_data = so1;      /* so1 already has ref count */
  612         error = falloc(td, &fp2, &fd);
  613         if (error)
  614                 goto free3;
  615         fp2->f_data = so2;      /* so2 already has ref count */
  616         sv[1] = fd;
  617         error = soconnect2(so1, so2);
  618         if (error)
  619                 goto free4;
  620         if (uap->type == SOCK_DGRAM) {
  621                 /*
  622                  * Datagram socket connection is asymmetric.
  623                  */
  624                  error = soconnect2(so2, so1);
  625                  if (error)
  626                         goto free4;
  627         }
  628         FILE_LOCK(fp1);
  629         fp1->f_flag = FREAD|FWRITE;
  630         fp1->f_ops = &socketops;
  631         fp1->f_type = DTYPE_SOCKET;
  632         FILE_UNLOCK(fp1);
  633         FILE_LOCK(fp2);
  634         fp2->f_flag = FREAD|FWRITE;
  635         fp2->f_ops = &socketops;
  636         fp2->f_type = DTYPE_SOCKET;
  637         FILE_UNLOCK(fp2);
  638         error = copyout(sv, uap->rsv, 2 * sizeof (int));
  639         fdrop(fp1, td);
  640         fdrop(fp2, td);
  641         goto done2;
  642 free4:
  643         fdclose(fdp, fp2, sv[1], td);
  644         fdrop(fp2, td);
  645 free3:
  646         fdclose(fdp, fp1, sv[0], td);
  647         fdrop(fp1, td);
  648 free2:
  649         (void)soclose(so2);
  650 free1:
  651         (void)soclose(so1);
  652 done2:
  653         NET_UNLOCK_GIANT();
  654         return (error);
  655 }
  656 
  657 static int
  658 sendit(td, s, mp, flags)
  659         register struct thread *td;
  660         int s;
  661         register struct msghdr *mp;
  662         int flags;
  663 {
  664         struct mbuf *control;
  665         struct sockaddr *to;
  666         int error;
  667 
  668         if (mp->msg_name != NULL) {
  669                 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
  670                 if (error) {
  671                         to = NULL;
  672                         goto bad;
  673                 }
  674                 mp->msg_name = to;
  675         } else {
  676                 to = NULL;
  677         }
  678 
  679         if (mp->msg_control) {
  680                 if (mp->msg_controllen < sizeof(struct cmsghdr)
  681 #ifdef COMPAT_OLDSOCK
  682                     && mp->msg_flags != MSG_COMPAT
  683 #endif
  684                 ) {
  685                         error = EINVAL;
  686                         goto bad;
  687                 }
  688                 error = sockargs(&control, mp->msg_control,
  689                     mp->msg_controllen, MT_CONTROL);
  690                 if (error)
  691                         goto bad;
  692 #ifdef COMPAT_OLDSOCK
  693                 if (mp->msg_flags == MSG_COMPAT) {
  694                         register struct cmsghdr *cm;
  695 
  696                         M_PREPEND(control, sizeof(*cm), M_TRYWAIT);
  697                         if (control == 0) {
  698                                 error = ENOBUFS;
  699                                 goto bad;
  700                         } else {
  701                                 cm = mtod(control, struct cmsghdr *);
  702                                 cm->cmsg_len = control->m_len;
  703                                 cm->cmsg_level = SOL_SOCKET;
  704                                 cm->cmsg_type = SCM_RIGHTS;
  705                         }
  706                 }
  707 #endif
  708         } else {
  709                 control = NULL;
  710         }
  711 
  712         error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE);
  713 
  714 bad:
  715         if (to)
  716                 FREE(to, M_SONAME);
  717         return (error);
  718 }
  719 
  720 int
  721 kern_sendit(td, s, mp, flags, control, segflg)
  722         struct thread *td;
  723         int s;
  724         struct msghdr *mp;
  725         int flags;
  726         struct mbuf *control;
  727         enum uio_seg segflg;
  728 {
  729         struct file *fp;
  730         struct uio auio;
  731         struct iovec *iov;
  732         struct socket *so;
  733         int i;
  734         int len, error;
  735 #ifdef KTRACE
  736         struct uio *ktruio = NULL;
  737 #endif
  738 
  739         NET_LOCK_GIANT();
  740         error = getsock(td->td_proc->p_fd, s, &fp);
  741         if (error)
  742                 goto bad2;
  743         so = (struct socket *)fp->f_data;
  744 
  745 #ifdef MAC
  746         SOCK_LOCK(so);
  747         error = mac_check_socket_send(td->td_ucred, so);
  748         SOCK_UNLOCK(so);
  749         if (error)
  750                 goto bad;
  751 #endif
  752 
  753         auio.uio_iov = mp->msg_iov;
  754         auio.uio_iovcnt = mp->msg_iovlen;
  755         auio.uio_segflg = segflg;
  756         auio.uio_rw = UIO_WRITE;
  757         auio.uio_td = td;
  758         auio.uio_offset = 0;                    /* XXX */
  759         auio.uio_resid = 0;
  760         iov = mp->msg_iov;
  761         for (i = 0; i < mp->msg_iovlen; i++, iov++) {
  762                 if ((auio.uio_resid += iov->iov_len) < 0) {
  763                         error = EINVAL;
  764                         goto bad;
  765                 }
  766         }
  767 #ifdef KTRACE
  768         if (KTRPOINT(td, KTR_GENIO))
  769                 ktruio = cloneuio(&auio);
  770 #endif
  771         len = auio.uio_resid;
  772         error = so->so_proto->pr_usrreqs->pru_sosend(so, mp->msg_name, &auio,
  773             0, control, flags, td);
  774         if (error) {
  775                 if (auio.uio_resid != len && (error == ERESTART ||
  776                     error == EINTR || error == EWOULDBLOCK))
  777                         error = 0;
  778                 /* Generation of SIGPIPE can be controlled per socket */
  779                 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
  780                     !(flags & MSG_NOSIGNAL)) {
  781                         PROC_LOCK(td->td_proc);
  782                         psignal(td->td_proc, SIGPIPE);
  783                         PROC_UNLOCK(td->td_proc);
  784                 }
  785         }
  786         if (error == 0)
  787                 td->td_retval[0] = len - auio.uio_resid;
  788 #ifdef KTRACE
  789         if (ktruio != NULL) {
  790                 ktruio->uio_resid = td->td_retval[0];
  791                 ktrgenio(s, UIO_WRITE, ktruio, error);
  792         }
  793 #endif
  794 bad:
  795         fdrop(fp, td);
  796 bad2:
  797         NET_UNLOCK_GIANT();
  798         return (error);
  799 }
  800 
  801 /*
  802  * MPSAFE
  803  */
  804 int
  805 sendto(td, uap)
  806         struct thread *td;
  807         register struct sendto_args /* {
  808                 int     s;
  809                 caddr_t buf;
  810                 size_t  len;
  811                 int     flags;
  812                 caddr_t to;
  813                 int     tolen;
  814         } */ *uap;
  815 {
  816         struct msghdr msg;
  817         struct iovec aiov;
  818         int error;
  819 
  820         msg.msg_name = uap->to;
  821         msg.msg_namelen = uap->tolen;
  822         msg.msg_iov = &aiov;
  823         msg.msg_iovlen = 1;
  824         msg.msg_control = 0;
  825 #ifdef COMPAT_OLDSOCK
  826         msg.msg_flags = 0;
  827 #endif
  828         aiov.iov_base = uap->buf;
  829         aiov.iov_len = uap->len;
  830         error = sendit(td, uap->s, &msg, uap->flags);
  831         return (error);
  832 }
  833 
  834 #ifdef COMPAT_OLDSOCK
  835 /*
  836  * MPSAFE
  837  */
  838 int
  839 osend(td, uap)
  840         struct thread *td;
  841         register struct osend_args /* {
  842                 int     s;
  843                 caddr_t buf;
  844                 int     len;
  845                 int     flags;
  846         } */ *uap;
  847 {
  848         struct msghdr msg;
  849         struct iovec aiov;
  850         int error;
  851 
  852         msg.msg_name = 0;
  853         msg.msg_namelen = 0;
  854         msg.msg_iov = &aiov;
  855         msg.msg_iovlen = 1;
  856         aiov.iov_base = uap->buf;
  857         aiov.iov_len = uap->len;
  858         msg.msg_control = 0;
  859         msg.msg_flags = 0;
  860         error = sendit(td, uap->s, &msg, uap->flags);
  861         return (error);
  862 }
  863 
  864 /*
  865  * MPSAFE
  866  */
  867 int
  868 osendmsg(td, uap)
  869         struct thread *td;
  870         struct osendmsg_args /* {
  871                 int     s;
  872                 caddr_t msg;
  873                 int     flags;
  874         } */ *uap;
  875 {
  876         struct msghdr msg;
  877         struct iovec *iov;
  878         int error;
  879 
  880         error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
  881         if (error)
  882                 return (error);
  883         error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
  884         if (error)
  885                 return (error);
  886         msg.msg_iov = iov;
  887         msg.msg_flags = MSG_COMPAT;
  888         error = sendit(td, uap->s, &msg, uap->flags);
  889         free(iov, M_IOV);
  890         return (error);
  891 }
  892 #endif
  893 
  894 /*
  895  * MPSAFE
  896  */
  897 int
  898 sendmsg(td, uap)
  899         struct thread *td;
  900         struct sendmsg_args /* {
  901                 int     s;
  902                 caddr_t msg;
  903                 int     flags;
  904         } */ *uap;
  905 {
  906         struct msghdr msg;
  907         struct iovec *iov;
  908         int error;
  909 
  910         error = copyin(uap->msg, &msg, sizeof (msg));
  911         if (error)
  912                 return (error);
  913         error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
  914         if (error)
  915                 return (error);
  916         msg.msg_iov = iov;
  917 #ifdef COMPAT_OLDSOCK
  918         msg.msg_flags = 0;
  919 #endif
  920         error = sendit(td, uap->s, &msg, uap->flags);
  921         free(iov, M_IOV);
  922         return (error);
  923 }
  924 
  925 int
  926 kern_recvit(td, s, mp, namelenp, segflg, controlp)
  927         struct thread *td;
  928         int s;
  929         struct msghdr *mp;
  930         void *namelenp;
  931         enum uio_seg segflg;
  932         struct mbuf **controlp;
  933 {
  934         struct uio auio;
  935         struct iovec *iov;
  936         int i;
  937         socklen_t len;
  938         int error;
  939         struct mbuf *m, *control = 0;
  940         caddr_t ctlbuf;
  941         struct file *fp;
  942         struct socket *so;
  943         struct sockaddr *fromsa = 0;
  944 #ifdef KTRACE
  945         struct uio *ktruio = NULL;
  946 #endif
  947 
  948         if(controlp != NULL)
  949                 *controlp = 0;
  950 
  951         NET_LOCK_GIANT();
  952         error = getsock(td->td_proc->p_fd, s, &fp);
  953         if (error) {
  954                 NET_UNLOCK_GIANT();
  955                 return (error);
  956         }
  957         so = fp->f_data;
  958 
  959 #ifdef MAC
  960         SOCK_LOCK(so);
  961         error = mac_check_socket_receive(td->td_ucred, so);
  962         SOCK_UNLOCK(so);
  963         if (error) {
  964                 fdrop(fp, td);
  965                 NET_UNLOCK_GIANT();
  966                 return (error);
  967         }
  968 #endif
  969 
  970         auio.uio_iov = mp->msg_iov;
  971         auio.uio_iovcnt = mp->msg_iovlen;
  972         auio.uio_segflg = segflg;
  973         auio.uio_rw = UIO_READ;
  974         auio.uio_td = td;
  975         auio.uio_offset = 0;                    /* XXX */
  976         auio.uio_resid = 0;
  977         iov = mp->msg_iov;
  978         for (i = 0; i < mp->msg_iovlen; i++, iov++) {
  979                 if ((auio.uio_resid += iov->iov_len) < 0) {
  980                         fdrop(fp, td);
  981                         NET_UNLOCK_GIANT();
  982                         return (EINVAL);
  983                 }
  984         }
  985 #ifdef KTRACE
  986         if (KTRPOINT(td, KTR_GENIO))
  987                 ktruio = cloneuio(&auio);
  988 #endif
  989         len = auio.uio_resid;
  990         error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio,
  991             (struct mbuf **)0, (mp->msg_control || controlp) ? &control : (struct mbuf **)0,
  992             &mp->msg_flags);
  993         if (error) {
  994                 if (auio.uio_resid != (int)len && (error == ERESTART ||
  995                     error == EINTR || error == EWOULDBLOCK))
  996                         error = 0;
  997         }
  998 #ifdef KTRACE
  999         if (ktruio != NULL) {
 1000                 ktruio->uio_resid = (int)len - auio.uio_resid;
 1001                 ktrgenio(s, UIO_READ, ktruio, error);
 1002         }
 1003 #endif
 1004         if (error)
 1005                 goto out;
 1006         td->td_retval[0] = (int)len - auio.uio_resid;
 1007         if (mp->msg_name) {
 1008                 len = mp->msg_namelen;
 1009                 if (len <= 0 || fromsa == 0)
 1010                         len = 0;
 1011                 else {
 1012                         /* save sa_len before it is destroyed by MSG_COMPAT */
 1013                         len = MIN(len, fromsa->sa_len);
 1014 #ifdef COMPAT_OLDSOCK
 1015                         if (mp->msg_flags & MSG_COMPAT)
 1016                                 ((struct osockaddr *)fromsa)->sa_family =
 1017                                     fromsa->sa_family;
 1018 #endif
 1019                         error = copyout(fromsa, mp->msg_name, (unsigned)len);
 1020                         if (error)
 1021                                 goto out;
 1022                 }
 1023                 mp->msg_namelen = len;
 1024                 if (namelenp &&
 1025                     (error = copyout(&len, namelenp, sizeof (socklen_t)))) {
 1026 #ifdef COMPAT_OLDSOCK
 1027                         if (mp->msg_flags & MSG_COMPAT)
 1028                                 error = 0;      /* old recvfrom didn't check */
 1029                         else
 1030 #endif
 1031                         goto out;
 1032                 }
 1033         }
 1034         if (mp->msg_control && controlp == NULL) {
 1035 #ifdef COMPAT_OLDSOCK
 1036                 /*
 1037                  * We assume that old recvmsg calls won't receive access
 1038                  * rights and other control info, esp. as control info
 1039                  * is always optional and those options didn't exist in 4.3.
 1040                  * If we receive rights, trim the cmsghdr; anything else
 1041                  * is tossed.
 1042                  */
 1043                 if (control && mp->msg_flags & MSG_COMPAT) {
 1044                         if (mtod(control, struct cmsghdr *)->cmsg_level !=
 1045                             SOL_SOCKET ||
 1046                             mtod(control, struct cmsghdr *)->cmsg_type !=
 1047                             SCM_RIGHTS) {
 1048                                 mp->msg_controllen = 0;
 1049                                 goto out;
 1050                         }
 1051                         control->m_len -= sizeof (struct cmsghdr);
 1052                         control->m_data += sizeof (struct cmsghdr);
 1053                 }
 1054 #endif
 1055                 len = mp->msg_controllen;
 1056                 m = control;
 1057                 mp->msg_controllen = 0;
 1058                 ctlbuf = mp->msg_control;
 1059 
 1060                 while (m && len > 0) {
 1061                         unsigned int tocopy;
 1062 
 1063                         if (len >= m->m_len)
 1064                                 tocopy = m->m_len;
 1065                         else {
 1066                                 mp->msg_flags |= MSG_CTRUNC;
 1067                                 tocopy = len;
 1068                         }
 1069 
 1070                         if ((error = copyout(mtod(m, caddr_t),
 1071                                         ctlbuf, tocopy)) != 0)
 1072                                 goto out;
 1073 
 1074                         ctlbuf += tocopy;
 1075                         len -= tocopy;
 1076                         m = m->m_next;
 1077                 }
 1078                 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
 1079         }
 1080 out:
 1081         fdrop(fp, td);
 1082         NET_UNLOCK_GIANT();
 1083         if (fromsa)
 1084                 FREE(fromsa, M_SONAME);
 1085 
 1086         if (error == 0 && controlp != NULL)  
 1087                 *controlp = control;
 1088         else  if (control)
 1089                 m_freem(control);
 1090 
 1091         return (error);
 1092 }
 1093 
 1094 static int
 1095 recvit(td, s, mp, namelenp)
 1096         struct thread *td;
 1097         int s;
 1098         struct msghdr *mp;
 1099         void *namelenp;
 1100 {
 1101 
 1102         return (kern_recvit(td, s, mp, namelenp, UIO_USERSPACE, NULL));
 1103 }
 1104 
 1105 /*
 1106  * MPSAFE
 1107  */
 1108 int
 1109 recvfrom(td, uap)
 1110         struct thread *td;
 1111         register struct recvfrom_args /* {
 1112                 int     s;
 1113                 caddr_t buf;
 1114                 size_t  len;
 1115                 int     flags;
 1116                 struct sockaddr * __restrict    from;
 1117                 socklen_t * __restrict fromlenaddr;
 1118         } */ *uap;
 1119 {
 1120         struct msghdr msg;
 1121         struct iovec aiov;
 1122         int error;
 1123 
 1124         if (uap->fromlenaddr) {
 1125                 error = copyin(uap->fromlenaddr,
 1126                     &msg.msg_namelen, sizeof (msg.msg_namelen));
 1127                 if (error)
 1128                         goto done2;
 1129         } else {
 1130                 msg.msg_namelen = 0;
 1131         }
 1132         msg.msg_name = uap->from;
 1133         msg.msg_iov = &aiov;
 1134         msg.msg_iovlen = 1;
 1135         aiov.iov_base = uap->buf;
 1136         aiov.iov_len = uap->len;
 1137         msg.msg_control = 0;
 1138         msg.msg_flags = uap->flags;
 1139         error = recvit(td, uap->s, &msg, uap->fromlenaddr);
 1140 done2:
 1141         return(error);
 1142 }
 1143 
 1144 #ifdef COMPAT_OLDSOCK
 1145 /*
 1146  * MPSAFE
 1147  */
 1148 int
 1149 orecvfrom(td, uap)
 1150         struct thread *td;
 1151         struct recvfrom_args *uap;
 1152 {
 1153 
 1154         uap->flags |= MSG_COMPAT;
 1155         return (recvfrom(td, uap));
 1156 }
 1157 #endif
 1158 
 1159 
 1160 #ifdef COMPAT_OLDSOCK
 1161 /*
 1162  * MPSAFE
 1163  */
 1164 int
 1165 orecv(td, uap)
 1166         struct thread *td;
 1167         register struct orecv_args /* {
 1168                 int     s;
 1169                 caddr_t buf;
 1170                 int     len;
 1171                 int     flags;
 1172         } */ *uap;
 1173 {
 1174         struct msghdr msg;
 1175         struct iovec aiov;
 1176         int error;
 1177 
 1178         msg.msg_name = 0;
 1179         msg.msg_namelen = 0;
 1180         msg.msg_iov = &aiov;
 1181         msg.msg_iovlen = 1;
 1182         aiov.iov_base = uap->buf;
 1183         aiov.iov_len = uap->len;
 1184         msg.msg_control = 0;
 1185         msg.msg_flags = uap->flags;
 1186         error = recvit(td, uap->s, &msg, NULL);
 1187         return (error);
 1188 }
 1189 
 1190 /*
 1191  * Old recvmsg.  This code takes advantage of the fact that the old msghdr
 1192  * overlays the new one, missing only the flags, and with the (old) access
 1193  * rights where the control fields are now.
 1194  *
 1195  * MPSAFE
 1196  */
 1197 int
 1198 orecvmsg(td, uap)
 1199         struct thread *td;
 1200         struct orecvmsg_args /* {
 1201                 int     s;
 1202                 struct  omsghdr *msg;
 1203                 int     flags;
 1204         } */ *uap;
 1205 {
 1206         struct msghdr msg;
 1207         struct iovec *iov;
 1208         int error;
 1209 
 1210         error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
 1211         if (error)
 1212                 return (error);
 1213         error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
 1214         if (error)
 1215                 return (error);
 1216         msg.msg_flags = uap->flags | MSG_COMPAT;
 1217         msg.msg_iov = iov;
 1218         error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen);
 1219         if (msg.msg_controllen && error == 0)
 1220                 error = copyout(&msg.msg_controllen,
 1221                     &uap->msg->msg_accrightslen, sizeof (int));
 1222         free(iov, M_IOV);
 1223         return (error);
 1224 }
 1225 #endif
 1226 
 1227 /*
 1228  * MPSAFE
 1229  */
 1230 int
 1231 recvmsg(td, uap)
 1232         struct thread *td;
 1233         struct recvmsg_args /* {
 1234                 int     s;
 1235                 struct  msghdr *msg;
 1236                 int     flags;
 1237         } */ *uap;
 1238 {
 1239         struct msghdr msg;
 1240         struct iovec *uiov, *iov;
 1241         int error;
 1242 
 1243         error = copyin(uap->msg, &msg, sizeof (msg));
 1244         if (error)
 1245                 return (error);
 1246         error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
 1247         if (error)
 1248                 return (error);
 1249         msg.msg_flags = uap->flags;
 1250 #ifdef COMPAT_OLDSOCK
 1251         msg.msg_flags &= ~MSG_COMPAT;
 1252 #endif
 1253         uiov = msg.msg_iov;
 1254         msg.msg_iov = iov;
 1255         error = recvit(td, uap->s, &msg, NULL);
 1256         if (error == 0) {
 1257                 msg.msg_iov = uiov;
 1258                 error = copyout(&msg, uap->msg, sizeof(msg));
 1259         }
 1260         free(iov, M_IOV);
 1261         return (error);
 1262 }
 1263 
 1264 /*
 1265  * MPSAFE
 1266  */
 1267 /* ARGSUSED */
 1268 int
 1269 shutdown(td, uap)
 1270         struct thread *td;
 1271         register struct shutdown_args /* {
 1272                 int     s;
 1273                 int     how;
 1274         } */ *uap;
 1275 {
 1276         struct socket *so;
 1277         struct file *fp;
 1278         int error;
 1279 
 1280         NET_LOCK_GIANT();
 1281         error = getsock(td->td_proc->p_fd, uap->s, &fp);
 1282         if (error == 0) {
 1283                 so = fp->f_data;
 1284                 error = soshutdown(so, uap->how);
 1285                 fdrop(fp, td);
 1286         }
 1287         NET_UNLOCK_GIANT();
 1288         return (error);
 1289 }
 1290 
 1291 /*
 1292  * MPSAFE
 1293  */
 1294 /* ARGSUSED */
 1295 int
 1296 setsockopt(td, uap)
 1297         struct thread *td;
 1298         register struct setsockopt_args /* {
 1299                 int     s;
 1300                 int     level;
 1301                 int     name;
 1302                 caddr_t val;
 1303                 int     valsize;
 1304         } */ *uap;
 1305 {
 1306 
 1307         return (kern_setsockopt(td, uap->s, uap->level, uap->name,
 1308             uap->val, UIO_USERSPACE, uap->valsize));
 1309 }
 1310 
 1311 int
 1312 kern_setsockopt(td, s, level, name, val, valseg, valsize)
 1313         struct thread *td;
 1314         int s;
 1315         int level;
 1316         int name;
 1317         void *val;
 1318         enum uio_seg valseg;
 1319         socklen_t valsize;
 1320 {
 1321         int error;
 1322         struct socket *so;
 1323         struct file *fp;
 1324         struct sockopt sopt;
 1325 
 1326         if (val == NULL && valsize != 0)
 1327                 return (EFAULT);
 1328         if (valsize < 0)
 1329                 return (EINVAL);
 1330 
 1331         sopt.sopt_dir = SOPT_SET;
 1332         sopt.sopt_level = level;
 1333         sopt.sopt_name = name;
 1334         sopt.sopt_val = val;
 1335         sopt.sopt_valsize = valsize;
 1336         switch (valseg) {
 1337         case UIO_USERSPACE:
 1338                 sopt.sopt_td = td;
 1339                 break;
 1340         case UIO_SYSSPACE:
 1341                 sopt.sopt_td = NULL;
 1342                 break;
 1343         default:
 1344                 panic("kern_setsockopt called with bad valseg");
 1345         }
 1346 
 1347         NET_LOCK_GIANT();
 1348         error = getsock(td->td_proc->p_fd, s, &fp);
 1349         if (error == 0) {
 1350                 so = fp->f_data;
 1351                 error = sosetopt(so, &sopt);
 1352                 fdrop(fp, td);
 1353         }
 1354         NET_UNLOCK_GIANT();
 1355         return(error);
 1356 }
 1357 
 1358 /*
 1359  * MPSAFE
 1360  */
 1361 /* ARGSUSED */
 1362 int
 1363 getsockopt(td, uap)
 1364         struct thread *td;
 1365         register struct getsockopt_args /* {
 1366                 int     s;
 1367                 int     level;
 1368                 int     name;
 1369                 void * __restrict       val;
 1370                 socklen_t * __restrict avalsize;
 1371         } */ *uap;
 1372 {
 1373         socklen_t valsize;
 1374         int     error;
 1375 
 1376         if (uap->val) {
 1377                 error = copyin(uap->avalsize, &valsize, sizeof (valsize));
 1378                 if (error)
 1379                         return (error);
 1380         }
 1381 
 1382         error = kern_getsockopt(td, uap->s, uap->level, uap->name,
 1383             uap->val, UIO_USERSPACE, &valsize);
 1384 
 1385         if (error == 0)
 1386                 error = copyout(&valsize, uap->avalsize, sizeof (valsize));
 1387         return (error);
 1388 }
 1389 
 1390 /*
 1391  * Kernel version of getsockopt.
 1392  * optval can be a userland or userspace. optlen is always a kernel pointer.
 1393  */
 1394 int
 1395 kern_getsockopt(td, s, level, name, val, valseg, valsize)
 1396         struct thread *td;
 1397         int s;
 1398         int level;
 1399         int name;
 1400         void *val;
 1401         enum uio_seg valseg;
 1402         socklen_t *valsize;
 1403 {
 1404         int error;
 1405         struct  socket *so;
 1406         struct file *fp;
 1407         struct  sockopt sopt;
 1408 
 1409         if (val == NULL)
 1410                 *valsize = 0;
 1411         if (*valsize < 0)
 1412                 return (EINVAL);
 1413 
 1414         sopt.sopt_dir = SOPT_GET;
 1415         sopt.sopt_level = level;
 1416         sopt.sopt_name = name;
 1417         sopt.sopt_val = val;
 1418         sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */
 1419         switch (valseg) {
 1420         case UIO_USERSPACE:
 1421                 sopt.sopt_td = td;
 1422                 break;
 1423         case UIO_SYSSPACE:
 1424                 sopt.sopt_td = NULL;
 1425                 break;
 1426         default:
 1427                 panic("kern_getsockopt called with bad valseg");
 1428         }
 1429 
 1430         NET_LOCK_GIANT();
 1431         error = getsock(td->td_proc->p_fd, s, &fp);
 1432         if (error == 0) {
 1433                 so = fp->f_data;
 1434                 error = sogetopt(so, &sopt);
 1435                 *valsize = sopt.sopt_valsize;
 1436                 fdrop(fp, td);
 1437         }
 1438         NET_UNLOCK_GIANT();
 1439         return (error);
 1440 }
 1441 
 1442 /*
 1443  * getsockname1() - Get socket name.
 1444  *
 1445  * MPSAFE
 1446  */
 1447 /* ARGSUSED */
 1448 static int
 1449 getsockname1(td, uap, compat)
 1450         struct thread *td;
 1451         register struct getsockname_args /* {
 1452                 int     fdes;
 1453                 struct sockaddr * __restrict asa;
 1454                 socklen_t * __restrict alen;
 1455         } */ *uap;
 1456         int compat;
 1457 {
 1458         struct socket *so;
 1459         struct sockaddr *sa;
 1460         struct file *fp;
 1461         socklen_t len;
 1462         int error;
 1463 
 1464         NET_LOCK_GIANT();
 1465         error = getsock(td->td_proc->p_fd, uap->fdes, &fp);
 1466         if (error)
 1467                 goto done2;
 1468         so = fp->f_data;
 1469         error = copyin(uap->alen, &len, sizeof (len));
 1470         if (error)
 1471                 goto done1;
 1472         if (len < 0) {
 1473                 error = EINVAL;
 1474                 goto done1;
 1475         }
 1476         sa = 0;
 1477         error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
 1478         if (error)
 1479                 goto bad;
 1480         if (sa == 0) {
 1481                 len = 0;
 1482                 goto gotnothing;
 1483         }
 1484 
 1485         len = MIN(len, sa->sa_len);
 1486 #ifdef COMPAT_OLDSOCK
 1487         if (compat)
 1488                 ((struct osockaddr *)sa)->sa_family = sa->sa_family;
 1489 #endif
 1490         error = copyout(sa, uap->asa, (u_int)len);
 1491         if (error == 0)
 1492 gotnothing:
 1493                 error = copyout(&len, uap->alen, sizeof (len));
 1494 bad:
 1495         if (sa)
 1496                 FREE(sa, M_SONAME);
 1497 done1:
 1498         fdrop(fp, td);
 1499 done2:
 1500         NET_UNLOCK_GIANT();
 1501         return (error);
 1502 }
 1503 
 1504 /*
 1505  * MPSAFE
 1506  */
 1507 int
 1508 getsockname(td, uap)
 1509         struct thread *td;
 1510         struct getsockname_args *uap;
 1511 {
 1512 
 1513         return (getsockname1(td, uap, 0));
 1514 }
 1515 
 1516 #ifdef COMPAT_OLDSOCK
 1517 /*
 1518  * MPSAFE
 1519  */
 1520 int
 1521 ogetsockname(td, uap)
 1522         struct thread *td;
 1523         struct getsockname_args *uap;
 1524 {
 1525 
 1526         return (getsockname1(td, uap, 1));
 1527 }
 1528 #endif /* COMPAT_OLDSOCK */
 1529 
 1530 /*
 1531  * getpeername1() - Get name of peer for connected socket.
 1532  *
 1533  * MPSAFE
 1534  */
 1535 /* ARGSUSED */
 1536 static int
 1537 getpeername1(td, uap, compat)
 1538         struct thread *td;
 1539         register struct getpeername_args /* {
 1540                 int     fdes;
 1541                 struct sockaddr * __restrict    asa;
 1542                 socklen_t * __restrict  alen;
 1543         } */ *uap;
 1544         int compat;
 1545 {
 1546         struct socket *so;
 1547         struct sockaddr *sa;
 1548         struct file *fp;
 1549         socklen_t len;
 1550         int error;
 1551 
 1552         NET_LOCK_GIANT();
 1553         error = getsock(td->td_proc->p_fd, uap->fdes, &fp);
 1554         if (error)
 1555                 goto done2;
 1556         so = fp->f_data;
 1557         if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
 1558                 error = ENOTCONN;
 1559                 goto done1;
 1560         }
 1561         error = copyin(uap->alen, &len, sizeof (len));
 1562         if (error)
 1563                 goto done1;
 1564         if (len < 0) {
 1565                 error = EINVAL;
 1566                 goto done1;
 1567         }
 1568         sa = 0;
 1569         error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
 1570         if (error)
 1571                 goto bad;
 1572         if (sa == 0) {
 1573                 len = 0;
 1574                 goto gotnothing;
 1575         }
 1576         len = MIN(len, sa->sa_len);
 1577 #ifdef COMPAT_OLDSOCK
 1578         if (compat)
 1579                 ((struct osockaddr *)sa)->sa_family =
 1580                     sa->sa_family;
 1581 #endif
 1582         error = copyout(sa, uap->asa, (u_int)len);
 1583         if (error)
 1584                 goto bad;
 1585 gotnothing:
 1586         error = copyout(&len, uap->alen, sizeof (len));
 1587 bad:
 1588         if (sa)
 1589                 FREE(sa, M_SONAME);
 1590 done1:
 1591         fdrop(fp, td);
 1592 done2:
 1593         NET_UNLOCK_GIANT();
 1594         return (error);
 1595 }
 1596 
 1597 /*
 1598  * MPSAFE
 1599  */
 1600 int
 1601 getpeername(td, uap)
 1602         struct thread *td;
 1603         struct getpeername_args *uap;
 1604 {
 1605 
 1606         return (getpeername1(td, uap, 0));
 1607 }
 1608 
 1609 #ifdef COMPAT_OLDSOCK
 1610 /*
 1611  * MPSAFE
 1612  */
 1613 int
 1614 ogetpeername(td, uap)
 1615         struct thread *td;
 1616         struct ogetpeername_args *uap;
 1617 {
 1618 
 1619         /* XXX uap should have type `getpeername_args *' to begin with. */
 1620         return (getpeername1(td, (struct getpeername_args *)uap, 1));
 1621 }
 1622 #endif /* COMPAT_OLDSOCK */
 1623 
 1624 int
 1625 sockargs(mp, buf, buflen, type)
 1626         struct mbuf **mp;
 1627         caddr_t buf;
 1628         int buflen, type;
 1629 {
 1630         register struct sockaddr *sa;
 1631         register struct mbuf *m;
 1632         int error;
 1633 
 1634         if ((u_int)buflen > MLEN) {
 1635 #ifdef COMPAT_OLDSOCK
 1636                 if (type == MT_SONAME && (u_int)buflen <= 112)
 1637                         buflen = MLEN;          /* unix domain compat. hack */
 1638                 else
 1639 #endif
 1640                         if ((u_int)buflen > MCLBYTES)
 1641                                 return (EINVAL);
 1642         }
 1643         m = m_get(M_TRYWAIT, type);
 1644         if (m == NULL)
 1645                 return (ENOBUFS);
 1646         if ((u_int)buflen > MLEN) {
 1647                 MCLGET(m, M_TRYWAIT);
 1648                 if ((m->m_flags & M_EXT) == 0) {
 1649                         m_free(m);
 1650                         return (ENOBUFS);
 1651                 }
 1652         }
 1653         m->m_len = buflen;
 1654         error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
 1655         if (error)
 1656                 (void) m_free(m);
 1657         else {
 1658                 *mp = m;
 1659                 if (type == MT_SONAME) {
 1660                         sa = mtod(m, struct sockaddr *);
 1661 
 1662 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
 1663                         if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
 1664                                 sa->sa_family = sa->sa_len;
 1665 #endif
 1666                         sa->sa_len = buflen;
 1667                 }
 1668         }
 1669         return (error);
 1670 }
 1671 
 1672 int
 1673 getsockaddr(namp, uaddr, len)
 1674         struct sockaddr **namp;
 1675         caddr_t uaddr;
 1676         size_t len;
 1677 {
 1678         struct sockaddr *sa;
 1679         int error;
 1680 
 1681         if (len > SOCK_MAXADDRLEN)
 1682                 return (ENAMETOOLONG);
 1683         if (len < offsetof(struct sockaddr, sa_data[0]))
 1684                 return (EINVAL);
 1685         MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK);
 1686         error = copyin(uaddr, sa, len);
 1687         if (error) {
 1688                 FREE(sa, M_SONAME);
 1689         } else {
 1690 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
 1691                 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
 1692                         sa->sa_family = sa->sa_len;
 1693 #endif
 1694                 sa->sa_len = len;
 1695                 *namp = sa;
 1696         }
 1697         return (error);
 1698 }
 1699 
 1700 /*
 1701  * Detach mapped page and release resources back to the system.
 1702  */
 1703 void
 1704 sf_buf_mext(void *addr, void *args)
 1705 {
 1706         vm_page_t m;
 1707 
 1708         m = sf_buf_page(args);
 1709         sf_buf_free(args);
 1710         vm_page_lock_queues();
 1711         vm_page_unwire(m, 0);
 1712         /*
 1713          * Check for the object going away on us. This can
 1714          * happen since we don't hold a reference to it.
 1715          * If so, we're responsible for freeing the page.
 1716          */
 1717         if (m->wire_count == 0 && m->object == NULL)
 1718                 vm_page_free(m);
 1719         vm_page_unlock_queues();
 1720 }
 1721 
 1722 /*
 1723  * sendfile(2)
 1724  *
 1725  * MPSAFE
 1726  *
 1727  * int sendfile(int fd, int s, off_t offset, size_t nbytes,
 1728  *       struct sf_hdtr *hdtr, off_t *sbytes, int flags)
 1729  *
 1730  * Send a file specified by 'fd' and starting at 'offset' to a socket
 1731  * specified by 's'. Send only 'nbytes' of the file or until EOF if
 1732  * nbytes == 0. Optionally add a header and/or trailer to the socket
 1733  * output. If specified, write the total number of bytes sent into *sbytes.
 1734  *
 1735  */
 1736 int
 1737 sendfile(struct thread *td, struct sendfile_args *uap)
 1738 {
 1739 
 1740         return (do_sendfile(td, uap, 0));
 1741 }
 1742 
 1743 #ifdef COMPAT_FREEBSD4
 1744 int
 1745 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap)
 1746 {
 1747         struct sendfile_args args;
 1748 
 1749         args.fd = uap->fd;
 1750         args.s = uap->s;
 1751         args.offset = uap->offset;
 1752         args.nbytes = uap->nbytes;
 1753         args.hdtr = uap->hdtr;
 1754         args.sbytes = uap->sbytes;
 1755         args.flags = uap->flags;
 1756 
 1757         return (do_sendfile(td, &args, 1));
 1758 }
 1759 #endif /* COMPAT_FREEBSD4 */
 1760 
 1761 static int
 1762 do_sendfile(struct thread *td, struct sendfile_args *uap, int compat)
 1763 {
 1764         struct vnode *vp;
 1765         struct vm_object *obj = NULL;
 1766         struct socket *so = NULL;
 1767         struct mbuf *m, *m_header = NULL;
 1768         struct sf_buf *sf;
 1769         struct vm_page *pg;
 1770         struct writev_args nuap;
 1771         struct sf_hdtr hdtr;
 1772         struct uio *hdr_uio = NULL;
 1773         off_t off, xfsize, hdtr_size, sbytes = 0;
 1774         int error, headersize = 0, headersent = 0;
 1775 
 1776         mtx_lock(&Giant);
 1777 
 1778         hdtr_size = 0;
 1779 
 1780         /*
 1781          * The descriptor must be a regular file and have a backing VM object.
 1782          */
 1783         if ((error = fgetvp_read(td, uap->fd, &vp)) != 0)
 1784                 goto done;
 1785         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 1786         if (vp->v_type == VREG)
 1787                 obj = vp->v_object;
 1788         VOP_UNLOCK(vp, 0, td);
 1789         if (obj == NULL) {
 1790                 error = EINVAL;
 1791                 goto done;
 1792         }
 1793         if ((error = fgetsock(td, uap->s, &so, NULL)) != 0)
 1794                 goto done;
 1795         if (so->so_type != SOCK_STREAM) {
 1796                 error = EINVAL;
 1797                 goto done;
 1798         }
 1799         if ((so->so_state & SS_ISCONNECTED) == 0) {
 1800                 error = ENOTCONN;
 1801                 goto done;
 1802         }
 1803         if (uap->offset < 0) {
 1804                 error = EINVAL;
 1805                 goto done;
 1806         }
 1807 
 1808 #ifdef MAC
 1809         SOCK_LOCK(so);
 1810         error = mac_check_socket_send(td->td_ucred, so);
 1811         SOCK_UNLOCK(so);
 1812         if (error)
 1813                 goto done;
 1814 #endif
 1815 
 1816         /*
 1817          * If specified, get the pointer to the sf_hdtr struct for
 1818          * any headers/trailers.
 1819          */
 1820         if (uap->hdtr != NULL) {
 1821                 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
 1822                 if (error)
 1823                         goto done;
 1824                 /*
 1825                  * Send any headers.
 1826                  */
 1827                 if (hdtr.headers != NULL) {
 1828                         error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio);
 1829                         if (error)
 1830                                 goto done;
 1831                         hdr_uio->uio_td = td;
 1832                         hdr_uio->uio_rw = UIO_WRITE;
 1833                         if (hdr_uio->uio_resid > 0) {
 1834                                 m_header = m_uiotombuf(hdr_uio, M_DONTWAIT, 0, 0);
 1835                                 if (m_header == NULL)
 1836                                         goto done;
 1837                                 headersize = m_header->m_pkthdr.len;
 1838                                 if (compat)
 1839                                         sbytes += headersize;
 1840                         }
 1841                 }
 1842         }
 1843 
 1844         /*
 1845          * Protect against multiple writers to the socket.
 1846          */
 1847         SOCKBUF_LOCK(&so->so_snd);
 1848         (void) sblock(&so->so_snd, M_WAITOK);
 1849         SOCKBUF_UNLOCK(&so->so_snd);
 1850 
 1851         /*
 1852          * Loop through the pages in the file, starting with the requested
 1853          * offset. Get a file page (do I/O if necessary), map the file page
 1854          * into an sf_buf, attach an mbuf header to the sf_buf, and queue
 1855          * it on the socket.
 1856          */
 1857         for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
 1858                 vm_pindex_t pindex;
 1859                 vm_offset_t pgoff;
 1860 
 1861                 pindex = OFF_TO_IDX(off);
 1862                 VM_OBJECT_LOCK(obj);
 1863 retry_lookup:
 1864                 /*
 1865                  * Calculate the amount to transfer. Not to exceed a page,
 1866                  * the EOF, or the passed in nbytes.
 1867                  */
 1868                 xfsize = obj->un_pager.vnp.vnp_size - off;
 1869                 VM_OBJECT_UNLOCK(obj);
 1870                 if (xfsize > PAGE_SIZE)
 1871                         xfsize = PAGE_SIZE;
 1872                 pgoff = (vm_offset_t)(off & PAGE_MASK);
 1873                 if (PAGE_SIZE - pgoff < xfsize)
 1874                         xfsize = PAGE_SIZE - pgoff;
 1875                 if (uap->nbytes && xfsize > (uap->nbytes - sbytes))
 1876                         xfsize = uap->nbytes - sbytes;
 1877                 if (xfsize <= 0) {
 1878                         if (m_header != NULL) {
 1879                                 m = m_header;
 1880                                 m_header = NULL;
 1881                                 SOCKBUF_LOCK(&so->so_snd);
 1882                                 goto retry_space;
 1883                         } else
 1884                                 break;
 1885                 }
 1886                 /*
 1887                  * Optimize the non-blocking case by looking at the socket space
 1888                  * before going to the extra work of constituting the sf_buf.
 1889                  */
 1890                 SOCKBUF_LOCK(&so->so_snd);
 1891                 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) {
 1892                         if (so->so_snd.sb_state & SBS_CANTSENDMORE)
 1893                                 error = EPIPE;
 1894                         else
 1895                                 error = EAGAIN;
 1896                         sbunlock(&so->so_snd);
 1897                         SOCKBUF_UNLOCK(&so->so_snd);
 1898                         goto done;
 1899                 }
 1900                 SOCKBUF_UNLOCK(&so->so_snd);
 1901                 VM_OBJECT_LOCK(obj);
 1902                 /*
 1903                  * Attempt to look up the page.
 1904                  *
 1905                  *      Allocate if not found
 1906                  *
 1907                  *      Wait and loop if busy.
 1908                  */
 1909                 pg = vm_page_lookup(obj, pindex);
 1910 
 1911                 if (pg == NULL) {
 1912                         pg = vm_page_alloc(obj, pindex, VM_ALLOC_NOBUSY |
 1913                             VM_ALLOC_NORMAL | VM_ALLOC_WIRED);
 1914                         if (pg == NULL) {
 1915                                 VM_OBJECT_UNLOCK(obj);
 1916                                 VM_WAIT;
 1917                                 VM_OBJECT_LOCK(obj);
 1918                                 goto retry_lookup;
 1919                         }
 1920                         vm_page_lock_queues();
 1921                 } else {
 1922                         vm_page_lock_queues();
 1923                         if (vm_page_sleep_if_busy(pg, TRUE, "sfpbsy"))
 1924                                 goto retry_lookup;
 1925                         /*
 1926                          * Wire the page so it does not get ripped out from
 1927                          * under us.
 1928                          */
 1929                         vm_page_wire(pg);
 1930                 }
 1931 
 1932                 /*
 1933                  * If page is not valid for what we need, initiate I/O
 1934                  */
 1935 
 1936                 if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize)) {
 1937                         VM_OBJECT_UNLOCK(obj);
 1938                 } else if (uap->flags & SF_NODISKIO) {
 1939                         error = EBUSY;
 1940                 } else {
 1941                         int bsize, resid;
 1942 
 1943                         /*
 1944                          * Ensure that our page is still around when the I/O
 1945                          * completes.
 1946                          */
 1947                         vm_page_io_start(pg);
 1948                         vm_page_unlock_queues();
 1949                         VM_OBJECT_UNLOCK(obj);
 1950 
 1951                         /*
 1952                          * Get the page from backing store.
 1953                          */
 1954                         bsize = vp->v_mount->mnt_stat.f_iosize;
 1955                         vn_lock(vp, LK_SHARED | LK_RETRY, td);
 1956                         /*
 1957                          * XXXMAC: Because we don't have fp->f_cred here,
 1958                          * we pass in NOCRED.  This is probably wrong, but
 1959                          * is consistent with our original implementation.
 1960                          */
 1961                         error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE,
 1962                             trunc_page(off), UIO_NOCOPY, IO_NODELOCKED |
 1963                             IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT),
 1964                             td->td_ucred, NOCRED, &resid, td);
 1965                         VOP_UNLOCK(vp, 0, td);
 1966                         VM_OBJECT_LOCK(obj);
 1967                         vm_page_lock_queues();
 1968                         vm_page_io_finish(pg);
 1969                         if (!error)
 1970                                 VM_OBJECT_UNLOCK(obj);
 1971                         mbstat.sf_iocnt++;
 1972                 }
 1973         
 1974                 if (error) {
 1975                         vm_page_unwire(pg, 0);
 1976                         /*
 1977                          * See if anyone else might know about this page.
 1978                          * If not and it is not valid, then free it.
 1979                          */
 1980                         if (pg->wire_count == 0 && pg->valid == 0 &&
 1981                             pg->busy == 0 && !(pg->flags & PG_BUSY) &&
 1982                             pg->hold_count == 0) {
 1983                                 vm_page_free(pg);
 1984                         }
 1985                         vm_page_unlock_queues();
 1986                         VM_OBJECT_UNLOCK(obj);
 1987                         SOCKBUF_LOCK(&so->so_snd);
 1988                         sbunlock(&so->so_snd);
 1989                         SOCKBUF_UNLOCK(&so->so_snd);
 1990                         goto done;
 1991                 }
 1992                 vm_page_unlock_queues();
 1993 
 1994                 /*
 1995                  * Get a sendfile buf. We usually wait as long as necessary,
 1996                  * but this wait can be interrupted.
 1997                  */
 1998                 if ((sf = sf_buf_alloc(pg, SFB_CATCH)) == NULL) {
 1999                         mbstat.sf_allocfail++;
 2000                         vm_page_lock_queues();
 2001                         vm_page_unwire(pg, 0);
 2002                         if (pg->wire_count == 0 && pg->object == NULL)
 2003                                 vm_page_free(pg);
 2004                         vm_page_unlock_queues();
 2005                         SOCKBUF_LOCK(&so->so_snd);
 2006                         sbunlock(&so->so_snd);
 2007                         SOCKBUF_UNLOCK(&so->so_snd);
 2008                         error = EINTR;
 2009                         goto done;
 2010                 }
 2011 
 2012                 /*
 2013                  * Get an mbuf header and set it up as having external storage.
 2014                  */
 2015                 if (m_header)
 2016                         MGET(m, M_TRYWAIT, MT_DATA);
 2017                 else
 2018                         MGETHDR(m, M_TRYWAIT, MT_DATA);
 2019                 if (m == NULL) {
 2020                         error = ENOBUFS;
 2021                         sf_buf_mext((void *)sf_buf_kva(sf), sf);
 2022                         SOCKBUF_LOCK(&so->so_snd);
 2023                         sbunlock(&so->so_snd);
 2024                         SOCKBUF_UNLOCK(&so->so_snd);
 2025                         goto done;
 2026                 }
 2027                 /*
 2028                  * Setup external storage for mbuf.
 2029                  */
 2030                 MEXTADD(m, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext, sf, M_RDONLY,
 2031                     EXT_SFBUF);
 2032                 m->m_data = (char *)sf_buf_kva(sf) + pgoff;
 2033                 m->m_pkthdr.len = m->m_len = xfsize;
 2034 
 2035                 if (m_header) {
 2036                         m_cat(m_header, m);
 2037                         m = m_header;
 2038                         m_header = NULL;
 2039                         m_fixhdr(m);
 2040                 }
 2041 
 2042                 /*
 2043                  * Add the buffer to the socket buffer chain.
 2044                  */
 2045                 SOCKBUF_LOCK(&so->so_snd);
 2046 retry_space:
 2047                 /*
 2048                  * Make sure that the socket is still able to take more data.
 2049                  * CANTSENDMORE being true usually means that the connection
 2050                  * was closed. so_error is true when an error was sensed after
 2051                  * a previous send.
 2052                  * The state is checked after the page mapping and buffer
 2053                  * allocation above since those operations may block and make
 2054                  * any socket checks stale. From this point forward, nothing
 2055                  * blocks before the pru_send (or more accurately, any blocking
 2056                  * results in a loop back to here to re-check).
 2057                  */
 2058                 SOCKBUF_LOCK_ASSERT(&so->so_snd);
 2059                 if ((so->so_snd.sb_state & SBS_CANTSENDMORE) || so->so_error) {
 2060                         if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 2061                                 error = EPIPE;
 2062                         } else {
 2063                                 error = so->so_error;
 2064                                 so->so_error = 0;
 2065                         }
 2066                         m_freem(m);
 2067                         sbunlock(&so->so_snd);
 2068                         SOCKBUF_UNLOCK(&so->so_snd);
 2069                         goto done;
 2070                 }
 2071                 /*
 2072                  * Wait for socket space to become available. We do this just
 2073                  * after checking the connection state above in order to avoid
 2074                  * a race condition with sbwait().
 2075                  */
 2076                 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) {
 2077                         if (so->so_state & SS_NBIO) {
 2078                                 m_freem(m);
 2079                                 sbunlock(&so->so_snd);
 2080                                 SOCKBUF_UNLOCK(&so->so_snd);
 2081                                 error = EAGAIN;
 2082                                 goto done;
 2083                         }
 2084                         error = sbwait(&so->so_snd);
 2085                         /*
 2086                          * An error from sbwait usually indicates that we've
 2087                          * been interrupted by a signal. If we've sent anything
 2088                          * then return bytes sent, otherwise return the error.
 2089                          */
 2090                         if (error) {
 2091                                 m_freem(m);
 2092                                 sbunlock(&so->so_snd);
 2093                                 SOCKBUF_UNLOCK(&so->so_snd);
 2094                                 goto done;
 2095                         }
 2096                         goto retry_space;
 2097                 }
 2098                 SOCKBUF_UNLOCK(&so->so_snd);
 2099                 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td);
 2100                 if (error) {
 2101                         SOCKBUF_LOCK(&so->so_snd);
 2102                         sbunlock(&so->so_snd);
 2103                         SOCKBUF_UNLOCK(&so->so_snd);
 2104                         goto done;
 2105                 }
 2106                 headersent = 1;
 2107         }
 2108         SOCKBUF_LOCK(&so->so_snd);
 2109         sbunlock(&so->so_snd);
 2110         SOCKBUF_UNLOCK(&so->so_snd);
 2111 
 2112         /*
 2113          * Send trailers. Wimp out and use writev(2).
 2114          */
 2115         if (uap->hdtr != NULL && hdtr.trailers != NULL) {
 2116                         nuap.fd = uap->s;
 2117                         nuap.iovp = hdtr.trailers;
 2118                         nuap.iovcnt = hdtr.trl_cnt;
 2119                         error = writev(td, &nuap);
 2120                         if (error)
 2121                                 goto done;
 2122                         if (compat)
 2123                                 sbytes += td->td_retval[0];
 2124                         else
 2125                                 hdtr_size += td->td_retval[0];
 2126         }
 2127 
 2128 done:
 2129         if (headersent) {
 2130                 if (!compat)
 2131                         hdtr_size += headersize;
 2132         } else {
 2133                 if (compat)
 2134                         sbytes -= headersize;
 2135         }
 2136         /*
 2137          * If there was no error we have to clear td->td_retval[0]
 2138          * because it may have been set by writev.
 2139          */
 2140         if (error == 0) {
 2141                 td->td_retval[0] = 0;
 2142         }
 2143         if (uap->sbytes != NULL) {
 2144                 if (!compat)
 2145                         sbytes += hdtr_size;
 2146                 copyout(&sbytes, uap->sbytes, sizeof(off_t));
 2147         }
 2148         if (vp)
 2149                 vrele(vp);
 2150         if (so)
 2151                 fputsock(so);
 2152         if (hdr_uio != NULL)
 2153                 free(hdr_uio, M_IOV);
 2154         if (m_header)
 2155                 m_freem(m_header);
 2156 
 2157         mtx_unlock(&Giant);
 2158 
 2159         if (error == ERESTART)
 2160                 error = EINTR;
 2161 
 2162         return (error);
 2163 }

Cache object: dfd18e98b75747b644ed4bce68cb325f


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.