The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/uipc_syscalls.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1982, 1986, 1989, 1990, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * sendfile(2) and related extensions:
    6  * Copyright (c) 1998, David Greenman. All rights reserved.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  * 4. Neither the name of the University nor the names of its contributors
   17  *    may be used to endorse or promote products derived from this software
   18  *    without specific prior written permission.
   19  *
   20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   30  * SUCH DAMAGE.
   31  *
   32  *      @(#)uipc_syscalls.c     8.4 (Berkeley) 2/21/94
   33  */
   34 
   35 #include <sys/cdefs.h>
   36 __FBSDID("$FreeBSD: releng/6.4/sys/kern/uipc_syscalls.c 176271 2008-02-14 11:46:08Z simon $");
   37 
   38 #include "opt_compat.h"
   39 #include "opt_ktrace.h"
   40 #include "opt_mac.h"
   41 
   42 #include <sys/param.h>
   43 #include <sys/systm.h>
   44 #include <sys/kernel.h>
   45 #include <sys/lock.h>
   46 #include <sys/mac.h>
   47 #include <sys/mutex.h>
   48 #include <sys/sysproto.h>
   49 #include <sys/malloc.h>
   50 #include <sys/filedesc.h>
   51 #include <sys/event.h>
   52 #include <sys/proc.h>
   53 #include <sys/fcntl.h>
   54 #include <sys/file.h>
   55 #include <sys/filio.h>
   56 #include <sys/mount.h>
   57 #include <sys/mbuf.h>
   58 #include <sys/protosw.h>
   59 #include <sys/sf_buf.h>
   60 #include <sys/socket.h>
   61 #include <sys/socketvar.h>
   62 #include <sys/signalvar.h>
   63 #include <sys/syscallsubr.h>
   64 #include <sys/sysctl.h>
   65 #include <sys/uio.h>
   66 #include <sys/vnode.h>
   67 #ifdef KTRACE
   68 #include <sys/ktrace.h>
   69 #endif
   70 
   71 #include <vm/vm.h>
   72 #include <vm/vm_object.h>
   73 #include <vm/vm_page.h>
   74 #include <vm/vm_pageout.h>
   75 #include <vm/vm_kern.h>
   76 #include <vm/vm_extern.h>
   77 
   78 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
   79 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp);
   80 
   81 static int accept1(struct thread *td, struct accept_args *uap, int compat);
   82 static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat);
   83 static int getsockname1(struct thread *td, struct getsockname_args *uap,
   84                         int compat);
   85 static int getpeername1(struct thread *td, struct getpeername_args *uap,
   86                         int compat);
   87 
   88 /*
   89  * NSFBUFS-related variables and associated sysctls
   90  */
   91 int nsfbufs;
   92 int nsfbufspeak;
   93 int nsfbufsused;
   94 
   95 SYSCTL_DECL(_kern_ipc);
   96 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
   97     "Maximum number of sendfile(2) sf_bufs available");
   98 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
   99     "Number of sendfile(2) sf_bufs at peak usage");
  100 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
  101     "Number of sendfile(2) sf_bufs in use");
  102 
  103 /*
  104  * Convert a user file descriptor to a kernel file entry.  A reference on the
  105  * file entry is held upon returning.  This is lighter weight than
  106  * fgetsock(), which bumps the socket reference drops the file reference
  107  * count instead, as this approach avoids several additional mutex operations
  108  * associated with the additional reference count.
  109  */
  110 static int
  111 getsock(struct filedesc *fdp, int fd, struct file **fpp)
  112 {
  113         struct file *fp;
  114         int error;
  115 
  116         fp = NULL;
  117         if (fdp == NULL)
  118                 error = EBADF;
  119         else {
  120                 FILEDESC_LOCK_FAST(fdp);
  121                 fp = fget_locked(fdp, fd);
  122                 if (fp == NULL)
  123                         error = EBADF;
  124                 else if (fp->f_type != DTYPE_SOCKET) {
  125                         fp = NULL;
  126                         error = ENOTSOCK;
  127                 } else {
  128                         fhold(fp);
  129                         error = 0;
  130                 }
  131                 FILEDESC_UNLOCK_FAST(fdp);
  132         }
  133         *fpp = fp;
  134         return (error);
  135 }
  136 
  137 /*
  138  * System call interface to the socket abstraction.
  139  */
  140 #if defined(COMPAT_43)
  141 #define COMPAT_OLDSOCK
  142 #endif
  143 
  144 /*
  145  * MPSAFE
  146  */
  147 int
  148 socket(td, uap)
  149         struct thread *td;
  150         register struct socket_args /* {
  151                 int     domain;
  152                 int     type;
  153                 int     protocol;
  154         } */ *uap;
  155 {
  156         struct filedesc *fdp;
  157         struct socket *so;
  158         struct file *fp;
  159         int fd, error;
  160 
  161 #ifdef MAC
  162         error = mac_check_socket_create(td->td_ucred, uap->domain, uap->type,
  163             uap->protocol);
  164         if (error)
  165                 return (error);
  166 #endif
  167         fdp = td->td_proc->p_fd;
  168         error = falloc(td, &fp, &fd);
  169         if (error)
  170                 return (error);
  171         /* An extra reference on `fp' has been held for us by falloc(). */
  172         NET_LOCK_GIANT();
  173         error = socreate(uap->domain, &so, uap->type, uap->protocol,
  174             td->td_ucred, td);
  175         NET_UNLOCK_GIANT();
  176         if (error) {
  177                 fdclose(fdp, fp, fd, td);
  178         } else {
  179                 FILEDESC_LOCK_FAST(fdp);
  180                 fp->f_data = so;        /* already has ref count */
  181                 fp->f_flag = FREAD|FWRITE;
  182                 fp->f_ops = &socketops;
  183                 fp->f_type = DTYPE_SOCKET;
  184                 FILEDESC_UNLOCK_FAST(fdp);
  185                 td->td_retval[0] = fd;
  186         }
  187         fdrop(fp, td);
  188         return (error);
  189 }
  190 
  191 /*
  192  * MPSAFE
  193  */
  194 /* ARGSUSED */
  195 int
  196 bind(td, uap)
  197         struct thread *td;
  198         register struct bind_args /* {
  199                 int     s;
  200                 caddr_t name;
  201                 int     namelen;
  202         } */ *uap;
  203 {
  204         struct sockaddr *sa;
  205         int error;
  206 
  207         if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0)
  208                 return (error);
  209 
  210         return (kern_bind(td, uap->s, sa));
  211 }
  212 
  213 int
  214 kern_bind(td, fd, sa)
  215         struct thread *td;
  216         int fd;
  217         struct sockaddr *sa;
  218 {
  219         struct socket *so;
  220         struct file *fp;
  221         int error;
  222 
  223         NET_LOCK_GIANT();
  224         error = getsock(td->td_proc->p_fd, fd, &fp);
  225         if (error)
  226                 goto done2;
  227         so = fp->f_data;
  228 #ifdef MAC
  229         SOCK_LOCK(so);
  230         error = mac_check_socket_bind(td->td_ucred, so, sa);
  231         SOCK_UNLOCK(so);
  232         if (error)
  233                 goto done1;
  234 #endif
  235         error = sobind(so, sa, td);
  236 #ifdef MAC
  237 done1:
  238 #endif
  239         fdrop(fp, td);
  240 done2:
  241         NET_UNLOCK_GIANT();
  242         FREE(sa, M_SONAME);
  243         return (error);
  244 }
  245 
  246 /*
  247  * MPSAFE
  248  */
  249 /* ARGSUSED */
  250 int
  251 listen(td, uap)
  252         struct thread *td;
  253         register struct listen_args /* {
  254                 int     s;
  255                 int     backlog;
  256         } */ *uap;
  257 {
  258         struct socket *so;
  259         struct file *fp;
  260         int error;
  261 
  262         NET_LOCK_GIANT();
  263         error = getsock(td->td_proc->p_fd, uap->s, &fp);
  264         if (error == 0) {
  265                 so = fp->f_data;
  266 #ifdef MAC
  267                 SOCK_LOCK(so);
  268                 error = mac_check_socket_listen(td->td_ucred, so);
  269                 SOCK_UNLOCK(so);
  270                 if (error)
  271                         goto done;
  272 #endif
  273                 error = solisten(so, uap->backlog, td);
  274 #ifdef MAC
  275 done:
  276 #endif
  277                 fdrop(fp, td);
  278         }
  279         NET_UNLOCK_GIANT();
  280         return(error);
  281 }
  282 
  283 /*
  284  * accept1()
  285  * MPSAFE
  286  */
  287 static int
  288 accept1(td, uap, compat)
  289         struct thread *td;
  290         register struct accept_args /* {
  291                 int     s;
  292                 struct sockaddr * __restrict name;
  293                 socklen_t       * __restrict anamelen;
  294         } */ *uap;
  295         int compat;
  296 {
  297         struct filedesc *fdp;
  298         struct file *nfp = NULL;
  299         struct sockaddr *sa = NULL;
  300         socklen_t namelen;
  301         int error;
  302         struct socket *head, *so;
  303         int fd;
  304         u_int fflag;
  305         pid_t pgid;
  306         int tmp;
  307 
  308         fdp = td->td_proc->p_fd;
  309         if (uap->name) {
  310                 error = copyin(uap->anamelen, &namelen, sizeof (namelen));
  311                 if(error)
  312                         return (error);
  313                 if (namelen < 0)
  314                         return (EINVAL);
  315         }
  316         NET_LOCK_GIANT();
  317         error = fgetsock(td, uap->s, &head, &fflag);
  318         if (error)
  319                 goto done2;
  320         if ((head->so_options & SO_ACCEPTCONN) == 0) {
  321                 error = EINVAL;
  322                 goto done;
  323         }
  324 #ifdef MAC
  325         SOCK_LOCK(head);
  326         error = mac_check_socket_accept(td->td_ucred, head);
  327         SOCK_UNLOCK(head);
  328         if (error != 0)
  329                 goto done;
  330 #endif
  331         error = falloc(td, &nfp, &fd);
  332         if (error)
  333                 goto done;
  334         ACCEPT_LOCK();
  335         if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) {
  336                 ACCEPT_UNLOCK();
  337                 error = EWOULDBLOCK;
  338                 goto noconnection;
  339         }
  340         while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
  341                 if (head->so_rcv.sb_state & SBS_CANTRCVMORE) {
  342                         head->so_error = ECONNABORTED;
  343                         break;
  344                 }
  345                 error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH,
  346                     "accept", 0);
  347                 if (error) {
  348                         ACCEPT_UNLOCK();
  349                         goto noconnection;
  350                 }
  351         }
  352         if (head->so_error) {
  353                 error = head->so_error;
  354                 head->so_error = 0;
  355                 ACCEPT_UNLOCK();
  356                 goto noconnection;
  357         }
  358         so = TAILQ_FIRST(&head->so_comp);
  359         KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP"));
  360         KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP"));
  361 
  362         /*
  363          * Before changing the flags on the socket, we have to bump the
  364          * reference count.  Otherwise, if the protocol calls sofree(),
  365          * the socket will be released due to a zero refcount.
  366          */
  367         SOCK_LOCK(so);                  /* soref() and so_state update */
  368         soref(so);                      /* file descriptor reference */
  369 
  370         TAILQ_REMOVE(&head->so_comp, so, so_list);
  371         head->so_qlen--;
  372         so->so_state |= (head->so_state & SS_NBIO);
  373         so->so_qstate &= ~SQ_COMP;
  374         so->so_head = NULL;
  375 
  376         SOCK_UNLOCK(so);
  377         ACCEPT_UNLOCK();
  378 
  379         /* An extra reference on `nfp' has been held for us by falloc(). */
  380         td->td_retval[0] = fd;
  381 
  382         /* connection has been removed from the listen queue */
  383         KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0);
  384 
  385         pgid = fgetown(&head->so_sigio);
  386         if (pgid != 0)
  387                 fsetown(pgid, &so->so_sigio);
  388 
  389         FILE_LOCK(nfp);
  390         nfp->f_data = so;       /* nfp has ref count from falloc */
  391         nfp->f_flag = fflag;
  392         nfp->f_ops = &socketops;
  393         nfp->f_type = DTYPE_SOCKET;
  394         FILE_UNLOCK(nfp);
  395         /* Sync socket nonblocking/async state with file flags */
  396         tmp = fflag & FNONBLOCK;
  397         (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td);
  398         tmp = fflag & FASYNC;
  399         (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td);
  400         sa = 0;
  401         error = soaccept(so, &sa);
  402         if (error) {
  403                 /*
  404                  * return a namelen of zero for older code which might
  405                  * ignore the return value from accept.
  406                  */
  407                 if (uap->name != NULL) {
  408                         namelen = 0;
  409                         (void) copyout(&namelen,
  410                             uap->anamelen, sizeof(*uap->anamelen));
  411                 }
  412                 goto noconnection;
  413         }
  414         if (sa == NULL) {
  415                 namelen = 0;
  416                 if (uap->name)
  417                         goto gotnoname;
  418                 error = 0;
  419                 goto done;
  420         }
  421         if (uap->name) {
  422                 /* check sa_len before it is destroyed */
  423                 if (namelen > sa->sa_len)
  424                         namelen = sa->sa_len;
  425 #ifdef COMPAT_OLDSOCK
  426                 if (compat)
  427                         ((struct osockaddr *)sa)->sa_family =
  428                             sa->sa_family;
  429 #endif
  430                 error = copyout(sa, uap->name, (u_int)namelen);
  431                 if (!error)
  432 gotnoname:
  433                         error = copyout(&namelen,
  434                             uap->anamelen, sizeof (*uap->anamelen));
  435         }
  436 noconnection:
  437         if (sa)
  438                 FREE(sa, M_SONAME);
  439 
  440         /*
  441          * close the new descriptor, assuming someone hasn't ripped it
  442          * out from under us.
  443          */
  444         if (error)
  445                 fdclose(fdp, nfp, fd, td);
  446 
  447         /*
  448          * Release explicitly held references before returning.
  449          */
  450 done:
  451         if (nfp != NULL)
  452                 fdrop(nfp, td);
  453         fputsock(head);
  454 done2:
  455         NET_UNLOCK_GIANT();
  456         return (error);
  457 }
  458 
  459 /*
  460  * MPSAFE (accept1() is MPSAFE)
  461  */
  462 int
  463 accept(td, uap)
  464         struct thread *td;
  465         struct accept_args *uap;
  466 {
  467 
  468         return (accept1(td, uap, 0));
  469 }
  470 
  471 #ifdef COMPAT_OLDSOCK
  472 /*
  473  * MPSAFE (accept1() is MPSAFE)
  474  */
  475 int
  476 oaccept(td, uap)
  477         struct thread *td;
  478         struct accept_args *uap;
  479 {
  480 
  481         return (accept1(td, uap, 1));
  482 }
  483 #endif /* COMPAT_OLDSOCK */
  484 
  485 /*
  486  * MPSAFE
  487  */
  488 /* ARGSUSED */
  489 int
  490 connect(td, uap)
  491         struct thread *td;
  492         register struct connect_args /* {
  493                 int     s;
  494                 caddr_t name;
  495                 int     namelen;
  496         } */ *uap;
  497 {
  498         struct sockaddr *sa;
  499         int error;
  500 
  501         error = getsockaddr(&sa, uap->name, uap->namelen);
  502         if (error)
  503                 return (error);
  504 
  505         return (kern_connect(td, uap->s, sa));
  506 }
  507 
  508 
  509 int
  510 kern_connect(td, fd, sa)
  511         struct thread *td;
  512         int fd;
  513         struct sockaddr *sa;
  514 {
  515         struct socket *so;
  516         struct file *fp;
  517         int error;
  518         int interrupted = 0;
  519 
  520         NET_LOCK_GIANT();
  521         error = getsock(td->td_proc->p_fd, fd, &fp);
  522         if (error)
  523                 goto done2;
  524         so = fp->f_data;
  525         if (so->so_state & SS_ISCONNECTING) {
  526                 error = EALREADY;
  527                 goto done1;
  528         }
  529 #ifdef MAC
  530         SOCK_LOCK(so);
  531         error = mac_check_socket_connect(td->td_ucred, so, sa);
  532         SOCK_UNLOCK(so);
  533         if (error)
  534                 goto bad;
  535 #endif
  536         error = soconnect(so, sa, td);
  537         if (error)
  538                 goto bad;
  539         if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
  540                 error = EINPROGRESS;
  541                 goto done1;
  542         }
  543         SOCK_LOCK(so);
  544         while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
  545                 error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH,
  546                     "connec", 0);
  547                 if (error) {
  548                         if (error == EINTR || error == ERESTART)
  549                                 interrupted = 1;
  550                         break;
  551                 }
  552         }
  553         if (error == 0) {
  554                 error = so->so_error;
  555                 so->so_error = 0;
  556         }
  557         SOCK_UNLOCK(so);
  558 bad:
  559         if (!interrupted)
  560                 so->so_state &= ~SS_ISCONNECTING;
  561         if (error == ERESTART)
  562                 error = EINTR;
  563 done1:
  564         fdrop(fp, td);
  565 done2:
  566         NET_UNLOCK_GIANT();
  567         FREE(sa, M_SONAME);
  568         return (error);
  569 }
  570 
  571 /*
  572  * MPSAFE
  573  */
  574 int
  575 socketpair(td, uap)
  576         struct thread *td;
  577         register struct socketpair_args /* {
  578                 int     domain;
  579                 int     type;
  580                 int     protocol;
  581                 int     *rsv;
  582         } */ *uap;
  583 {
  584         register struct filedesc *fdp = td->td_proc->p_fd;
  585         struct file *fp1, *fp2;
  586         struct socket *so1, *so2;
  587         int fd, error, sv[2];
  588 
  589 #ifdef MAC
  590         /* We might want to have a separate check for socket pairs. */
  591         error = mac_check_socket_create(td->td_ucred, uap->domain, uap->type,
  592             uap->protocol);
  593         if (error)
  594                 return (error);
  595 #endif
  596 
  597         NET_LOCK_GIANT();
  598         error = socreate(uap->domain, &so1, uap->type, uap->protocol,
  599             td->td_ucred, td);
  600         if (error)
  601                 goto done2;
  602         error = socreate(uap->domain, &so2, uap->type, uap->protocol,
  603             td->td_ucred, td);
  604         if (error)
  605                 goto free1;
  606         /* On success extra reference to `fp1' and 'fp2' is set by falloc. */
  607         error = falloc(td, &fp1, &fd);
  608         if (error)
  609                 goto free2;
  610         sv[0] = fd;
  611         fp1->f_data = so1;      /* so1 already has ref count */
  612         error = falloc(td, &fp2, &fd);
  613         if (error)
  614                 goto free3;
  615         fp2->f_data = so2;      /* so2 already has ref count */
  616         sv[1] = fd;
  617         error = soconnect2(so1, so2);
  618         if (error)
  619                 goto free4;
  620         if (uap->type == SOCK_DGRAM) {
  621                 /*
  622                  * Datagram socket connection is asymmetric.
  623                  */
  624                  error = soconnect2(so2, so1);
  625                  if (error)
  626                         goto free4;
  627         }
  628         FILE_LOCK(fp1);
  629         fp1->f_flag = FREAD|FWRITE;
  630         fp1->f_ops = &socketops;
  631         fp1->f_type = DTYPE_SOCKET;
  632         FILE_UNLOCK(fp1);
  633         FILE_LOCK(fp2);
  634         fp2->f_flag = FREAD|FWRITE;
  635         fp2->f_ops = &socketops;
  636         fp2->f_type = DTYPE_SOCKET;
  637         FILE_UNLOCK(fp2);
  638         error = copyout(sv, uap->rsv, 2 * sizeof (int));
  639         fdrop(fp1, td);
  640         fdrop(fp2, td);
  641         goto done2;
  642 free4:
  643         fdclose(fdp, fp2, sv[1], td);
  644         fdrop(fp2, td);
  645 free3:
  646         fdclose(fdp, fp1, sv[0], td);
  647         fdrop(fp1, td);
  648 free2:
  649         (void)soclose(so2);
  650 free1:
  651         (void)soclose(so1);
  652 done2:
  653         NET_UNLOCK_GIANT();
  654         return (error);
  655 }
  656 
  657 static int
  658 sendit(td, s, mp, flags)
  659         register struct thread *td;
  660         int s;
  661         register struct msghdr *mp;
  662         int flags;
  663 {
  664         struct mbuf *control;
  665         struct sockaddr *to;
  666         int error;
  667 
  668         if (mp->msg_name != NULL) {
  669                 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
  670                 if (error) {
  671                         to = NULL;
  672                         goto bad;
  673                 }
  674                 mp->msg_name = to;
  675         } else {
  676                 to = NULL;
  677         }
  678 
  679         if (mp->msg_control) {
  680                 if (mp->msg_controllen < sizeof(struct cmsghdr)
  681 #ifdef COMPAT_OLDSOCK
  682                     && mp->msg_flags != MSG_COMPAT
  683 #endif
  684                 ) {
  685                         error = EINVAL;
  686                         goto bad;
  687                 }
  688                 error = sockargs(&control, mp->msg_control,
  689                     mp->msg_controllen, MT_CONTROL);
  690                 if (error)
  691                         goto bad;
  692 #ifdef COMPAT_OLDSOCK
  693                 if (mp->msg_flags == MSG_COMPAT) {
  694                         register struct cmsghdr *cm;
  695 
  696                         M_PREPEND(control, sizeof(*cm), M_TRYWAIT);
  697                         if (control == 0) {
  698                                 error = ENOBUFS;
  699                                 goto bad;
  700                         } else {
  701                                 cm = mtod(control, struct cmsghdr *);
  702                                 cm->cmsg_len = control->m_len;
  703                                 cm->cmsg_level = SOL_SOCKET;
  704                                 cm->cmsg_type = SCM_RIGHTS;
  705                         }
  706                 }
  707 #endif
  708         } else {
  709                 control = NULL;
  710         }
  711 
  712         error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE);
  713 
  714 bad:
  715         if (to)
  716                 FREE(to, M_SONAME);
  717         return (error);
  718 }
  719 
  720 int
  721 kern_sendit(td, s, mp, flags, control, segflg)
  722         struct thread *td;
  723         int s;
  724         struct msghdr *mp;
  725         int flags;
  726         struct mbuf *control;
  727         enum uio_seg segflg;
  728 {
  729         struct file *fp;
  730         struct uio auio;
  731         struct iovec *iov;
  732         struct socket *so;
  733         int i;
  734         int len, error;
  735 #ifdef KTRACE
  736         struct uio *ktruio = NULL;
  737 #endif
  738 
  739         NET_LOCK_GIANT();
  740         error = getsock(td->td_proc->p_fd, s, &fp);
  741         if (error)
  742                 goto bad2;
  743         so = (struct socket *)fp->f_data;
  744 
  745 #ifdef MAC
  746         SOCK_LOCK(so);
  747         error = mac_check_socket_send(td->td_ucred, so);
  748         SOCK_UNLOCK(so);
  749         if (error)
  750                 goto bad;
  751 #endif
  752 
  753         auio.uio_iov = mp->msg_iov;
  754         auio.uio_iovcnt = mp->msg_iovlen;
  755         auio.uio_segflg = segflg;
  756         auio.uio_rw = UIO_WRITE;
  757         auio.uio_td = td;
  758         auio.uio_offset = 0;                    /* XXX */
  759         auio.uio_resid = 0;
  760         iov = mp->msg_iov;
  761         for (i = 0; i < mp->msg_iovlen; i++, iov++) {
  762                 if ((auio.uio_resid += iov->iov_len) < 0) {
  763                         error = EINVAL;
  764                         goto bad;
  765                 }
  766         }
  767 #ifdef KTRACE
  768         if (KTRPOINT(td, KTR_GENIO))
  769                 ktruio = cloneuio(&auio);
  770 #endif
  771         len = auio.uio_resid;
  772         error = so->so_proto->pr_usrreqs->pru_sosend(so, mp->msg_name, &auio,
  773             0, control, flags, td);
  774         if (error) {
  775                 if (auio.uio_resid != len && (error == ERESTART ||
  776                     error == EINTR || error == EWOULDBLOCK))
  777                         error = 0;
  778                 /* Generation of SIGPIPE can be controlled per socket */
  779                 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
  780                     !(flags & MSG_NOSIGNAL)) {
  781                         PROC_LOCK(td->td_proc);
  782                         psignal(td->td_proc, SIGPIPE);
  783                         PROC_UNLOCK(td->td_proc);
  784                 }
  785         }
  786         if (error == 0)
  787                 td->td_retval[0] = len - auio.uio_resid;
  788 #ifdef KTRACE
  789         if (ktruio != NULL) {
  790                 ktruio->uio_resid = td->td_retval[0];
  791                 ktrgenio(s, UIO_WRITE, ktruio, error);
  792         }
  793 #endif
  794 bad:
  795         fdrop(fp, td);
  796 bad2:
  797         NET_UNLOCK_GIANT();
  798         return (error);
  799 }
  800 
  801 /*
  802  * MPSAFE
  803  */
  804 int
  805 sendto(td, uap)
  806         struct thread *td;
  807         register struct sendto_args /* {
  808                 int     s;
  809                 caddr_t buf;
  810                 size_t  len;
  811                 int     flags;
  812                 caddr_t to;
  813                 int     tolen;
  814         } */ *uap;
  815 {
  816         struct msghdr msg;
  817         struct iovec aiov;
  818         int error;
  819 
  820         msg.msg_name = uap->to;
  821         msg.msg_namelen = uap->tolen;
  822         msg.msg_iov = &aiov;
  823         msg.msg_iovlen = 1;
  824         msg.msg_control = 0;
  825 #ifdef COMPAT_OLDSOCK
  826         msg.msg_flags = 0;
  827 #endif
  828         aiov.iov_base = uap->buf;
  829         aiov.iov_len = uap->len;
  830         error = sendit(td, uap->s, &msg, uap->flags);
  831         return (error);
  832 }
  833 
  834 #ifdef COMPAT_OLDSOCK
  835 /*
  836  * MPSAFE
  837  */
  838 int
  839 osend(td, uap)
  840         struct thread *td;
  841         register struct osend_args /* {
  842                 int     s;
  843                 caddr_t buf;
  844                 int     len;
  845                 int     flags;
  846         } */ *uap;
  847 {
  848         struct msghdr msg;
  849         struct iovec aiov;
  850         int error;
  851 
  852         msg.msg_name = 0;
  853         msg.msg_namelen = 0;
  854         msg.msg_iov = &aiov;
  855         msg.msg_iovlen = 1;
  856         aiov.iov_base = uap->buf;
  857         aiov.iov_len = uap->len;
  858         msg.msg_control = 0;
  859         msg.msg_flags = 0;
  860         error = sendit(td, uap->s, &msg, uap->flags);
  861         return (error);
  862 }
  863 
  864 /*
  865  * MPSAFE
  866  */
  867 int
  868 osendmsg(td, uap)
  869         struct thread *td;
  870         struct osendmsg_args /* {
  871                 int     s;
  872                 caddr_t msg;
  873                 int     flags;
  874         } */ *uap;
  875 {
  876         struct msghdr msg;
  877         struct iovec *iov;
  878         int error;
  879 
  880         error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
  881         if (error)
  882                 return (error);
  883         error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
  884         if (error)
  885                 return (error);
  886         msg.msg_iov = iov;
  887         msg.msg_flags = MSG_COMPAT;
  888         error = sendit(td, uap->s, &msg, uap->flags);
  889         free(iov, M_IOV);
  890         return (error);
  891 }
  892 #endif
  893 
  894 /*
  895  * MPSAFE
  896  */
  897 int
  898 sendmsg(td, uap)
  899         struct thread *td;
  900         struct sendmsg_args /* {
  901                 int     s;
  902                 caddr_t msg;
  903                 int     flags;
  904         } */ *uap;
  905 {
  906         struct msghdr msg;
  907         struct iovec *iov;
  908         int error;
  909 
  910         error = copyin(uap->msg, &msg, sizeof (msg));
  911         if (error)
  912                 return (error);
  913         error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
  914         if (error)
  915                 return (error);
  916         msg.msg_iov = iov;
  917 #ifdef COMPAT_OLDSOCK
  918         msg.msg_flags = 0;
  919 #endif
  920         error = sendit(td, uap->s, &msg, uap->flags);
  921         free(iov, M_IOV);
  922         return (error);
  923 }
  924 
  925 int
  926 kern_recvit(td, s, mp, namelenp, segflg, controlp)
  927         struct thread *td;
  928         int s;
  929         struct msghdr *mp;
  930         void *namelenp;
  931         enum uio_seg segflg;
  932         struct mbuf **controlp;
  933 {
  934         struct uio auio;
  935         struct iovec *iov;
  936         int i;
  937         socklen_t len;
  938         int error;
  939         struct mbuf *m, *control = 0;
  940         caddr_t ctlbuf;
  941         struct file *fp;
  942         struct socket *so;
  943         struct sockaddr *fromsa = 0;
  944 #ifdef KTRACE
  945         struct uio *ktruio = NULL;
  946 #endif
  947 
  948         if(controlp != NULL)
  949                 *controlp = 0;
  950 
  951         NET_LOCK_GIANT();
  952         error = getsock(td->td_proc->p_fd, s, &fp);
  953         if (error) {
  954                 NET_UNLOCK_GIANT();
  955                 return (error);
  956         }
  957         so = fp->f_data;
  958 
  959 #ifdef MAC
  960         SOCK_LOCK(so);
  961         error = mac_check_socket_receive(td->td_ucred, so);
  962         SOCK_UNLOCK(so);
  963         if (error) {
  964                 fdrop(fp, td);
  965                 NET_UNLOCK_GIANT();
  966                 return (error);
  967         }
  968 #endif
  969 
  970         auio.uio_iov = mp->msg_iov;
  971         auio.uio_iovcnt = mp->msg_iovlen;
  972         auio.uio_segflg = segflg;
  973         auio.uio_rw = UIO_READ;
  974         auio.uio_td = td;
  975         auio.uio_offset = 0;                    /* XXX */
  976         auio.uio_resid = 0;
  977         iov = mp->msg_iov;
  978         for (i = 0; i < mp->msg_iovlen; i++, iov++) {
  979                 if ((auio.uio_resid += iov->iov_len) < 0) {
  980                         fdrop(fp, td);
  981                         NET_UNLOCK_GIANT();
  982                         return (EINVAL);
  983                 }
  984         }
  985 #ifdef KTRACE
  986         if (KTRPOINT(td, KTR_GENIO))
  987                 ktruio = cloneuio(&auio);
  988 #endif
  989         len = auio.uio_resid;
  990         error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio,
  991             (struct mbuf **)0, (mp->msg_control || controlp) ? &control : (struct mbuf **)0,
  992             &mp->msg_flags);
  993         if (error) {
  994                 if (auio.uio_resid != (int)len && (error == ERESTART ||
  995                     error == EINTR || error == EWOULDBLOCK))
  996                         error = 0;
  997         }
  998 #ifdef KTRACE
  999         if (ktruio != NULL) {
 1000                 ktruio->uio_resid = (int)len - auio.uio_resid;
 1001                 ktrgenio(s, UIO_READ, ktruio, error);
 1002         }
 1003 #endif
 1004         if (error)
 1005                 goto out;
 1006         td->td_retval[0] = (int)len - auio.uio_resid;
 1007         if (mp->msg_name) {
 1008                 len = mp->msg_namelen;
 1009                 if (len <= 0 || fromsa == 0)
 1010                         len = 0;
 1011                 else {
 1012                         /* save sa_len before it is destroyed by MSG_COMPAT */
 1013                         len = MIN(len, fromsa->sa_len);
 1014 #ifdef COMPAT_OLDSOCK
 1015                         if (mp->msg_flags & MSG_COMPAT)
 1016                                 ((struct osockaddr *)fromsa)->sa_family =
 1017                                     fromsa->sa_family;
 1018 #endif
 1019                         error = copyout(fromsa, mp->msg_name, (unsigned)len);
 1020                         if (error)
 1021                                 goto out;
 1022                 }
 1023                 mp->msg_namelen = len;
 1024                 if (namelenp &&
 1025                     (error = copyout(&len, namelenp, sizeof (socklen_t)))) {
 1026 #ifdef COMPAT_OLDSOCK
 1027                         if (mp->msg_flags & MSG_COMPAT)
 1028                                 error = 0;      /* old recvfrom didn't check */
 1029                         else
 1030 #endif
 1031                         goto out;
 1032                 }
 1033         }
 1034         if (mp->msg_control && controlp == NULL) {
 1035 #ifdef COMPAT_OLDSOCK
 1036                 /*
 1037                  * We assume that old recvmsg calls won't receive access
 1038                  * rights and other control info, esp. as control info
 1039                  * is always optional and those options didn't exist in 4.3.
 1040                  * If we receive rights, trim the cmsghdr; anything else
 1041                  * is tossed.
 1042                  */
 1043                 if (control && mp->msg_flags & MSG_COMPAT) {
 1044                         if (mtod(control, struct cmsghdr *)->cmsg_level !=
 1045                             SOL_SOCKET ||
 1046                             mtod(control, struct cmsghdr *)->cmsg_type !=
 1047                             SCM_RIGHTS) {
 1048                                 mp->msg_controllen = 0;
 1049                                 goto out;
 1050                         }
 1051                         control->m_len -= sizeof (struct cmsghdr);
 1052                         control->m_data += sizeof (struct cmsghdr);
 1053                 }
 1054 #endif
 1055                 len = mp->msg_controllen;
 1056                 m = control;
 1057                 mp->msg_controllen = 0;
 1058                 ctlbuf = mp->msg_control;
 1059 
 1060                 while (m && len > 0) {
 1061                         unsigned int tocopy;
 1062 
 1063                         if (len >= m->m_len)
 1064                                 tocopy = m->m_len;
 1065                         else {
 1066                                 mp->msg_flags |= MSG_CTRUNC;
 1067                                 tocopy = len;
 1068                         }
 1069 
 1070                         if ((error = copyout(mtod(m, caddr_t),
 1071                                         ctlbuf, tocopy)) != 0)
 1072                                 goto out;
 1073 
 1074                         ctlbuf += tocopy;
 1075                         len -= tocopy;
 1076                         m = m->m_next;
 1077                 }
 1078                 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
 1079         }
 1080 out:
 1081         fdrop(fp, td);
 1082         NET_UNLOCK_GIANT();
 1083         if (fromsa)
 1084                 FREE(fromsa, M_SONAME);
 1085 
 1086         if (error == 0 && controlp != NULL)  
 1087                 *controlp = control;
 1088         else  if (control)
 1089                 m_freem(control);
 1090 
 1091         return (error);
 1092 }
 1093 
 1094 static int
 1095 recvit(td, s, mp, namelenp)
 1096         struct thread *td;
 1097         int s;
 1098         struct msghdr *mp;
 1099         void *namelenp;
 1100 {
 1101 
 1102         return (kern_recvit(td, s, mp, namelenp, UIO_USERSPACE, NULL));
 1103 }
 1104 
 1105 /*
 1106  * MPSAFE
 1107  */
 1108 int
 1109 recvfrom(td, uap)
 1110         struct thread *td;
 1111         register struct recvfrom_args /* {
 1112                 int     s;
 1113                 caddr_t buf;
 1114                 size_t  len;
 1115                 int     flags;
 1116                 struct sockaddr * __restrict    from;
 1117                 socklen_t * __restrict fromlenaddr;
 1118         } */ *uap;
 1119 {
 1120         struct msghdr msg;
 1121         struct iovec aiov;
 1122         int error;
 1123 
 1124         if (uap->fromlenaddr) {
 1125                 error = copyin(uap->fromlenaddr,
 1126                     &msg.msg_namelen, sizeof (msg.msg_namelen));
 1127                 if (error)
 1128                         goto done2;
 1129         } else {
 1130                 msg.msg_namelen = 0;
 1131         }
 1132         msg.msg_name = uap->from;
 1133         msg.msg_iov = &aiov;
 1134         msg.msg_iovlen = 1;
 1135         aiov.iov_base = uap->buf;
 1136         aiov.iov_len = uap->len;
 1137         msg.msg_control = 0;
 1138         msg.msg_flags = uap->flags;
 1139         error = recvit(td, uap->s, &msg, uap->fromlenaddr);
 1140 done2:
 1141         return(error);
 1142 }
 1143 
 1144 #ifdef COMPAT_OLDSOCK
 1145 /*
 1146  * MPSAFE
 1147  */
 1148 int
 1149 orecvfrom(td, uap)
 1150         struct thread *td;
 1151         struct recvfrom_args *uap;
 1152 {
 1153 
 1154         uap->flags |= MSG_COMPAT;
 1155         return (recvfrom(td, uap));
 1156 }
 1157 #endif
 1158 
 1159 
 1160 #ifdef COMPAT_OLDSOCK
 1161 /*
 1162  * MPSAFE
 1163  */
 1164 int
 1165 orecv(td, uap)
 1166         struct thread *td;
 1167         register struct orecv_args /* {
 1168                 int     s;
 1169                 caddr_t buf;
 1170                 int     len;
 1171                 int     flags;
 1172         } */ *uap;
 1173 {
 1174         struct msghdr msg;
 1175         struct iovec aiov;
 1176         int error;
 1177 
 1178         msg.msg_name = 0;
 1179         msg.msg_namelen = 0;
 1180         msg.msg_iov = &aiov;
 1181         msg.msg_iovlen = 1;
 1182         aiov.iov_base = uap->buf;
 1183         aiov.iov_len = uap->len;
 1184         msg.msg_control = 0;
 1185         msg.msg_flags = uap->flags;
 1186         error = recvit(td, uap->s, &msg, NULL);
 1187         return (error);
 1188 }
 1189 
 1190 /*
 1191  * Old recvmsg.  This code takes advantage of the fact that the old msghdr
 1192  * overlays the new one, missing only the flags, and with the (old) access
 1193  * rights where the control fields are now.
 1194  *
 1195  * MPSAFE
 1196  */
 1197 int
 1198 orecvmsg(td, uap)
 1199         struct thread *td;
 1200         struct orecvmsg_args /* {
 1201                 int     s;
 1202                 struct  omsghdr *msg;
 1203                 int     flags;
 1204         } */ *uap;
 1205 {
 1206         struct msghdr msg;
 1207         struct iovec *iov;
 1208         int error;
 1209 
 1210         error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
 1211         if (error)
 1212                 return (error);
 1213         error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
 1214         if (error)
 1215                 return (error);
 1216         msg.msg_flags = uap->flags | MSG_COMPAT;
 1217         msg.msg_iov = iov;
 1218         error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen);
 1219         if (msg.msg_controllen && error == 0)
 1220                 error = copyout(&msg.msg_controllen,
 1221                     &uap->msg->msg_accrightslen, sizeof (int));
 1222         free(iov, M_IOV);
 1223         return (error);
 1224 }
 1225 #endif
 1226 
 1227 /*
 1228  * MPSAFE
 1229  */
 1230 int
 1231 recvmsg(td, uap)
 1232         struct thread *td;
 1233         struct recvmsg_args /* {
 1234                 int     s;
 1235                 struct  msghdr *msg;
 1236                 int     flags;
 1237         } */ *uap;
 1238 {
 1239         struct msghdr msg;
 1240         struct iovec *uiov, *iov;
 1241         int error;
 1242 
 1243         error = copyin(uap->msg, &msg, sizeof (msg));
 1244         if (error)
 1245                 return (error);
 1246         error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
 1247         if (error)
 1248                 return (error);
 1249         msg.msg_flags = uap->flags;
 1250 #ifdef COMPAT_OLDSOCK
 1251         msg.msg_flags &= ~MSG_COMPAT;
 1252 #endif
 1253         uiov = msg.msg_iov;
 1254         msg.msg_iov = iov;
 1255         error = recvit(td, uap->s, &msg, NULL);
 1256         if (error == 0) {
 1257                 msg.msg_iov = uiov;
 1258                 error = copyout(&msg, uap->msg, sizeof(msg));
 1259         }
 1260         free(iov, M_IOV);
 1261         return (error);
 1262 }
 1263 
 1264 /*
 1265  * MPSAFE
 1266  */
 1267 /* ARGSUSED */
 1268 int
 1269 shutdown(td, uap)
 1270         struct thread *td;
 1271         register struct shutdown_args /* {
 1272                 int     s;
 1273                 int     how;
 1274         } */ *uap;
 1275 {
 1276         struct socket *so;
 1277         struct file *fp;
 1278         int error;
 1279 
 1280         NET_LOCK_GIANT();
 1281         error = getsock(td->td_proc->p_fd, uap->s, &fp);
 1282         if (error == 0) {
 1283                 so = fp->f_data;
 1284                 error = soshutdown(so, uap->how);
 1285                 fdrop(fp, td);
 1286         }
 1287         NET_UNLOCK_GIANT();
 1288         return (error);
 1289 }
 1290 
 1291 /*
 1292  * MPSAFE
 1293  */
 1294 /* ARGSUSED */
 1295 int
 1296 setsockopt(td, uap)
 1297         struct thread *td;
 1298         register struct setsockopt_args /* {
 1299                 int     s;
 1300                 int     level;
 1301                 int     name;
 1302                 caddr_t val;
 1303                 int     valsize;
 1304         } */ *uap;
 1305 {
 1306 
 1307         return (kern_setsockopt(td, uap->s, uap->level, uap->name,
 1308             uap->val, UIO_USERSPACE, uap->valsize));
 1309 }
 1310 
 1311 int
 1312 kern_setsockopt(td, s, level, name, val, valseg, valsize)
 1313         struct thread *td;
 1314         int s;
 1315         int level;
 1316         int name;
 1317         void *val;
 1318         enum uio_seg valseg;
 1319         socklen_t valsize;
 1320 {
 1321         int error;
 1322         struct socket *so;
 1323         struct file *fp;
 1324         struct sockopt sopt;
 1325 
 1326         if (val == NULL && valsize != 0)
 1327                 return (EFAULT);
 1328         if ((int)valsize < 0)
 1329                 return (EINVAL);
 1330 
 1331         sopt.sopt_dir = SOPT_SET;
 1332         sopt.sopt_level = level;
 1333         sopt.sopt_name = name;
 1334         sopt.sopt_val = val;
 1335         sopt.sopt_valsize = valsize;
 1336         switch (valseg) {
 1337         case UIO_USERSPACE:
 1338                 sopt.sopt_td = td;
 1339                 break;
 1340         case UIO_SYSSPACE:
 1341                 sopt.sopt_td = NULL;
 1342                 break;
 1343         default:
 1344                 panic("kern_setsockopt called with bad valseg");
 1345         }
 1346 
 1347         NET_LOCK_GIANT();
 1348         error = getsock(td->td_proc->p_fd, s, &fp);
 1349         if (error == 0) {
 1350                 so = fp->f_data;
 1351                 error = sosetopt(so, &sopt);
 1352                 fdrop(fp, td);
 1353         }
 1354         NET_UNLOCK_GIANT();
 1355         return(error);
 1356 }
 1357 
 1358 /*
 1359  * MPSAFE
 1360  */
 1361 /* ARGSUSED */
 1362 int
 1363 getsockopt(td, uap)
 1364         struct thread *td;
 1365         register struct getsockopt_args /* {
 1366                 int     s;
 1367                 int     level;
 1368                 int     name;
 1369                 void * __restrict       val;
 1370                 socklen_t * __restrict avalsize;
 1371         } */ *uap;
 1372 {
 1373         socklen_t valsize;
 1374         int     error;
 1375 
 1376         if (uap->val) {
 1377                 error = copyin(uap->avalsize, &valsize, sizeof (valsize));
 1378                 if (error)
 1379                         return (error);
 1380         }
 1381 
 1382         error = kern_getsockopt(td, uap->s, uap->level, uap->name,
 1383             uap->val, UIO_USERSPACE, &valsize);
 1384 
 1385         if (error == 0)
 1386                 error = copyout(&valsize, uap->avalsize, sizeof (valsize));
 1387         return (error);
 1388 }
 1389 
 1390 /*
 1391  * Kernel version of getsockopt.
 1392  * optval can be a userland or userspace. optlen is always a kernel pointer.
 1393  */
 1394 int
 1395 kern_getsockopt(td, s, level, name, val, valseg, valsize)
 1396         struct thread *td;
 1397         int s;
 1398         int level;
 1399         int name;
 1400         void *val;
 1401         enum uio_seg valseg;
 1402         socklen_t *valsize;
 1403 {
 1404         int error;
 1405         struct  socket *so;
 1406         struct file *fp;
 1407         struct  sockopt sopt;
 1408 
 1409         if (val == NULL)
 1410                 *valsize = 0;
 1411         if ((int)*valsize < 0)
 1412                 return (EINVAL);
 1413 
 1414         sopt.sopt_dir = SOPT_GET;
 1415         sopt.sopt_level = level;
 1416         sopt.sopt_name = name;
 1417         sopt.sopt_val = val;
 1418         sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */
 1419         switch (valseg) {
 1420         case UIO_USERSPACE:
 1421                 sopt.sopt_td = td;
 1422                 break;
 1423         case UIO_SYSSPACE:
 1424                 sopt.sopt_td = NULL;
 1425                 break;
 1426         default:
 1427                 panic("kern_getsockopt called with bad valseg");
 1428         }
 1429 
 1430         NET_LOCK_GIANT();
 1431         error = getsock(td->td_proc->p_fd, s, &fp);
 1432         if (error == 0) {
 1433                 so = fp->f_data;
 1434                 error = sogetopt(so, &sopt);
 1435                 *valsize = sopt.sopt_valsize;
 1436                 fdrop(fp, td);
 1437         }
 1438         NET_UNLOCK_GIANT();
 1439         return (error);
 1440 }
 1441 
 1442 /*
 1443  * getsockname1() - Get socket name.
 1444  *
 1445  * MPSAFE
 1446  */
 1447 /* ARGSUSED */
 1448 static int
 1449 getsockname1(td, uap, compat)
 1450         struct thread *td;
 1451         register struct getsockname_args /* {
 1452                 int     fdes;
 1453                 struct sockaddr * __restrict asa;
 1454                 socklen_t * __restrict alen;
 1455         } */ *uap;
 1456         int compat;
 1457 {
 1458         struct socket *so;
 1459         struct sockaddr *sa;
 1460         struct file *fp;
 1461         socklen_t len;
 1462         int error;
 1463 
 1464         NET_LOCK_GIANT();
 1465         error = getsock(td->td_proc->p_fd, uap->fdes, &fp);
 1466         if (error)
 1467                 goto done2;
 1468         so = fp->f_data;
 1469         error = copyin(uap->alen, &len, sizeof (len));
 1470         if (error)
 1471                 goto done1;
 1472         if (len < 0) {
 1473                 error = EINVAL;
 1474                 goto done1;
 1475         }
 1476         sa = 0;
 1477         error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
 1478         if (error)
 1479                 goto bad;
 1480         if (sa == 0) {
 1481                 len = 0;
 1482                 goto gotnothing;
 1483         }
 1484 
 1485         len = MIN(len, sa->sa_len);
 1486 #ifdef COMPAT_OLDSOCK
 1487         if (compat)
 1488                 ((struct osockaddr *)sa)->sa_family = sa->sa_family;
 1489 #endif
 1490         error = copyout(sa, uap->asa, (u_int)len);
 1491         if (error == 0)
 1492 gotnothing:
 1493                 error = copyout(&len, uap->alen, sizeof (len));
 1494 bad:
 1495         if (sa)
 1496                 FREE(sa, M_SONAME);
 1497 done1:
 1498         fdrop(fp, td);
 1499 done2:
 1500         NET_UNLOCK_GIANT();
 1501         return (error);
 1502 }
 1503 
 1504 /*
 1505  * MPSAFE
 1506  */
 1507 int
 1508 getsockname(td, uap)
 1509         struct thread *td;
 1510         struct getsockname_args *uap;
 1511 {
 1512 
 1513         return (getsockname1(td, uap, 0));
 1514 }
 1515 
 1516 #ifdef COMPAT_OLDSOCK
 1517 /*
 1518  * MPSAFE
 1519  */
 1520 int
 1521 ogetsockname(td, uap)
 1522         struct thread *td;
 1523         struct getsockname_args *uap;
 1524 {
 1525 
 1526         return (getsockname1(td, uap, 1));
 1527 }
 1528 #endif /* COMPAT_OLDSOCK */
 1529 
 1530 /*
 1531  * getpeername1() - Get name of peer for connected socket.
 1532  *
 1533  * MPSAFE
 1534  */
 1535 /* ARGSUSED */
 1536 static int
 1537 getpeername1(td, uap, compat)
 1538         struct thread *td;
 1539         register struct getpeername_args /* {
 1540                 int     fdes;
 1541                 struct sockaddr * __restrict    asa;
 1542                 socklen_t * __restrict  alen;
 1543         } */ *uap;
 1544         int compat;
 1545 {
 1546         struct socket *so;
 1547         struct sockaddr *sa;
 1548         struct file *fp;
 1549         socklen_t len;
 1550         int error;
 1551 
 1552         NET_LOCK_GIANT();
 1553         error = getsock(td->td_proc->p_fd, uap->fdes, &fp);
 1554         if (error)
 1555                 goto done2;
 1556         so = fp->f_data;
 1557         if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
 1558                 error = ENOTCONN;
 1559                 goto done1;
 1560         }
 1561         error = copyin(uap->alen, &len, sizeof (len));
 1562         if (error)
 1563                 goto done1;
 1564         if (len < 0) {
 1565                 error = EINVAL;
 1566                 goto done1;
 1567         }
 1568         sa = 0;
 1569         error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
 1570         if (error)
 1571                 goto bad;
 1572         if (sa == 0) {
 1573                 len = 0;
 1574                 goto gotnothing;
 1575         }
 1576         len = MIN(len, sa->sa_len);
 1577 #ifdef COMPAT_OLDSOCK
 1578         if (compat)
 1579                 ((struct osockaddr *)sa)->sa_family =
 1580                     sa->sa_family;
 1581 #endif
 1582         error = copyout(sa, uap->asa, (u_int)len);
 1583         if (error)
 1584                 goto bad;
 1585 gotnothing:
 1586         error = copyout(&len, uap->alen, sizeof (len));
 1587 bad:
 1588         if (sa)
 1589                 FREE(sa, M_SONAME);
 1590 done1:
 1591         fdrop(fp, td);
 1592 done2:
 1593         NET_UNLOCK_GIANT();
 1594         return (error);
 1595 }
 1596 
 1597 /*
 1598  * MPSAFE
 1599  */
 1600 int
 1601 getpeername(td, uap)
 1602         struct thread *td;
 1603         struct getpeername_args *uap;
 1604 {
 1605 
 1606         return (getpeername1(td, uap, 0));
 1607 }
 1608 
 1609 #ifdef COMPAT_OLDSOCK
 1610 /*
 1611  * MPSAFE
 1612  */
 1613 int
 1614 ogetpeername(td, uap)
 1615         struct thread *td;
 1616         struct ogetpeername_args *uap;
 1617 {
 1618 
 1619         /* XXX uap should have type `getpeername_args *' to begin with. */
 1620         return (getpeername1(td, (struct getpeername_args *)uap, 1));
 1621 }
 1622 #endif /* COMPAT_OLDSOCK */
 1623 
 1624 int
 1625 sockargs(mp, buf, buflen, type)
 1626         struct mbuf **mp;
 1627         caddr_t buf;
 1628         int buflen, type;
 1629 {
 1630         register struct sockaddr *sa;
 1631         register struct mbuf *m;
 1632         int error;
 1633 
 1634         if ((u_int)buflen > MLEN) {
 1635 #ifdef COMPAT_OLDSOCK
 1636                 if (type == MT_SONAME && (u_int)buflen <= 112)
 1637                         buflen = MLEN;          /* unix domain compat. hack */
 1638                 else
 1639 #endif
 1640                         if ((u_int)buflen > MCLBYTES)
 1641                                 return (EINVAL);
 1642         }
 1643         m = m_get(M_TRYWAIT, type);
 1644         if (m == NULL)
 1645                 return (ENOBUFS);
 1646         if ((u_int)buflen > MLEN) {
 1647                 MCLGET(m, M_TRYWAIT);
 1648                 if ((m->m_flags & M_EXT) == 0) {
 1649                         m_free(m);
 1650                         return (ENOBUFS);
 1651                 }
 1652         }
 1653         m->m_len = buflen;
 1654         error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
 1655         if (error)
 1656                 (void) m_free(m);
 1657         else {
 1658                 *mp = m;
 1659                 if (type == MT_SONAME) {
 1660                         sa = mtod(m, struct sockaddr *);
 1661 
 1662 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
 1663                         if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
 1664                                 sa->sa_family = sa->sa_len;
 1665 #endif
 1666                         sa->sa_len = buflen;
 1667                 }
 1668         }
 1669         return (error);
 1670 }
 1671 
 1672 int
 1673 getsockaddr(namp, uaddr, len)
 1674         struct sockaddr **namp;
 1675         caddr_t uaddr;
 1676         size_t len;
 1677 {
 1678         struct sockaddr *sa;
 1679         int error;
 1680 
 1681         if (len > SOCK_MAXADDRLEN)
 1682                 return (ENAMETOOLONG);
 1683         if (len < offsetof(struct sockaddr, sa_data[0]))
 1684                 return (EINVAL);
 1685         MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK);
 1686         error = copyin(uaddr, sa, len);
 1687         if (error) {
 1688                 FREE(sa, M_SONAME);
 1689         } else {
 1690 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
 1691                 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
 1692                         sa->sa_family = sa->sa_len;
 1693 #endif
 1694                 sa->sa_len = len;
 1695                 *namp = sa;
 1696         }
 1697         return (error);
 1698 }
 1699 
 1700 /*
 1701  * Detach mapped page and release resources back to the system.
 1702  */
 1703 void
 1704 sf_buf_mext(void *addr, void *args)
 1705 {
 1706         vm_page_t m;
 1707 
 1708         m = sf_buf_page(args);
 1709         sf_buf_free(args);
 1710         vm_page_lock_queues();
 1711         vm_page_unwire(m, 0);
 1712         /*
 1713          * Check for the object going away on us. This can
 1714          * happen since we don't hold a reference to it.
 1715          * If so, we're responsible for freeing the page.
 1716          */
 1717         if (m->wire_count == 0 && m->object == NULL)
 1718                 vm_page_free(m);
 1719         vm_page_unlock_queues();
 1720 }
 1721 
 1722 /*
 1723  * sendfile(2)
 1724  *
 1725  * MPSAFE
 1726  *
 1727  * int sendfile(int fd, int s, off_t offset, size_t nbytes,
 1728  *       struct sf_hdtr *hdtr, off_t *sbytes, int flags)
 1729  *
 1730  * Send a file specified by 'fd' and starting at 'offset' to a socket
 1731  * specified by 's'. Send only 'nbytes' of the file or until EOF if
 1732  * nbytes == 0. Optionally add a header and/or trailer to the socket
 1733  * output. If specified, write the total number of bytes sent into *sbytes.
 1734  *
 1735  */
 1736 int
 1737 sendfile(struct thread *td, struct sendfile_args *uap)
 1738 {
 1739 
 1740         return (do_sendfile(td, uap, 0));
 1741 }
 1742 
 1743 static int
 1744 do_sendfile(struct thread *td, struct sendfile_args *uap, int compat)
 1745 {
 1746         struct sf_hdtr hdtr;
 1747         struct uio *hdr_uio, *trl_uio;
 1748         int error;
 1749 
 1750         hdr_uio = trl_uio = NULL;
 1751 
 1752         if (uap->hdtr != NULL) {
 1753                 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
 1754                 if (error)
 1755                         goto out;
 1756                 if (hdtr.headers != NULL) {
 1757                         error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio);
 1758                         if (error)
 1759                                 goto out;
 1760                 }
 1761                 if (hdtr.trailers != NULL) {
 1762                         error = copyinuio(hdtr.trailers, hdtr.trl_cnt, &trl_uio);
 1763                         if (error)
 1764                                 goto out;
 1765 
 1766                 }
 1767         }
 1768 
 1769         error = kern_sendfile(td, uap, hdr_uio, trl_uio, compat);
 1770 out:
 1771         if (hdr_uio)
 1772                 free(hdr_uio, M_IOV);
 1773         if (trl_uio)
 1774                 free(trl_uio, M_IOV);
 1775         return (error);
 1776 }
 1777 
 1778 #ifdef COMPAT_FREEBSD4
 1779 int
 1780 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap)
 1781 {
 1782         struct sendfile_args args;
 1783 
 1784         args.fd = uap->fd;
 1785         args.s = uap->s;
 1786         args.offset = uap->offset;
 1787         args.nbytes = uap->nbytes;
 1788         args.hdtr = uap->hdtr;
 1789         args.sbytes = uap->sbytes;
 1790         args.flags = uap->flags;
 1791 
 1792         return (do_sendfile(td, &args, 1));
 1793 }
 1794 #endif /* COMPAT_FREEBSD4 */
 1795 
 1796 int
 1797 kern_sendfile(struct thread *td, struct sendfile_args *uap,
 1798     struct uio *hdr_uio, struct uio *trl_uio, int compat)
 1799 {
 1800         struct vnode *vp;
 1801         struct vm_object *obj = NULL;
 1802         struct socket *so = NULL;
 1803         struct mbuf *m, *m_header = NULL;
 1804         struct sf_buf *sf;
 1805         struct vm_page *pg;
 1806         off_t off, xfsize, hdtr_size, sbytes = 0;
 1807         int error, headersize = 0, headersent = 0;
 1808         int vfslocked;
 1809 
 1810         NET_LOCK_GIANT();
 1811 
 1812         hdtr_size = 0;
 1813 
 1814         /*
 1815          * The descriptor must be a regular file and have a backing VM object.
 1816          */
 1817         if ((error = fgetvp_read(td, uap->fd, &vp)) != 0)
 1818                 goto done;
 1819         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 1820         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 1821         if (vp->v_type == VREG) {
 1822                 obj = vp->v_object;
 1823                 if (obj != NULL) {
 1824                         /*
 1825                          * Temporarily increase the backing VM
 1826                          * object's reference count so that a forced
 1827                          * reclamation of its vnode does not
 1828                          * immediately destroy it.
 1829                          */
 1830                         VM_OBJECT_LOCK(obj);
 1831                         if ((obj->flags & OBJ_DEAD) == 0) {
 1832                                 vm_object_reference_locked(obj);
 1833                                 VM_OBJECT_UNLOCK(obj);
 1834                         } else {
 1835                                 VM_OBJECT_UNLOCK(obj);
 1836                                 obj = NULL;
 1837                         }
 1838                 }
 1839         }
 1840         VOP_UNLOCK(vp, 0, td);
 1841         VFS_UNLOCK_GIANT(vfslocked);
 1842         if (obj == NULL) {
 1843                 error = EINVAL;
 1844                 goto done;
 1845         }
 1846         if ((error = fgetsock(td, uap->s, &so, NULL)) != 0)
 1847                 goto done;
 1848         if (so->so_type != SOCK_STREAM) {
 1849                 error = EINVAL;
 1850                 goto done;
 1851         }
 1852         if ((so->so_state & SS_ISCONNECTED) == 0) {
 1853                 error = ENOTCONN;
 1854                 goto done;
 1855         }
 1856         if (uap->offset < 0) {
 1857                 error = EINVAL;
 1858                 goto done;
 1859         }
 1860 
 1861 #ifdef MAC
 1862         SOCK_LOCK(so);
 1863         error = mac_check_socket_send(td->td_ucred, so);
 1864         SOCK_UNLOCK(so);
 1865         if (error)
 1866                 goto done;
 1867 #endif
 1868 
 1869         /*
 1870          * If specified, get the pointer to the sf_hdtr struct for
 1871          * any headers/trailers.
 1872          */
 1873         if (hdr_uio != NULL) {
 1874                 hdr_uio->uio_td = td;
 1875                 hdr_uio->uio_rw = UIO_WRITE;
 1876                 if (hdr_uio->uio_resid > 0) {
 1877                         m_header = m_uiotombuf(hdr_uio, M_DONTWAIT, 0, 0);
 1878                         if (m_header == NULL)
 1879                                 goto done;
 1880                         headersize = m_header->m_pkthdr.len;
 1881                         if (compat)
 1882                                 sbytes += headersize;
 1883                 }
 1884         }
 1885 
 1886         /*
 1887          * Protect against multiple writers to the socket.
 1888          */
 1889         SOCKBUF_LOCK(&so->so_snd);
 1890         (void) sblock(&so->so_snd, M_WAITOK);
 1891         SOCKBUF_UNLOCK(&so->so_snd);
 1892 
 1893         /*
 1894          * Loop through the pages in the file, starting with the requested
 1895          * offset. Get a file page (do I/O if necessary), map the file page
 1896          * into an sf_buf, attach an mbuf header to the sf_buf, and queue
 1897          * it on the socket.
 1898          */
 1899         for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
 1900                 vm_pindex_t pindex;
 1901                 vm_offset_t pgoff;
 1902 
 1903                 pindex = OFF_TO_IDX(off);
 1904                 VM_OBJECT_LOCK(obj);
 1905 retry_lookup:
 1906                 /*
 1907                  * Calculate the amount to transfer. Not to exceed a page,
 1908                  * the EOF, or the passed in nbytes.
 1909                  */
 1910                 xfsize = obj->un_pager.vnp.vnp_size - off;
 1911                 VM_OBJECT_UNLOCK(obj);
 1912                 if (xfsize > PAGE_SIZE)
 1913                         xfsize = PAGE_SIZE;
 1914                 pgoff = (vm_offset_t)(off & PAGE_MASK);
 1915                 if (PAGE_SIZE - pgoff < xfsize)
 1916                         xfsize = PAGE_SIZE - pgoff;
 1917                 if (uap->nbytes && xfsize > (uap->nbytes - sbytes))
 1918                         xfsize = uap->nbytes - sbytes;
 1919                 if (xfsize <= 0) {
 1920                         if (m_header != NULL) {
 1921                                 m = m_header;
 1922                                 m_header = NULL;
 1923                                 SOCKBUF_LOCK(&so->so_snd);
 1924                                 goto retry_space;
 1925                         } else
 1926                                 break;
 1927                 }
 1928                 /*
 1929                  * Optimize the non-blocking case by looking at the socket space
 1930                  * before going to the extra work of constituting the sf_buf.
 1931                  */
 1932                 SOCKBUF_LOCK(&so->so_snd);
 1933                 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) {
 1934                         if (so->so_snd.sb_state & SBS_CANTSENDMORE)
 1935                                 error = EPIPE;
 1936                         else
 1937                                 error = EAGAIN;
 1938                         sbunlock(&so->so_snd);
 1939                         SOCKBUF_UNLOCK(&so->so_snd);
 1940                         goto done;
 1941                 }
 1942                 SOCKBUF_UNLOCK(&so->so_snd);
 1943                 VM_OBJECT_LOCK(obj);
 1944                 /*
 1945                  * Attempt to look up the page.
 1946                  *
 1947                  *      Allocate if not found
 1948                  *
 1949                  *      Wait and loop if busy.
 1950                  */
 1951                 pg = vm_page_lookup(obj, pindex);
 1952 
 1953                 if (pg == NULL) {
 1954                         pg = vm_page_alloc(obj, pindex, VM_ALLOC_NOBUSY |
 1955                             VM_ALLOC_NORMAL | VM_ALLOC_WIRED);
 1956                         if (pg == NULL) {
 1957                                 VM_OBJECT_UNLOCK(obj);
 1958                                 VM_WAIT;
 1959                                 VM_OBJECT_LOCK(obj);
 1960                                 goto retry_lookup;
 1961                         }
 1962                         vm_page_lock_queues();
 1963                 } else {
 1964                         vm_page_lock_queues();
 1965                         if (vm_page_sleep_if_busy(pg, TRUE, "sfpbsy"))
 1966                                 goto retry_lookup;
 1967                         /*
 1968                          * Wire the page so it does not get ripped out from
 1969                          * under us.
 1970                          */
 1971                         vm_page_wire(pg);
 1972                 }
 1973 
 1974                 /*
 1975                  * If page is not valid for what we need, initiate I/O
 1976                  */
 1977 
 1978                 if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize)) {
 1979                         VM_OBJECT_UNLOCK(obj);
 1980                 } else if (uap->flags & SF_NODISKIO) {
 1981                         error = EBUSY;
 1982                 } else {
 1983                         int bsize, resid;
 1984 
 1985                         /*
 1986                          * Ensure that our page is still around when the I/O
 1987                          * completes.
 1988                          */
 1989                         vm_page_io_start(pg);
 1990                         vm_page_unlock_queues();
 1991                         VM_OBJECT_UNLOCK(obj);
 1992 
 1993                         /*
 1994                          * Get the page from backing store.
 1995                          */
 1996                         bsize = vp->v_mount->mnt_stat.f_iosize;
 1997                         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 1998                         vn_lock(vp, LK_SHARED | LK_RETRY, td);
 1999                         /*
 2000                          * XXXMAC: Because we don't have fp->f_cred here,
 2001                          * we pass in NOCRED.  This is probably wrong, but
 2002                          * is consistent with our original implementation.
 2003                          */
 2004                         error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE,
 2005                             trunc_page(off), UIO_NOCOPY, IO_NODELOCKED |
 2006                             IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT),
 2007                             td->td_ucred, NOCRED, &resid, td);
 2008                         VOP_UNLOCK(vp, 0, td);
 2009                         VFS_UNLOCK_GIANT(vfslocked);
 2010                         VM_OBJECT_LOCK(obj);
 2011                         vm_page_lock_queues();
 2012                         vm_page_io_finish(pg);
 2013                         if (!error)
 2014                                 VM_OBJECT_UNLOCK(obj);
 2015                         mbstat.sf_iocnt++;
 2016                 }
 2017         
 2018                 if (error) {
 2019                         vm_page_unwire(pg, 0);
 2020                         /*
 2021                          * See if anyone else might know about this page.
 2022                          * If not and it is not valid, then free it.
 2023                          */
 2024                         if (pg->wire_count == 0 && pg->valid == 0 &&
 2025                             pg->busy == 0 && !(pg->flags & PG_BUSY) &&
 2026                             pg->hold_count == 0) {
 2027                                 vm_page_free(pg);
 2028                         }
 2029                         vm_page_unlock_queues();
 2030                         VM_OBJECT_UNLOCK(obj);
 2031                         SOCKBUF_LOCK(&so->so_snd);
 2032                         sbunlock(&so->so_snd);
 2033                         SOCKBUF_UNLOCK(&so->so_snd);
 2034                         goto done;
 2035                 }
 2036                 vm_page_unlock_queues();
 2037 
 2038                 /*
 2039                  * Get a sendfile buf. We usually wait as long as necessary,
 2040                  * but this wait can be interrupted.
 2041                  */
 2042                 if ((sf = sf_buf_alloc(pg, SFB_CATCH)) == NULL) {
 2043                         mbstat.sf_allocfail++;
 2044                         vm_page_lock_queues();
 2045                         vm_page_unwire(pg, 0);
 2046                         if (pg->wire_count == 0 && pg->object == NULL)
 2047                                 vm_page_free(pg);
 2048                         vm_page_unlock_queues();
 2049                         SOCKBUF_LOCK(&so->so_snd);
 2050                         sbunlock(&so->so_snd);
 2051                         SOCKBUF_UNLOCK(&so->so_snd);
 2052                         error = EINTR;
 2053                         goto done;
 2054                 }
 2055 
 2056                 /*
 2057                  * Get an mbuf header and set it up as having external storage.
 2058                  */
 2059                 if (m_header)
 2060                         MGET(m, M_TRYWAIT, MT_DATA);
 2061                 else
 2062                         MGETHDR(m, M_TRYWAIT, MT_DATA);
 2063                 if (m == NULL) {
 2064                         error = ENOBUFS;
 2065                         sf_buf_mext((void *)sf_buf_kva(sf), sf);
 2066                         SOCKBUF_LOCK(&so->so_snd);
 2067                         sbunlock(&so->so_snd);
 2068                         SOCKBUF_UNLOCK(&so->so_snd);
 2069                         goto done;
 2070                 }
 2071                 /*
 2072                  * Setup external storage for mbuf.
 2073                  */
 2074                 MEXTADD(m, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext, sf, M_RDONLY,
 2075                     EXT_SFBUF);
 2076                 m->m_data = (char *)sf_buf_kva(sf) + pgoff;
 2077                 m->m_pkthdr.len = m->m_len = xfsize;
 2078 
 2079                 if (m_header) {
 2080                         m_cat(m_header, m);
 2081                         m = m_header;
 2082                         m_header = NULL;
 2083                         m_fixhdr(m);
 2084                 }
 2085 
 2086                 /*
 2087                  * Add the buffer to the socket buffer chain.
 2088                  */
 2089                 SOCKBUF_LOCK(&so->so_snd);
 2090 retry_space:
 2091                 /*
 2092                  * Make sure that the socket is still able to take more data.
 2093                  * CANTSENDMORE being true usually means that the connection
 2094                  * was closed. so_error is true when an error was sensed after
 2095                  * a previous send.
 2096                  * The state is checked after the page mapping and buffer
 2097                  * allocation above since those operations may block and make
 2098                  * any socket checks stale. From this point forward, nothing
 2099                  * blocks before the pru_send (or more accurately, any blocking
 2100                  * results in a loop back to here to re-check).
 2101                  */
 2102                 SOCKBUF_LOCK_ASSERT(&so->so_snd);
 2103                 if ((so->so_snd.sb_state & SBS_CANTSENDMORE) || so->so_error) {
 2104                         if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 2105                                 error = EPIPE;
 2106                         } else {
 2107                                 error = so->so_error;
 2108                                 so->so_error = 0;
 2109                         }
 2110                         m_freem(m);
 2111                         sbunlock(&so->so_snd);
 2112                         SOCKBUF_UNLOCK(&so->so_snd);
 2113                         goto done;
 2114                 }
 2115                 /*
 2116                  * Wait for socket space to become available. We do this just
 2117                  * after checking the connection state above in order to avoid
 2118                  * a race condition with sbwait().
 2119                  */
 2120                 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) {
 2121                         if (so->so_state & SS_NBIO) {
 2122                                 m_freem(m);
 2123                                 sbunlock(&so->so_snd);
 2124                                 SOCKBUF_UNLOCK(&so->so_snd);
 2125                                 error = EAGAIN;
 2126                                 goto done;
 2127                         }
 2128                         error = sbwait(&so->so_snd);
 2129                         /*
 2130                          * An error from sbwait usually indicates that we've
 2131                          * been interrupted by a signal. If we've sent anything
 2132                          * then return bytes sent, otherwise return the error.
 2133                          */
 2134                         if (error) {
 2135                                 m_freem(m);
 2136                                 sbunlock(&so->so_snd);
 2137                                 SOCKBUF_UNLOCK(&so->so_snd);
 2138                                 goto done;
 2139                         }
 2140                         goto retry_space;
 2141                 }
 2142                 SOCKBUF_UNLOCK(&so->so_snd);
 2143                 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td);
 2144                 if (error) {
 2145                         SOCKBUF_LOCK(&so->so_snd);
 2146                         sbunlock(&so->so_snd);
 2147                         SOCKBUF_UNLOCK(&so->so_snd);
 2148                         goto done;
 2149                 }
 2150                 headersent = 1;
 2151         }
 2152         SOCKBUF_LOCK(&so->so_snd);
 2153         sbunlock(&so->so_snd);
 2154         SOCKBUF_UNLOCK(&so->so_snd);
 2155 
 2156         /*
 2157          * Send trailers. Wimp out and use writev(2).
 2158          */
 2159         if (trl_uio != NULL) {
 2160                 error = kern_writev(td, uap->s, trl_uio);
 2161                 if (error)
 2162                         goto done;
 2163                 if (compat)
 2164                         sbytes += td->td_retval[0];
 2165                 else
 2166                         hdtr_size += td->td_retval[0];
 2167         }
 2168 
 2169 done:
 2170         if (headersent) {
 2171                 if (!compat)
 2172                         hdtr_size += headersize;
 2173         } else {
 2174                 if (compat)
 2175                         sbytes -= headersize;
 2176         }
 2177         /*
 2178          * If there was no error we have to clear td->td_retval[0]
 2179          * because it may have been set by writev.
 2180          */
 2181         if (error == 0) {
 2182                 td->td_retval[0] = 0;
 2183         }
 2184         if (uap->sbytes != NULL) {
 2185                 if (!compat)
 2186                         sbytes += hdtr_size;
 2187                 copyout(&sbytes, uap->sbytes, sizeof(off_t));
 2188         }
 2189         if (obj != NULL)
 2190                 vm_object_deallocate(obj);
 2191         if (vp != NULL) {
 2192                 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 2193                 vrele(vp);
 2194                 VFS_UNLOCK_GIANT(vfslocked);
 2195         }
 2196         if (so)
 2197                 fputsock(so);
 2198         if (m_header)
 2199                 m_freem(m_header);
 2200 
 2201         NET_UNLOCK_GIANT();
 2202 
 2203         if (error == ERESTART)
 2204                 error = EINTR;
 2205 
 2206         return (error);
 2207 }

Cache object: 627112e79075c9cc6da5ec364ef2ce29


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.