The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/uipc_syscalls.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 1982, 1986, 1989, 1990, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * sendfile(2) and related extensions:
    6  * Copyright (c) 1998, David Greenman. All rights reserved. 
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  * 3. All advertising materials mentioning features or use of this software
   17  *    must display the following acknowledgement:
   18  *      This product includes software developed by the University of
   19  *      California, Berkeley and its contributors.
   20  * 4. Neither the name of the University nor the names of its contributors
   21  *    may be used to endorse or promote products derived from this software
   22  *    without specific prior written permission.
   23  *
   24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   34  * SUCH DAMAGE.
   35  *
   36  *      @(#)uipc_syscalls.c     8.4 (Berkeley) 2/21/94
   37  * $FreeBSD$
   38  */
   39 
   40 #include "opt_compat.h"
   41 #include "opt_ktrace.h"
   42 
   43 #include <sys/param.h>
   44 #include <sys/systm.h>
   45 #include <sys/kernel.h>
   46 #include <sys/sysproto.h>
   47 #include <sys/malloc.h>
   48 #include <sys/filedesc.h>
   49 #include <sys/proc.h>
   50 #include <sys/fcntl.h>
   51 #include <sys/file.h>
   52 #include <sys/mbuf.h>
   53 #include <sys/protosw.h>
   54 #include <sys/socket.h>
   55 #include <sys/socketvar.h>
   56 #include <sys/signalvar.h>
   57 #include <sys/uio.h>
   58 #include <sys/vnode.h>
   59 #include <sys/lock.h>
   60 #include <sys/mount.h>
   61 #ifdef KTRACE
   62 #include <sys/ktrace.h>
   63 #endif
   64 #include <vm/vm.h>
   65 #include <vm/vm_prot.h>
   66 #include <vm/vm_object.h>
   67 #include <vm/vm_page.h>
   68 #include <vm/vm_pager.h>
   69 #include <vm/vm_pageout.h>
   70 #include <vm/vm_kern.h>
   71 #include <vm/vm_extern.h>
   72 #include <machine/limits.h>
   73 
   74 static void sf_buf_init(void *arg);
   75 SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL)
   76 static struct sf_buf *sf_buf_alloc(void);
   77 static void sf_buf_ref(caddr_t addr, u_int size);
   78 static void sf_buf_free(caddr_t addr, u_int size);
   79 
   80 static int sendit __P((struct proc *p, int s, struct msghdr *mp, int flags));
   81 static int recvit __P((struct proc *p, int s, struct msghdr *mp,
   82                        caddr_t namelenp));
   83   
   84 static int accept1 __P((struct proc *p, struct accept_args *uap, int compat));
   85 static int getsockname1 __P((struct proc *p, struct getsockname_args *uap,
   86                              int compat));
   87 static int getpeername1 __P((struct proc *p, struct getpeername_args *uap,
   88                              int compat));
   89 
   90 static SLIST_HEAD(, sf_buf) sf_freelist;
   91 static vm_offset_t sf_base;
   92 static struct sf_buf *sf_bufs;
   93 static int sf_buf_alloc_want;
   94 
   95 /*
   96  * System call interface to the socket abstraction.
   97  */
   98 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
   99 #define COMPAT_OLDSOCK
  100 #endif
  101 
  102 extern  struct fileops socketops;
  103 
  104 int
  105 socket(p, uap)
  106         struct proc *p;
  107         register struct socket_args /* {
  108                 int     domain;
  109                 int     type;
  110                 int     protocol;
  111         } */ *uap;
  112 {
  113         struct filedesc *fdp = p->p_fd;
  114         struct socket *so;
  115         struct file *fp;
  116         int fd, error;
  117 
  118         error = falloc(p, &fp, &fd);
  119         if (error)
  120                 return (error);
  121         fp->f_flag = FREAD|FWRITE;
  122         fp->f_type = DTYPE_SOCKET;
  123         fp->f_ops = &socketops;
  124         error = socreate(uap->domain, &so, uap->type, uap->protocol, p);
  125         if (error) {
  126                 fdp->fd_ofiles[fd] = 0;
  127                 ffree(fp);
  128         } else {
  129                 fp->f_data = (caddr_t)so;
  130                 p->p_retval[0] = fd;
  131         }
  132         return (error);
  133 }
  134 
  135 /* ARGSUSED */
  136 int
  137 bind(p, uap)
  138         struct proc *p;
  139         register struct bind_args /* {
  140                 int     s;
  141                 caddr_t name;
  142                 int     namelen;
  143         } */ *uap;
  144 {
  145         struct file *fp;
  146         struct sockaddr *sa;
  147         int error;
  148 
  149         error = getsock(p->p_fd, uap->s, &fp);
  150         if (error)
  151                 return (error);
  152         error = getsockaddr(&sa, uap->name, uap->namelen);
  153         if (error)
  154                 return (error);
  155         error = sobind((struct socket *)fp->f_data, sa, p);
  156         FREE(sa, M_SONAME);
  157         return (error);
  158 }
  159 
  160 /* ARGSUSED */
  161 int
  162 listen(p, uap)
  163         struct proc *p;
  164         register struct listen_args /* {
  165                 int     s;
  166                 int     backlog;
  167         } */ *uap;
  168 {
  169         struct file *fp;
  170         int error;
  171 
  172         error = getsock(p->p_fd, uap->s, &fp);
  173         if (error)
  174                 return (error);
  175         return (solisten((struct socket *)fp->f_data, uap->backlog, p));
  176 }
  177 
  178 static int
  179 accept1(p, uap, compat)
  180         struct proc *p;
  181         register struct accept_args /* {
  182                 int     s;
  183                 caddr_t name;
  184                 int     *anamelen;
  185         } */ *uap;
  186         int compat;
  187 {
  188         struct file *fp;
  189         struct sockaddr *sa;
  190         int namelen, error, s;
  191         struct socket *head, *so;
  192         int fd;
  193         short fflag;            /* type must match fp->f_flag */
  194 
  195         if (uap->name) {
  196                 error = copyin((caddr_t)uap->anamelen, (caddr_t)&namelen,
  197                         sizeof (namelen));
  198                 if(error)
  199                         return (error);
  200         }
  201         error = getsock(p->p_fd, uap->s, &fp);
  202         if (error)
  203                 return (error);
  204         s = splnet();
  205         head = (struct socket *)fp->f_data;
  206         if ((head->so_options & SO_ACCEPTCONN) == 0) {
  207                 splx(s);
  208                 return (EINVAL);
  209         }
  210         if ((head->so_state & SS_NBIO) && head->so_comp.tqh_first == NULL) {
  211                 splx(s);
  212                 return (EWOULDBLOCK);
  213         }
  214         while (head->so_comp.tqh_first == NULL && head->so_error == 0) {
  215                 if (head->so_state & SS_CANTRCVMORE) {
  216                         head->so_error = ECONNABORTED;
  217                         break;
  218                 }
  219                 error = tsleep((caddr_t)&head->so_timeo, PSOCK | PCATCH,
  220                     "accept", 0);
  221                 if (error) {
  222                         splx(s);
  223                         return (error);
  224                 }
  225         }
  226         if (head->so_error) {
  227                 error = head->so_error;
  228                 head->so_error = 0;
  229                 splx(s);
  230                 return (error);
  231         }
  232 
  233         /*
  234          * At this point we know that there is at least one connection
  235          * ready to be accepted. Remove it from the queue prior to
  236          * allocating the file descriptor for it since falloc() may
  237          * block allowing another process to accept the connection
  238          * instead.
  239          */
  240         so = head->so_comp.tqh_first;
  241         TAILQ_REMOVE(&head->so_comp, so, so_list);
  242         head->so_qlen--;
  243 
  244         fflag = fp->f_flag;
  245         error = falloc(p, &fp, &fd);
  246         if (error) {
  247                 /*
  248                  * Probably ran out of file descriptors. Put the
  249                  * unaccepted connection back onto the queue and
  250                  * do another wakeup so some other process might
  251                  * have a chance at it.
  252                  */
  253                 TAILQ_INSERT_HEAD(&head->so_comp, so, so_list);
  254                 head->so_qlen++;
  255                 wakeup_one(&head->so_timeo);
  256                 splx(s);
  257                 return (error);
  258         } else
  259                 p->p_retval[0] = fd;
  260 
  261         so->so_state &= ~SS_COMP;
  262         so->so_head = NULL;
  263         if (head->so_sigio != NULL)
  264                 fsetown(fgetown(head->so_sigio), &so->so_sigio);
  265 
  266         fp->f_type = DTYPE_SOCKET;
  267         fp->f_flag = fflag;
  268         fp->f_ops = &socketops;
  269         fp->f_data = (caddr_t)so;
  270         sa = 0;
  271         (void) soaccept(so, &sa);
  272         if (sa == 0) {
  273                 namelen = 0;
  274                 if (uap->name)
  275                         goto gotnoname;
  276                 return 0;
  277         }
  278         if (uap->name) {
  279                 /* check sa_len before it is destroyed */
  280                 if (namelen > sa->sa_len)
  281                         namelen = sa->sa_len;
  282 #ifdef COMPAT_OLDSOCK
  283                 if (compat)
  284                         ((struct osockaddr *)sa)->sa_family =
  285                             sa->sa_family;
  286 #endif
  287                 error = copyout(sa, (caddr_t)uap->name, (u_int)namelen);
  288                 if (!error)
  289 gotnoname:
  290                         error = copyout((caddr_t)&namelen,
  291                             (caddr_t)uap->anamelen, sizeof (*uap->anamelen));
  292         }
  293         if (sa)
  294                 FREE(sa, M_SONAME);
  295         splx(s);
  296         return (error);
  297 }
  298 
  299 int
  300 accept(p, uap)
  301         struct proc *p;
  302         struct accept_args *uap;
  303 {
  304 
  305         return (accept1(p, uap, 0));
  306 }
  307 
  308 #ifdef COMPAT_OLDSOCK
  309 int
  310 oaccept(p, uap)
  311         struct proc *p;
  312         struct accept_args *uap;
  313 {
  314 
  315         return (accept1(p, uap, 1));
  316 }
  317 #endif /* COMPAT_OLDSOCK */
  318 
  319 /* ARGSUSED */
  320 int
  321 connect(p, uap)
  322         struct proc *p;
  323         register struct connect_args /* {
  324                 int     s;
  325                 caddr_t name;
  326                 int     namelen;
  327         } */ *uap;
  328 {
  329         struct file *fp;
  330         register struct socket *so;
  331         struct sockaddr *sa;
  332         int error, s;
  333 
  334         error = getsock(p->p_fd, uap->s, &fp);
  335         if (error)
  336                 return (error);
  337         so = (struct socket *)fp->f_data;
  338         if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING))
  339                 return (EALREADY);
  340         error = getsockaddr(&sa, uap->name, uap->namelen);
  341         if (error)
  342                 return (error);
  343         error = soconnect(so, sa, p);
  344         if (error)
  345                 goto bad;
  346         if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
  347                 FREE(sa, M_SONAME);
  348                 return (EINPROGRESS);
  349         }
  350         s = splnet();
  351         while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
  352                 error = tsleep((caddr_t)&so->so_timeo, PSOCK | PCATCH,
  353                     "connec", 0);
  354                 if (error)
  355                         break;
  356         }
  357         if (error == 0) {
  358                 error = so->so_error;
  359                 so->so_error = 0;
  360         }
  361         splx(s);
  362 bad:
  363         so->so_state &= ~SS_ISCONNECTING;
  364         FREE(sa, M_SONAME);
  365         if (error == ERESTART)
  366                 error = EINTR;
  367         return (error);
  368 }
  369 
  370 int
  371 socketpair(p, uap)
  372         struct proc *p;
  373         register struct socketpair_args /* {
  374                 int     domain;
  375                 int     type;
  376                 int     protocol;
  377                 int     *rsv;
  378         } */ *uap;
  379 {
  380         register struct filedesc *fdp = p->p_fd;
  381         struct file *fp1, *fp2;
  382         struct socket *so1, *so2;
  383         int fd, error, sv[2];
  384 
  385         error = socreate(uap->domain, &so1, uap->type, uap->protocol, p);
  386         if (error)
  387                 return (error);
  388         error = socreate(uap->domain, &so2, uap->type, uap->protocol, p);
  389         if (error)
  390                 goto free1;
  391         error = falloc(p, &fp1, &fd);
  392         if (error)
  393                 goto free2;
  394         sv[0] = fd;
  395         fp1->f_flag = FREAD|FWRITE;
  396         fp1->f_type = DTYPE_SOCKET;
  397         fp1->f_ops = &socketops;
  398         fp1->f_data = (caddr_t)so1;
  399         error = falloc(p, &fp2, &fd);
  400         if (error)
  401                 goto free3;
  402         fp2->f_flag = FREAD|FWRITE;
  403         fp2->f_type = DTYPE_SOCKET;
  404         fp2->f_ops = &socketops;
  405         fp2->f_data = (caddr_t)so2;
  406         sv[1] = fd;
  407         error = soconnect2(so1, so2);
  408         if (error)
  409                 goto free4;
  410         if (uap->type == SOCK_DGRAM) {
  411                 /*
  412                  * Datagram socket connection is asymmetric.
  413                  */
  414                  error = soconnect2(so2, so1);
  415                  if (error)
  416                         goto free4;
  417         }
  418         error = copyout((caddr_t)sv, (caddr_t)uap->rsv, 2 * sizeof (int));
  419         return (error);
  420 free4:
  421         ffree(fp2);
  422         fdp->fd_ofiles[sv[1]] = 0;
  423 free3:
  424         ffree(fp1);
  425         fdp->fd_ofiles[sv[0]] = 0;
  426 free2:
  427         (void)soclose(so2);
  428 free1:
  429         (void)soclose(so1);
  430         return (error);
  431 }
  432 
  433 static int
  434 sendit(p, s, mp, flags)
  435         register struct proc *p;
  436         int s;
  437         register struct msghdr *mp;
  438         int flags;
  439 {
  440         struct file *fp;
  441         struct uio auio;
  442         register struct iovec *iov;
  443         register int i;
  444         struct mbuf *control;
  445         struct sockaddr *to;
  446         int len, error;
  447         struct socket *so;
  448 #ifdef KTRACE
  449         struct iovec *ktriov = NULL;
  450         struct uio ktruio;
  451 #endif
  452 
  453         error = getsock(p->p_fd, s, &fp);
  454         if (error)
  455                 return (error);
  456         auio.uio_iov = mp->msg_iov;
  457         auio.uio_iovcnt = mp->msg_iovlen;
  458         auio.uio_segflg = UIO_USERSPACE;
  459         auio.uio_rw = UIO_WRITE;
  460         auio.uio_procp = p;
  461         auio.uio_offset = 0;                    /* XXX */
  462         auio.uio_resid = 0;
  463         iov = mp->msg_iov;
  464         for (i = 0; i < mp->msg_iovlen; i++, iov++) {
  465                 if ((auio.uio_resid += iov->iov_len) < 0)
  466                         return (EINVAL);
  467         }
  468         if (mp->msg_name) {
  469                 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
  470                 if (error)
  471                         return (error);
  472         } else
  473                 to = 0;
  474         if (mp->msg_control) {
  475                 if (mp->msg_controllen < sizeof(struct cmsghdr)
  476 #ifdef COMPAT_OLDSOCK
  477                     && mp->msg_flags != MSG_COMPAT
  478 #endif
  479                 ) {
  480                         error = EINVAL;
  481                         goto bad;
  482                 }
  483                 error = sockargs(&control, mp->msg_control,
  484                     mp->msg_controllen, MT_CONTROL);
  485                 if (error)
  486                         goto bad;
  487 #ifdef COMPAT_OLDSOCK
  488                 if (mp->msg_flags == MSG_COMPAT) {
  489                         register struct cmsghdr *cm;
  490 
  491                         M_PREPEND(control, sizeof(*cm), M_WAIT);
  492                         if (control == 0) {
  493                                 error = ENOBUFS;
  494                                 goto bad;
  495                         } else {
  496                                 cm = mtod(control, struct cmsghdr *);
  497                                 cm->cmsg_len = control->m_len;
  498                                 cm->cmsg_level = SOL_SOCKET;
  499                                 cm->cmsg_type = SCM_RIGHTS;
  500                         }
  501                 }
  502 #endif
  503         } else
  504                 control = 0;
  505 #ifdef KTRACE
  506         if (KTRPOINT(p, KTR_GENIO)) {
  507                 int iovlen = auio.uio_iovcnt * sizeof (struct iovec);
  508 
  509                 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
  510                 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
  511                 ktruio = auio;
  512         }
  513 #endif
  514         len = auio.uio_resid;
  515         so = (struct socket *)fp->f_data;
  516         error = so->so_proto->pr_usrreqs->pru_sosend(so, to, &auio, 0, control,
  517                                                      flags, p);
  518         if (error) {
  519                 if (auio.uio_resid != len && (error == ERESTART ||
  520                     error == EINTR || error == EWOULDBLOCK))
  521                         error = 0;
  522                 if (error == EPIPE)
  523                         psignal(p, SIGPIPE);
  524         }
  525         if (error == 0)
  526                 p->p_retval[0] = len - auio.uio_resid;
  527 #ifdef KTRACE
  528         if (ktriov != NULL) {
  529                 if (error == 0) {
  530                         ktruio.uio_iov = ktriov;
  531                         ktruio.uio_resid = p->p_retval[0];
  532                         ktrgenio(p->p_tracep, s, UIO_WRITE, &ktruio, error);
  533                 }
  534                 FREE(ktriov, M_TEMP);
  535         }
  536 #endif
  537 bad:
  538         if (to)
  539                 FREE(to, M_SONAME);
  540         return (error);
  541 }
  542 
  543 int
  544 sendto(p, uap)
  545         struct proc *p;
  546         register struct sendto_args /* {
  547                 int     s;
  548                 caddr_t buf;
  549                 size_t  len;
  550                 int     flags;
  551                 caddr_t to;
  552                 int     tolen;
  553         } */ *uap;
  554 {
  555         struct msghdr msg;
  556         struct iovec aiov;
  557 
  558         msg.msg_name = uap->to;
  559         msg.msg_namelen = uap->tolen;
  560         msg.msg_iov = &aiov;
  561         msg.msg_iovlen = 1;
  562         msg.msg_control = 0;
  563 #ifdef COMPAT_OLDSOCK
  564         msg.msg_flags = 0;
  565 #endif
  566         aiov.iov_base = uap->buf;
  567         aiov.iov_len = uap->len;
  568         return (sendit(p, uap->s, &msg, uap->flags));
  569 }
  570 
  571 #ifdef COMPAT_OLDSOCK
  572 int
  573 osend(p, uap)
  574         struct proc *p;
  575         register struct osend_args /* {
  576                 int     s;
  577                 caddr_t buf;
  578                 int     len;
  579                 int     flags;
  580         } */ *uap;
  581 {
  582         struct msghdr msg;
  583         struct iovec aiov;
  584 
  585         msg.msg_name = 0;
  586         msg.msg_namelen = 0;
  587         msg.msg_iov = &aiov;
  588         msg.msg_iovlen = 1;
  589         aiov.iov_base = uap->buf;
  590         aiov.iov_len = uap->len;
  591         msg.msg_control = 0;
  592         msg.msg_flags = 0;
  593         return (sendit(p, uap->s, &msg, uap->flags));
  594 }
  595 
  596 int
  597 osendmsg(p, uap)
  598         struct proc *p;
  599         register struct osendmsg_args /* {
  600                 int     s;
  601                 caddr_t msg;
  602                 int     flags;
  603         } */ *uap;
  604 {
  605         struct msghdr msg;
  606         struct iovec aiov[UIO_SMALLIOV], *iov;
  607         int error;
  608 
  609         error = copyin(uap->msg, (caddr_t)&msg, sizeof (struct omsghdr));
  610         if (error)
  611                 return (error);
  612         if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
  613                 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV)
  614                         return (EMSGSIZE);
  615                 MALLOC(iov, struct iovec *,
  616                       sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
  617                       M_WAITOK);
  618         } else
  619                 iov = aiov;
  620         error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
  621             (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
  622         if (error)
  623                 goto done;
  624         msg.msg_flags = MSG_COMPAT;
  625         msg.msg_iov = iov;
  626         error = sendit(p, uap->s, &msg, uap->flags);
  627 done:
  628         if (iov != aiov)
  629                 FREE(iov, M_IOV);
  630         return (error);
  631 }
  632 #endif
  633 
  634 int
  635 sendmsg(p, uap)
  636         struct proc *p;
  637         register struct sendmsg_args /* {
  638                 int     s;
  639                 caddr_t msg;
  640                 int     flags;
  641         } */ *uap;
  642 {
  643         struct msghdr msg;
  644         struct iovec aiov[UIO_SMALLIOV], *iov;
  645         int error;
  646 
  647         error = copyin(uap->msg, (caddr_t)&msg, sizeof (msg));
  648         if (error)
  649                 return (error);
  650         if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
  651                 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV)
  652                         return (EMSGSIZE);
  653                 MALLOC(iov, struct iovec *,
  654                        sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
  655                        M_WAITOK);
  656         } else
  657                 iov = aiov;
  658         if (msg.msg_iovlen &&
  659             (error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
  660             (unsigned)(msg.msg_iovlen * sizeof (struct iovec)))))
  661                 goto done;
  662         msg.msg_iov = iov;
  663 #ifdef COMPAT_OLDSOCK
  664         msg.msg_flags = 0;
  665 #endif
  666         error = sendit(p, uap->s, &msg, uap->flags);
  667 done:
  668         if (iov != aiov)
  669                 FREE(iov, M_IOV);
  670         return (error);
  671 }
  672 
  673 static int
  674 recvit(p, s, mp, namelenp)
  675         register struct proc *p;
  676         int s;
  677         register struct msghdr *mp;
  678         caddr_t namelenp;
  679 {
  680         struct file *fp;
  681         struct uio auio;
  682         register struct iovec *iov;
  683         register int i;
  684         int len, error;
  685         struct mbuf *m, *control = 0;
  686         caddr_t ctlbuf;
  687         struct socket *so;
  688         struct sockaddr *fromsa = 0;
  689 #ifdef KTRACE
  690         struct iovec *ktriov = NULL;
  691         struct uio ktruio;
  692 #endif
  693 
  694         error = getsock(p->p_fd, s, &fp);
  695         if (error)
  696                 return (error);
  697         auio.uio_iov = mp->msg_iov;
  698         auio.uio_iovcnt = mp->msg_iovlen;
  699         auio.uio_segflg = UIO_USERSPACE;
  700         auio.uio_rw = UIO_READ;
  701         auio.uio_procp = p;
  702         auio.uio_offset = 0;                    /* XXX */
  703         auio.uio_resid = 0;
  704         iov = mp->msg_iov;
  705         for (i = 0; i < mp->msg_iovlen; i++, iov++) {
  706                 if ((auio.uio_resid += iov->iov_len) < 0)
  707                         return (EINVAL);
  708         }
  709 #ifdef KTRACE
  710         if (KTRPOINT(p, KTR_GENIO)) {
  711                 int iovlen = auio.uio_iovcnt * sizeof (struct iovec);
  712 
  713                 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
  714                 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
  715                 ktruio = auio;
  716         }
  717 #endif
  718         len = auio.uio_resid;
  719         so = (struct socket *)fp->f_data;
  720         error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio,
  721             (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
  722             &mp->msg_flags);
  723         if (error) {
  724                 if (auio.uio_resid != len && (error == ERESTART ||
  725                     error == EINTR || error == EWOULDBLOCK))
  726                         error = 0;
  727         }
  728 #ifdef KTRACE
  729         if (ktriov != NULL) {
  730                 if (error == 0) {
  731                         ktruio.uio_iov = ktriov;
  732                         ktruio.uio_resid = len - auio.uio_resid;
  733                         ktrgenio(p->p_tracep, s, UIO_READ, &ktruio, error);
  734                 }
  735                 FREE(ktriov, M_TEMP);
  736         }
  737 #endif
  738         if (error)
  739                 goto out;
  740         p->p_retval[0] = len - auio.uio_resid;
  741         if (mp->msg_name) {
  742                 len = mp->msg_namelen;
  743                 if (len <= 0 || fromsa == 0)
  744                         len = 0;
  745                 else {
  746 #ifndef MIN
  747 #define MIN(a,b) ((a)>(b)?(b):(a))
  748 #endif
  749                         /* save sa_len before it is destroyed by MSG_COMPAT */
  750                         len = MIN(len, fromsa->sa_len);
  751 #ifdef COMPAT_OLDSOCK
  752                         if (mp->msg_flags & MSG_COMPAT)
  753                                 ((struct osockaddr *)fromsa)->sa_family =
  754                                     fromsa->sa_family;
  755 #endif
  756                         error = copyout(fromsa,
  757                             (caddr_t)mp->msg_name, (unsigned)len);
  758                         if (error)
  759                                 goto out;
  760                 }
  761                 mp->msg_namelen = len;
  762                 if (namelenp &&
  763                     (error = copyout((caddr_t)&len, namelenp, sizeof (int)))) {
  764 #ifdef COMPAT_OLDSOCK
  765                         if (mp->msg_flags & MSG_COMPAT)
  766                                 error = 0;      /* old recvfrom didn't check */
  767                         else
  768 #endif
  769                         goto out;
  770                 }
  771         }
  772         if (mp->msg_control) {
  773 #ifdef COMPAT_OLDSOCK
  774                 /*
  775                  * We assume that old recvmsg calls won't receive access
  776                  * rights and other control info, esp. as control info
  777                  * is always optional and those options didn't exist in 4.3.
  778                  * If we receive rights, trim the cmsghdr; anything else
  779                  * is tossed.
  780                  */
  781                 if (control && mp->msg_flags & MSG_COMPAT) {
  782                         if (mtod(control, struct cmsghdr *)->cmsg_level !=
  783                             SOL_SOCKET ||
  784                             mtod(control, struct cmsghdr *)->cmsg_type !=
  785                             SCM_RIGHTS) {
  786                                 mp->msg_controllen = 0;
  787                                 goto out;
  788                         }
  789                         control->m_len -= sizeof (struct cmsghdr);
  790                         control->m_data += sizeof (struct cmsghdr);
  791                 }
  792 #endif
  793                 len = mp->msg_controllen;
  794                 m = control;
  795                 mp->msg_controllen = 0;
  796                 ctlbuf = (caddr_t) mp->msg_control;
  797 
  798                 while (m && len > 0) {
  799                         unsigned int tocopy;
  800 
  801                         if (len >= m->m_len) 
  802                                 tocopy = m->m_len;
  803                         else {
  804                                 mp->msg_flags |= MSG_CTRUNC;
  805                                 tocopy = len;
  806                         }
  807                 
  808                         if (error = copyout((caddr_t)mtod(m, caddr_t),
  809                                         ctlbuf, tocopy))
  810                                 goto out;
  811 
  812                         ctlbuf += tocopy;
  813                         len -= tocopy;
  814                         m = m->m_next;
  815                 }
  816                 mp->msg_controllen = ctlbuf - mp->msg_control;
  817         }
  818 out:
  819         if (fromsa)
  820                 FREE(fromsa, M_SONAME);
  821         if (control)
  822                 m_freem(control);
  823         return (error);
  824 }
  825 
  826 int
  827 recvfrom(p, uap)
  828         struct proc *p;
  829         register struct recvfrom_args /* {
  830                 int     s;
  831                 caddr_t buf;
  832                 size_t  len;
  833                 int     flags;
  834                 caddr_t from;
  835                 int     *fromlenaddr;
  836         } */ *uap;
  837 {
  838         struct msghdr msg;
  839         struct iovec aiov;
  840         int error;
  841 
  842         if (uap->fromlenaddr) {
  843                 error = copyin((caddr_t)uap->fromlenaddr,
  844                     (caddr_t)&msg.msg_namelen, sizeof (msg.msg_namelen));
  845                 if (error)
  846                         return (error);
  847         } else
  848                 msg.msg_namelen = 0;
  849         msg.msg_name = uap->from;
  850         msg.msg_iov = &aiov;
  851         msg.msg_iovlen = 1;
  852         aiov.iov_base = uap->buf;
  853         aiov.iov_len = uap->len;
  854         msg.msg_control = 0;
  855         msg.msg_flags = uap->flags;
  856         return (recvit(p, uap->s, &msg, (caddr_t)uap->fromlenaddr));
  857 }
  858 
  859 #ifdef COMPAT_OLDSOCK
  860 int
  861 orecvfrom(p, uap)
  862         struct proc *p;
  863         struct recvfrom_args *uap;
  864 {
  865 
  866         uap->flags |= MSG_COMPAT;
  867         return (recvfrom(p, uap));
  868 }
  869 #endif
  870 
  871 
  872 #ifdef COMPAT_OLDSOCK
  873 int
  874 orecv(p, uap)
  875         struct proc *p;
  876         register struct orecv_args /* {
  877                 int     s;
  878                 caddr_t buf;
  879                 int     len;
  880                 int     flags;
  881         } */ *uap;
  882 {
  883         struct msghdr msg;
  884         struct iovec aiov;
  885 
  886         msg.msg_name = 0;
  887         msg.msg_namelen = 0;
  888         msg.msg_iov = &aiov;
  889         msg.msg_iovlen = 1;
  890         aiov.iov_base = uap->buf;
  891         aiov.iov_len = uap->len;
  892         msg.msg_control = 0;
  893         msg.msg_flags = uap->flags;
  894         return (recvit(p, uap->s, &msg, (caddr_t)0));
  895 }
  896 
  897 /*
  898  * Old recvmsg.  This code takes advantage of the fact that the old msghdr
  899  * overlays the new one, missing only the flags, and with the (old) access
  900  * rights where the control fields are now.
  901  */
  902 int
  903 orecvmsg(p, uap)
  904         struct proc *p;
  905         register struct orecvmsg_args /* {
  906                 int     s;
  907                 struct  omsghdr *msg;
  908                 int     flags;
  909         } */ *uap;
  910 {
  911         struct msghdr msg;
  912         struct iovec aiov[UIO_SMALLIOV], *iov;
  913         int error;
  914 
  915         error = copyin((caddr_t)uap->msg, (caddr_t)&msg,
  916             sizeof (struct omsghdr));
  917         if (error)
  918                 return (error);
  919         if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
  920                 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV)
  921                         return (EMSGSIZE);
  922                 MALLOC(iov, struct iovec *,
  923                       sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
  924                       M_WAITOK);
  925         } else
  926                 iov = aiov;
  927         msg.msg_flags = uap->flags | MSG_COMPAT;
  928         error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
  929             (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
  930         if (error)
  931                 goto done;
  932         msg.msg_iov = iov;
  933         error = recvit(p, uap->s, &msg, (caddr_t)&uap->msg->msg_namelen);
  934 
  935         if (msg.msg_controllen && error == 0)
  936                 error = copyout((caddr_t)&msg.msg_controllen,
  937                     (caddr_t)&uap->msg->msg_accrightslen, sizeof (int));
  938 done:
  939         if (iov != aiov)
  940                 FREE(iov, M_IOV);
  941         return (error);
  942 }
  943 #endif
  944 
  945 int
  946 recvmsg(p, uap)
  947         struct proc *p;
  948         register struct recvmsg_args /* {
  949                 int     s;
  950                 struct  msghdr *msg;
  951                 int     flags;
  952         } */ *uap;
  953 {
  954         struct msghdr msg;
  955         struct iovec aiov[UIO_SMALLIOV], *uiov, *iov;
  956         register int error;
  957 
  958         error = copyin((caddr_t)uap->msg, (caddr_t)&msg, sizeof (msg));
  959         if (error)
  960                 return (error);
  961         if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
  962                 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV)
  963                         return (EMSGSIZE);
  964                 MALLOC(iov, struct iovec *,
  965                        sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
  966                        M_WAITOK);
  967         } else
  968                 iov = aiov;
  969 #ifdef COMPAT_OLDSOCK
  970         msg.msg_flags = uap->flags &~ MSG_COMPAT;
  971 #else
  972         msg.msg_flags = uap->flags;
  973 #endif
  974         uiov = msg.msg_iov;
  975         msg.msg_iov = iov;
  976         error = copyin((caddr_t)uiov, (caddr_t)iov,
  977             (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
  978         if (error)
  979                 goto done;
  980         error = recvit(p, uap->s, &msg, (caddr_t)0);
  981         if (!error) {
  982                 msg.msg_iov = uiov;
  983                 error = copyout((caddr_t)&msg, (caddr_t)uap->msg, sizeof(msg));
  984         }
  985 done:
  986         if (iov != aiov)
  987                 FREE(iov, M_IOV);
  988         return (error);
  989 }
  990 
  991 /* ARGSUSED */
  992 int
  993 shutdown(p, uap)
  994         struct proc *p;
  995         register struct shutdown_args /* {
  996                 int     s;
  997                 int     how;
  998         } */ *uap;
  999 {
 1000         struct file *fp;
 1001         int error;
 1002 
 1003         error = getsock(p->p_fd, uap->s, &fp);
 1004         if (error)
 1005                 return (error);
 1006         return (soshutdown((struct socket *)fp->f_data, uap->how));
 1007 }
 1008 
 1009 /* ARGSUSED */
 1010 int
 1011 setsockopt(p, uap)
 1012         struct proc *p;
 1013         register struct setsockopt_args /* {
 1014                 int     s;
 1015                 int     level;
 1016                 int     name;
 1017                 caddr_t val;
 1018                 int     valsize;
 1019         } */ *uap;
 1020 {
 1021         struct file *fp;
 1022         struct sockopt sopt;
 1023         int error;
 1024 
 1025         if (uap->val == 0 && uap->valsize != 0)
 1026                 return (EFAULT);
 1027         if (uap->valsize < 0)
 1028                 return (EINVAL);
 1029 
 1030         error = getsock(p->p_fd, uap->s, &fp);
 1031         if (error)
 1032                 return (error);
 1033 
 1034         sopt.sopt_dir = SOPT_SET;
 1035         sopt.sopt_level = uap->level;
 1036         sopt.sopt_name = uap->name;
 1037         sopt.sopt_val = uap->val;
 1038         sopt.sopt_valsize = uap->valsize;
 1039         sopt.sopt_p = p;
 1040 
 1041         return (sosetopt((struct socket *)fp->f_data, &sopt));
 1042 }
 1043 
 1044 /* ARGSUSED */
 1045 int
 1046 getsockopt(p, uap)
 1047         struct proc *p;
 1048         register struct getsockopt_args /* {
 1049                 int     s;
 1050                 int     level;
 1051                 int     name;
 1052                 caddr_t val;
 1053                 int     *avalsize;
 1054         } */ *uap;
 1055 {
 1056         int     valsize, error;
 1057         struct  file *fp;
 1058         struct  sockopt sopt;
 1059 
 1060         error = getsock(p->p_fd, uap->s, &fp);
 1061         if (error)
 1062                 return (error);
 1063         if (uap->val) {
 1064                 error = copyin((caddr_t)uap->avalsize, (caddr_t)&valsize,
 1065                     sizeof (valsize));
 1066                 if (error)
 1067                         return (error);
 1068                 if (valsize < 0)
 1069                         return (EINVAL);
 1070         } else
 1071                 valsize = 0;
 1072 
 1073         sopt.sopt_dir = SOPT_GET;
 1074         sopt.sopt_level = uap->level;
 1075         sopt.sopt_name = uap->name;
 1076         sopt.sopt_val = uap->val;
 1077         sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
 1078         sopt.sopt_p = p;
 1079 
 1080         error = sogetopt((struct socket *)fp->f_data, &sopt);
 1081         if (error == 0) {
 1082                 valsize = sopt.sopt_valsize;
 1083                 error = copyout((caddr_t)&valsize,
 1084                                 (caddr_t)uap->avalsize, sizeof (valsize));
 1085         }
 1086         return (error);
 1087 }
 1088 
 1089 /*
 1090  * Get socket name.
 1091  */
 1092 /* ARGSUSED */
 1093 static int
 1094 getsockname1(p, uap, compat)
 1095         struct proc *p;
 1096         register struct getsockname_args /* {
 1097                 int     fdes;
 1098                 caddr_t asa;
 1099                 int     *alen;
 1100         } */ *uap;
 1101         int compat;
 1102 {
 1103         struct file *fp;
 1104         register struct socket *so;
 1105         struct sockaddr *sa;
 1106         int len, error;
 1107 
 1108         error = getsock(p->p_fd, uap->fdes, &fp);
 1109         if (error)
 1110                 return (error);
 1111         error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len));
 1112         if (error)
 1113                 return (error);
 1114         so = (struct socket *)fp->f_data;
 1115         sa = 0;
 1116         error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
 1117         if (error)
 1118                 goto bad;
 1119         if (sa == 0) {
 1120                 len = 0;
 1121                 goto gotnothing;
 1122         }
 1123 
 1124         len = MIN(len, sa->sa_len);
 1125 #ifdef COMPAT_OLDSOCK
 1126         if (compat)
 1127                 ((struct osockaddr *)sa)->sa_family = sa->sa_family;
 1128 #endif
 1129         error = copyout(sa, (caddr_t)uap->asa, (u_int)len);
 1130         if (error == 0)
 1131 gotnothing:
 1132                 error = copyout((caddr_t)&len, (caddr_t)uap->alen,
 1133                     sizeof (len));
 1134 bad:
 1135         if (sa)
 1136                 FREE(sa, M_SONAME);
 1137         return (error);
 1138 }
 1139 
 1140 int
 1141 getsockname(p, uap)
 1142         struct proc *p;
 1143         struct getsockname_args *uap;
 1144 {
 1145 
 1146         return (getsockname1(p, uap, 0));
 1147 }
 1148 
 1149 #ifdef COMPAT_OLDSOCK
 1150 int
 1151 ogetsockname(p, uap)
 1152         struct proc *p;
 1153         struct getsockname_args *uap;
 1154 {
 1155 
 1156         return (getsockname1(p, uap, 1));
 1157 }
 1158 #endif /* COMPAT_OLDSOCK */
 1159 
 1160 /*
 1161  * Get name of peer for connected socket.
 1162  */
 1163 /* ARGSUSED */
 1164 static int
 1165 getpeername1(p, uap, compat)
 1166         struct proc *p;
 1167         register struct getpeername_args /* {
 1168                 int     fdes;
 1169                 caddr_t asa;
 1170                 int     *alen;
 1171         } */ *uap;
 1172         int compat;
 1173 {
 1174         struct file *fp;
 1175         register struct socket *so;
 1176         struct sockaddr *sa;
 1177         int len, error;
 1178 
 1179         error = getsock(p->p_fd, uap->fdes, &fp);
 1180         if (error)
 1181                 return (error);
 1182         so = (struct socket *)fp->f_data;
 1183         if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0)
 1184                 return (ENOTCONN);
 1185         error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len));
 1186         if (error)
 1187                 return (error);
 1188         sa = 0;
 1189         error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
 1190         if (error)
 1191                 goto bad;
 1192         if (sa == 0) {
 1193                 len = 0;
 1194                 goto gotnothing;
 1195         }
 1196         len = MIN(len, sa->sa_len);
 1197 #ifdef COMPAT_OLDSOCK
 1198         if (compat)
 1199                 ((struct osockaddr *)sa)->sa_family =
 1200                     sa->sa_family;
 1201 #endif
 1202         error = copyout(sa, (caddr_t)uap->asa, (u_int)len);
 1203         if (error)
 1204                 goto bad;
 1205 gotnothing:
 1206         error = copyout((caddr_t)&len, (caddr_t)uap->alen, sizeof (len));
 1207 bad:
 1208         if (sa) FREE(sa, M_SONAME);
 1209         return (error);
 1210 }
 1211 
 1212 int
 1213 getpeername(p, uap)
 1214         struct proc *p;
 1215         struct getpeername_args *uap;
 1216 {
 1217 
 1218         return (getpeername1(p, uap, 0));
 1219 }
 1220 
 1221 #ifdef COMPAT_OLDSOCK
 1222 int
 1223 ogetpeername(p, uap)
 1224         struct proc *p;
 1225         struct ogetpeername_args *uap;
 1226 {
 1227 
 1228         /* XXX uap should have type `getpeername_args *' to begin with. */
 1229         return (getpeername1(p, (struct getpeername_args *)uap, 1));
 1230 }
 1231 #endif /* COMPAT_OLDSOCK */
 1232 
 1233 int
 1234 sockargs(mp, buf, buflen, type)
 1235         struct mbuf **mp;
 1236         caddr_t buf;
 1237         int buflen, type;
 1238 {
 1239         register struct sockaddr *sa;
 1240         register struct mbuf *m;
 1241         int error;
 1242 
 1243         if ((u_int)buflen > MLEN) {
 1244 #ifdef COMPAT_OLDSOCK
 1245                 if (type == MT_SONAME && (u_int)buflen <= 112)
 1246                         buflen = MLEN;          /* unix domain compat. hack */
 1247                 else
 1248 #endif
 1249                 return (EINVAL);
 1250         }
 1251         m = m_get(M_WAIT, type);
 1252         if (m == NULL)
 1253                 return (ENOBUFS);
 1254         m->m_len = buflen;
 1255         error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
 1256         if (error)
 1257                 (void) m_free(m);
 1258         else {
 1259                 *mp = m;
 1260                 if (type == MT_SONAME) {
 1261                         sa = mtod(m, struct sockaddr *);
 1262 
 1263 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
 1264                         if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
 1265                                 sa->sa_family = sa->sa_len;
 1266 #endif
 1267                         sa->sa_len = buflen;
 1268                 }
 1269         }
 1270         return (error);
 1271 }
 1272 
 1273 int
 1274 getsockaddr(namp, uaddr, len)
 1275         struct sockaddr **namp;
 1276         caddr_t uaddr;
 1277         size_t len;
 1278 {
 1279         struct sockaddr *sa;
 1280         int error;
 1281 
 1282         if (len > SOCK_MAXADDRLEN)
 1283                 return ENAMETOOLONG;
 1284         MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK);
 1285         error = copyin(uaddr, sa, len);
 1286         if (error) {
 1287                 FREE(sa, M_SONAME);
 1288         } else {
 1289 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
 1290                 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
 1291                         sa->sa_family = sa->sa_len;
 1292 #endif
 1293                 sa->sa_len = len;
 1294                 *namp = sa;
 1295         }
 1296         return error;
 1297 }
 1298 
 1299 int
 1300 getsock(fdp, fdes, fpp)
 1301         struct filedesc *fdp;
 1302         int fdes;
 1303         struct file **fpp;
 1304 {
 1305         register struct file *fp;
 1306 
 1307         if ((unsigned)fdes >= fdp->fd_nfiles ||
 1308             (fp = fdp->fd_ofiles[fdes]) == NULL)
 1309                 return (EBADF);
 1310         if (fp->f_type != DTYPE_SOCKET)
 1311                 return (ENOTSOCK);
 1312         *fpp = fp;
 1313         return (0);
 1314 }
 1315 
 1316 /*
 1317  * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
 1318  * XXX - The sf_buf functions are currently private to sendfile(2), so have
 1319  * been made static, but may be useful in the future for doing zero-copy in
 1320  * other parts of the networking code. 
 1321  */
 1322 static void
 1323 sf_buf_init(void *arg)
 1324 {
 1325         int i;
 1326 
 1327         SLIST_INIT(&sf_freelist);
 1328         sf_base = kmem_alloc_pageable(kernel_map, nsfbufs * PAGE_SIZE);
 1329         sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP, M_NOWAIT);
 1330         bzero(sf_bufs, nsfbufs * sizeof(struct sf_buf));
 1331         for (i = 0; i < nsfbufs; i++) {
 1332                 sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
 1333                 SLIST_INSERT_HEAD(&sf_freelist, &sf_bufs[i], free_list);
 1334         }
 1335 }
 1336 
 1337 /*
 1338  * Get an sf_buf from the freelist. Will block if none are available.
 1339  */
 1340 static struct sf_buf *
 1341 sf_buf_alloc()
 1342 {
 1343         struct sf_buf *sf;
 1344         int s;
 1345 
 1346         s = splimp();
 1347         while ((sf = SLIST_FIRST(&sf_freelist)) == NULL) {
 1348                 sf_buf_alloc_want = 1;
 1349                 tsleep(&sf_freelist, PVM, "sfbufa", 0);
 1350         }
 1351         SLIST_REMOVE_HEAD(&sf_freelist, free_list);
 1352         splx(s);
 1353         sf->refcnt = 1;
 1354         return (sf);
 1355 }
 1356 
 1357 #define dtosf(x)        (&sf_bufs[((uintptr_t)(x) - (uintptr_t)sf_base) >> PAGE_SHIFT])
 1358 static void
 1359 sf_buf_ref(caddr_t addr, u_int size)
 1360 {
 1361         struct sf_buf *sf;
 1362 
 1363         sf = dtosf(addr);
 1364         if (sf->refcnt == 0)
 1365                 panic("sf_buf_ref: referencing a free sf_buf");
 1366         sf->refcnt++;
 1367 }
 1368 
 1369 /*
 1370  * Lose a reference to an sf_buf. When none left, detach mapped page
 1371  * and release resources back to the system.
 1372  *
 1373  * Must be called at splimp.
 1374  */
 1375 static void
 1376 sf_buf_free(caddr_t addr, u_int size)
 1377 {
 1378         struct sf_buf *sf;
 1379         struct vm_page *m;
 1380         int s;
 1381 
 1382         sf = dtosf(addr);
 1383         if (sf->refcnt == 0)
 1384                 panic("sf_buf_free: freeing free sf_buf");
 1385         sf->refcnt--;
 1386         if (sf->refcnt == 0) {
 1387                 pmap_qremove((vm_offset_t)addr, 1);
 1388                 m = sf->m;
 1389                 s = splvm();
 1390                 vm_page_unwire(m, 0);
 1391                 /*
 1392                  * Check for the object going away on us. This can
 1393                  * happen since we don't hold a reference to it.
 1394                  * If so, we're responsible for freeing the page.
 1395                  */
 1396                 if (m->wire_count == 0 && m->object == NULL)
 1397                         vm_page_free(m);
 1398                 splx(s);
 1399                 sf->m = NULL;
 1400                 SLIST_INSERT_HEAD(&sf_freelist, sf, free_list);
 1401                 if (sf_buf_alloc_want) {
 1402                         sf_buf_alloc_want = 0;
 1403                         wakeup(&sf_freelist);
 1404                 }
 1405         }
 1406 }
 1407 
 1408 /*
 1409  * sendfile(2).
 1410  * int sendfile(int fd, int s, off_t offset, size_t nbytes,
 1411  *       struct sf_hdtr *hdtr, off_t *sbytes, int flags)
 1412  *
 1413  * Send a file specified by 'fd' and starting at 'offset' to a socket
 1414  * specified by 's'. Send only 'nbytes' of the file or until EOF if
 1415  * nbytes == 0. Optionally add a header and/or trailer to the socket
 1416  * output. If specified, write the total number of bytes sent into *sbytes.
 1417  */
 1418 int
 1419 sendfile(struct proc *p, struct sendfile_args *uap)
 1420 {
 1421         struct file *fp;
 1422         struct filedesc *fdp = p->p_fd;
 1423         struct vnode *vp;
 1424         struct vm_object *obj;
 1425         struct socket *so;
 1426         struct mbuf *m;
 1427         struct sf_buf *sf;
 1428         struct vm_page *pg;
 1429         struct writev_args nuap;
 1430         struct sf_hdtr hdtr;
 1431         off_t off, xfsize, sbytes = 0;
 1432         int error = 0, s;
 1433 
 1434         /*
 1435          * Do argument checking. Must be a regular file in, stream
 1436          * type and connected socket out, positive offset.
 1437          */
 1438         if (((u_int)uap->fd) >= fdp->fd_nfiles ||
 1439             (fp = fdp->fd_ofiles[uap->fd]) == NULL ||
 1440             (fp->f_flag & FREAD) == 0) {
 1441                 error = EBADF;
 1442                 goto done;
 1443         }
 1444         if (fp->f_type != DTYPE_VNODE) {
 1445                 error = EINVAL;
 1446                 goto done;
 1447         }
 1448         vp = (struct vnode *)fp->f_data;
 1449         obj = vp->v_object;
 1450         if (vp->v_type != VREG || obj == NULL) {
 1451                 error = EINVAL;
 1452                 goto done;
 1453         }
 1454         error = getsock(p->p_fd, uap->s, &fp);
 1455         if (error)
 1456                 goto done;
 1457         so = (struct socket *)fp->f_data;
 1458         if (so->so_type != SOCK_STREAM) {
 1459                 error = EINVAL;
 1460                 goto done;
 1461         }
 1462         if ((so->so_state & SS_ISCONNECTED) == 0) {
 1463                 error = ENOTCONN;
 1464                 goto done;
 1465         }
 1466         if (uap->offset < 0) {
 1467                 error = EINVAL;
 1468                 goto done;
 1469         }
 1470 
 1471         /*
 1472          * If specified, get the pointer to the sf_hdtr struct for
 1473          * any headers/trailers.
 1474          */
 1475         if (uap->hdtr != NULL) {
 1476                 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
 1477                 if (error)
 1478                         goto done;
 1479                 /*
 1480                  * Send any headers. Wimp out and use writev(2).
 1481                  */
 1482                 if (hdtr.headers != NULL) {
 1483                         nuap.fd = uap->s;
 1484                         nuap.iovp = hdtr.headers;
 1485                         nuap.iovcnt = hdtr.hdr_cnt;
 1486                         error = writev(p, &nuap);
 1487                         if (error)
 1488                                 goto done;
 1489                         sbytes += p->p_retval[0];
 1490                 }
 1491         }
 1492 
 1493         /*
 1494          * Protect against multiple writers to the socket.
 1495          */
 1496         (void) sblock(&so->so_snd, M_WAITOK);
 1497 
 1498         /*
 1499          * Loop through the pages in the file, starting with the requested
 1500          * offset. Get a file page (do I/O if necessary), map the file page
 1501          * into an sf_buf, attach an mbuf header to the sf_buf, and queue
 1502          * it on the socket.
 1503          */
 1504         for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
 1505                 vm_pindex_t pindex;
 1506                 vm_offset_t pgoff;
 1507 
 1508                 pindex = OFF_TO_IDX(off);
 1509 retry_lookup:
 1510                 /*
 1511                  * Calculate the amount to transfer. Not to exceed a page,
 1512                  * the EOF, or the passed in nbytes.
 1513                  */
 1514                 xfsize = obj->un_pager.vnp.vnp_size - off;
 1515                 if (xfsize > PAGE_SIZE)
 1516                         xfsize = PAGE_SIZE;
 1517                 pgoff = (vm_offset_t)(off & PAGE_MASK);
 1518                 if (PAGE_SIZE - pgoff < xfsize)
 1519                         xfsize = PAGE_SIZE - pgoff;
 1520                 if (uap->nbytes && xfsize > (uap->nbytes - sbytes))
 1521                         xfsize = uap->nbytes - sbytes;
 1522                 if (xfsize <= 0)
 1523                         break;
 1524                 /*
 1525                  * Optimize the non-blocking case by looking at the socket space
 1526                  * before going to the extra work of constituting the sf_buf.
 1527                  */
 1528                 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) {
 1529                         if (so->so_state & SS_CANTSENDMORE)
 1530                                 error = EPIPE;
 1531                         else
 1532                                 error = EAGAIN;
 1533                         sbunlock(&so->so_snd);
 1534                         goto done;
 1535                 }
 1536                 /*
 1537                  * Attempt to look up the page. If the page doesn't exist or the
 1538                  * part we're interested in isn't valid, then read it from disk.
 1539                  * If some other part of the kernel has this page (i.e. it's busy),
 1540                  * then disk I/O may be occuring on it, so wait and retry.
 1541                  */
 1542                 pg = vm_page_lookup(obj, pindex);
 1543                 if (pg == NULL || (!(pg->flags & PG_BUSY) && !pg->busy &&
 1544                     !vm_page_is_valid(pg, pgoff, xfsize))) {
 1545                         struct uio auio;
 1546                         struct iovec aiov;
 1547                         int bsize;
 1548 
 1549                         if (pg == NULL) {
 1550                                 pg = vm_page_alloc(obj, pindex, VM_ALLOC_NORMAL);
 1551                                 if (pg == NULL) {
 1552                                         VM_WAIT;
 1553                                         goto retry_lookup;
 1554                                 }
 1555                                 /*
 1556                                  * don't just clear PG_BUSY manually -
 1557                                  * vm_page_alloc() should be considered opaque,
 1558                                  * use the VM routine provided to clear
 1559                                  * PG_BUSY.
 1560                                  */
 1561                                 vm_page_wakeup(pg);
 1562 
 1563                         }
 1564                         /*
 1565                          * Ensure that our page is still around when the I/O completes.
 1566                          */
 1567                         vm_page_io_start(pg);
 1568                         vm_page_wire(pg);
 1569                         /*
 1570                          * Get the page from backing store.
 1571                          */
 1572                         bsize = vp->v_mount->mnt_stat.f_iosize;
 1573                         auio.uio_iov = &aiov;
 1574                         auio.uio_iovcnt = 1;
 1575                         aiov.iov_base = 0;
 1576                         aiov.iov_len = MAXBSIZE;
 1577                         auio.uio_resid = MAXBSIZE;
 1578                         auio.uio_offset = trunc_page(off);
 1579                         auio.uio_segflg = UIO_NOCOPY;
 1580                         auio.uio_rw = UIO_READ;
 1581                         auio.uio_procp = p;
 1582                         vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, p);
 1583                         error = VOP_READ(vp, &auio, IO_VMIO | ((MAXBSIZE / bsize) << 16),
 1584                                 p->p_ucred);
 1585                         VOP_UNLOCK(vp, 0, p);
 1586                         vm_page_flag_clear(pg, PG_ZERO);
 1587                         vm_page_io_finish(pg);
 1588                         if (error) {
 1589                                 vm_page_unwire(pg, 0);
 1590                                 /*
 1591                                  * See if anyone else might know about this page.
 1592                                  * If not and it is not valid, then free it.
 1593                                  */
 1594                                 if (pg->wire_count == 0 && pg->valid == 0 &&
 1595                                     pg->busy == 0 && !(pg->flags & PG_BUSY) &&
 1596                                     pg->hold_count == 0)
 1597                                         vm_page_free(pg);
 1598                                 sbunlock(&so->so_snd);
 1599                                 goto done;
 1600                         }
 1601                 } else {
 1602                         if ((pg->flags & PG_BUSY) || pg->busy)  {
 1603                                 s = splvm();
 1604                                 if ((pg->flags & PG_BUSY) || pg->busy) {
 1605                                         /*
 1606                                          * Page is busy. Wait and retry.
 1607                                          */
 1608                                         vm_page_flag_set(pg, PG_WANTED);
 1609                                         tsleep(pg, PVM, "sfpbsy", 0);
 1610                                         splx(s);
 1611                                         goto retry_lookup;
 1612                                 }
 1613                                 splx(s);
 1614                         }
 1615                         /*
 1616                          * Protect from having the page ripped out from beneath us.
 1617                          */
 1618                         vm_page_wire(pg);
 1619                 }
 1620                 /*
 1621                  * Allocate a kernel virtual page and insert the physical page
 1622                  * into it.
 1623                  */
 1624                 sf = sf_buf_alloc();
 1625                 sf->m = pg;
 1626                 pmap_qenter(sf->kva, &pg, 1);
 1627                 /*
 1628                  * Get an mbuf header and set it up as having external storage.
 1629                  */
 1630                 MGETHDR(m, M_WAIT, MT_DATA);
 1631                 if (m == NULL) {
 1632                         error = ENOBUFS;
 1633                         goto done;
 1634                 }
 1635                 m->m_ext.ext_free = sf_buf_free;
 1636                 m->m_ext.ext_ref = sf_buf_ref;
 1637                 m->m_ext.ext_buf = (void *)sf->kva;
 1638                 m->m_ext.ext_size = PAGE_SIZE;
 1639                 m->m_data = (char *) sf->kva + pgoff;
 1640                 m->m_flags |= M_EXT;
 1641                 m->m_pkthdr.len = m->m_len = xfsize;
 1642                 /*
 1643                  * Add the buffer to the socket buffer chain.
 1644                  */
 1645                 s = splnet();
 1646 retry_space:
 1647                 /*
 1648                  * Make sure that the socket is still able to take more data.
 1649                  * CANTSENDMORE being true usually means that the connection
 1650                  * was closed. so_error is true when an error was sensed after
 1651                  * a previous send.
 1652                  * The state is checked after the page mapping and buffer
 1653                  * allocation above since those operations may block and make
 1654                  * any socket checks stale. From this point forward, nothing
 1655                  * blocks before the pru_send (or more accurately, any blocking
 1656                  * results in a loop back to here to re-check).
 1657                  */
 1658                 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
 1659                         if (so->so_state & SS_CANTSENDMORE) {
 1660                                 error = EPIPE;
 1661                         } else {
 1662                                 error = so->so_error;
 1663                                 so->so_error = 0;
 1664                         }
 1665                         m_freem(m);
 1666                         sbunlock(&so->so_snd);
 1667                         splx(s);
 1668                         goto done;
 1669                 }
 1670                 /*
 1671                  * Wait for socket space to become available. We do this just
 1672                  * after checking the connection state above in order to avoid
 1673                  * a race condition with sbwait().
 1674                  */
 1675                 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) {
 1676                         if (so->so_state & SS_NBIO) {
 1677                                 m_freem(m);
 1678                                 sbunlock(&so->so_snd);
 1679                                 splx(s);
 1680                                 error = EAGAIN;
 1681                                 goto done;
 1682                         }
 1683                         error = sbwait(&so->so_snd);
 1684                         /*
 1685                          * An error from sbwait usually indicates that we've
 1686                          * been interrupted by a signal. If we've sent anything
 1687                          * then return bytes sent, otherwise return the error.
 1688                          */
 1689                         if (error) {
 1690                                 m_freem(m);
 1691                                 sbunlock(&so->so_snd);
 1692                                 splx(s);
 1693                                 goto done;
 1694                         }
 1695                         goto retry_space;
 1696                 }
 1697                 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, p);
 1698                 splx(s);
 1699                 if (error) {
 1700                         sbunlock(&so->so_snd);
 1701                         goto done;
 1702                 }
 1703         }
 1704         sbunlock(&so->so_snd);
 1705 
 1706         /*
 1707          * Send trailers. Wimp out and use writev(2).
 1708          */
 1709         if (uap->hdtr != NULL && hdtr.trailers != NULL) {
 1710                         nuap.fd = uap->s;
 1711                         nuap.iovp = hdtr.trailers;
 1712                         nuap.iovcnt = hdtr.trl_cnt;
 1713                         error = writev(p, &nuap);
 1714                         if (error)
 1715                                 goto done;
 1716                         sbytes += p->p_retval[0];
 1717         }
 1718 
 1719 done:
 1720         if (uap->sbytes != NULL) {
 1721                 copyout(&sbytes, uap->sbytes, sizeof(off_t));
 1722         }
 1723         return (error);
 1724 }

Cache object: e2b4b0f328341b8f38a3e0eaf21b7f9b


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.