The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/uipc_syscalls.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 1982, 1986, 1989, 1990, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * sendfile(2) and related extensions:
    6  * Copyright (c) 1998, David Greenman. All rights reserved. 
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  * 3. All advertising materials mentioning features or use of this software
   17  *    must display the following acknowledgement:
   18  *      This product includes software developed by the University of
   19  *      California, Berkeley and its contributors.
   20  * 4. Neither the name of the University nor the names of its contributors
   21  *    may be used to endorse or promote products derived from this software
   22  *    without specific prior written permission.
   23  *
   24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   34  * SUCH DAMAGE.
   35  *
   36  *      @(#)uipc_syscalls.c     8.4 (Berkeley) 2/21/94
   37  * $FreeBSD$
   38  */
   39 
   40 #include "opt_compat.h"
   41 #include "opt_ktrace.h"
   42 
   43 #include <sys/param.h>
   44 #include <sys/systm.h>
   45 #include <sys/kernel.h>
   46 #include <sys/sysproto.h>
   47 #include <sys/malloc.h>
   48 #include <sys/filedesc.h>
   49 #include <sys/event.h>
   50 #include <sys/proc.h>
   51 #include <sys/fcntl.h>
   52 #include <sys/file.h>
   53 #include <sys/filio.h>
   54 #include <sys/mbuf.h>
   55 #include <sys/protosw.h>
   56 #include <sys/socket.h>
   57 #include <sys/socketvar.h>
   58 #include <sys/signalvar.h>
   59 #include <sys/uio.h>
   60 #include <sys/vnode.h>
   61 #include <sys/lock.h>
   62 #include <sys/mount.h>
   63 #ifdef KTRACE
   64 #include <sys/ktrace.h>
   65 #endif
   66 #include <vm/vm.h>
   67 #include <vm/vm_object.h>
   68 #include <vm/vm_page.h>
   69 #include <vm/vm_pageout.h>
   70 #include <vm/vm_kern.h>
   71 #include <vm/vm_extern.h>
   72 
   73 static void sf_buf_init(void *arg);
   74 SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL)
   75 
   76 static int sendit __P((struct proc *p, int s, struct msghdr *mp, int flags));
   77 static int recvit __P((struct proc *p, int s, struct msghdr *mp,
   78                        caddr_t namelenp));
   79   
   80 static int accept1 __P((struct proc *p, struct accept_args *uap, int compat));
   81 static int do_sendfile __P((struct proc *p, struct sendfile_args *uap,
   82                             int compat));
   83 static int getsockname1 __P((struct proc *p, struct getsockname_args *uap,
   84                              int compat));
   85 static int getpeername1 __P((struct proc *p, struct getpeername_args *uap,
   86                              int compat));
   87 
   88 static SLIST_HEAD(, sf_buf) sf_freelist;
   89 static vm_offset_t sf_base;
   90 static struct sf_buf *sf_bufs;
   91 static int sf_buf_alloc_want;
   92 
   93 /*
   94  * System call interface to the socket abstraction.
   95  */
   96 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
   97 #define COMPAT_OLDSOCK
   98 #endif
   99 
  100 extern  struct fileops socketops;
  101 
  102 int
  103 socket(p, uap)
  104         struct proc *p;
  105         register struct socket_args /* {
  106                 int     domain;
  107                 int     type;
  108                 int     protocol;
  109         } */ *uap;
  110 {
  111         struct filedesc *fdp = p->p_fd;
  112         struct socket *so;
  113         struct file *fp;
  114         int fd, error;
  115 
  116         error = falloc(p, &fp, &fd);
  117         if (error)
  118                 return (error);
  119         fhold(fp);
  120         error = socreate(uap->domain, &so, uap->type, uap->protocol, p);
  121         if (error) {
  122                 if (fdp->fd_ofiles[fd] == fp) {
  123                         fdp->fd_ofiles[fd] = NULL;
  124                         fdrop(fp, p);
  125                 }
  126         } else {
  127                 fp->f_data = (caddr_t)so;
  128                 fp->f_flag = FREAD|FWRITE;
  129                 fp->f_ops = &socketops;
  130                 fp->f_type = DTYPE_SOCKET;
  131                 p->p_retval[0] = fd;
  132         }
  133         fdrop(fp, p);
  134         return (error);
  135 }
  136 
  137 /* ARGSUSED */
  138 int
  139 bind(p, uap)
  140         struct proc *p;
  141         register struct bind_args /* {
  142                 int     s;
  143                 caddr_t name;
  144                 int     namelen;
  145         } */ *uap;
  146 {
  147         struct file *fp;
  148         struct sockaddr *sa;
  149         int error;
  150 
  151         error = holdsock(p->p_fd, uap->s, &fp);
  152         if (error)
  153                 return (error);
  154         error = getsockaddr(&sa, uap->name, uap->namelen);
  155         if (error) {
  156                 fdrop(fp, p);
  157                 return (error);
  158         }
  159         error = sobind((struct socket *)fp->f_data, sa, p);
  160         FREE(sa, M_SONAME);
  161         fdrop(fp, p);
  162         return (error);
  163 }
  164 
  165 /* ARGSUSED */
  166 int
  167 listen(p, uap)
  168         struct proc *p;
  169         register struct listen_args /* {
  170                 int     s;
  171                 int     backlog;
  172         } */ *uap;
  173 {
  174         struct file *fp;
  175         int error;
  176 
  177         error = holdsock(p->p_fd, uap->s, &fp);
  178         if (error)
  179                 return (error);
  180         error = solisten((struct socket *)fp->f_data, uap->backlog, p);
  181         fdrop(fp, p);
  182         return(error);
  183 }
  184 
  185 static int
  186 accept1(p, uap, compat)
  187         struct proc *p;
  188         register struct accept_args /* {
  189                 int     s;
  190                 caddr_t name;
  191                 int     *anamelen;
  192         } */ *uap;
  193         int compat;
  194 {
  195         struct filedesc *fdp = p->p_fd;
  196         struct file *lfp = NULL;
  197         struct file *nfp = NULL;
  198         struct sockaddr *sa;
  199         int namelen, error, s;
  200         struct socket *head, *so;
  201         int fd;
  202         u_int fflag;            /* type must match fp->f_flag */
  203         int tmp;
  204 
  205         if (uap->name) {
  206                 error = copyin((caddr_t)uap->anamelen, (caddr_t)&namelen,
  207                         sizeof (namelen));
  208                 if(error)
  209                         return (error);
  210                 if (namelen < 0)
  211                         return (EINVAL);
  212         }
  213         error = holdsock(fdp, uap->s, &lfp);
  214         if (error)
  215                 return (error);
  216         s = splnet();
  217         head = (struct socket *)lfp->f_data;
  218         if ((head->so_options & SO_ACCEPTCONN) == 0) {
  219                 splx(s);
  220                 error = EINVAL;
  221                 goto done;
  222         }
  223         while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
  224                 if (head->so_state & SS_CANTRCVMORE) {
  225                         head->so_error = ECONNABORTED;
  226                         break;
  227                 }
  228                 if ((head->so_state & SS_NBIO) != 0) {
  229                         head->so_error = EWOULDBLOCK;
  230                         break;
  231                 }
  232                 error = tsleep((caddr_t)&head->so_timeo, PSOCK | PCATCH,
  233                     "accept", 0);
  234                 if (error) {
  235                         splx(s);
  236                         goto done;
  237                 }
  238         }
  239         if (head->so_error) {
  240                 error = head->so_error;
  241                 head->so_error = 0;
  242                 splx(s);
  243                 goto done;
  244         }
  245 
  246         /*
  247          * At this point we know that there is at least one connection
  248          * ready to be accepted. Remove it from the queue prior to
  249          * allocating the file descriptor for it since falloc() may
  250          * block allowing another process to accept the connection
  251          * instead.
  252          */
  253         so = TAILQ_FIRST(&head->so_comp);
  254         TAILQ_REMOVE(&head->so_comp, so, so_list);
  255         head->so_qlen--;
  256 
  257         fflag = lfp->f_flag;
  258         error = falloc(p, &nfp, &fd);
  259         if (error) {
  260                 /*
  261                  * Probably ran out of file descriptors. Put the
  262                  * unaccepted connection back onto the queue and
  263                  * do another wakeup so some other process might
  264                  * have a chance at it.
  265                  */
  266                 TAILQ_INSERT_HEAD(&head->so_comp, so, so_list);
  267                 head->so_qlen++;
  268                 wakeup_one(&head->so_timeo);
  269                 splx(s);
  270                 goto done;
  271         }
  272         fhold(nfp);
  273         p->p_retval[0] = fd;
  274 
  275         /* connection has been removed from the listen queue */
  276         KNOTE(&head->so_rcv.sb_sel.si_note, 0);
  277 
  278         so->so_state &= ~SS_COMP;
  279         so->so_head = NULL;
  280         if (head->so_sigio != NULL)
  281                 fsetown(fgetown(head->so_sigio), &so->so_sigio);
  282 
  283         nfp->f_data = (caddr_t)so;
  284         nfp->f_flag = fflag;
  285         nfp->f_ops = &socketops;
  286         nfp->f_type = DTYPE_SOCKET;
  287         /* Sync socket nonblocking/async state with file flags */
  288         tmp = fflag & FNONBLOCK;
  289         (void) fo_ioctl(nfp, FIONBIO, (caddr_t)&tmp, p);
  290         tmp = fflag & FASYNC;
  291         (void) fo_ioctl(nfp, FIOASYNC, (caddr_t)&tmp, p);
  292         sa = 0;
  293         error = soaccept(so, &sa);
  294         if (error) {
  295                 /*
  296                  * return a namelen of zero for older code which might
  297                  * ignore the return value from accept.
  298                  */     
  299                 if (uap->name != NULL) {
  300                         namelen = 0;
  301                         (void) copyout((caddr_t)&namelen,
  302                             (caddr_t)uap->anamelen, sizeof(*uap->anamelen));
  303                 }
  304                 goto noconnection;
  305         }
  306         if (sa == NULL) {
  307                 namelen = 0;
  308                 if (uap->name)
  309                         goto gotnoname;
  310                 splx(s);
  311                 error = 0;
  312                 goto done;
  313         }
  314         if (uap->name) {
  315                 /* check sa_len before it is destroyed */
  316                 if (namelen > sa->sa_len)
  317                         namelen = sa->sa_len;
  318 #ifdef COMPAT_OLDSOCK
  319                 if (compat)
  320                         ((struct osockaddr *)sa)->sa_family =
  321                             sa->sa_family;
  322 #endif
  323                 error = copyout(sa, (caddr_t)uap->name, (u_int)namelen);
  324                 if (!error)
  325 gotnoname:
  326                         error = copyout((caddr_t)&namelen,
  327                             (caddr_t)uap->anamelen, sizeof (*uap->anamelen));
  328         }
  329 noconnection:
  330         if (sa)
  331                 FREE(sa, M_SONAME);
  332 
  333         /*
  334          * close the new descriptor, assuming someone hasn't ripped it
  335          * out from under us.
  336          */
  337         if (error) {
  338                 if (fdp->fd_ofiles[fd] == nfp) {
  339                         fdp->fd_ofiles[fd] = NULL;
  340                         fdrop(nfp, p);
  341                 }
  342         }
  343         splx(s);
  344 
  345         /*
  346          * Release explicitly held references before returning.
  347          */
  348 done:
  349         if (nfp != NULL)
  350                 fdrop(nfp, p);
  351         fdrop(lfp, p);
  352         return (error);
  353 }
  354 
  355 int
  356 accept(p, uap)
  357         struct proc *p;
  358         struct accept_args *uap;
  359 {
  360 
  361         return (accept1(p, uap, 0));
  362 }
  363 
  364 #ifdef COMPAT_OLDSOCK
  365 int
  366 oaccept(p, uap)
  367         struct proc *p;
  368         struct accept_args *uap;
  369 {
  370 
  371         return (accept1(p, uap, 1));
  372 }
  373 #endif /* COMPAT_OLDSOCK */
  374 
  375 /* ARGSUSED */
  376 int
  377 connect(p, uap)
  378         struct proc *p;
  379         register struct connect_args /* {
  380                 int     s;
  381                 caddr_t name;
  382                 int     namelen;
  383         } */ *uap;
  384 {
  385         struct file *fp;
  386         register struct socket *so;
  387         struct sockaddr *sa;
  388         int error, s;
  389 
  390         error = holdsock(p->p_fd, uap->s, &fp);
  391         if (error)
  392                 return (error);
  393         so = (struct socket *)fp->f_data;
  394         if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
  395                 error = EALREADY;
  396                 goto done;
  397         }
  398         error = getsockaddr(&sa, uap->name, uap->namelen);
  399         if (error)
  400                 goto done;
  401         error = soconnect(so, sa, p);
  402         if (error)
  403                 goto bad;
  404         if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
  405                 FREE(sa, M_SONAME);
  406                 error = EINPROGRESS;
  407                 goto done;
  408         }
  409         s = splnet();
  410         while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
  411                 error = tsleep((caddr_t)&so->so_timeo, PSOCK | PCATCH,
  412                     "connec", 0);
  413                 if (error)
  414                         break;
  415         }
  416         if (error == 0) {
  417                 error = so->so_error;
  418                 so->so_error = 0;
  419         }
  420         splx(s);
  421 bad:
  422         so->so_state &= ~SS_ISCONNECTING;
  423         FREE(sa, M_SONAME);
  424         if (error == ERESTART)
  425                 error = EINTR;
  426 done:
  427         fdrop(fp, p);
  428         return (error);
  429 }
  430 
  431 int
  432 socketpair(p, uap)
  433         struct proc *p;
  434         register struct socketpair_args /* {
  435                 int     domain;
  436                 int     type;
  437                 int     protocol;
  438                 int     *rsv;
  439         } */ *uap;
  440 {
  441         register struct filedesc *fdp = p->p_fd;
  442         struct file *fp1, *fp2;
  443         struct socket *so1, *so2;
  444         int fd, error, sv[2];
  445 
  446         error = socreate(uap->domain, &so1, uap->type, uap->protocol, p);
  447         if (error)
  448                 return (error);
  449         error = socreate(uap->domain, &so2, uap->type, uap->protocol, p);
  450         if (error)
  451                 goto free1;
  452         error = falloc(p, &fp1, &fd);
  453         if (error)
  454                 goto free2;
  455         fhold(fp1);
  456         sv[0] = fd;
  457         fp1->f_data = (caddr_t)so1;
  458         error = falloc(p, &fp2, &fd);
  459         if (error)
  460                 goto free3;
  461         fhold(fp2);
  462         fp2->f_data = (caddr_t)so2;
  463         sv[1] = fd;
  464         error = soconnect2(so1, so2);
  465         if (error)
  466                 goto free4;
  467         if (uap->type == SOCK_DGRAM) {
  468                 /*
  469                  * Datagram socket connection is asymmetric.
  470                  */
  471                  error = soconnect2(so2, so1);
  472                  if (error)
  473                         goto free4;
  474         }
  475         fp1->f_flag = fp2->f_flag = FREAD|FWRITE;
  476         fp1->f_ops = fp2->f_ops = &socketops;
  477         fp1->f_type = fp2->f_type = DTYPE_SOCKET;
  478         error = copyout((caddr_t)sv, (caddr_t)uap->rsv, 2 * sizeof (int));
  479         fdrop(fp1, p);
  480         fdrop(fp2, p);
  481         return (error);
  482 free4:
  483         if (fdp->fd_ofiles[sv[1]] == fp2) {
  484                 fdp->fd_ofiles[sv[1]] = NULL;
  485                 fdrop(fp2, p);
  486         }
  487         fdrop(fp2, p);
  488 free3:
  489         if (fdp->fd_ofiles[sv[0]] == fp1) {
  490                 fdp->fd_ofiles[sv[0]] = NULL;
  491                 fdrop(fp1, p);
  492         }
  493         fdrop(fp1, p);
  494 free2:
  495         (void)soclose(so2);
  496 free1:
  497         (void)soclose(so1);
  498         return (error);
  499 }
  500 
  501 static int
  502 sendit(p, s, mp, flags)
  503         register struct proc *p;
  504         int s;
  505         register struct msghdr *mp;
  506         int flags;
  507 {
  508         struct file *fp;
  509         struct uio auio;
  510         register struct iovec *iov;
  511         register int i;
  512         struct mbuf *control;
  513         struct sockaddr *to;
  514         int len, error;
  515         struct socket *so;
  516 #ifdef KTRACE
  517         struct iovec *ktriov = NULL;
  518         struct uio ktruio;
  519 #endif
  520 
  521         error = holdsock(p->p_fd, s, &fp);
  522         if (error)
  523                 return (error);
  524         auio.uio_iov = mp->msg_iov;
  525         auio.uio_iovcnt = mp->msg_iovlen;
  526         auio.uio_segflg = UIO_USERSPACE;
  527         auio.uio_rw = UIO_WRITE;
  528         auio.uio_procp = p;
  529         auio.uio_offset = 0;                    /* XXX */
  530         auio.uio_resid = 0;
  531         iov = mp->msg_iov;
  532         for (i = 0; i < mp->msg_iovlen; i++, iov++) {
  533                 if ((auio.uio_resid += iov->iov_len) < 0) {
  534                         fdrop(fp, p);
  535                         return (EINVAL);
  536                 }
  537         }
  538         if (mp->msg_name) {
  539                 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
  540                 if (error) {
  541                         fdrop(fp, p);
  542                         return (error);
  543                 }
  544         } else {
  545                 to = 0;
  546         }
  547         if (mp->msg_control) {
  548                 if (mp->msg_controllen < sizeof(struct cmsghdr)
  549 #ifdef COMPAT_OLDSOCK
  550                     && mp->msg_flags != MSG_COMPAT
  551 #endif
  552                 ) {
  553                         error = EINVAL;
  554                         goto bad;
  555                 }
  556                 error = sockargs(&control, mp->msg_control,
  557                     mp->msg_controllen, MT_CONTROL);
  558                 if (error)
  559                         goto bad;
  560 #ifdef COMPAT_OLDSOCK
  561                 if (mp->msg_flags == MSG_COMPAT) {
  562                         register struct cmsghdr *cm;
  563 
  564                         M_PREPEND(control, sizeof(*cm), M_WAIT);
  565                         if (control == 0) {
  566                                 error = ENOBUFS;
  567                                 goto bad;
  568                         } else {
  569                                 cm = mtod(control, struct cmsghdr *);
  570                                 cm->cmsg_len = control->m_len;
  571                                 cm->cmsg_level = SOL_SOCKET;
  572                                 cm->cmsg_type = SCM_RIGHTS;
  573                         }
  574                 }
  575 #endif
  576         } else {
  577                 control = 0;
  578         }
  579 #ifdef KTRACE
  580         if (KTRPOINT(p, KTR_GENIO)) {
  581                 int iovlen = auio.uio_iovcnt * sizeof (struct iovec);
  582 
  583                 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
  584                 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
  585                 ktruio = auio;
  586         }
  587 #endif
  588         len = auio.uio_resid;
  589         so = (struct socket *)fp->f_data;
  590         error = so->so_proto->pr_usrreqs->pru_sosend(so, to, &auio, 0, control,
  591                                                      flags, p);
  592         if (error) {
  593                 if (auio.uio_resid != len && (error == ERESTART ||
  594                     error == EINTR || error == EWOULDBLOCK))
  595                         error = 0;
  596                 if (error == EPIPE)
  597                         psignal(p, SIGPIPE);
  598         }
  599         if (error == 0)
  600                 p->p_retval[0] = len - auio.uio_resid;
  601 #ifdef KTRACE
  602         if (ktriov != NULL) {
  603                 if (error == 0) {
  604                         ktruio.uio_iov = ktriov;
  605                         ktruio.uio_resid = p->p_retval[0];
  606                         ktrgenio(p->p_tracep, s, UIO_WRITE, &ktruio, error);
  607                 }
  608                 FREE(ktriov, M_TEMP);
  609         }
  610 #endif
  611 bad:
  612         fdrop(fp, p);
  613         if (to)
  614                 FREE(to, M_SONAME);
  615         return (error);
  616 }
  617 
  618 int
  619 sendto(p, uap)
  620         struct proc *p;
  621         register struct sendto_args /* {
  622                 int     s;
  623                 caddr_t buf;
  624                 size_t  len;
  625                 int     flags;
  626                 caddr_t to;
  627                 int     tolen;
  628         } */ *uap;
  629 {
  630         struct msghdr msg;
  631         struct iovec aiov;
  632 
  633         msg.msg_name = uap->to;
  634         msg.msg_namelen = uap->tolen;
  635         msg.msg_iov = &aiov;
  636         msg.msg_iovlen = 1;
  637         msg.msg_control = 0;
  638 #ifdef COMPAT_OLDSOCK
  639         msg.msg_flags = 0;
  640 #endif
  641         aiov.iov_base = uap->buf;
  642         aiov.iov_len = uap->len;
  643         return (sendit(p, uap->s, &msg, uap->flags));
  644 }
  645 
  646 #ifdef COMPAT_OLDSOCK
  647 int
  648 osend(p, uap)
  649         struct proc *p;
  650         register struct osend_args /* {
  651                 int     s;
  652                 caddr_t buf;
  653                 int     len;
  654                 int     flags;
  655         } */ *uap;
  656 {
  657         struct msghdr msg;
  658         struct iovec aiov;
  659 
  660         msg.msg_name = 0;
  661         msg.msg_namelen = 0;
  662         msg.msg_iov = &aiov;
  663         msg.msg_iovlen = 1;
  664         aiov.iov_base = uap->buf;
  665         aiov.iov_len = uap->len;
  666         msg.msg_control = 0;
  667         msg.msg_flags = 0;
  668         return (sendit(p, uap->s, &msg, uap->flags));
  669 }
  670 
  671 int
  672 osendmsg(p, uap)
  673         struct proc *p;
  674         register struct osendmsg_args /* {
  675                 int     s;
  676                 caddr_t msg;
  677                 int     flags;
  678         } */ *uap;
  679 {
  680         struct msghdr msg;
  681         struct iovec aiov[UIO_SMALLIOV], *iov;
  682         int error;
  683 
  684         error = copyin(uap->msg, (caddr_t)&msg, sizeof (struct omsghdr));
  685         if (error)
  686                 return (error);
  687         if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
  688                 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV)
  689                         return (EMSGSIZE);
  690                 MALLOC(iov, struct iovec *,
  691                       sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
  692                       M_WAITOK);
  693         } else
  694                 iov = aiov;
  695         error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
  696             (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
  697         if (error)
  698                 goto done;
  699         msg.msg_flags = MSG_COMPAT;
  700         msg.msg_iov = iov;
  701         error = sendit(p, uap->s, &msg, uap->flags);
  702 done:
  703         if (iov != aiov)
  704                 FREE(iov, M_IOV);
  705         return (error);
  706 }
  707 #endif
  708 
  709 int
  710 sendmsg(p, uap)
  711         struct proc *p;
  712         register struct sendmsg_args /* {
  713                 int     s;
  714                 caddr_t msg;
  715                 int     flags;
  716         } */ *uap;
  717 {
  718         struct msghdr msg;
  719         struct iovec aiov[UIO_SMALLIOV], *iov;
  720         int error;
  721 
  722         error = copyin(uap->msg, (caddr_t)&msg, sizeof (msg));
  723         if (error)
  724                 return (error);
  725         if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
  726                 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV)
  727                         return (EMSGSIZE);
  728                 MALLOC(iov, struct iovec *,
  729                        sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
  730                        M_WAITOK);
  731         } else
  732                 iov = aiov;
  733         if (msg.msg_iovlen &&
  734             (error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
  735             (unsigned)(msg.msg_iovlen * sizeof (struct iovec)))))
  736                 goto done;
  737         msg.msg_iov = iov;
  738 #ifdef COMPAT_OLDSOCK
  739         msg.msg_flags = 0;
  740 #endif
  741         error = sendit(p, uap->s, &msg, uap->flags);
  742 done:
  743         if (iov != aiov)
  744                 FREE(iov, M_IOV);
  745         return (error);
  746 }
  747 
  748 static int
  749 recvit(p, s, mp, namelenp)
  750         register struct proc *p;
  751         int s;
  752         register struct msghdr *mp;
  753         caddr_t namelenp;
  754 {
  755         struct file *fp;
  756         struct uio auio;
  757         register struct iovec *iov;
  758         register int i;
  759         int len, error;
  760         struct mbuf *m, *control = 0;
  761         caddr_t ctlbuf;
  762         struct socket *so;
  763         struct sockaddr *fromsa = 0;
  764 #ifdef KTRACE
  765         struct iovec *ktriov = NULL;
  766         struct uio ktruio;
  767 #endif
  768 
  769         error = holdsock(p->p_fd, s, &fp);
  770         if (error)
  771                 return (error);
  772         auio.uio_iov = mp->msg_iov;
  773         auio.uio_iovcnt = mp->msg_iovlen;
  774         auio.uio_segflg = UIO_USERSPACE;
  775         auio.uio_rw = UIO_READ;
  776         auio.uio_procp = p;
  777         auio.uio_offset = 0;                    /* XXX */
  778         auio.uio_resid = 0;
  779         iov = mp->msg_iov;
  780         for (i = 0; i < mp->msg_iovlen; i++, iov++) {
  781                 if ((auio.uio_resid += iov->iov_len) < 0) {
  782                         fdrop(fp, p);
  783                         return (EINVAL);
  784                 }
  785         }
  786 #ifdef KTRACE
  787         if (KTRPOINT(p, KTR_GENIO)) {
  788                 int iovlen = auio.uio_iovcnt * sizeof (struct iovec);
  789 
  790                 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
  791                 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
  792                 ktruio = auio;
  793         }
  794 #endif
  795         len = auio.uio_resid;
  796         so = (struct socket *)fp->f_data;
  797         error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio,
  798             (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
  799             &mp->msg_flags);
  800         if (error) {
  801                 if (auio.uio_resid != len && (error == ERESTART ||
  802                     error == EINTR || error == EWOULDBLOCK))
  803                         error = 0;
  804         }
  805 #ifdef KTRACE
  806         if (ktriov != NULL) {
  807                 if (error == 0) {
  808                         ktruio.uio_iov = ktriov;
  809                         ktruio.uio_resid = len - auio.uio_resid;
  810                         ktrgenio(p->p_tracep, s, UIO_READ, &ktruio, error);
  811                 }
  812                 FREE(ktriov, M_TEMP);
  813         }
  814 #endif
  815         if (error)
  816                 goto out;
  817         p->p_retval[0] = len - auio.uio_resid;
  818         if (mp->msg_name) {
  819                 len = mp->msg_namelen;
  820                 if (len <= 0 || fromsa == 0)
  821                         len = 0;
  822                 else {
  823 #ifndef MIN
  824 #define MIN(a,b) ((a)>(b)?(b):(a))
  825 #endif
  826                         /* save sa_len before it is destroyed by MSG_COMPAT */
  827                         len = MIN(len, fromsa->sa_len);
  828 #ifdef COMPAT_OLDSOCK
  829                         if (mp->msg_flags & MSG_COMPAT)
  830                                 ((struct osockaddr *)fromsa)->sa_family =
  831                                     fromsa->sa_family;
  832 #endif
  833                         error = copyout(fromsa,
  834                             (caddr_t)mp->msg_name, (unsigned)len);
  835                         if (error)
  836                                 goto out;
  837                 }
  838                 mp->msg_namelen = len;
  839                 if (namelenp &&
  840                     (error = copyout((caddr_t)&len, namelenp, sizeof (int)))) {
  841 #ifdef COMPAT_OLDSOCK
  842                         if (mp->msg_flags & MSG_COMPAT)
  843                                 error = 0;      /* old recvfrom didn't check */
  844                         else
  845 #endif
  846                         goto out;
  847                 }
  848         }
  849         if (mp->msg_control) {
  850 #ifdef COMPAT_OLDSOCK
  851                 /*
  852                  * We assume that old recvmsg calls won't receive access
  853                  * rights and other control info, esp. as control info
  854                  * is always optional and those options didn't exist in 4.3.
  855                  * If we receive rights, trim the cmsghdr; anything else
  856                  * is tossed.
  857                  */
  858                 if (control && mp->msg_flags & MSG_COMPAT) {
  859                         if (mtod(control, struct cmsghdr *)->cmsg_level !=
  860                             SOL_SOCKET ||
  861                             mtod(control, struct cmsghdr *)->cmsg_type !=
  862                             SCM_RIGHTS) {
  863                                 mp->msg_controllen = 0;
  864                                 goto out;
  865                         }
  866                         control->m_len -= sizeof (struct cmsghdr);
  867                         control->m_data += sizeof (struct cmsghdr);
  868                 }
  869 #endif
  870                 len = mp->msg_controllen;
  871                 m = control;
  872                 mp->msg_controllen = 0;
  873                 ctlbuf = (caddr_t) mp->msg_control;
  874 
  875                 while (m && len > 0) {
  876                         unsigned int tocopy;
  877 
  878                         if (len >= m->m_len) 
  879                                 tocopy = m->m_len;
  880                         else {
  881                                 mp->msg_flags |= MSG_CTRUNC;
  882                                 tocopy = len;
  883                         }
  884                 
  885                         if ((error = copyout((caddr_t)mtod(m, caddr_t),
  886                                         ctlbuf, tocopy)) != 0)
  887                                 goto out;
  888 
  889                         ctlbuf += tocopy;
  890                         len -= tocopy;
  891                         m = m->m_next;
  892                 }
  893                 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
  894         }
  895 out:
  896         fdrop(fp, p);
  897         if (fromsa)
  898                 FREE(fromsa, M_SONAME);
  899         if (control)
  900                 m_freem(control);
  901         return (error);
  902 }
  903 
  904 int
  905 recvfrom(p, uap)
  906         struct proc *p;
  907         register struct recvfrom_args /* {
  908                 int     s;
  909                 caddr_t buf;
  910                 size_t  len;
  911                 int     flags;
  912                 caddr_t from;
  913                 int     *fromlenaddr;
  914         } */ *uap;
  915 {
  916         struct msghdr msg;
  917         struct iovec aiov;
  918         int error;
  919 
  920         if (uap->fromlenaddr) {
  921                 error = copyin((caddr_t)uap->fromlenaddr,
  922                     (caddr_t)&msg.msg_namelen, sizeof (msg.msg_namelen));
  923                 if (error)
  924                         return (error);
  925         } else
  926                 msg.msg_namelen = 0;
  927         msg.msg_name = uap->from;
  928         msg.msg_iov = &aiov;
  929         msg.msg_iovlen = 1;
  930         aiov.iov_base = uap->buf;
  931         aiov.iov_len = uap->len;
  932         msg.msg_control = 0;
  933         msg.msg_flags = uap->flags;
  934         return (recvit(p, uap->s, &msg, (caddr_t)uap->fromlenaddr));
  935 }
  936 
  937 #ifdef COMPAT_OLDSOCK
  938 int
  939 orecvfrom(p, uap)
  940         struct proc *p;
  941         struct recvfrom_args *uap;
  942 {
  943 
  944         uap->flags |= MSG_COMPAT;
  945         return (recvfrom(p, uap));
  946 }
  947 #endif
  948 
  949 
  950 #ifdef COMPAT_OLDSOCK
  951 int
  952 orecv(p, uap)
  953         struct proc *p;
  954         register struct orecv_args /* {
  955                 int     s;
  956                 caddr_t buf;
  957                 int     len;
  958                 int     flags;
  959         } */ *uap;
  960 {
  961         struct msghdr msg;
  962         struct iovec aiov;
  963 
  964         msg.msg_name = 0;
  965         msg.msg_namelen = 0;
  966         msg.msg_iov = &aiov;
  967         msg.msg_iovlen = 1;
  968         aiov.iov_base = uap->buf;
  969         aiov.iov_len = uap->len;
  970         msg.msg_control = 0;
  971         msg.msg_flags = uap->flags;
  972         return (recvit(p, uap->s, &msg, (caddr_t)0));
  973 }
  974 
  975 /*
  976  * Old recvmsg.  This code takes advantage of the fact that the old msghdr
  977  * overlays the new one, missing only the flags, and with the (old) access
  978  * rights where the control fields are now.
  979  */
  980 int
  981 orecvmsg(p, uap)
  982         struct proc *p;
  983         register struct orecvmsg_args /* {
  984                 int     s;
  985                 struct  omsghdr *msg;
  986                 int     flags;
  987         } */ *uap;
  988 {
  989         struct msghdr msg;
  990         struct iovec aiov[UIO_SMALLIOV], *iov;
  991         int error;
  992 
  993         error = copyin((caddr_t)uap->msg, (caddr_t)&msg,
  994             sizeof (struct omsghdr));
  995         if (error)
  996                 return (error);
  997         if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
  998                 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV)
  999                         return (EMSGSIZE);
 1000                 MALLOC(iov, struct iovec *,
 1001                       sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
 1002                       M_WAITOK);
 1003         } else
 1004                 iov = aiov;
 1005         msg.msg_flags = uap->flags | MSG_COMPAT;
 1006         error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
 1007             (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
 1008         if (error)
 1009                 goto done;
 1010         msg.msg_iov = iov;
 1011         error = recvit(p, uap->s, &msg, (caddr_t)&uap->msg->msg_namelen);
 1012 
 1013         if (msg.msg_controllen && error == 0)
 1014                 error = copyout((caddr_t)&msg.msg_controllen,
 1015                     (caddr_t)&uap->msg->msg_accrightslen, sizeof (int));
 1016 done:
 1017         if (iov != aiov)
 1018                 FREE(iov, M_IOV);
 1019         return (error);
 1020 }
 1021 #endif
 1022 
 1023 int
 1024 recvmsg(p, uap)
 1025         struct proc *p;
 1026         register struct recvmsg_args /* {
 1027                 int     s;
 1028                 struct  msghdr *msg;
 1029                 int     flags;
 1030         } */ *uap;
 1031 {
 1032         struct msghdr msg;
 1033         struct iovec aiov[UIO_SMALLIOV], *uiov, *iov;
 1034         register int error;
 1035 
 1036         error = copyin((caddr_t)uap->msg, (caddr_t)&msg, sizeof (msg));
 1037         if (error)
 1038                 return (error);
 1039         if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
 1040                 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV)
 1041                         return (EMSGSIZE);
 1042                 MALLOC(iov, struct iovec *,
 1043                        sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
 1044                        M_WAITOK);
 1045         } else
 1046                 iov = aiov;
 1047 #ifdef COMPAT_OLDSOCK
 1048         msg.msg_flags = uap->flags &~ MSG_COMPAT;
 1049 #else
 1050         msg.msg_flags = uap->flags;
 1051 #endif
 1052         uiov = msg.msg_iov;
 1053         msg.msg_iov = iov;
 1054         error = copyin((caddr_t)uiov, (caddr_t)iov,
 1055             (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
 1056         if (error)
 1057                 goto done;
 1058         error = recvit(p, uap->s, &msg, (caddr_t)0);
 1059         if (!error) {
 1060                 msg.msg_iov = uiov;
 1061                 error = copyout((caddr_t)&msg, (caddr_t)uap->msg, sizeof(msg));
 1062         }
 1063 done:
 1064         if (iov != aiov)
 1065                 FREE(iov, M_IOV);
 1066         return (error);
 1067 }
 1068 
 1069 /* ARGSUSED */
 1070 int
 1071 shutdown(p, uap)
 1072         struct proc *p;
 1073         register struct shutdown_args /* {
 1074                 int     s;
 1075                 int     how;
 1076         } */ *uap;
 1077 {
 1078         struct file *fp;
 1079         int error;
 1080 
 1081         error = holdsock(p->p_fd, uap->s, &fp);
 1082         if (error)
 1083                 return (error);
 1084         error = soshutdown((struct socket *)fp->f_data, uap->how);
 1085         fdrop(fp, p);
 1086         return(error);
 1087 }
 1088 
 1089 /* ARGSUSED */
 1090 int
 1091 setsockopt(p, uap)
 1092         struct proc *p;
 1093         register struct setsockopt_args /* {
 1094                 int     s;
 1095                 int     level;
 1096                 int     name;
 1097                 caddr_t val;
 1098                 int     valsize;
 1099         } */ *uap;
 1100 {
 1101         struct file *fp;
 1102         struct sockopt sopt;
 1103         int error;
 1104 
 1105         if (uap->val == 0 && uap->valsize != 0)
 1106                 return (EFAULT);
 1107         if (uap->valsize < 0)
 1108                 return (EINVAL);
 1109 
 1110         error = holdsock(p->p_fd, uap->s, &fp);
 1111         if (error)
 1112                 return (error);
 1113 
 1114         sopt.sopt_dir = SOPT_SET;
 1115         sopt.sopt_level = uap->level;
 1116         sopt.sopt_name = uap->name;
 1117         sopt.sopt_val = uap->val;
 1118         sopt.sopt_valsize = uap->valsize;
 1119         sopt.sopt_p = p;
 1120         error = sosetopt((struct socket *)fp->f_data, &sopt);
 1121         fdrop(fp, p);
 1122         return(error);
 1123 }
 1124 
 1125 /* ARGSUSED */
 1126 int
 1127 getsockopt(p, uap)
 1128         struct proc *p;
 1129         register struct getsockopt_args /* {
 1130                 int     s;
 1131                 int     level;
 1132                 int     name;
 1133                 caddr_t val;
 1134                 int     *avalsize;
 1135         } */ *uap;
 1136 {
 1137         int     valsize, error;
 1138         struct  file *fp;
 1139         struct  sockopt sopt;
 1140 
 1141         error = holdsock(p->p_fd, uap->s, &fp);
 1142         if (error)
 1143                 return (error);
 1144         if (uap->val) {
 1145                 error = copyin((caddr_t)uap->avalsize, (caddr_t)&valsize,
 1146                     sizeof (valsize));
 1147                 if (error) {
 1148                         fdrop(fp, p);
 1149                         return (error);
 1150                 }
 1151                 if (valsize < 0) {
 1152                         fdrop(fp, p);
 1153                         return (EINVAL);
 1154                 }
 1155         } else {
 1156                 valsize = 0;
 1157         }
 1158 
 1159         sopt.sopt_dir = SOPT_GET;
 1160         sopt.sopt_level = uap->level;
 1161         sopt.sopt_name = uap->name;
 1162         sopt.sopt_val = uap->val;
 1163         sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
 1164         sopt.sopt_p = p;
 1165 
 1166         error = sogetopt((struct socket *)fp->f_data, &sopt);
 1167         if (error == 0) {
 1168                 valsize = sopt.sopt_valsize;
 1169                 error = copyout((caddr_t)&valsize,
 1170                                 (caddr_t)uap->avalsize, sizeof (valsize));
 1171         }
 1172         fdrop(fp, p);
 1173         return (error);
 1174 }
 1175 
 1176 /*
 1177  * Get socket name.
 1178  */
 1179 /* ARGSUSED */
 1180 static int
 1181 getsockname1(p, uap, compat)
 1182         struct proc *p;
 1183         register struct getsockname_args /* {
 1184                 int     fdes;
 1185                 caddr_t asa;
 1186                 int     *alen;
 1187         } */ *uap;
 1188         int compat;
 1189 {
 1190         struct file *fp;
 1191         register struct socket *so;
 1192         struct sockaddr *sa;
 1193         int len, error;
 1194 
 1195         error = holdsock(p->p_fd, uap->fdes, &fp);
 1196         if (error)
 1197                 return (error);
 1198         error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len));
 1199         if (error) {
 1200                 fdrop(fp, p);
 1201                 return (error);
 1202         }
 1203         if (len < 0) {
 1204                 fdrop(fp, p);
 1205                 return (EINVAL);
 1206         }
 1207         so = (struct socket *)fp->f_data;
 1208         sa = 0;
 1209         error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
 1210         if (error)
 1211                 goto bad;
 1212         if (sa == 0) {
 1213                 len = 0;
 1214                 goto gotnothing;
 1215         }
 1216 
 1217         len = MIN(len, sa->sa_len);
 1218 #ifdef COMPAT_OLDSOCK
 1219         if (compat)
 1220                 ((struct osockaddr *)sa)->sa_family = sa->sa_family;
 1221 #endif
 1222         error = copyout(sa, (caddr_t)uap->asa, (u_int)len);
 1223         if (error == 0)
 1224 gotnothing:
 1225                 error = copyout((caddr_t)&len, (caddr_t)uap->alen,
 1226                     sizeof (len));
 1227 bad:
 1228         if (sa)
 1229                 FREE(sa, M_SONAME);
 1230         fdrop(fp, p);
 1231         return (error);
 1232 }
 1233 
 1234 int
 1235 getsockname(p, uap)
 1236         struct proc *p;
 1237         struct getsockname_args *uap;
 1238 {
 1239 
 1240         return (getsockname1(p, uap, 0));
 1241 }
 1242 
 1243 #ifdef COMPAT_OLDSOCK
 1244 int
 1245 ogetsockname(p, uap)
 1246         struct proc *p;
 1247         struct getsockname_args *uap;
 1248 {
 1249 
 1250         return (getsockname1(p, uap, 1));
 1251 }
 1252 #endif /* COMPAT_OLDSOCK */
 1253 
 1254 /*
 1255  * Get name of peer for connected socket.
 1256  */
 1257 /* ARGSUSED */
 1258 static int
 1259 getpeername1(p, uap, compat)
 1260         struct proc *p;
 1261         register struct getpeername_args /* {
 1262                 int     fdes;
 1263                 caddr_t asa;
 1264                 int     *alen;
 1265         } */ *uap;
 1266         int compat;
 1267 {
 1268         struct file *fp;
 1269         register struct socket *so;
 1270         struct sockaddr *sa;
 1271         int len, error;
 1272 
 1273         error = holdsock(p->p_fd, uap->fdes, &fp);
 1274         if (error)
 1275                 return (error);
 1276         so = (struct socket *)fp->f_data;
 1277         if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
 1278                 fdrop(fp, p);
 1279                 return (ENOTCONN);
 1280         }
 1281         error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len));
 1282         if (error) {
 1283                 fdrop(fp, p);
 1284                 return (error);
 1285         }
 1286         if (len < 0) {
 1287                 fdrop(fp, p);
 1288                 return (EINVAL);
 1289         }
 1290         sa = 0;
 1291         error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
 1292         if (error)
 1293                 goto bad;
 1294         if (sa == 0) {
 1295                 len = 0;
 1296                 goto gotnothing;
 1297         }
 1298         len = MIN(len, sa->sa_len);
 1299 #ifdef COMPAT_OLDSOCK
 1300         if (compat)
 1301                 ((struct osockaddr *)sa)->sa_family =
 1302                     sa->sa_family;
 1303 #endif
 1304         error = copyout(sa, (caddr_t)uap->asa, (u_int)len);
 1305         if (error)
 1306                 goto bad;
 1307 gotnothing:
 1308         error = copyout((caddr_t)&len, (caddr_t)uap->alen, sizeof (len));
 1309 bad:
 1310         if (sa)
 1311                 FREE(sa, M_SONAME);
 1312         fdrop(fp, p);
 1313         return (error);
 1314 }
 1315 
 1316 int
 1317 getpeername(p, uap)
 1318         struct proc *p;
 1319         struct getpeername_args *uap;
 1320 {
 1321 
 1322         return (getpeername1(p, uap, 0));
 1323 }
 1324 
 1325 #ifdef COMPAT_OLDSOCK
 1326 int
 1327 ogetpeername(p, uap)
 1328         struct proc *p;
 1329         struct ogetpeername_args *uap;
 1330 {
 1331 
 1332         /* XXX uap should have type `getpeername_args *' to begin with. */
 1333         return (getpeername1(p, (struct getpeername_args *)uap, 1));
 1334 }
 1335 #endif /* COMPAT_OLDSOCK */
 1336 
 1337 int
 1338 sockargs(mp, buf, buflen, type)
 1339         struct mbuf **mp;
 1340         caddr_t buf;
 1341         int buflen, type;
 1342 {
 1343         register struct sockaddr *sa;
 1344         register struct mbuf *m;
 1345         int error;
 1346 
 1347         if ((u_int)buflen > MLEN) {
 1348 #ifdef COMPAT_OLDSOCK
 1349                 if (type == MT_SONAME && (u_int)buflen <= 112)
 1350                         buflen = MLEN;          /* unix domain compat. hack */
 1351                 else
 1352 #endif
 1353                         if ((u_int)buflen > MCLBYTES)
 1354                                 return (EINVAL);
 1355         }
 1356         m = m_get(M_WAIT, type);
 1357         if (m == NULL)
 1358                 return (ENOBUFS);
 1359         if ((u_int)buflen > MLEN)
 1360                 MCLGET(m, M_WAIT);
 1361         m->m_len = buflen;
 1362         error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
 1363         if (error)
 1364                 (void) m_free(m);
 1365         else {
 1366                 *mp = m;
 1367                 if (type == MT_SONAME) {
 1368                         sa = mtod(m, struct sockaddr *);
 1369 
 1370 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
 1371                         if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
 1372                                 sa->sa_family = sa->sa_len;
 1373 #endif
 1374                         sa->sa_len = buflen;
 1375                 }
 1376         }
 1377         return (error);
 1378 }
 1379 
 1380 int
 1381 getsockaddr(namp, uaddr, len)
 1382         struct sockaddr **namp;
 1383         caddr_t uaddr;
 1384         size_t len;
 1385 {
 1386         struct sockaddr *sa;
 1387         int error;
 1388 
 1389         if (len > SOCK_MAXADDRLEN)
 1390                 return ENAMETOOLONG;
 1391         if (len < offsetof(struct sockaddr, sa_data[0]))
 1392                 return EINVAL;
 1393         MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK);
 1394         error = copyin(uaddr, sa, len);
 1395         if (error) {
 1396                 FREE(sa, M_SONAME);
 1397         } else {
 1398 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
 1399                 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
 1400                         sa->sa_family = sa->sa_len;
 1401 #endif
 1402                 sa->sa_len = len;
 1403                 *namp = sa;
 1404         }
 1405         return error;
 1406 }
 1407 
 1408 /*
 1409  * holdsock() - load the struct file pointer associated
 1410  * with a socket into *fpp.  If an error occurs, non-zero
 1411  * will be returned and *fpp will be set to NULL.
 1412  */
 1413 int
 1414 holdsock(fdp, fdes, fpp)
 1415         struct filedesc *fdp;
 1416         int fdes;
 1417         struct file **fpp;
 1418 {
 1419         register struct file *fp = NULL;
 1420         int error = 0;
 1421 
 1422         if ((unsigned)fdes >= fdp->fd_nfiles ||
 1423             (fp = fdp->fd_ofiles[fdes]) == NULL) {
 1424                 error = EBADF;
 1425         } else if (fp->f_type != DTYPE_SOCKET) {
 1426                 error = ENOTSOCK;
 1427                 fp = NULL;
 1428         } else {
 1429                 fhold(fp);
 1430         }
 1431         *fpp = fp;
 1432         return(error);
 1433 }
 1434 
 1435 /*
 1436  * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
 1437  */
 1438 static void
 1439 sf_buf_init(void *arg)
 1440 {
 1441         int i;
 1442 
 1443         SLIST_INIT(&sf_freelist);
 1444         sf_base = kmem_alloc_pageable(kernel_map, nsfbufs * PAGE_SIZE);
 1445         sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP, M_NOWAIT);
 1446         bzero(sf_bufs, nsfbufs * sizeof(struct sf_buf));
 1447         for (i = 0; i < nsfbufs; i++) {
 1448                 sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
 1449                 SLIST_INSERT_HEAD(&sf_freelist, &sf_bufs[i], free_list);
 1450         }
 1451 }
 1452 
 1453 /*
 1454  * Get an sf_buf from the freelist. Will block if none are available.
 1455  */
 1456 struct sf_buf *
 1457 sf_buf_alloc()
 1458 {
 1459         struct sf_buf *sf;
 1460         int s;
 1461         int error;
 1462 
 1463         s = splimp();
 1464         while ((sf = SLIST_FIRST(&sf_freelist)) == NULL) {
 1465                 sf_buf_alloc_want = 1;
 1466                 mbstat.sf_allocwait++;
 1467                 error = tsleep(&sf_freelist, PVM|PCATCH, "sfbufa", 0);
 1468                 if (error)
 1469                         break;
 1470         }
 1471         if (sf != NULL) {
 1472                 SLIST_REMOVE_HEAD(&sf_freelist, free_list);
 1473                 sf->refcnt = 1;
 1474                 nsfbufsused++;
 1475                 nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
 1476         }
 1477         splx(s);
 1478         return (sf);
 1479 }
 1480 
 1481 #define dtosf(x)        (&sf_bufs[((uintptr_t)(x) - (uintptr_t)sf_base) >> PAGE_SHIFT])
 1482 void
 1483 sf_buf_ref(caddr_t addr, u_int size)
 1484 {
 1485         struct sf_buf *sf;
 1486 
 1487         sf = dtosf(addr);
 1488         if (sf->refcnt == 0)
 1489                 panic("sf_buf_ref: referencing a free sf_buf");
 1490         sf->refcnt++;
 1491 }
 1492 
 1493 /*
 1494  * Lose a reference to an sf_buf. When none left, detach mapped page
 1495  * and release resources back to the system.
 1496  *
 1497  * Must be called at splimp.
 1498  */
 1499 void
 1500 sf_buf_free(caddr_t addr, u_int size)
 1501 {
 1502         struct sf_buf *sf;
 1503         struct vm_page *m;
 1504         int s;
 1505 
 1506         sf = dtosf(addr);
 1507         if (sf->refcnt == 0)
 1508                 panic("sf_buf_free: freeing free sf_buf");
 1509         sf->refcnt--;
 1510         if (sf->refcnt == 0) {
 1511                 nsfbufsused--;
 1512                 pmap_qremove((vm_offset_t)addr, 1);
 1513                 m = sf->m;
 1514                 s = splvm();
 1515                 vm_page_unwire(m, 0);
 1516                 /*
 1517                  * Check for the object going away on us. This can
 1518                  * happen since we don't hold a reference to it.
 1519                  * If so, we're responsible for freeing the page.
 1520                  */
 1521                 if (m->wire_count == 0 && m->object == NULL)
 1522                         vm_page_free(m);
 1523                 splx(s);
 1524                 sf->m = NULL;
 1525                 SLIST_INSERT_HEAD(&sf_freelist, sf, free_list);
 1526                 if (sf_buf_alloc_want) {
 1527                         sf_buf_alloc_want = 0;
 1528                         wakeup(&sf_freelist);
 1529                 }
 1530         }
 1531 }
 1532 
 1533 /*
 1534  * sendfile(2).
 1535  * int sendfile(int fd, int s, off_t offset, size_t nbytes,
 1536  *       struct sf_hdtr *hdtr, off_t *sbytes, int flags)
 1537  *
 1538  * Send a file specified by 'fd' and starting at 'offset' to a socket
 1539  * specified by 's'. Send only 'nbytes' of the file or until EOF if
 1540  * nbytes == 0. Optionally add a header and/or trailer to the socket
 1541  * output. If specified, write the total number of bytes sent into *sbytes.
 1542  */
 1543 int
 1544 sendfile(struct proc *p, struct sendfile_args *uap)
 1545 {
 1546 
 1547         return (do_sendfile(p, uap, 0));
 1548 }
 1549 
 1550 #ifdef COMPAT_43
 1551 int
 1552 osendfile(struct proc *p, struct osendfile_args *uap)
 1553 {
 1554         struct sendfile_args args;
 1555 
 1556         args.fd = uap->fd;
 1557         args.s = uap->s;
 1558         args.offset = uap->offset;
 1559         args.nbytes = uap->nbytes;
 1560         args.hdtr = uap->hdtr;
 1561         args.sbytes = uap->sbytes;
 1562         args.flags = uap->flags;
 1563 
 1564         return (do_sendfile(p, &args, 1));
 1565 }
 1566 #endif
 1567 
 1568 int
 1569 do_sendfile(struct proc *p, struct sendfile_args *uap, int compat)
 1570 {
 1571         struct file *fp;
 1572         struct filedesc *fdp = p->p_fd;
 1573         struct vnode *vp;
 1574         struct vm_object *obj;
 1575         struct socket *so;
 1576         struct mbuf *m;
 1577         struct sf_buf *sf;
 1578         struct vm_page *pg;
 1579         struct writev_args nuap;
 1580         struct sf_hdtr hdtr;
 1581         off_t off, xfsize, hdtr_size, sbytes = 0;
 1582         int error = 0, s;
 1583 
 1584         vp = NULL;
 1585         hdtr_size = 0;
 1586         /*
 1587          * Do argument checking. Must be a regular file in, stream
 1588          * type and connected socket out, positive offset.
 1589          */
 1590         fp = holdfp(fdp, uap->fd, FREAD);
 1591         if (fp == NULL) {
 1592                 error = EBADF;
 1593                 goto done;
 1594         }
 1595         if (fp->f_type != DTYPE_VNODE) {
 1596                 error = EINVAL;
 1597                 goto done;
 1598         }
 1599         vp = (struct vnode *)fp->f_data;
 1600         vref(vp);
 1601         if (vp->v_type != VREG || VOP_GETVOBJECT(vp, &obj) != 0) {
 1602                 error = EINVAL;
 1603                 goto done;
 1604         }
 1605         fdrop(fp, p);
 1606         error = holdsock(p->p_fd, uap->s, &fp);
 1607         if (error)
 1608                 goto done;
 1609         so = (struct socket *)fp->f_data;
 1610         if (so->so_type != SOCK_STREAM) {
 1611                 error = EINVAL;
 1612                 goto done;
 1613         }
 1614         if ((so->so_state & SS_ISCONNECTED) == 0) {
 1615                 error = ENOTCONN;
 1616                 goto done;
 1617         }
 1618         if (uap->offset < 0) {
 1619                 error = EINVAL;
 1620                 goto done;
 1621         }
 1622 
 1623         /*
 1624          * If specified, get the pointer to the sf_hdtr struct for
 1625          * any headers/trailers.
 1626          */
 1627         if (uap->hdtr != NULL) {
 1628                 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
 1629                 if (error)
 1630                         goto done;
 1631                 /*
 1632                  * Send any headers. Wimp out and use writev(2).
 1633                  */
 1634                 if (hdtr.headers != NULL) {
 1635                         nuap.fd = uap->s;
 1636                         nuap.iovp = hdtr.headers;
 1637                         nuap.iovcnt = hdtr.hdr_cnt;
 1638                         error = writev(p, &nuap);
 1639                         if (error)
 1640                                 goto done;
 1641                         if (compat)
 1642                                 sbytes += p->p_retval[0];
 1643                         else
 1644                                 hdtr_size += p->p_retval[0];
 1645                 }
 1646         }
 1647 
 1648         /*
 1649          * Protect against multiple writers to the socket.
 1650          */
 1651         (void) sblock(&so->so_snd, M_WAITOK);
 1652 
 1653         /*
 1654          * Loop through the pages in the file, starting with the requested
 1655          * offset. Get a file page (do I/O if necessary), map the file page
 1656          * into an sf_buf, attach an mbuf header to the sf_buf, and queue
 1657          * it on the socket.
 1658          */
 1659         for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
 1660                 vm_pindex_t pindex;
 1661                 vm_offset_t pgoff;
 1662 
 1663                 pindex = OFF_TO_IDX(off);
 1664 retry_lookup:
 1665                 /*
 1666                  * Calculate the amount to transfer. Not to exceed a page,
 1667                  * the EOF, or the passed in nbytes.
 1668                  */
 1669                 xfsize = obj->un_pager.vnp.vnp_size - off;
 1670                 if (xfsize > PAGE_SIZE)
 1671                         xfsize = PAGE_SIZE;
 1672                 pgoff = (vm_offset_t)(off & PAGE_MASK);
 1673                 if (PAGE_SIZE - pgoff < xfsize)
 1674                         xfsize = PAGE_SIZE - pgoff;
 1675                 if (uap->nbytes && xfsize > (uap->nbytes - sbytes))
 1676                         xfsize = uap->nbytes - sbytes;
 1677                 if (xfsize <= 0)
 1678                         break;
 1679                 /*
 1680                  * Optimize the non-blocking case by looking at the socket space
 1681                  * before going to the extra work of constituting the sf_buf.
 1682                  */
 1683                 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) {
 1684                         if (so->so_state & SS_CANTSENDMORE)
 1685                                 error = EPIPE;
 1686                         else
 1687                                 error = EAGAIN;
 1688                         sbunlock(&so->so_snd);
 1689                         goto done;
 1690                 }
 1691                 /*
 1692                  * Attempt to look up the page.  
 1693                  *
 1694                  *      Allocate if not found
 1695                  *
 1696                  *      Wait and loop if busy.
 1697                  */
 1698                 pg = vm_page_lookup(obj, pindex);
 1699 
 1700                 if (pg == NULL) {
 1701                         pg = vm_page_alloc(obj, pindex, VM_ALLOC_NORMAL);
 1702                         if (pg == NULL) {
 1703                                 VM_WAIT;
 1704                                 goto retry_lookup;
 1705                         }
 1706                         vm_page_wakeup(pg);
 1707                 } else if (vm_page_sleep_busy(pg, TRUE, "sfpbsy")) {
 1708                         goto retry_lookup;
 1709                 }
 1710 
 1711                 /*
 1712                  * Wire the page so it does not get ripped out from under
 1713                  * us. 
 1714                  */
 1715 
 1716                 vm_page_wire(pg);
 1717 
 1718                 /*
 1719                  * If page is not valid for what we need, initiate I/O
 1720                  */
 1721 
 1722                 if (!pg->valid || !vm_page_is_valid(pg, pgoff, xfsize)) {
 1723                         struct uio auio;
 1724                         struct iovec aiov;
 1725                         int bsize;
 1726 
 1727                         /*
 1728                          * Ensure that our page is still around when the I/O 
 1729                          * completes.
 1730                          */
 1731                         vm_page_io_start(pg);
 1732 
 1733                         /*
 1734                          * Get the page from backing store.
 1735                          */
 1736                         bsize = vp->v_mount->mnt_stat.f_iosize;
 1737                         auio.uio_iov = &aiov;
 1738                         auio.uio_iovcnt = 1;
 1739                         aiov.iov_base = 0;
 1740                         aiov.iov_len = MAXBSIZE;
 1741                         auio.uio_resid = MAXBSIZE;
 1742                         auio.uio_offset = trunc_page(off);
 1743                         auio.uio_segflg = UIO_NOCOPY;
 1744                         auio.uio_rw = UIO_READ;
 1745                         auio.uio_procp = p;
 1746                         vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, p);
 1747                         error = VOP_READ(vp, &auio, IO_VMIO | ((MAXBSIZE / bsize) << 16),
 1748                                 p->p_ucred);
 1749                         VOP_UNLOCK(vp, 0, p);
 1750                         vm_page_flag_clear(pg, PG_ZERO);
 1751                         vm_page_io_finish(pg);
 1752                         if (error) {
 1753                                 vm_page_unwire(pg, 0);
 1754                                 /*
 1755                                  * See if anyone else might know about this page.
 1756                                  * If not and it is not valid, then free it.
 1757                                  */
 1758                                 if (pg->wire_count == 0 && pg->valid == 0 &&
 1759                                     pg->busy == 0 && !(pg->flags & PG_BUSY) &&
 1760                                     pg->hold_count == 0) {
 1761                                         vm_page_busy(pg);
 1762                                         vm_page_free(pg);
 1763                                 }
 1764                                 sbunlock(&so->so_snd);
 1765                                 goto done;
 1766                         }
 1767                         mbstat.sf_iocnt++;
 1768                 }
 1769 
 1770 
 1771                 /*
 1772                  * Get a sendfile buf. We usually wait as long as necessary,
 1773                  * but this wait can be interrupted.
 1774                  */
 1775                 if ((sf = sf_buf_alloc()) == NULL) {
 1776                         mbstat.sf_allocfail++;
 1777                         s = splvm();
 1778                         vm_page_unwire(pg, 0);
 1779                         if (pg->wire_count == 0 && pg->object == NULL)
 1780                                 vm_page_free(pg);
 1781                         splx(s);
 1782                         sbunlock(&so->so_snd);
 1783                         error = EINTR;
 1784                         goto done;
 1785                 }
 1786 
 1787 
 1788                 /*
 1789                  * Allocate a kernel virtual page and insert the physical page
 1790                  * into it.
 1791                  */
 1792 
 1793                 sf->m = pg;
 1794                 pmap_qenter(sf->kva, &pg, 1);
 1795                 /*
 1796                  * Get an mbuf header and set it up as having external storage.
 1797                  */
 1798                 MGETHDR(m, M_WAIT, MT_DATA);
 1799                 if (m == NULL) {
 1800                         error = ENOBUFS;
 1801                         sf_buf_free((void *)sf->kva, PAGE_SIZE);
 1802                         sbunlock(&so->so_snd);
 1803                         goto done;
 1804                 }
 1805                 m->m_ext.ext_free = sf_buf_free;
 1806                 m->m_ext.ext_ref = sf_buf_ref;
 1807                 m->m_ext.ext_buf = (void *)sf->kva;
 1808                 m->m_ext.ext_size = PAGE_SIZE;
 1809                 m->m_data = (char *) sf->kva + pgoff;
 1810                 m->m_flags |= M_EXT;
 1811                 m->m_pkthdr.len = m->m_len = xfsize;
 1812                 /*
 1813                  * Add the buffer to the socket buffer chain.
 1814                  */
 1815                 s = splnet();
 1816 retry_space:
 1817                 /*
 1818                  * Make sure that the socket is still able to take more data.
 1819                  * CANTSENDMORE being true usually means that the connection
 1820                  * was closed. so_error is true when an error was sensed after
 1821                  * a previous send.
 1822                  * The state is checked after the page mapping and buffer
 1823                  * allocation above since those operations may block and make
 1824                  * any socket checks stale. From this point forward, nothing
 1825                  * blocks before the pru_send (or more accurately, any blocking
 1826                  * results in a loop back to here to re-check).
 1827                  */
 1828                 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
 1829                         if (so->so_state & SS_CANTSENDMORE) {
 1830                                 error = EPIPE;
 1831                         } else {
 1832                                 error = so->so_error;
 1833                                 so->so_error = 0;
 1834                         }
 1835                         m_freem(m);
 1836                         sbunlock(&so->so_snd);
 1837                         splx(s);
 1838                         goto done;
 1839                 }
 1840                 /*
 1841                  * Wait for socket space to become available. We do this just
 1842                  * after checking the connection state above in order to avoid
 1843                  * a race condition with sbwait().
 1844                  */
 1845                 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) {
 1846                         if (so->so_state & SS_NBIO) {
 1847                                 m_freem(m);
 1848                                 sbunlock(&so->so_snd);
 1849                                 splx(s);
 1850                                 error = EAGAIN;
 1851                                 goto done;
 1852                         }
 1853                         error = sbwait(&so->so_snd);
 1854                         /*
 1855                          * An error from sbwait usually indicates that we've
 1856                          * been interrupted by a signal. If we've sent anything
 1857                          * then return bytes sent, otherwise return the error.
 1858                          */
 1859                         if (error) {
 1860                                 m_freem(m);
 1861                                 sbunlock(&so->so_snd);
 1862                                 splx(s);
 1863                                 goto done;
 1864                         }
 1865                         goto retry_space;
 1866                 }
 1867                 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, p);
 1868                 splx(s);
 1869                 if (error) {
 1870                         sbunlock(&so->so_snd);
 1871                         goto done;
 1872                 }
 1873         }
 1874         sbunlock(&so->so_snd);
 1875 
 1876         /*
 1877          * Send trailers. Wimp out and use writev(2).
 1878          */
 1879         if (uap->hdtr != NULL && hdtr.trailers != NULL) {
 1880                         nuap.fd = uap->s;
 1881                         nuap.iovp = hdtr.trailers;
 1882                         nuap.iovcnt = hdtr.trl_cnt;
 1883                         error = writev(p, &nuap);
 1884                         if (error)
 1885                                 goto done;
 1886                         if (compat)
 1887                                 sbytes += p->p_retval[0];
 1888                         else
 1889                                 hdtr_size += p->p_retval[0];
 1890         }
 1891 
 1892 done:
 1893         if (uap->sbytes != NULL) {
 1894                 if (compat == 0)
 1895                         sbytes += hdtr_size;
 1896                 copyout(&sbytes, uap->sbytes, sizeof(off_t));
 1897         }
 1898         if (vp)
 1899                 vrele(vp);
 1900         if (fp)
 1901                 fdrop(fp, p);
 1902         /*
 1903          * sendfile cannot be restarted.
 1904          */
 1905         if (error == ERESTART)
 1906                 error = EINTR;
 1907         return (error);
 1908 }

Cache object: 6a7e1eeb4fc4f0e591698f78d0b45f24


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.