The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/uipc_socket.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 1982, 1986, 1988, 1990, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  * 3. All advertising materials mentioning features or use of this software
   14  *    must display the following acknowledgement:
   15  *      This product includes software developed by the University of
   16  *      California, Berkeley and its contributors.
   17  * 4. Neither the name of the University nor the names of its contributors
   18  *    may be used to endorse or promote products derived from this software
   19  *    without specific prior written permission.
   20  *
   21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   31  * SUCH DAMAGE.
   32  *
   33  *      @(#)uipc_socket.c       8.3 (Berkeley) 4/15/94
   34  * $FreeBSD$
   35  */
   36 
   37 #include "opt_inet.h"
   38 
   39 #include <sys/param.h>
   40 #include <sys/systm.h>
   41 #include <sys/fcntl.h>
   42 #include <sys/malloc.h>
   43 #include <sys/mbuf.h>
   44 #include <sys/domain.h>
   45 #include <sys/file.h>                   /* for struct knote */
   46 #include <sys/kernel.h>
   47 #include <sys/malloc.h>
   48 #include <sys/event.h>
   49 #include <sys/poll.h>
   50 #include <sys/proc.h>
   51 #include <sys/protosw.h>
   52 #include <sys/socket.h>
   53 #include <sys/socketvar.h>
   54 #include <sys/resourcevar.h>
   55 #include <sys/signalvar.h>
   56 #include <sys/sysctl.h>
   57 #include <sys/uio.h>
   58 #include <sys/jail.h>
   59 #include <vm/vm_zone.h>
   60 
   61 #include <machine/limits.h>
   62 
   63 #ifdef INET
   64 static int       do_setopt_accept_filter(struct socket *so, struct sockopt *sopt);
   65 #endif /* INET */
   66 
   67 static void     filt_sordetach(struct knote *kn);
   68 static int      filt_soread(struct knote *kn, long hint);
   69 static void     filt_sowdetach(struct knote *kn);
   70 static int      filt_sowrite(struct knote *kn, long hint);
   71 static int      filt_solisten(struct knote *kn, long hint);
   72 
   73 static struct filterops solisten_filtops = 
   74         { 1, NULL, filt_sordetach, filt_solisten };
   75 static struct filterops soread_filtops =
   76         { 1, NULL, filt_sordetach, filt_soread };
   77 static struct filterops sowrite_filtops = 
   78         { 1, NULL, filt_sowdetach, filt_sowrite };
   79 
   80 struct  vm_zone *socket_zone;
   81 so_gen_t        so_gencnt;      /* generation count for sockets */
   82 
   83 MALLOC_DEFINE(M_SONAME, "soname", "socket name");
   84 MALLOC_DEFINE(M_PCB, "pcb", "protocol control block");
   85 
   86 SYSCTL_DECL(_kern_ipc);
   87 
   88 static int somaxconn = SOMAXCONN;
   89 SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW,
   90     &somaxconn, 0, "Maximum pending socket connection queue size");
   91 
   92 /*
   93  * Socket operation routines.
   94  * These routines are called by the routines in
   95  * sys_socket.c or from a system process, and
   96  * implement the semantics of socket operations by
   97  * switching out to the protocol specific routines.
   98  */
   99 
  100 /*
  101  * Get a socket structure from our zone, and initialize it.
  102  * We don't implement `waitok' yet (see comments in uipc_domain.c).
  103  * Note that it would probably be better to allocate socket
  104  * and PCB at the same time, but I'm not convinced that all
  105  * the protocols can be easily modified to do this.
  106  */
  107 struct socket *
  108 soalloc(waitok)
  109         int waitok;
  110 {
  111         struct socket *so;
  112 
  113         so = zalloci(socket_zone);
  114         if (so) {
  115                 /* XXX race condition for reentrant kernel */
  116                 bzero(so, sizeof *so);
  117                 so->so_gencnt = ++so_gencnt;
  118                 TAILQ_INIT(&so->so_aiojobq);
  119         }
  120         return so;
  121 }
  122 
  123 int
  124 socreate(dom, aso, type, proto, p)
  125         int dom;
  126         struct socket **aso;
  127         register int type;
  128         int proto;
  129         struct proc *p;
  130 {
  131         register struct protosw *prp;
  132         register struct socket *so;
  133         register int error;
  134 
  135         if (proto)
  136                 prp = pffindproto(dom, proto, type);
  137         else
  138                 prp = pffindtype(dom, type);
  139 
  140         if (prp == 0 || prp->pr_usrreqs->pru_attach == 0)
  141                 return (EPROTONOSUPPORT);
  142 
  143         if (p->p_prison && jail_socket_unixiproute_only &&
  144             prp->pr_domain->dom_family != PF_LOCAL &&
  145             prp->pr_domain->dom_family != PF_INET &&
  146             prp->pr_domain->dom_family != PF_ROUTE) {
  147                 return (EPROTONOSUPPORT);
  148         }
  149 
  150         if (prp->pr_type != type)
  151                 return (EPROTOTYPE);
  152         so = soalloc(p != 0);
  153         if (so == 0)
  154                 return (ENOBUFS);
  155 
  156         TAILQ_INIT(&so->so_incomp);
  157         TAILQ_INIT(&so->so_comp);
  158         so->so_type = type;
  159         so->so_cred = p->p_ucred;
  160         crhold(so->so_cred);
  161         so->so_proto = prp;
  162         error = (*prp->pr_usrreqs->pru_attach)(so, proto, p);
  163         if (error) {
  164                 so->so_state |= SS_NOFDREF;
  165                 sofree(so);
  166                 return (error);
  167         }
  168         *aso = so;
  169         return (0);
  170 }
  171 
  172 int
  173 sobind(so, nam, p)
  174         struct socket *so;
  175         struct sockaddr *nam;
  176         struct proc *p;
  177 {
  178         int s = splnet();
  179         int error;
  180 
  181         error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, p);
  182         splx(s);
  183         return (error);
  184 }
  185 
  186 void
  187 sodealloc(so)
  188         struct socket *so;
  189 {
  190 
  191         so->so_gencnt = ++so_gencnt;
  192         if (so->so_rcv.sb_hiwat)
  193                 (void)chgsbsize(so->so_cred->cr_uidinfo,
  194                     &so->so_rcv.sb_hiwat, 0, RLIM_INFINITY);
  195         if (so->so_snd.sb_hiwat)
  196                 (void)chgsbsize(so->so_cred->cr_uidinfo,
  197                     &so->so_snd.sb_hiwat, 0, RLIM_INFINITY);
  198 #ifdef INET
  199         if (so->so_accf != NULL) {
  200                 if (so->so_accf->so_accept_filter != NULL && 
  201                         so->so_accf->so_accept_filter->accf_destroy != NULL) {
  202                         so->so_accf->so_accept_filter->accf_destroy(so);
  203                 }
  204                 if (so->so_accf->so_accept_filter_str != NULL)
  205                         FREE(so->so_accf->so_accept_filter_str, M_ACCF);
  206                 FREE(so->so_accf, M_ACCF);
  207         }
  208 #endif /* INET */
  209         crfree(so->so_cred);
  210         zfreei(socket_zone, so);
  211 }
  212 
  213 int
  214 solisten(so, backlog, p)
  215         register struct socket *so;
  216         int backlog;
  217         struct proc *p;
  218 {
  219         int s, error;
  220 
  221         s = splnet();
  222         if (so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING)) {
  223                 splx(s);
  224                 return (EINVAL);
  225         }
  226         error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p);
  227         if (error) {
  228                 splx(s);
  229                 return (error);
  230         }
  231         if (TAILQ_EMPTY(&so->so_comp))
  232                 so->so_options |= SO_ACCEPTCONN;
  233         if (backlog < 0 || backlog > somaxconn)
  234                 backlog = somaxconn;
  235         so->so_qlimit = backlog;
  236         splx(s);
  237         return (0);
  238 }
  239 
  240 void
  241 sofree(so)
  242         register struct socket *so;
  243 {
  244         struct socket *head = so->so_head;
  245 
  246         if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
  247                 return;
  248         if (head != NULL) {
  249                 if (so->so_state & SS_INCOMP) {
  250                         TAILQ_REMOVE(&head->so_incomp, so, so_list);
  251                         head->so_incqlen--;
  252                 } else if (so->so_state & SS_COMP) {
  253                         /*
  254                          * We must not decommission a socket that's
  255                          * on the accept(2) queue.  If we do, then
  256                          * accept(2) may hang after select(2) indicated
  257                          * that the listening socket was ready.
  258                          */
  259                         return;
  260                 } else {
  261                         panic("sofree: not queued");
  262                 }
  263                 so->so_state &= ~SS_INCOMP;
  264                 so->so_head = NULL;
  265         }
  266         sbrelease(&so->so_snd, so);
  267         sorflush(so);
  268         sodealloc(so);
  269 }
  270 
  271 /*
  272  * Close a socket on last file table reference removal.
  273  * Initiate disconnect if connected.
  274  * Free socket when disconnect complete.
  275  */
  276 int
  277 soclose(so)
  278         register struct socket *so;
  279 {
  280         int s = splnet();               /* conservative */
  281         int error = 0;
  282 
  283         funsetown(so->so_sigio);
  284         if (so->so_options & SO_ACCEPTCONN) {
  285                 struct socket *sp, *sonext;
  286 
  287                 sp = TAILQ_FIRST(&so->so_incomp);
  288                 for (; sp != NULL; sp = sonext) {
  289                         sonext = TAILQ_NEXT(sp, so_list);
  290                         (void) soabort(sp);
  291                 }
  292                 for (sp = TAILQ_FIRST(&so->so_comp); sp != NULL; sp = sonext) {
  293                         sonext = TAILQ_NEXT(sp, so_list);
  294                         /* Dequeue from so_comp since sofree() won't do it */
  295                         TAILQ_REMOVE(&so->so_comp, sp, so_list);
  296                         so->so_qlen--;
  297                         sp->so_state &= ~SS_COMP;
  298                         sp->so_head = NULL;
  299                         (void) soabort(sp);
  300                 }
  301         }
  302         if (so->so_pcb == 0)
  303                 goto discard;
  304         if (so->so_state & SS_ISCONNECTED) {
  305                 if ((so->so_state & SS_ISDISCONNECTING) == 0) {
  306                         error = sodisconnect(so);
  307                         if (error)
  308                                 goto drop;
  309                 }
  310                 if (so->so_options & SO_LINGER) {
  311                         if ((so->so_state & SS_ISDISCONNECTING) &&
  312                             (so->so_state & SS_NBIO))
  313                                 goto drop;
  314                         while (so->so_state & SS_ISCONNECTED) {
  315                                 error = tsleep((caddr_t)&so->so_timeo,
  316                                     PSOCK | PCATCH, "soclos", so->so_linger * hz);
  317                                 if (error)
  318                                         break;
  319                         }
  320                 }
  321         }
  322 drop:
  323         if (so->so_pcb) {
  324                 int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so);
  325                 if (error == 0)
  326                         error = error2;
  327         }
  328 discard:
  329         if (so->so_state & SS_NOFDREF)
  330                 panic("soclose: NOFDREF");
  331         so->so_state |= SS_NOFDREF;
  332         sofree(so);
  333         splx(s);
  334         return (error);
  335 }
  336 
  337 /*
  338  * Must be called at splnet...
  339  */
  340 int
  341 soabort(so)
  342         struct socket *so;
  343 {
  344         int error;
  345 
  346         error = (*so->so_proto->pr_usrreqs->pru_abort)(so);
  347         if (error) {
  348                 sofree(so);
  349                 return error;
  350         }
  351         return (0);
  352 }
  353 
  354 int
  355 soaccept(so, nam)
  356         register struct socket *so;
  357         struct sockaddr **nam;
  358 {
  359         int s = splnet();
  360         int error;
  361 
  362         if ((so->so_state & SS_NOFDREF) == 0)
  363                 panic("soaccept: !NOFDREF");
  364         so->so_state &= ~SS_NOFDREF;
  365         error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam);
  366         splx(s);
  367         return (error);
  368 }
  369 
  370 int
  371 soconnect(so, nam, p)
  372         register struct socket *so;
  373         struct sockaddr *nam;
  374         struct proc *p;
  375 {
  376         int s;
  377         int error;
  378 
  379         if (so->so_options & SO_ACCEPTCONN)
  380                 return (EOPNOTSUPP);
  381         s = splnet();
  382         /*
  383          * If protocol is connection-based, can only connect once.
  384          * Otherwise, if connected, try to disconnect first.
  385          * This allows user to disconnect by connecting to, e.g.,
  386          * a null address.
  387          */
  388         if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
  389             ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
  390             (error = sodisconnect(so))))
  391                 error = EISCONN;
  392         else
  393                 error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, p);
  394         splx(s);
  395         return (error);
  396 }
  397 
  398 int
  399 soconnect2(so1, so2)
  400         register struct socket *so1;
  401         struct socket *so2;
  402 {
  403         int s = splnet();
  404         int error;
  405 
  406         error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2);
  407         splx(s);
  408         return (error);
  409 }
  410 
  411 int
  412 sodisconnect(so)
  413         register struct socket *so;
  414 {
  415         int s = splnet();
  416         int error;
  417 
  418         if ((so->so_state & SS_ISCONNECTED) == 0) {
  419                 error = ENOTCONN;
  420                 goto bad;
  421         }
  422         if (so->so_state & SS_ISDISCONNECTING) {
  423                 error = EALREADY;
  424                 goto bad;
  425         }
  426         error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so);
  427 bad:
  428         splx(s);
  429         return (error);
  430 }
  431 
  432 #define SBLOCKWAIT(f)   (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK)
  433 /*
  434  * Send on a socket.
  435  * If send must go all at once and message is larger than
  436  * send buffering, then hard error.
  437  * Lock against other senders.
  438  * If must go all at once and not enough room now, then
  439  * inform user that this would block and do nothing.
  440  * Otherwise, if nonblocking, send as much as possible.
  441  * The data to be sent is described by "uio" if nonzero,
  442  * otherwise by the mbuf chain "top" (which must be null
  443  * if uio is not).  Data provided in mbuf chain must be small
  444  * enough to send all at once.
  445  *
  446  * Returns nonzero on error, timeout or signal; callers
  447  * must check for short counts if EINTR/ERESTART are returned.
  448  * Data and control buffers are freed on return.
  449  */
  450 int
  451 sosend(so, addr, uio, top, control, flags, p)
  452         register struct socket *so;
  453         struct sockaddr *addr;
  454         struct uio *uio;
  455         struct mbuf *top;
  456         struct mbuf *control;
  457         int flags;
  458         struct proc *p;
  459 {
  460         struct mbuf **mp;
  461         register struct mbuf *m;
  462         register long space, len, resid;
  463         int clen = 0, error, s, dontroute, mlen;
  464         int atomic = sosendallatonce(so) || top;
  465 
  466         if (uio)
  467                 resid = uio->uio_resid;
  468         else
  469                 resid = top->m_pkthdr.len;
  470         /*
  471          * In theory resid should be unsigned.
  472          * However, space must be signed, as it might be less than 0
  473          * if we over-committed, and we must use a signed comparison
  474          * of space and resid.  On the other hand, a negative resid
  475          * causes us to loop sending 0-length segments to the protocol.
  476          *
  477          * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM
  478          * type sockets since that's an error.
  479          */
  480         if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) {
  481                 error = EINVAL;
  482                 goto out;
  483         }
  484 
  485         dontroute =
  486             (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
  487             (so->so_proto->pr_flags & PR_ATOMIC);
  488         if (p)
  489                 p->p_stats->p_ru.ru_msgsnd++;
  490         if (control)
  491                 clen = control->m_len;
  492 #define snderr(errno)   { error = errno; splx(s); goto release; }
  493 
  494 restart:
  495         error = sblock(&so->so_snd, SBLOCKWAIT(flags));
  496         if (error)
  497                 goto out;
  498         do {
  499                 s = splnet();
  500                 if (so->so_state & SS_CANTSENDMORE)
  501                         snderr(EPIPE);
  502                 if (so->so_error) {
  503                         error = so->so_error;
  504                         so->so_error = 0;
  505                         splx(s);
  506                         goto release;
  507                 }
  508                 if ((so->so_state & SS_ISCONNECTED) == 0) {
  509                         /*
  510                          * `sendto' and `sendmsg' is allowed on a connection-
  511                          * based socket if it supports implied connect.
  512                          * Return ENOTCONN if not connected and no address is
  513                          * supplied.
  514                          */
  515                         if ((so->so_proto->pr_flags & PR_CONNREQUIRED) &&
  516                             (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) {
  517                                 if ((so->so_state & SS_ISCONFIRMING) == 0 &&
  518                                     !(resid == 0 && clen != 0))
  519                                         snderr(ENOTCONN);
  520                         } else if (addr == 0)
  521                             snderr(so->so_proto->pr_flags & PR_CONNREQUIRED ?
  522                                    ENOTCONN : EDESTADDRREQ);
  523                 }
  524                 space = sbspace(&so->so_snd);
  525                 if (flags & MSG_OOB)
  526                         space += 1024;
  527                 if ((atomic && resid > so->so_snd.sb_hiwat) ||
  528                     clen > so->so_snd.sb_hiwat)
  529                         snderr(EMSGSIZE);
  530                 if (space < resid + clen &&
  531                     (atomic || space < so->so_snd.sb_lowat || space < clen)) {
  532                         if (so->so_state & SS_NBIO)
  533                                 snderr(EWOULDBLOCK);
  534                         sbunlock(&so->so_snd);
  535                         error = sbwait(&so->so_snd);
  536                         splx(s);
  537                         if (error)
  538                                 goto out;
  539                         goto restart;
  540                 }
  541                 splx(s);
  542                 mp = &top;
  543                 space -= clen;
  544                 do {
  545                     if (uio == NULL) {
  546                         /*
  547                          * Data is prepackaged in "top".
  548                          */
  549                         resid = 0;
  550                         if (flags & MSG_EOR)
  551                                 top->m_flags |= M_EOR;
  552                     } else do {
  553                         if (top == 0) {
  554                                 MGETHDR(m, M_WAIT, MT_DATA);
  555                                 if (m == NULL) {
  556                                         error = ENOBUFS;
  557                                         goto release;
  558                                 }
  559                                 mlen = MHLEN;
  560                                 m->m_pkthdr.len = 0;
  561                                 m->m_pkthdr.rcvif = (struct ifnet *)0;
  562                         } else {
  563                                 MGET(m, M_WAIT, MT_DATA);
  564                                 if (m == NULL) {
  565                                         error = ENOBUFS;
  566                                         goto release;
  567                                 }
  568                                 mlen = MLEN;
  569                         }
  570                         if (resid >= MINCLSIZE) {
  571                                 MCLGET(m, M_WAIT);
  572                                 if ((m->m_flags & M_EXT) == 0)
  573                                         goto nopages;
  574                                 mlen = MCLBYTES;
  575                                 len = min(min(mlen, resid), space);
  576                         } else {
  577 nopages:
  578                                 len = min(min(mlen, resid), space);
  579                                 /*
  580                                  * For datagram protocols, leave room
  581                                  * for protocol headers in first mbuf.
  582                                  */
  583                                 if (atomic && top == 0 && len < mlen)
  584                                         MH_ALIGN(m, len);
  585                         }
  586                         space -= len;
  587                         error = uiomove(mtod(m, caddr_t), (int)len, uio);
  588                         resid = uio->uio_resid;
  589                         m->m_len = len;
  590                         *mp = m;
  591                         top->m_pkthdr.len += len;
  592                         if (error)
  593                                 goto release;
  594                         mp = &m->m_next;
  595                         if (resid <= 0) {
  596                                 if (flags & MSG_EOR)
  597                                         top->m_flags |= M_EOR;
  598                                 break;
  599                         }
  600                     } while (space > 0 && atomic);
  601                     if (dontroute)
  602                             so->so_options |= SO_DONTROUTE;
  603                     s = splnet();                               /* XXX */
  604                     /*
  605                      * XXX all the SS_CANTSENDMORE checks previously
  606                      * done could be out of date.  We could have recieved
  607                      * a reset packet in an interrupt or maybe we slept
  608                      * while doing page faults in uiomove() etc. We could
  609                      * probably recheck again inside the splnet() protection
  610                      * here, but there are probably other places that this
  611                      * also happens.  We must rethink this.
  612                      */
  613                     error = (*so->so_proto->pr_usrreqs->pru_send)(so,
  614                         (flags & MSG_OOB) ? PRUS_OOB :
  615                         /*
  616                          * If the user set MSG_EOF, the protocol
  617                          * understands this flag and nothing left to
  618                          * send then use PRU_SEND_EOF instead of PRU_SEND.
  619                          */
  620                         ((flags & MSG_EOF) &&
  621                          (so->so_proto->pr_flags & PR_IMPLOPCL) &&
  622                          (resid <= 0)) ?
  623                                 PRUS_EOF :
  624                         /* If there is more to send set PRUS_MORETOCOME */
  625                         (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0,
  626                         top, addr, control, p);
  627                     splx(s);
  628                     if (dontroute)
  629                             so->so_options &= ~SO_DONTROUTE;
  630                     clen = 0;
  631                     control = 0;
  632                     top = 0;
  633                     mp = &top;
  634                     if (error)
  635                         goto release;
  636                 } while (resid && space > 0);
  637         } while (resid);
  638 
  639 release:
  640         sbunlock(&so->so_snd);
  641 out:
  642         if (top)
  643                 m_freem(top);
  644         if (control)
  645                 m_freem(control);
  646         return (error);
  647 }
  648 
  649 /*
  650  * Implement receive operations on a socket.
  651  * We depend on the way that records are added to the sockbuf
  652  * by sbappend*.  In particular, each record (mbufs linked through m_next)
  653  * must begin with an address if the protocol so specifies,
  654  * followed by an optional mbuf or mbufs containing ancillary data,
  655  * and then zero or more mbufs of data.
  656  * In order to avoid blocking network interrupts for the entire time here,
  657  * we splx() while doing the actual copy to user space.
  658  * Although the sockbuf is locked, new data may still be appended,
  659  * and thus we must maintain consistency of the sockbuf during that time.
  660  *
  661  * The caller may receive the data as a single mbuf chain by supplying
  662  * an mbuf **mp0 for use in returning the chain.  The uio is then used
  663  * only for the count in uio_resid.
  664  */
  665 int
  666 soreceive(so, psa, uio, mp0, controlp, flagsp)
  667         register struct socket *so;
  668         struct sockaddr **psa;
  669         struct uio *uio;
  670         struct mbuf **mp0;
  671         struct mbuf **controlp;
  672         int *flagsp;
  673 {
  674         register struct mbuf *m, **mp;
  675         register int flags, len, error, s, offset;
  676         struct protosw *pr = so->so_proto;
  677         struct mbuf *nextrecord;
  678         int moff, type = 0;
  679         int orig_resid = uio->uio_resid;
  680 
  681         mp = mp0;
  682         if (psa)
  683                 *psa = 0;
  684         if (controlp)
  685                 *controlp = 0;
  686         if (flagsp)
  687                 flags = *flagsp &~ MSG_EOR;
  688         else
  689                 flags = 0;
  690         if (flags & MSG_OOB) {
  691                 m = m_get(M_WAIT, MT_DATA);
  692                 if (m == NULL)
  693                         return (ENOBUFS);
  694                 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
  695                 if (error)
  696                         goto bad;
  697                 do {
  698                         error = uiomove(mtod(m, caddr_t),
  699                             (int) min(uio->uio_resid, m->m_len), uio);
  700                         m = m_free(m);
  701                 } while (uio->uio_resid && error == 0 && m);
  702 bad:
  703                 if (m)
  704                         m_freem(m);
  705                 return (error);
  706         }
  707         if (mp)
  708                 *mp = (struct mbuf *)0;
  709         if (so->so_state & SS_ISCONFIRMING && uio->uio_resid)
  710                 (*pr->pr_usrreqs->pru_rcvd)(so, 0);
  711 
  712 restart:
  713         error = sblock(&so->so_rcv, SBLOCKWAIT(flags));
  714         if (error)
  715                 return (error);
  716         s = splnet();
  717 
  718         m = so->so_rcv.sb_mb;
  719         /*
  720          * If we have less data than requested, block awaiting more
  721          * (subject to any timeout) if:
  722          *   1. the current count is less than the low water mark, or
  723          *   2. MSG_WAITALL is set, and it is possible to do the entire
  724          *      receive operation at once if we block (resid <= hiwat).
  725          *   3. MSG_DONTWAIT is not set
  726          * If MSG_WAITALL is set but resid is larger than the receive buffer,
  727          * we have to do the receive in sections, and thus risk returning
  728          * a short count if a timeout or signal occurs after we start.
  729          */
  730         if (m == 0 || (((flags & MSG_DONTWAIT) == 0 &&
  731             so->so_rcv.sb_cc < uio->uio_resid) &&
  732             (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
  733             ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) &&
  734             m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) {
  735                 KASSERT(m != 0 || !so->so_rcv.sb_cc, ("receive 1"));
  736                 if (so->so_error) {
  737                         if (m)
  738                                 goto dontblock;
  739                         error = so->so_error;
  740                         if ((flags & MSG_PEEK) == 0)
  741                                 so->so_error = 0;
  742                         goto release;
  743                 }
  744                 if (so->so_state & SS_CANTRCVMORE) {
  745                         if (m)
  746                                 goto dontblock;
  747                         else
  748                                 goto release;
  749                 }
  750                 for (; m; m = m->m_next)
  751                         if (m->m_type == MT_OOBDATA  || (m->m_flags & M_EOR)) {
  752                                 m = so->so_rcv.sb_mb;
  753                                 goto dontblock;
  754                         }
  755                 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
  756                     (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
  757                         error = ENOTCONN;
  758                         goto release;
  759                 }
  760                 if (uio->uio_resid == 0)
  761                         goto release;
  762                 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) {
  763                         error = EWOULDBLOCK;
  764                         goto release;
  765                 }
  766                 SBLASTRECORDCHK(&so->so_rcv);
  767                 SBLASTMBUFCHK(&so->so_rcv);
  768                 sbunlock(&so->so_rcv);
  769                 error = sbwait(&so->so_rcv);
  770                 splx(s);
  771                 if (error)
  772                         return (error);
  773                 goto restart;
  774         }
  775 dontblock:
  776         if (uio->uio_procp)
  777                 uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
  778         SBLASTRECORDCHK(&so->so_rcv);
  779         SBLASTMBUFCHK(&so->so_rcv);
  780         nextrecord = m->m_nextpkt;
  781         if (pr->pr_flags & PR_ADDR) {
  782                 KASSERT(m->m_type == MT_SONAME, ("receive 1a"));
  783                 orig_resid = 0;
  784                 if (psa)
  785                         *psa = dup_sockaddr(mtod(m, struct sockaddr *),
  786                                             mp0 == 0);
  787                 if (flags & MSG_PEEK) {
  788                         m = m->m_next;
  789                 } else {
  790                         sbfree(&so->so_rcv, m);
  791                         so->so_rcv.sb_mb = m_free(m);
  792                         m = so->so_rcv.sb_mb;
  793                 }
  794         }
  795         while (m && m->m_type == MT_CONTROL && error == 0) {
  796                 if (flags & MSG_PEEK) {
  797                         if (controlp)
  798                                 *controlp = m_copy(m, 0, m->m_len);
  799                         m = m->m_next;
  800                 } else {
  801                         sbfree(&so->so_rcv, m);
  802                         if (controlp) {
  803                                 if (pr->pr_domain->dom_externalize &&
  804                                     mtod(m, struct cmsghdr *)->cmsg_type ==
  805                                     SCM_RIGHTS)
  806                                    error = (*pr->pr_domain->dom_externalize)(m);
  807                                 *controlp = m;
  808                                 so->so_rcv.sb_mb = m->m_next;
  809                                 m->m_next = 0;
  810                                 m = so->so_rcv.sb_mb;
  811                         } else {
  812                                 so->so_rcv.sb_mb = m_free(m);
  813                                 m = so->so_rcv.sb_mb;
  814                         }
  815                 }
  816                 if (controlp) {
  817                         orig_resid = 0;
  818                         controlp = &(*controlp)->m_next;
  819                 }
  820         }
  821         if (m) {
  822                 if ((flags & MSG_PEEK) == 0) {
  823                         m->m_nextpkt = nextrecord;
  824                         /*
  825                          * If nextrecord == NULL (this is a single chain),
  826                          * then sb_lastrecord may not be valid here if m
  827                          * was changed earlier.
  828                          */
  829                         if (nextrecord == NULL) {
  830                                 KASSERT(so->so_rcv.sb_mb == m,
  831                                         ("receive tailq 1"));
  832                                 so->so_rcv.sb_lastrecord = m;
  833                         }
  834                 }
  835                 type = m->m_type;
  836                 if (type == MT_OOBDATA)
  837                         flags |= MSG_OOB;
  838         } else {
  839                 if ((flags & MSG_PEEK) == 0) {
  840                         KASSERT(so->so_rcv.sb_mb == m,("receive tailq 2"));
  841                         so->so_rcv.sb_mb = nextrecord;
  842                         SB_EMPTY_FIXUP(&so->so_rcv);
  843                 }
  844         }
  845         SBLASTRECORDCHK(&so->so_rcv);
  846         SBLASTMBUFCHK(&so->so_rcv);
  847 
  848         moff = 0;
  849         offset = 0;
  850         while (m && uio->uio_resid > 0 && error == 0) {
  851                 if (m->m_type == MT_OOBDATA) {
  852                         if (type != MT_OOBDATA)
  853                                 break;
  854                 } else if (type == MT_OOBDATA)
  855                         break;
  856                 else
  857                     KASSERT(m->m_type == MT_DATA || m->m_type == MT_HEADER,
  858                         ("receive 3"));
  859                 so->so_state &= ~SS_RCVATMARK;
  860                 len = uio->uio_resid;
  861                 if (so->so_oobmark && len > so->so_oobmark - offset)
  862                         len = so->so_oobmark - offset;
  863                 if (len > m->m_len - moff)
  864                         len = m->m_len - moff;
  865                 /*
  866                  * If mp is set, just pass back the mbufs.
  867                  * Otherwise copy them out via the uio, then free.
  868                  * Sockbuf must be consistent here (points to current mbuf,
  869                  * it points to next record) when we drop priority;
  870                  * we must note any additions to the sockbuf when we
  871                  * block interrupts again.
  872                  */
  873                 if (mp == 0) {
  874                         SBLASTRECORDCHK(&so->so_rcv);
  875                         SBLASTMBUFCHK(&so->so_rcv);
  876                         splx(s);
  877                         error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
  878                         s = splnet();
  879                         if (error)
  880                                 goto release;
  881                 } else
  882                         uio->uio_resid -= len;
  883                 if (len == m->m_len - moff) {
  884                         if (m->m_flags & M_EOR)
  885                                 flags |= MSG_EOR;
  886                         if (flags & MSG_PEEK) {
  887                                 m = m->m_next;
  888                                 moff = 0;
  889                         } else {
  890                                 nextrecord = m->m_nextpkt;
  891                                 sbfree(&so->so_rcv, m);
  892                                 if (mp) {
  893                                         *mp = m;
  894                                         mp = &m->m_next;
  895                                         so->so_rcv.sb_mb = m = m->m_next;
  896                                         *mp = (struct mbuf *)0;
  897                                 } else {
  898                                         so->so_rcv.sb_mb = m = m_free(m);
  899                                 }
  900                                 if (m) {
  901                                         m->m_nextpkt = nextrecord;
  902                                         if (nextrecord == NULL)
  903                                                 so->so_rcv.sb_lastrecord = m;
  904                                 } else {
  905                                         so->so_rcv.sb_mb = nextrecord;
  906                                         SB_EMPTY_FIXUP(&so->so_rcv);
  907                                 }
  908                                 SBLASTRECORDCHK(&so->so_rcv);
  909                                 SBLASTMBUFCHK(&so->so_rcv);
  910                         }
  911                 } else {
  912                         if (flags & MSG_PEEK)
  913                                 moff += len;
  914                         else {
  915                                 if (mp)
  916                                         *mp = m_copym(m, 0, len, M_WAIT);
  917                                 m->m_data += len;
  918                                 m->m_len -= len;
  919                                 so->so_rcv.sb_cc -= len;
  920                         }
  921                 }
  922                 if (so->so_oobmark) {
  923                         if ((flags & MSG_PEEK) == 0) {
  924                                 so->so_oobmark -= len;
  925                                 if (so->so_oobmark == 0) {
  926                                         so->so_state |= SS_RCVATMARK;
  927                                         break;
  928                                 }
  929                         } else {
  930                                 offset += len;
  931                                 if (offset == so->so_oobmark)
  932                                         break;
  933                         }
  934                 }
  935                 if (flags & MSG_EOR)
  936                         break;
  937                 /*
  938                  * If the MSG_WAITALL flag is set (for non-atomic socket),
  939                  * we must not quit until "uio->uio_resid == 0" or an error
  940                  * termination.  If a signal/timeout occurs, return
  941                  * with a short count but without error.
  942                  * Keep sockbuf locked against other readers.
  943                  */
  944                 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 &&
  945                     !sosendallatonce(so) && !nextrecord) {
  946                         if (so->so_error || so->so_state & SS_CANTRCVMORE)
  947                                 break;
  948                         /*
  949                          * The window might have closed to zero, make
  950                          * sure we send an ack now that we've drained
  951                          * the buffer or we might end up blocking until
  952                          * the idle takes over (5 seconds).
  953                          */
  954                         if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
  955                                 (*pr->pr_usrreqs->pru_rcvd)(so, flags);
  956                         SBLASTRECORDCHK(&so->so_rcv);
  957                         SBLASTMBUFCHK(&so->so_rcv);
  958                         error = sbwait(&so->so_rcv);
  959                         if (error) {
  960                                 sbunlock(&so->so_rcv);
  961                                 splx(s);
  962                                 return (0);
  963                         }
  964                         m = so->so_rcv.sb_mb;
  965                         if (m)
  966                                 nextrecord = m->m_nextpkt;
  967                 }
  968         }
  969 
  970         if (m && pr->pr_flags & PR_ATOMIC) {
  971                 flags |= MSG_TRUNC;
  972                 if ((flags & MSG_PEEK) == 0)
  973                         (void) sbdroprecord(&so->so_rcv);
  974         }
  975         if ((flags & MSG_PEEK) == 0) {
  976                 if (m == 0) {
  977                         /*
  978                          * First part is an inline SB_EMPTY_FIXUP().  Second
  979                          * part makes sure sb_lastrecord is up-to-date if
  980                          * there is still data in the socket buffer.
  981                          */
  982                         so->so_rcv.sb_mb = nextrecord;
  983                         if (so->so_rcv.sb_mb == NULL) {
  984                                 so->so_rcv.sb_mbtail = NULL;
  985                                 so->so_rcv.sb_lastrecord = NULL;
  986                         } else if (nextrecord->m_nextpkt == NULL)
  987                                 so->so_rcv.sb_lastrecord = nextrecord;
  988                 }
  989                 SBLASTRECORDCHK(&so->so_rcv);
  990                 SBLASTMBUFCHK(&so->so_rcv);
  991                 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
  992                         (*pr->pr_usrreqs->pru_rcvd)(so, flags);
  993         }
  994         if (orig_resid == uio->uio_resid && orig_resid &&
  995             (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
  996                 sbunlock(&so->so_rcv);
  997                 splx(s);
  998                 goto restart;
  999         }
 1000 
 1001         if (flagsp)
 1002                 *flagsp |= flags;
 1003 release:
 1004         sbunlock(&so->so_rcv);
 1005         splx(s);
 1006         return (error);
 1007 }
 1008 
 1009 int
 1010 soshutdown(so, how)
 1011         register struct socket *so;
 1012         register int how;
 1013 {
 1014         register struct protosw *pr = so->so_proto;
 1015 
 1016         if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR))
 1017                 return (EINVAL);
 1018 
 1019         if (how != SHUT_WR)
 1020                 sorflush(so);
 1021         if (how != SHUT_RD)
 1022                 return ((*pr->pr_usrreqs->pru_shutdown)(so));
 1023         return (0);
 1024 }
 1025 
 1026 void
 1027 sorflush(so)
 1028         register struct socket *so;
 1029 {
 1030         register struct sockbuf *sb = &so->so_rcv;
 1031         register struct protosw *pr = so->so_proto;
 1032         register int s;
 1033         struct sockbuf asb;
 1034 
 1035         sb->sb_flags |= SB_NOINTR;
 1036         (void) sblock(sb, M_WAITOK);
 1037         s = splimp();
 1038         socantrcvmore(so);
 1039         sbunlock(sb);
 1040         asb = *sb;
 1041         bzero((caddr_t)sb, sizeof (*sb));
 1042         if (asb.sb_flags & SB_KNOTE) {
 1043                 sb->sb_sel.si_note = asb.sb_sel.si_note;
 1044                 sb->sb_flags = SB_KNOTE;
 1045         }
 1046         splx(s);
 1047         if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
 1048                 (*pr->pr_domain->dom_dispose)(asb.sb_mb);
 1049         sbrelease(&asb, so);
 1050 }
 1051 
 1052 #ifdef INET
 1053 static int
 1054 do_setopt_accept_filter(so, sopt)
 1055         struct  socket *so;
 1056         struct  sockopt *sopt;
 1057 {
 1058         struct accept_filter_arg        *afap = NULL;
 1059         struct accept_filter    *afp;
 1060         struct so_accf  *af = so->so_accf;
 1061         int     error = 0;
 1062 
 1063         /* do not set/remove accept filters on non listen sockets */
 1064         if ((so->so_options & SO_ACCEPTCONN) == 0) {
 1065                 error = EINVAL;
 1066                 goto out;
 1067         }
 1068 
 1069         /* removing the filter */
 1070         if (sopt == NULL || sopt->sopt_val == NULL) {
 1071                 if (af != NULL) {
 1072                         if (af->so_accept_filter != NULL && 
 1073                                 af->so_accept_filter->accf_destroy != NULL) {
 1074                                 af->so_accept_filter->accf_destroy(so);
 1075                         }
 1076                         if (af->so_accept_filter_str != NULL) {
 1077                                 FREE(af->so_accept_filter_str, M_ACCF);
 1078                         }
 1079                         FREE(af, M_ACCF);
 1080                         so->so_accf = NULL;
 1081                 }
 1082                 so->so_options &= ~SO_ACCEPTFILTER;
 1083                 return (0);
 1084         }
 1085         /* adding a filter */
 1086         /* must remove previous filter first */
 1087         if (af != NULL) {
 1088                 error = EINVAL;
 1089                 goto out;
 1090         }
 1091         /* don't put large objects on the kernel stack */
 1092         MALLOC(afap, struct accept_filter_arg *, sizeof(*afap), M_TEMP, M_WAITOK);
 1093         error = sooptcopyin(sopt, afap, sizeof *afap, sizeof *afap);
 1094         afap->af_name[sizeof(afap->af_name)-1] = '\0';
 1095         afap->af_arg[sizeof(afap->af_arg)-1] = '\0';
 1096         if (error)
 1097                 goto out;
 1098         afp = accept_filt_get(afap->af_name);
 1099         if (afp == NULL) {
 1100                 error = ENOENT;
 1101                 goto out;
 1102         }
 1103         MALLOC(af, struct so_accf *, sizeof(*af), M_ACCF, M_WAITOK);
 1104         bzero(af, sizeof(*af));
 1105         if (afp->accf_create != NULL) {
 1106                 if (afap->af_name[0] != '\0') {
 1107                         int len = strlen(afap->af_name) + 1;
 1108 
 1109                         MALLOC(af->so_accept_filter_str, char *, len, M_ACCF, M_WAITOK);
 1110                         strcpy(af->so_accept_filter_str, afap->af_name);
 1111                 }
 1112                 af->so_accept_filter_arg = afp->accf_create(so, afap->af_arg);
 1113                 if (af->so_accept_filter_arg == NULL) {
 1114                         FREE(af->so_accept_filter_str, M_ACCF);
 1115                         FREE(af, M_ACCF);
 1116                         so->so_accf = NULL;
 1117                         error = EINVAL;
 1118                         goto out;
 1119                 }
 1120         }
 1121         af->so_accept_filter = afp;
 1122         so->so_accf = af;
 1123         so->so_options |= SO_ACCEPTFILTER;
 1124 out:
 1125         if (afap != NULL)
 1126                 FREE(afap, M_TEMP);
 1127         return (error);
 1128 }
 1129 #endif /* INET */
 1130 
 1131 /*
 1132  * Perhaps this routine, and sooptcopyout(), below, ought to come in
 1133  * an additional variant to handle the case where the option value needs
 1134  * to be some kind of integer, but not a specific size.
 1135  * In addition to their use here, these functions are also called by the
 1136  * protocol-level pr_ctloutput() routines.
 1137  */
 1138 int
 1139 sooptcopyin(sopt, buf, len, minlen)
 1140         struct  sockopt *sopt;
 1141         void    *buf;
 1142         size_t  len;
 1143         size_t  minlen;
 1144 {
 1145         size_t  valsize;
 1146 
 1147         /*
 1148          * If the user gives us more than we wanted, we ignore it,
 1149          * but if we don't get the minimum length the caller
 1150          * wants, we return EINVAL.  On success, sopt->sopt_valsize
 1151          * is set to however much we actually retrieved.
 1152          */
 1153         if ((valsize = sopt->sopt_valsize) < minlen)
 1154                 return EINVAL;
 1155         if (valsize > len)
 1156                 sopt->sopt_valsize = valsize = len;
 1157 
 1158         if (sopt->sopt_p != 0)
 1159                 return (copyin(sopt->sopt_val, buf, valsize));
 1160 
 1161         bcopy(sopt->sopt_val, buf, valsize);
 1162         return 0;
 1163 }
 1164 
 1165 int
 1166 sosetopt(so, sopt)
 1167         struct socket *so;
 1168         struct sockopt *sopt;
 1169 {
 1170         int     error, optval;
 1171         struct  linger l;
 1172         struct  timeval tv;
 1173         u_long  val;
 1174 
 1175         error = 0;
 1176         if (sopt->sopt_level != SOL_SOCKET) {
 1177                 if (so->so_proto && so->so_proto->pr_ctloutput)
 1178                         return ((*so->so_proto->pr_ctloutput)
 1179                                   (so, sopt));
 1180                 error = ENOPROTOOPT;
 1181         } else {
 1182                 switch (sopt->sopt_name) {
 1183 #ifdef INET
 1184                 case SO_ACCEPTFILTER:
 1185                         error = do_setopt_accept_filter(so, sopt);
 1186                         if (error)
 1187                                 goto bad;
 1188                         break;
 1189 #endif /* INET */
 1190                 case SO_LINGER:
 1191                         error = sooptcopyin(sopt, &l, sizeof l, sizeof l);
 1192                         if (error)
 1193                                 goto bad;
 1194 
 1195                         so->so_linger = l.l_linger;
 1196                         if (l.l_onoff)
 1197                                 so->so_options |= SO_LINGER;
 1198                         else
 1199                                 so->so_options &= ~SO_LINGER;
 1200                         break;
 1201 
 1202                 case SO_DEBUG:
 1203                 case SO_KEEPALIVE:
 1204                 case SO_DONTROUTE:
 1205                 case SO_USELOOPBACK:
 1206                 case SO_BROADCAST:
 1207                 case SO_REUSEADDR:
 1208                 case SO_REUSEPORT:
 1209                 case SO_OOBINLINE:
 1210                 case SO_TIMESTAMP:
 1211                         error = sooptcopyin(sopt, &optval, sizeof optval,
 1212                                             sizeof optval);
 1213                         if (error)
 1214                                 goto bad;
 1215                         if (optval)
 1216                                 so->so_options |= sopt->sopt_name;
 1217                         else
 1218                                 so->so_options &= ~sopt->sopt_name;
 1219                         break;
 1220 
 1221                 case SO_SNDBUF:
 1222                 case SO_RCVBUF:
 1223                 case SO_SNDLOWAT:
 1224                 case SO_RCVLOWAT:
 1225                         error = sooptcopyin(sopt, &optval, sizeof optval,
 1226                                             sizeof optval);
 1227                         if (error)
 1228                                 goto bad;
 1229 
 1230                         /*
 1231                          * Values < 1 make no sense for any of these
 1232                          * options, so disallow them.
 1233                          */
 1234                         if (optval < 1) {
 1235                                 error = EINVAL;
 1236                                 goto bad;
 1237                         }
 1238 
 1239                         switch (sopt->sopt_name) {
 1240                         case SO_SNDBUF:
 1241                         case SO_RCVBUF:
 1242                                 if (sbreserve(sopt->sopt_name == SO_SNDBUF ?
 1243                                     &so->so_snd : &so->so_rcv, (u_long)optval,
 1244                                     so, curproc) == 0) {
 1245                                         error = ENOBUFS;
 1246                                         goto bad;
 1247                                 }
 1248                                 break;
 1249 
 1250                         /*
 1251                          * Make sure the low-water is never greater than
 1252                          * the high-water.
 1253                          */
 1254                         case SO_SNDLOWAT:
 1255                                 so->so_snd.sb_lowat =
 1256                                     (optval > so->so_snd.sb_hiwat) ?
 1257                                     so->so_snd.sb_hiwat : optval;
 1258                                 break;
 1259                         case SO_RCVLOWAT:
 1260                                 so->so_rcv.sb_lowat =
 1261                                     (optval > so->so_rcv.sb_hiwat) ?
 1262                                     so->so_rcv.sb_hiwat : optval;
 1263                                 break;
 1264                         }
 1265                         break;
 1266 
 1267                 case SO_SNDTIMEO:
 1268                 case SO_RCVTIMEO:
 1269                         error = sooptcopyin(sopt, &tv, sizeof tv,
 1270                                             sizeof tv);
 1271                         if (error)
 1272                                 goto bad;
 1273 
 1274                         /* assert(hz > 0); */
 1275                         if (tv.tv_sec < 0 || tv.tv_sec > SHRT_MAX / hz ||
 1276                             tv.tv_usec < 0 || tv.tv_usec >= 1000000) {
 1277                                 error = EDOM;
 1278                                 goto bad;
 1279                         }
 1280                         /* assert(tick > 0); */
 1281                         /* assert(ULONG_MAX - SHRT_MAX >= 1000000); */
 1282                         val = (u_long)(tv.tv_sec * hz) + tv.tv_usec / tick;
 1283                         if (val > SHRT_MAX) {
 1284                                 error = EDOM;
 1285                                 goto bad;
 1286                         }
 1287                         if (val == 0 && tv.tv_usec != 0)
 1288                                 val = 1;
 1289 
 1290                         switch (sopt->sopt_name) {
 1291                         case SO_SNDTIMEO:
 1292                                 so->so_snd.sb_timeo = val;
 1293                                 break;
 1294                         case SO_RCVTIMEO:
 1295                                 so->so_rcv.sb_timeo = val;
 1296                                 break;
 1297                         }
 1298                         break;
 1299                 default:
 1300                         error = ENOPROTOOPT;
 1301                         break;
 1302                 }
 1303                 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) {
 1304                         (void) ((*so->so_proto->pr_ctloutput)
 1305                                   (so, sopt));
 1306                 }
 1307         }
 1308 bad:
 1309         return (error);
 1310 }
 1311 
 1312 /* Helper routine for getsockopt */
 1313 int
 1314 sooptcopyout(struct sockopt *sopt, const void *buf, size_t len)
 1315 {
 1316         int     error;
 1317         size_t  valsize;
 1318 
 1319         error = 0;
 1320 
 1321         /*
 1322          * Documented get behavior is that we always return a value,
 1323          * possibly truncated to fit in the user's buffer.
 1324          * Traditional behavior is that we always tell the user
 1325          * precisely how much we copied, rather than something useful
 1326          * like the total amount we had available for her.
 1327          * Note that this interface is not idempotent; the entire answer must
 1328          * generated ahead of time.
 1329          */
 1330         valsize = min(len, sopt->sopt_valsize);
 1331         sopt->sopt_valsize = valsize;
 1332         if (sopt->sopt_val != 0) {
 1333                 if (sopt->sopt_p != 0)
 1334                         error = copyout(buf, sopt->sopt_val, valsize);
 1335                 else
 1336                         bcopy(buf, sopt->sopt_val, valsize);
 1337         }
 1338         return error;
 1339 }
 1340 
 1341 int
 1342 sogetopt(so, sopt)
 1343         struct socket *so;
 1344         struct sockopt *sopt;
 1345 {
 1346         int     error, optval;
 1347         struct  linger l;
 1348         struct  timeval tv;
 1349         struct accept_filter_arg *afap;
 1350 
 1351         error = 0;
 1352         if (sopt->sopt_level != SOL_SOCKET) {
 1353                 if (so->so_proto && so->so_proto->pr_ctloutput) {
 1354                         return ((*so->so_proto->pr_ctloutput)
 1355                                   (so, sopt));
 1356                 } else
 1357                         return (ENOPROTOOPT);
 1358         } else {
 1359                 switch (sopt->sopt_name) {
 1360 #ifdef INET
 1361                 case SO_ACCEPTFILTER:
 1362                         if ((so->so_options & SO_ACCEPTCONN) == 0)
 1363                                 return (EINVAL);
 1364                         if ((so->so_options & SO_ACCEPTFILTER) == 0)
 1365                                 return (EINVAL);
 1366                         MALLOC(afap, struct accept_filter_arg *, sizeof(*afap),
 1367                                 M_TEMP, M_WAITOK);
 1368                         bzero(afap, sizeof(*afap));
 1369                         if ((so->so_options & SO_ACCEPTFILTER) != 0) {
 1370                                 strcpy(afap->af_name, so->so_accf->so_accept_filter->accf_name);
 1371                                 if (so->so_accf->so_accept_filter_str != NULL)
 1372                                         strcpy(afap->af_arg, so->so_accf->so_accept_filter_str);
 1373                         }
 1374                         error = sooptcopyout(sopt, afap, sizeof(*afap));
 1375                         FREE(afap, M_TEMP);
 1376                         break;
 1377 #endif /* INET */
 1378                         
 1379                 case SO_LINGER:
 1380                         l.l_onoff = so->so_options & SO_LINGER;
 1381                         l.l_linger = so->so_linger;
 1382                         error = sooptcopyout(sopt, &l, sizeof l);
 1383                         break;
 1384 
 1385                 case SO_USELOOPBACK:
 1386                 case SO_DONTROUTE:
 1387                 case SO_DEBUG:
 1388                 case SO_KEEPALIVE:
 1389                 case SO_REUSEADDR:
 1390                 case SO_REUSEPORT:
 1391                 case SO_BROADCAST:
 1392                 case SO_OOBINLINE:
 1393                 case SO_TIMESTAMP:
 1394                         optval = so->so_options & sopt->sopt_name;
 1395 integer:
 1396                         error = sooptcopyout(sopt, &optval, sizeof optval);
 1397                         break;
 1398 
 1399                 case SO_TYPE:
 1400                         optval = so->so_type;
 1401                         goto integer;
 1402 
 1403                 case SO_ERROR:
 1404                         optval = so->so_error;
 1405                         so->so_error = 0;
 1406                         goto integer;
 1407 
 1408                 case SO_SNDBUF:
 1409                         optval = so->so_snd.sb_hiwat;
 1410                         goto integer;
 1411 
 1412                 case SO_RCVBUF:
 1413                         optval = so->so_rcv.sb_hiwat;
 1414                         goto integer;
 1415 
 1416                 case SO_SNDLOWAT:
 1417                         optval = so->so_snd.sb_lowat;
 1418                         goto integer;
 1419 
 1420                 case SO_RCVLOWAT:
 1421                         optval = so->so_rcv.sb_lowat;
 1422                         goto integer;
 1423 
 1424                 case SO_SNDTIMEO:
 1425                 case SO_RCVTIMEO:
 1426                         optval = (sopt->sopt_name == SO_SNDTIMEO ?
 1427                                   so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
 1428 
 1429                         tv.tv_sec = optval / hz;
 1430                         tv.tv_usec = (optval % hz) * tick;
 1431                         error = sooptcopyout(sopt, &tv, sizeof tv);
 1432                         break;                  
 1433 
 1434                 default:
 1435                         error = ENOPROTOOPT;
 1436                         break;
 1437                 }
 1438                 return (error);
 1439         }
 1440 }
 1441 
 1442 /* XXX; prepare mbuf for (__FreeBSD__ < 3) routines. */
 1443 int
 1444 soopt_getm(struct sockopt *sopt, struct mbuf **mp)
 1445 {
 1446         struct mbuf *m, *m_prev;
 1447         int sopt_size = sopt->sopt_valsize;
 1448 
 1449         MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_DATA);
 1450         if (m == 0)
 1451                 return ENOBUFS;
 1452         if (sopt_size > MLEN) {
 1453                 MCLGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT);
 1454                 if ((m->m_flags & M_EXT) == 0) {
 1455                         m_free(m);
 1456                         return ENOBUFS;
 1457                 }
 1458                 m->m_len = min(MCLBYTES, sopt_size);
 1459         } else {
 1460                 m->m_len = min(MLEN, sopt_size);
 1461         }
 1462         sopt_size -= m->m_len;
 1463         *mp = m;
 1464         m_prev = m;
 1465 
 1466         while (sopt_size) {
 1467                 MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_DATA);
 1468                 if (m == 0) {
 1469                         m_freem(*mp);
 1470                         return ENOBUFS;
 1471                 }
 1472                 if (sopt_size > MLEN) {
 1473                         MCLGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT);
 1474                         if ((m->m_flags & M_EXT) == 0) {
 1475                                 m_freem(*mp);
 1476                                 return ENOBUFS;
 1477                         }
 1478                         m->m_len = min(MCLBYTES, sopt_size);
 1479                 } else {
 1480                         m->m_len = min(MLEN, sopt_size);
 1481                 }
 1482                 sopt_size -= m->m_len;
 1483                 m_prev->m_next = m;
 1484                 m_prev = m;
 1485         }
 1486         return 0;
 1487 }
 1488 
 1489 /* XXX; copyin sopt data into mbuf chain for (__FreeBSD__ < 3) routines. */
 1490 int
 1491 soopt_mcopyin(struct sockopt *sopt, struct mbuf *m)
 1492 {
 1493         struct mbuf *m0 = m;
 1494 
 1495         if (sopt->sopt_val == NULL)
 1496                 return 0;
 1497         while (m != NULL && sopt->sopt_valsize >= m->m_len) {
 1498                 if (sopt->sopt_p != NULL) {
 1499                         int error;
 1500 
 1501                         error = copyin(sopt->sopt_val, mtod(m, char *),
 1502                                        m->m_len);
 1503                         if (error != 0) {
 1504                                 m_freem(m0);
 1505                                 return(error);
 1506                         }
 1507                 } else
 1508                         bcopy(sopt->sopt_val, mtod(m, char *), m->m_len);
 1509                 sopt->sopt_valsize -= m->m_len;
 1510                 (caddr_t)sopt->sopt_val += m->m_len;
 1511                 m = m->m_next;
 1512         }
 1513         if (m != NULL) /* should be allocated enoughly at ip6_sooptmcopyin() */
 1514                 panic("ip6_sooptmcopyin");
 1515         return 0;
 1516 }
 1517 
 1518 /* XXX; copyout mbuf chain data into soopt for (__FreeBSD__ < 3) routines. */
 1519 int
 1520 soopt_mcopyout(struct sockopt *sopt, struct mbuf *m)
 1521 {
 1522         struct mbuf *m0 = m;
 1523         size_t valsize = 0;
 1524 
 1525         if (sopt->sopt_val == NULL)
 1526                 return 0;
 1527         while (m != NULL && sopt->sopt_valsize >= m->m_len) {
 1528                 if (sopt->sopt_p != NULL) {
 1529                         int error;
 1530 
 1531                         error = copyout(mtod(m, char *), sopt->sopt_val,
 1532                                        m->m_len);
 1533                         if (error != 0) {
 1534                                 m_freem(m0);
 1535                                 return(error);
 1536                         }
 1537                 } else
 1538                         bcopy(mtod(m, char *), sopt->sopt_val, m->m_len);
 1539                sopt->sopt_valsize -= m->m_len;
 1540                (caddr_t)sopt->sopt_val += m->m_len;
 1541                valsize += m->m_len;
 1542                m = m->m_next;
 1543         }
 1544         if (m != NULL) {
 1545                 /* enough soopt buffer should be given from user-land */
 1546                 m_freem(m0);
 1547                 return(EINVAL);
 1548         }
 1549         sopt->sopt_valsize = valsize;
 1550         return 0;
 1551 }
 1552 
 1553 void
 1554 sohasoutofband(so)
 1555         register struct socket *so;
 1556 {
 1557         if (so->so_sigio != NULL)
 1558                 pgsigio(so->so_sigio, SIGURG, 0);
 1559         selwakeup(&so->so_rcv.sb_sel);
 1560 }
 1561 
 1562 int
 1563 sopoll(struct socket *so, int events, struct ucred *cred, struct proc *p)
 1564 {
 1565         int revents = 0;
 1566         int s = splnet();
 1567 
 1568         if (events & (POLLIN | POLLRDNORM))
 1569                 if (soreadable(so))
 1570                         revents |= events & (POLLIN | POLLRDNORM);
 1571 
 1572         if (events & (POLLOUT | POLLWRNORM))
 1573                 if (sowriteable(so))
 1574                         revents |= events & (POLLOUT | POLLWRNORM);
 1575 
 1576         if (events & (POLLPRI | POLLRDBAND))
 1577                 if (so->so_oobmark || (so->so_state & SS_RCVATMARK))
 1578                         revents |= events & (POLLPRI | POLLRDBAND);
 1579 
 1580         if (revents == 0) {
 1581                 if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) {
 1582                         selrecord(p, &so->so_rcv.sb_sel);
 1583                         so->so_rcv.sb_flags |= SB_SEL;
 1584                 }
 1585 
 1586                 if (events & (POLLOUT | POLLWRNORM)) {
 1587                         selrecord(p, &so->so_snd.sb_sel);
 1588                         so->so_snd.sb_flags |= SB_SEL;
 1589                 }
 1590         }
 1591 
 1592         splx(s);
 1593         return (revents);
 1594 }
 1595 
 1596 int
 1597 sokqfilter(struct file *fp, struct knote *kn)
 1598 {
 1599         struct socket *so = (struct socket *)kn->kn_fp->f_data;
 1600         struct sockbuf *sb;
 1601         int s;
 1602 
 1603         switch (kn->kn_filter) {
 1604         case EVFILT_READ:
 1605                 if (so->so_options & SO_ACCEPTCONN)
 1606                         kn->kn_fop = &solisten_filtops;
 1607                 else
 1608                         kn->kn_fop = &soread_filtops;
 1609                 sb = &so->so_rcv;
 1610                 break;
 1611         case EVFILT_WRITE:
 1612                 kn->kn_fop = &sowrite_filtops;
 1613                 sb = &so->so_snd;
 1614                 break;
 1615         default:
 1616                 return (1);
 1617         }
 1618 
 1619         s = splnet();
 1620         SLIST_INSERT_HEAD(&sb->sb_sel.si_note, kn, kn_selnext);
 1621         sb->sb_flags |= SB_KNOTE;
 1622         splx(s);
 1623         return (0);
 1624 }
 1625 
 1626 static void
 1627 filt_sordetach(struct knote *kn)
 1628 {
 1629         struct socket *so = (struct socket *)kn->kn_fp->f_data;
 1630         int s = splnet();
 1631 
 1632         SLIST_REMOVE(&so->so_rcv.sb_sel.si_note, kn, knote, kn_selnext);
 1633         if (SLIST_EMPTY(&so->so_rcv.sb_sel.si_note))
 1634                 so->so_rcv.sb_flags &= ~SB_KNOTE;
 1635         splx(s);
 1636 }
 1637 
 1638 /*ARGSUSED*/
 1639 static int
 1640 filt_soread(struct knote *kn, long hint)
 1641 {
 1642         struct socket *so = (struct socket *)kn->kn_fp->f_data;
 1643 
 1644         kn->kn_data = so->so_rcv.sb_cc;
 1645         if (so->so_state & SS_CANTRCVMORE) {
 1646                 kn->kn_flags |= EV_EOF; 
 1647                 kn->kn_fflags = so->so_error;
 1648                 return (1);
 1649         }
 1650         if (so->so_error)       /* temporary udp error */
 1651                 return (1);
 1652         if (kn->kn_sfflags & NOTE_LOWAT)
 1653                 return (kn->kn_data >= kn->kn_sdata);
 1654         return (kn->kn_data >= so->so_rcv.sb_lowat);
 1655 }
 1656 
 1657 static void
 1658 filt_sowdetach(struct knote *kn)
 1659 {
 1660         struct socket *so = (struct socket *)kn->kn_fp->f_data;
 1661         int s = splnet();
 1662 
 1663         SLIST_REMOVE(&so->so_snd.sb_sel.si_note, kn, knote, kn_selnext);
 1664         if (SLIST_EMPTY(&so->so_snd.sb_sel.si_note))
 1665                 so->so_snd.sb_flags &= ~SB_KNOTE;
 1666         splx(s);
 1667 }
 1668 
 1669 /*ARGSUSED*/
 1670 static int
 1671 filt_sowrite(struct knote *kn, long hint)
 1672 {
 1673         struct socket *so = (struct socket *)kn->kn_fp->f_data;
 1674 
 1675         kn->kn_data = sbspace(&so->so_snd);
 1676         if (so->so_state & SS_CANTSENDMORE) {
 1677                 kn->kn_flags |= EV_EOF; 
 1678                 kn->kn_fflags = so->so_error;
 1679                 return (1);
 1680         }
 1681         if (so->so_error)       /* temporary udp error */
 1682                 return (1);
 1683         if (((so->so_state & SS_ISCONNECTED) == 0) &&
 1684             (so->so_proto->pr_flags & PR_CONNREQUIRED))
 1685                 return (0);
 1686         if (kn->kn_sfflags & NOTE_LOWAT)
 1687                 return (kn->kn_data >= kn->kn_sdata);
 1688         return (kn->kn_data >= so->so_snd.sb_lowat);
 1689 }
 1690 
 1691 /*ARGSUSED*/
 1692 static int
 1693 filt_solisten(struct knote *kn, long hint)
 1694 {
 1695         struct socket *so = (struct socket *)kn->kn_fp->f_data;
 1696 
 1697         kn->kn_data = so->so_qlen;
 1698         return (! TAILQ_EMPTY(&so->so_comp));
 1699 }

Cache object: 439cd0c0442b43ce44401e31809cfab1


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.