The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/uipc_socket.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 1982, 1986, 1988, 1990, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  * 3. All advertising materials mentioning features or use of this software
   14  *    must display the following acknowledgement:
   15  *      This product includes software developed by the University of
   16  *      California, Berkeley and its contributors.
   17  * 4. Neither the name of the University nor the names of its contributors
   18  *    may be used to endorse or promote products derived from this software
   19  *    without specific prior written permission.
   20  *
   21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   31  * SUCH DAMAGE.
   32  *
   33  *      @(#)uipc_socket.c       8.3 (Berkeley) 4/15/94
   34  */
   35 
   36 #include <sys/cdefs.h>
   37 __FBSDID("$FreeBSD: releng/5.2/sys/kern/uipc_socket.c 122807 2003-11-16 18:25:20Z rwatson $");
   38 
   39 #include "opt_inet.h"
   40 #include "opt_mac.h"
   41 #include "opt_zero.h"
   42 
   43 #include <sys/param.h>
   44 #include <sys/systm.h>
   45 #include <sys/fcntl.h>
   46 #include <sys/limits.h>
   47 #include <sys/lock.h>
   48 #include <sys/mac.h>
   49 #include <sys/malloc.h>
   50 #include <sys/mbuf.h>
   51 #include <sys/mutex.h>
   52 #include <sys/domain.h>
   53 #include <sys/file.h>                   /* for struct knote */
   54 #include <sys/kernel.h>
   55 #include <sys/event.h>
   56 #include <sys/poll.h>
   57 #include <sys/proc.h>
   58 #include <sys/protosw.h>
   59 #include <sys/socket.h>
   60 #include <sys/socketvar.h>
   61 #include <sys/resourcevar.h>
   62 #include <sys/signalvar.h>
   63 #include <sys/sysctl.h>
   64 #include <sys/uio.h>
   65 #include <sys/jail.h>
   66 
   67 #include <vm/uma.h>
   68 
   69 
   70 #ifdef INET
   71 static int       do_setopt_accept_filter(struct socket *so, struct sockopt *sopt);
   72 #endif
   73 
   74 static void     filt_sordetach(struct knote *kn);
   75 static int      filt_soread(struct knote *kn, long hint);
   76 static void     filt_sowdetach(struct knote *kn);
   77 static int      filt_sowrite(struct knote *kn, long hint);
   78 static int      filt_solisten(struct knote *kn, long hint);
   79 
   80 static struct filterops solisten_filtops =
   81         { 1, NULL, filt_sordetach, filt_solisten };
   82 static struct filterops soread_filtops =
   83         { 1, NULL, filt_sordetach, filt_soread };
   84 static struct filterops sowrite_filtops =
   85         { 1, NULL, filt_sowdetach, filt_sowrite };
   86 
   87 uma_zone_t socket_zone;
   88 so_gen_t        so_gencnt;      /* generation count for sockets */
   89 
   90 MALLOC_DEFINE(M_SONAME, "soname", "socket name");
   91 MALLOC_DEFINE(M_PCB, "pcb", "protocol control block");
   92 
   93 SYSCTL_DECL(_kern_ipc);
   94 
   95 static int somaxconn = SOMAXCONN;
   96 SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW,
   97     &somaxconn, 0, "Maximum pending socket connection queue size");
   98 static int numopensockets;
   99 SYSCTL_INT(_kern_ipc, OID_AUTO, numopensockets, CTLFLAG_RD,
  100     &numopensockets, 0, "Number of open sockets");
  101 #ifdef ZERO_COPY_SOCKETS
  102 /* These aren't static because they're used in other files. */
  103 int so_zero_copy_send = 1;
  104 int so_zero_copy_receive = 1;
  105 SYSCTL_NODE(_kern_ipc, OID_AUTO, zero_copy, CTLFLAG_RD, 0,
  106     "Zero copy controls");
  107 SYSCTL_INT(_kern_ipc_zero_copy, OID_AUTO, receive, CTLFLAG_RW,
  108     &so_zero_copy_receive, 0, "Enable zero copy receive");
  109 SYSCTL_INT(_kern_ipc_zero_copy, OID_AUTO, send, CTLFLAG_RW,
  110     &so_zero_copy_send, 0, "Enable zero copy send");
  111 #endif /* ZERO_COPY_SOCKETS */
  112 
  113 
  114 /*
  115  * Socket operation routines.
  116  * These routines are called by the routines in
  117  * sys_socket.c or from a system process, and
  118  * implement the semantics of socket operations by
  119  * switching out to the protocol specific routines.
  120  */
  121 
  122 /*
  123  * Get a socket structure from our zone, and initialize it.
  124  * Note that it would probably be better to allocate socket
  125  * and PCB at the same time, but I'm not convinced that all
  126  * the protocols can be easily modified to do this.
  127  *
  128  * soalloc() returns a socket with a ref count of 0.
  129  */
  130 struct socket *
  131 soalloc(waitok)
  132         int waitok;
  133 {
  134         struct socket *so;
  135 #ifdef MAC
  136         int error;
  137 #endif
  138         int flag;
  139 
  140         if (waitok == 1)
  141                 flag = M_WAITOK;
  142         else
  143                 flag = M_NOWAIT;
  144         flag |= M_ZERO;
  145         so = uma_zalloc(socket_zone, flag);
  146         if (so) {
  147 #ifdef MAC
  148                 error = mac_init_socket(so, flag);
  149                 if (error != 0) {
  150                         uma_zfree(socket_zone, so);
  151                         so = NULL;
  152                         return so;
  153                 }
  154 #endif
  155                 /* XXX race condition for reentrant kernel */
  156                 so->so_gencnt = ++so_gencnt;
  157                 /* sx_init(&so->so_sxlock, "socket sxlock"); */
  158                 TAILQ_INIT(&so->so_aiojobq);
  159                 ++numopensockets;
  160         }
  161         return so;
  162 }
  163 
  164 /*
  165  * socreate returns a socket with a ref count of 1.  The socket should be
  166  * closed with soclose().
  167  */
  168 int
  169 socreate(dom, aso, type, proto, cred, td)
  170         int dom;
  171         struct socket **aso;
  172         int type;
  173         int proto;
  174         struct ucred *cred;
  175         struct thread *td;
  176 {
  177         struct protosw *prp;
  178         struct socket *so;
  179         int error;
  180 
  181         if (proto)
  182                 prp = pffindproto(dom, proto, type);
  183         else
  184                 prp = pffindtype(dom, type);
  185 
  186         if (prp == 0 || prp->pr_usrreqs->pru_attach == 0)
  187                 return (EPROTONOSUPPORT);
  188 
  189         if (jailed(cred) && jail_socket_unixiproute_only &&
  190             prp->pr_domain->dom_family != PF_LOCAL &&
  191             prp->pr_domain->dom_family != PF_INET &&
  192             prp->pr_domain->dom_family != PF_ROUTE) {
  193                 return (EPROTONOSUPPORT);
  194         }
  195 
  196         if (prp->pr_type != type)
  197                 return (EPROTOTYPE);
  198         so = soalloc(1);
  199         if (so == NULL)
  200                 return (ENOBUFS);
  201 
  202         TAILQ_INIT(&so->so_incomp);
  203         TAILQ_INIT(&so->so_comp);
  204         so->so_type = type;
  205         so->so_cred = crhold(cred);
  206         so->so_proto = prp;
  207 #ifdef MAC
  208         mac_create_socket(cred, so);
  209 #endif
  210         soref(so);
  211         error = (*prp->pr_usrreqs->pru_attach)(so, proto, td);
  212         if (error) {
  213                 so->so_state |= SS_NOFDREF;
  214                 sorele(so);
  215                 return (error);
  216         }
  217         *aso = so;
  218         return (0);
  219 }
  220 
  221 int
  222 sobind(so, nam, td)
  223         struct socket *so;
  224         struct sockaddr *nam;
  225         struct thread *td;
  226 {
  227         int s = splnet();
  228         int error;
  229 
  230         error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, td);
  231         splx(s);
  232         return (error);
  233 }
  234 
  235 void
  236 sodealloc(struct socket *so)
  237 {
  238 
  239         KASSERT(so->so_count == 0, ("sodealloc(): so_count %d", so->so_count));
  240         so->so_gencnt = ++so_gencnt;
  241         if (so->so_rcv.sb_hiwat)
  242                 (void)chgsbsize(so->so_cred->cr_uidinfo,
  243                     &so->so_rcv.sb_hiwat, 0, RLIM_INFINITY);
  244         if (so->so_snd.sb_hiwat)
  245                 (void)chgsbsize(so->so_cred->cr_uidinfo,
  246                     &so->so_snd.sb_hiwat, 0, RLIM_INFINITY);
  247 #ifdef INET
  248         /* remove acccept filter if one is present. */
  249         if (so->so_accf != NULL)
  250                 do_setopt_accept_filter(so, NULL);
  251 #endif
  252 #ifdef MAC
  253         mac_destroy_socket(so);
  254 #endif
  255         crfree(so->so_cred);
  256         /* sx_destroy(&so->so_sxlock); */
  257         uma_zfree(socket_zone, so);
  258         --numopensockets;
  259 }
  260 
  261 int
  262 solisten(so, backlog, td)
  263         struct socket *so;
  264         int backlog;
  265         struct thread *td;
  266 {
  267         int s, error;
  268 
  269         s = splnet();
  270         if (so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING |
  271                             SS_ISDISCONNECTING)) {
  272                 splx(s);
  273                 return (EINVAL);
  274         }
  275         error = (*so->so_proto->pr_usrreqs->pru_listen)(so, td);
  276         if (error) {
  277                 splx(s);
  278                 return (error);
  279         }
  280         if (TAILQ_EMPTY(&so->so_comp))
  281                 so->so_options |= SO_ACCEPTCONN;
  282         if (backlog < 0 || backlog > somaxconn)
  283                 backlog = somaxconn;
  284         so->so_qlimit = backlog;
  285         splx(s);
  286         return (0);
  287 }
  288 
  289 void
  290 sofree(so)
  291         struct socket *so;
  292 {
  293         struct socket *head = so->so_head;
  294 
  295         KASSERT(so->so_count == 0, ("socket %p so_count not 0", so));
  296 
  297         if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
  298                 return;
  299         if (head != NULL) {
  300                 if (so->so_state & SS_INCOMP) {
  301                         TAILQ_REMOVE(&head->so_incomp, so, so_list);
  302                         head->so_incqlen--;
  303                 } else if (so->so_state & SS_COMP) {
  304                         /*
  305                          * We must not decommission a socket that's
  306                          * on the accept(2) queue.  If we do, then
  307                          * accept(2) may hang after select(2) indicated
  308                          * that the listening socket was ready.
  309                          */
  310                         return;
  311                 } else {
  312                         panic("sofree: not queued");
  313                 }
  314                 so->so_state &= ~SS_INCOMP;
  315                 so->so_head = NULL;
  316         }
  317         sbrelease(&so->so_snd, so);
  318         sorflush(so);
  319         sodealloc(so);
  320 }
  321 
  322 /*
  323  * Close a socket on last file table reference removal.
  324  * Initiate disconnect if connected.
  325  * Free socket when disconnect complete.
  326  *
  327  * This function will sorele() the socket.  Note that soclose() may be
  328  * called prior to the ref count reaching zero.  The actual socket
  329  * structure will not be freed until the ref count reaches zero.
  330  */
  331 int
  332 soclose(so)
  333         struct socket *so;
  334 {
  335         int s = splnet();               /* conservative */
  336         int error = 0;
  337 
  338         funsetown(&so->so_sigio);
  339         if (so->so_options & SO_ACCEPTCONN) {
  340                 struct socket *sp, *sonext;
  341 
  342                 sp = TAILQ_FIRST(&so->so_incomp);
  343                 for (; sp != NULL; sp = sonext) {
  344                         sonext = TAILQ_NEXT(sp, so_list);
  345                         (void) soabort(sp);
  346                 }
  347                 for (sp = TAILQ_FIRST(&so->so_comp); sp != NULL; sp = sonext) {
  348                         sonext = TAILQ_NEXT(sp, so_list);
  349                         /* Dequeue from so_comp since sofree() won't do it */
  350                         TAILQ_REMOVE(&so->so_comp, sp, so_list);
  351                         so->so_qlen--;
  352                         sp->so_state &= ~SS_COMP;
  353                         sp->so_head = NULL;
  354                         (void) soabort(sp);
  355                 }
  356         }
  357         if (so->so_pcb == 0)
  358                 goto discard;
  359         if (so->so_state & SS_ISCONNECTED) {
  360                 if ((so->so_state & SS_ISDISCONNECTING) == 0) {
  361                         error = sodisconnect(so);
  362                         if (error)
  363                                 goto drop;
  364                 }
  365                 if (so->so_options & SO_LINGER) {
  366                         if ((so->so_state & SS_ISDISCONNECTING) &&
  367                             (so->so_state & SS_NBIO))
  368                                 goto drop;
  369                         while (so->so_state & SS_ISCONNECTED) {
  370                                 error = tsleep(&so->so_timeo,
  371                                     PSOCK | PCATCH, "soclos", so->so_linger * hz);
  372                                 if (error)
  373                                         break;
  374                         }
  375                 }
  376         }
  377 drop:
  378         if (so->so_pcb) {
  379                 int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so);
  380                 if (error == 0)
  381                         error = error2;
  382         }
  383 discard:
  384         if (so->so_state & SS_NOFDREF)
  385                 panic("soclose: NOFDREF");
  386         so->so_state |= SS_NOFDREF;
  387         sorele(so);
  388         splx(s);
  389         return (error);
  390 }
  391 
  392 /*
  393  * Must be called at splnet...
  394  */
  395 int
  396 soabort(so)
  397         struct socket *so;
  398 {
  399         int error;
  400 
  401         error = (*so->so_proto->pr_usrreqs->pru_abort)(so);
  402         if (error) {
  403                 sotryfree(so);  /* note: does not decrement the ref count */
  404                 return error;
  405         }
  406         return (0);
  407 }
  408 
  409 int
  410 soaccept(so, nam)
  411         struct socket *so;
  412         struct sockaddr **nam;
  413 {
  414         int s = splnet();
  415         int error;
  416 
  417         if ((so->so_state & SS_NOFDREF) == 0)
  418                 panic("soaccept: !NOFDREF");
  419         so->so_state &= ~SS_NOFDREF;
  420         error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam);
  421         splx(s);
  422         return (error);
  423 }
  424 
  425 int
  426 soconnect(so, nam, td)
  427         struct socket *so;
  428         struct sockaddr *nam;
  429         struct thread *td;
  430 {
  431         int s;
  432         int error;
  433 
  434         if (so->so_options & SO_ACCEPTCONN)
  435                 return (EOPNOTSUPP);
  436         s = splnet();
  437         /*
  438          * If protocol is connection-based, can only connect once.
  439          * Otherwise, if connected, try to disconnect first.
  440          * This allows user to disconnect by connecting to, e.g.,
  441          * a null address.
  442          */
  443         if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
  444             ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
  445             (error = sodisconnect(so))))
  446                 error = EISCONN;
  447         else
  448                 error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, td);
  449         splx(s);
  450         return (error);
  451 }
  452 
  453 int
  454 soconnect2(so1, so2)
  455         struct socket *so1;
  456         struct socket *so2;
  457 {
  458         int s = splnet();
  459         int error;
  460 
  461         error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2);
  462         splx(s);
  463         return (error);
  464 }
  465 
  466 int
  467 sodisconnect(so)
  468         struct socket *so;
  469 {
  470         int s = splnet();
  471         int error;
  472 
  473         if ((so->so_state & SS_ISCONNECTED) == 0) {
  474                 error = ENOTCONN;
  475                 goto bad;
  476         }
  477         if (so->so_state & SS_ISDISCONNECTING) {
  478                 error = EALREADY;
  479                 goto bad;
  480         }
  481         error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so);
  482 bad:
  483         splx(s);
  484         return (error);
  485 }
  486 
  487 #define SBLOCKWAIT(f)   (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK)
  488 /*
  489  * Send on a socket.
  490  * If send must go all at once and message is larger than
  491  * send buffering, then hard error.
  492  * Lock against other senders.
  493  * If must go all at once and not enough room now, then
  494  * inform user that this would block and do nothing.
  495  * Otherwise, if nonblocking, send as much as possible.
  496  * The data to be sent is described by "uio" if nonzero,
  497  * otherwise by the mbuf chain "top" (which must be null
  498  * if uio is not).  Data provided in mbuf chain must be small
  499  * enough to send all at once.
  500  *
  501  * Returns nonzero on error, timeout or signal; callers
  502  * must check for short counts if EINTR/ERESTART are returned.
  503  * Data and control buffers are freed on return.
  504  */
  505 
  506 #ifdef ZERO_COPY_SOCKETS
  507 struct so_zerocopy_stats{
  508         int size_ok;
  509         int align_ok;
  510         int found_ifp;
  511 };
  512 struct so_zerocopy_stats so_zerocp_stats = {0,0,0};
  513 #include <netinet/in.h>
  514 #include <net/route.h>
  515 #include <netinet/in_pcb.h>
  516 #include <vm/vm.h>
  517 #include <vm/vm_page.h>
  518 #include <vm/vm_object.h>
  519 #endif /*ZERO_COPY_SOCKETS*/
  520 
  521 int
  522 sosend(so, addr, uio, top, control, flags, td)
  523         struct socket *so;
  524         struct sockaddr *addr;
  525         struct uio *uio;
  526         struct mbuf *top;
  527         struct mbuf *control;
  528         int flags;
  529         struct thread *td;
  530 {
  531         struct mbuf **mp;
  532         struct mbuf *m;
  533         long space, len, resid;
  534         int clen = 0, error, s, dontroute, mlen;
  535         int atomic = sosendallatonce(so) || top;
  536 #ifdef ZERO_COPY_SOCKETS
  537         int cow_send;
  538 #endif /* ZERO_COPY_SOCKETS */
  539 
  540         if (uio)
  541                 resid = uio->uio_resid;
  542         else
  543                 resid = top->m_pkthdr.len;
  544         /*
  545          * In theory resid should be unsigned.
  546          * However, space must be signed, as it might be less than 0
  547          * if we over-committed, and we must use a signed comparison
  548          * of space and resid.  On the other hand, a negative resid
  549          * causes us to loop sending 0-length segments to the protocol.
  550          *
  551          * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM
  552          * type sockets since that's an error.
  553          */
  554         if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) {
  555                 error = EINVAL;
  556                 goto out;
  557         }
  558 
  559         dontroute =
  560             (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
  561             (so->so_proto->pr_flags & PR_ATOMIC);
  562         if (td)
  563                 td->td_proc->p_stats->p_ru.ru_msgsnd++;
  564         if (control)
  565                 clen = control->m_len;
  566 #define snderr(errno)   { error = (errno); splx(s); goto release; }
  567 
  568 restart:
  569         error = sblock(&so->so_snd, SBLOCKWAIT(flags));
  570         if (error)
  571                 goto out;
  572         do {
  573                 s = splnet();
  574                 if (so->so_state & SS_CANTSENDMORE)
  575                         snderr(EPIPE);
  576                 if (so->so_error) {
  577                         error = so->so_error;
  578                         so->so_error = 0;
  579                         splx(s);
  580                         goto release;
  581                 }
  582                 if ((so->so_state & SS_ISCONNECTED) == 0) {
  583                         /*
  584                          * `sendto' and `sendmsg' is allowed on a connection-
  585                          * based socket if it supports implied connect.
  586                          * Return ENOTCONN if not connected and no address is
  587                          * supplied.
  588                          */
  589                         if ((so->so_proto->pr_flags & PR_CONNREQUIRED) &&
  590                             (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) {
  591                                 if ((so->so_state & SS_ISCONFIRMING) == 0 &&
  592                                     !(resid == 0 && clen != 0))
  593                                         snderr(ENOTCONN);
  594                         } else if (addr == 0)
  595                             snderr(so->so_proto->pr_flags & PR_CONNREQUIRED ?
  596                                    ENOTCONN : EDESTADDRREQ);
  597                 }
  598                 space = sbspace(&so->so_snd);
  599                 if (flags & MSG_OOB)
  600                         space += 1024;
  601                 if ((atomic && resid > so->so_snd.sb_hiwat) ||
  602                     clen > so->so_snd.sb_hiwat)
  603                         snderr(EMSGSIZE);
  604                 if (space < resid + clen &&
  605                     (atomic || space < so->so_snd.sb_lowat || space < clen)) {
  606                         if (so->so_state & SS_NBIO)
  607                                 snderr(EWOULDBLOCK);
  608                         sbunlock(&so->so_snd);
  609                         error = sbwait(&so->so_snd);
  610                         splx(s);
  611                         if (error)
  612                                 goto out;
  613                         goto restart;
  614                 }
  615                 splx(s);
  616                 mp = &top;
  617                 space -= clen;
  618                 do {
  619                     if (uio == NULL) {
  620                         /*
  621                          * Data is prepackaged in "top".
  622                          */
  623                         resid = 0;
  624                         if (flags & MSG_EOR)
  625                                 top->m_flags |= M_EOR;
  626                     } else do {
  627 #ifdef ZERO_COPY_SOCKETS
  628                         cow_send = 0;
  629 #endif /* ZERO_COPY_SOCKETS */
  630                         if (top == 0) {
  631                                 MGETHDR(m, M_TRYWAIT, MT_DATA);
  632                                 if (m == NULL) {
  633                                         error = ENOBUFS;
  634                                         goto release;
  635                                 }
  636                                 mlen = MHLEN;
  637                                 m->m_pkthdr.len = 0;
  638                                 m->m_pkthdr.rcvif = (struct ifnet *)0;
  639                         } else {
  640                                 MGET(m, M_TRYWAIT, MT_DATA);
  641                                 if (m == NULL) {
  642                                         error = ENOBUFS;
  643                                         goto release;
  644                                 }
  645                                 mlen = MLEN;
  646                         }
  647                         if (resid >= MINCLSIZE) {
  648 #ifdef ZERO_COPY_SOCKETS
  649                                 if (so_zero_copy_send &&
  650                                     resid>=PAGE_SIZE &&
  651                                     space>=PAGE_SIZE &&
  652                                     uio->uio_iov->iov_len>=PAGE_SIZE) {
  653                                         so_zerocp_stats.size_ok++;
  654                                         if (!((vm_offset_t)
  655                                           uio->uio_iov->iov_base & PAGE_MASK)){
  656                                                 so_zerocp_stats.align_ok++;
  657                                                 cow_send = socow_setup(m, uio);
  658                                         }
  659                                 }
  660                                 if (!cow_send){
  661 #endif /* ZERO_COPY_SOCKETS */
  662                                 MCLGET(m, M_TRYWAIT);
  663                                 if ((m->m_flags & M_EXT) == 0)
  664                                         goto nopages;
  665                                 mlen = MCLBYTES;
  666                                 len = min(min(mlen, resid), space);
  667                         } else {
  668 #ifdef ZERO_COPY_SOCKETS
  669                                         len = PAGE_SIZE;
  670                                 }
  671 
  672                         } else {
  673 #endif /* ZERO_COPY_SOCKETS */
  674 nopages:
  675                                 len = min(min(mlen, resid), space);
  676                                 /*
  677                                  * For datagram protocols, leave room
  678                                  * for protocol headers in first mbuf.
  679                                  */
  680                                 if (atomic && top == 0 && len < mlen)
  681                                         MH_ALIGN(m, len);
  682                         }
  683                         space -= len;
  684 #ifdef ZERO_COPY_SOCKETS
  685                         if (cow_send)
  686                                 error = 0;
  687                         else
  688 #endif /* ZERO_COPY_SOCKETS */
  689                         error = uiomove(mtod(m, void *), (int)len, uio);
  690                         resid = uio->uio_resid;
  691                         m->m_len = len;
  692                         *mp = m;
  693                         top->m_pkthdr.len += len;
  694                         if (error)
  695                                 goto release;
  696                         mp = &m->m_next;
  697                         if (resid <= 0) {
  698                                 if (flags & MSG_EOR)
  699                                         top->m_flags |= M_EOR;
  700                                 break;
  701                         }
  702                     } while (space > 0 && atomic);
  703                     if (dontroute)
  704                             so->so_options |= SO_DONTROUTE;
  705                     s = splnet();                               /* XXX */
  706                     /*
  707                      * XXX all the SS_CANTSENDMORE checks previously
  708                      * done could be out of date.  We could have recieved
  709                      * a reset packet in an interrupt or maybe we slept
  710                      * while doing page faults in uiomove() etc. We could
  711                      * probably recheck again inside the splnet() protection
  712                      * here, but there are probably other places that this
  713                      * also happens.  We must rethink this.
  714                      */
  715                     error = (*so->so_proto->pr_usrreqs->pru_send)(so,
  716                         (flags & MSG_OOB) ? PRUS_OOB :
  717                         /*
  718                          * If the user set MSG_EOF, the protocol
  719                          * understands this flag and nothing left to
  720                          * send then use PRU_SEND_EOF instead of PRU_SEND.
  721                          */
  722                         ((flags & MSG_EOF) &&
  723                          (so->so_proto->pr_flags & PR_IMPLOPCL) &&
  724                          (resid <= 0)) ?
  725                                 PRUS_EOF :
  726                         /* If there is more to send set PRUS_MORETOCOME */
  727                         (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0,
  728                         top, addr, control, td);
  729                     splx(s);
  730                     if (dontroute)
  731                             so->so_options &= ~SO_DONTROUTE;
  732                     clen = 0;
  733                     control = 0;
  734                     top = 0;
  735                     mp = &top;
  736                     if (error)
  737                         goto release;
  738                 } while (resid && space > 0);
  739         } while (resid);
  740 
  741 release:
  742         sbunlock(&so->so_snd);
  743 out:
  744         if (top)
  745                 m_freem(top);
  746         if (control)
  747                 m_freem(control);
  748         return (error);
  749 }
  750 
  751 /*
  752  * Implement receive operations on a socket.
  753  * We depend on the way that records are added to the sockbuf
  754  * by sbappend*.  In particular, each record (mbufs linked through m_next)
  755  * must begin with an address if the protocol so specifies,
  756  * followed by an optional mbuf or mbufs containing ancillary data,
  757  * and then zero or more mbufs of data.
  758  * In order to avoid blocking network interrupts for the entire time here,
  759  * we splx() while doing the actual copy to user space.
  760  * Although the sockbuf is locked, new data may still be appended,
  761  * and thus we must maintain consistency of the sockbuf during that time.
  762  *
  763  * The caller may receive the data as a single mbuf chain by supplying
  764  * an mbuf **mp0 for use in returning the chain.  The uio is then used
  765  * only for the count in uio_resid.
  766  */
  767 int
  768 soreceive(so, psa, uio, mp0, controlp, flagsp)
  769         struct socket *so;
  770         struct sockaddr **psa;
  771         struct uio *uio;
  772         struct mbuf **mp0;
  773         struct mbuf **controlp;
  774         int *flagsp;
  775 {
  776         struct mbuf *m, **mp;
  777         int flags, len, error, s, offset;
  778         struct protosw *pr = so->so_proto;
  779         struct mbuf *nextrecord;
  780         int moff, type = 0;
  781         int orig_resid = uio->uio_resid;
  782 
  783         mp = mp0;
  784         if (psa)
  785                 *psa = 0;
  786         if (controlp)
  787                 *controlp = 0;
  788         if (flagsp)
  789                 flags = *flagsp &~ MSG_EOR;
  790         else
  791                 flags = 0;
  792         if (flags & MSG_OOB) {
  793                 m = m_get(M_TRYWAIT, MT_DATA);
  794                 if (m == NULL)
  795                         return (ENOBUFS);
  796                 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
  797                 if (error)
  798                         goto bad;
  799                 do {
  800 #ifdef ZERO_COPY_SOCKETS
  801                         if (so_zero_copy_receive) {
  802                                 vm_page_t pg;
  803                                 int disposable;
  804 
  805                                 if ((m->m_flags & M_EXT)
  806                                  && (m->m_ext.ext_type == EXT_DISPOSABLE))
  807                                         disposable = 1;
  808                                 else
  809                                         disposable = 0;
  810 
  811                                 pg = PHYS_TO_VM_PAGE(vtophys(mtod(m, caddr_t)));
  812                                 if (uio->uio_offset == -1)
  813                                         uio->uio_offset =IDX_TO_OFF(pg->pindex);
  814 
  815                                 error = uiomoveco(mtod(m, void *),
  816                                                   min(uio->uio_resid, m->m_len),
  817                                                   uio, pg->object,
  818                                                   disposable);
  819                         } else
  820 #endif /* ZERO_COPY_SOCKETS */
  821                         error = uiomove(mtod(m, void *),
  822                             (int) min(uio->uio_resid, m->m_len), uio);
  823                         m = m_free(m);
  824                 } while (uio->uio_resid && error == 0 && m);
  825 bad:
  826                 if (m)
  827                         m_freem(m);
  828                 return (error);
  829         }
  830         if (mp)
  831                 *mp = (struct mbuf *)0;
  832         if (so->so_state & SS_ISCONFIRMING && uio->uio_resid)
  833                 (*pr->pr_usrreqs->pru_rcvd)(so, 0);
  834 
  835 restart:
  836         error = sblock(&so->so_rcv, SBLOCKWAIT(flags));
  837         if (error)
  838                 return (error);
  839         s = splnet();
  840 
  841         m = so->so_rcv.sb_mb;
  842         /*
  843          * If we have less data than requested, block awaiting more
  844          * (subject to any timeout) if:
  845          *   1. the current count is less than the low water mark, or
  846          *   2. MSG_WAITALL is set, and it is possible to do the entire
  847          *      receive operation at once if we block (resid <= hiwat).
  848          *   3. MSG_DONTWAIT is not set
  849          * If MSG_WAITALL is set but resid is larger than the receive buffer,
  850          * we have to do the receive in sections, and thus risk returning
  851          * a short count if a timeout or signal occurs after we start.
  852          */
  853         if (m == 0 || (((flags & MSG_DONTWAIT) == 0 &&
  854             so->so_rcv.sb_cc < uio->uio_resid) &&
  855             (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
  856             ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) &&
  857             m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) {
  858                 KASSERT(m != 0 || !so->so_rcv.sb_cc,
  859                     ("receive: m == %p so->so_rcv.sb_cc == %u",
  860                     m, so->so_rcv.sb_cc));
  861                 if (so->so_error) {
  862                         if (m)
  863                                 goto dontblock;
  864                         error = so->so_error;
  865                         if ((flags & MSG_PEEK) == 0)
  866                                 so->so_error = 0;
  867                         goto release;
  868                 }
  869                 if (so->so_state & SS_CANTRCVMORE) {
  870                         if (m)
  871                                 goto dontblock;
  872                         else
  873                                 goto release;
  874                 }
  875                 for (; m; m = m->m_next)
  876                         if (m->m_type == MT_OOBDATA  || (m->m_flags & M_EOR)) {
  877                                 m = so->so_rcv.sb_mb;
  878                                 goto dontblock;
  879                         }
  880                 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
  881                     (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
  882                         error = ENOTCONN;
  883                         goto release;
  884                 }
  885                 if (uio->uio_resid == 0)
  886                         goto release;
  887                 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) {
  888                         error = EWOULDBLOCK;
  889                         goto release;
  890                 }
  891                 SBLASTRECORDCHK(&so->so_rcv);
  892                 SBLASTMBUFCHK(&so->so_rcv);
  893                 sbunlock(&so->so_rcv);
  894                 error = sbwait(&so->so_rcv);
  895                 splx(s);
  896                 if (error)
  897                         return (error);
  898                 goto restart;
  899         }
  900 dontblock:
  901         if (uio->uio_td)
  902                 uio->uio_td->td_proc->p_stats->p_ru.ru_msgrcv++;
  903         SBLASTRECORDCHK(&so->so_rcv);
  904         SBLASTMBUFCHK(&so->so_rcv);
  905         nextrecord = m->m_nextpkt;
  906         if (pr->pr_flags & PR_ADDR) {
  907                 KASSERT(m->m_type == MT_SONAME,
  908                     ("m->m_type == %d", m->m_type));
  909                 orig_resid = 0;
  910                 if (psa)
  911                         *psa = dup_sockaddr(mtod(m, struct sockaddr *),
  912                                             mp0 == 0);
  913                 if (flags & MSG_PEEK) {
  914                         m = m->m_next;
  915                 } else {
  916                         sbfree(&so->so_rcv, m);
  917                         so->so_rcv.sb_mb = m_free(m);
  918                         m = so->so_rcv.sb_mb;
  919                 }
  920         }
  921         while (m && m->m_type == MT_CONTROL && error == 0) {
  922                 if (flags & MSG_PEEK) {
  923                         if (controlp)
  924                                 *controlp = m_copy(m, 0, m->m_len);
  925                         m = m->m_next;
  926                 } else {
  927                         sbfree(&so->so_rcv, m);
  928                         so->so_rcv.sb_mb = m->m_next;
  929                         m->m_next = NULL;
  930                         if (pr->pr_domain->dom_externalize)
  931                                 error =
  932                                 (*pr->pr_domain->dom_externalize)(m, controlp);
  933                         else if (controlp)
  934                                 *controlp = m;
  935                         else
  936                                 m_freem(m);
  937                         m = so->so_rcv.sb_mb;
  938                 }
  939                 if (controlp) {
  940                         orig_resid = 0;
  941                         while (*controlp != NULL)
  942                                 controlp = &(*controlp)->m_next;
  943                 }
  944         }
  945         if (m) {
  946                 if ((flags & MSG_PEEK) == 0) {
  947                         m->m_nextpkt = nextrecord;
  948                         /*
  949                          * If nextrecord == NULL (this is a single chain),
  950                          * then sb_lastrecord may not be valid here if m
  951                          * was changed earlier.
  952                          */
  953                         if (nextrecord == NULL) {
  954                                 KASSERT(so->so_rcv.sb_mb == m,
  955                                         ("receive tailq 1"));
  956                                 so->so_rcv.sb_lastrecord = m;
  957                         }
  958                 }
  959                 type = m->m_type;
  960                 if (type == MT_OOBDATA)
  961                         flags |= MSG_OOB;
  962         } else {
  963                 if ((flags & MSG_PEEK) == 0) {
  964                         KASSERT(so->so_rcv.sb_mb == m,("receive tailq 2"));
  965                         so->so_rcv.sb_mb = nextrecord;
  966                         SB_EMPTY_FIXUP(&so->so_rcv);
  967                 }
  968         }
  969         SBLASTRECORDCHK(&so->so_rcv);
  970         SBLASTMBUFCHK(&so->so_rcv);
  971 
  972         moff = 0;
  973         offset = 0;
  974         while (m && uio->uio_resid > 0 && error == 0) {
  975                 if (m->m_type == MT_OOBDATA) {
  976                         if (type != MT_OOBDATA)
  977                                 break;
  978                 } else if (type == MT_OOBDATA)
  979                         break;
  980                 else
  981                     KASSERT(m->m_type == MT_DATA || m->m_type == MT_HEADER,
  982                         ("m->m_type == %d", m->m_type));
  983                 so->so_state &= ~SS_RCVATMARK;
  984                 len = uio->uio_resid;
  985                 if (so->so_oobmark && len > so->so_oobmark - offset)
  986                         len = so->so_oobmark - offset;
  987                 if (len > m->m_len - moff)
  988                         len = m->m_len - moff;
  989                 /*
  990                  * If mp is set, just pass back the mbufs.
  991                  * Otherwise copy them out via the uio, then free.
  992                  * Sockbuf must be consistent here (points to current mbuf,
  993                  * it points to next record) when we drop priority;
  994                  * we must note any additions to the sockbuf when we
  995                  * block interrupts again.
  996                  */
  997                 if (mp == 0) {
  998                         SBLASTRECORDCHK(&so->so_rcv);
  999                         SBLASTMBUFCHK(&so->so_rcv);
 1000                         splx(s);
 1001 #ifdef ZERO_COPY_SOCKETS
 1002                         if (so_zero_copy_receive) {
 1003                                 vm_page_t pg;
 1004                                 int disposable;
 1005 
 1006                                 if ((m->m_flags & M_EXT)
 1007                                  && (m->m_ext.ext_type == EXT_DISPOSABLE))
 1008                                         disposable = 1;
 1009                                 else
 1010                                         disposable = 0;
 1011 
 1012                                 pg = PHYS_TO_VM_PAGE(vtophys(mtod(m, caddr_t) +
 1013                                         moff));
 1014 
 1015                                 if (uio->uio_offset == -1)
 1016                                         uio->uio_offset =IDX_TO_OFF(pg->pindex);
 1017 
 1018                                 error = uiomoveco(mtod(m, char *) + moff,
 1019                                                   (int)len, uio,pg->object,
 1020                                                   disposable);
 1021                         } else
 1022 #endif /* ZERO_COPY_SOCKETS */
 1023                         error = uiomove(mtod(m, char *) + moff, (int)len, uio);
 1024                         s = splnet();
 1025                         if (error)
 1026                                 goto release;
 1027                 } else
 1028                         uio->uio_resid -= len;
 1029                 if (len == m->m_len - moff) {
 1030                         if (m->m_flags & M_EOR)
 1031                                 flags |= MSG_EOR;
 1032                         if (flags & MSG_PEEK) {
 1033                                 m = m->m_next;
 1034                                 moff = 0;
 1035                         } else {
 1036                                 nextrecord = m->m_nextpkt;
 1037                                 sbfree(&so->so_rcv, m);
 1038                                 if (mp) {
 1039                                         *mp = m;
 1040                                         mp = &m->m_next;
 1041                                         so->so_rcv.sb_mb = m = m->m_next;
 1042                                         *mp = (struct mbuf *)0;
 1043                                 } else {
 1044                                         so->so_rcv.sb_mb = m_free(m);
 1045                                         m = so->so_rcv.sb_mb;
 1046                                 }
 1047                                 if (m) {
 1048                                         m->m_nextpkt = nextrecord;
 1049                                         if (nextrecord == NULL)
 1050                                                 so->so_rcv.sb_lastrecord = m;
 1051                                 } else {
 1052                                         so->so_rcv.sb_mb = nextrecord;
 1053                                         SB_EMPTY_FIXUP(&so->so_rcv);
 1054                                 }
 1055                                 SBLASTRECORDCHK(&so->so_rcv);
 1056                                 SBLASTMBUFCHK(&so->so_rcv);
 1057                         }
 1058                 } else {
 1059                         if (flags & MSG_PEEK)
 1060                                 moff += len;
 1061                         else {
 1062                                 if (mp)
 1063                                         *mp = m_copym(m, 0, len, M_TRYWAIT);
 1064                                 m->m_data += len;
 1065                                 m->m_len -= len;
 1066                                 so->so_rcv.sb_cc -= len;
 1067                         }
 1068                 }
 1069                 if (so->so_oobmark) {
 1070                         if ((flags & MSG_PEEK) == 0) {
 1071                                 so->so_oobmark -= len;
 1072                                 if (so->so_oobmark == 0) {
 1073                                         so->so_state |= SS_RCVATMARK;
 1074                                         break;
 1075                                 }
 1076                         } else {
 1077                                 offset += len;
 1078                                 if (offset == so->so_oobmark)
 1079                                         break;
 1080                         }
 1081                 }
 1082                 if (flags & MSG_EOR)
 1083                         break;
 1084                 /*
 1085                  * If the MSG_WAITALL flag is set (for non-atomic socket),
 1086                  * we must not quit until "uio->uio_resid == 0" or an error
 1087                  * termination.  If a signal/timeout occurs, return
 1088                  * with a short count but without error.
 1089                  * Keep sockbuf locked against other readers.
 1090                  */
 1091                 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 &&
 1092                     !sosendallatonce(so) && !nextrecord) {
 1093                         if (so->so_error || so->so_state & SS_CANTRCVMORE)
 1094                                 break;
 1095                         /*
 1096                          * Notify the protocol that some data has been
 1097                          * drained before blocking.
 1098                          */
 1099                         if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
 1100                                 (*pr->pr_usrreqs->pru_rcvd)(so, flags);
 1101                         SBLASTRECORDCHK(&so->so_rcv);
 1102                         SBLASTMBUFCHK(&so->so_rcv);
 1103                         error = sbwait(&so->so_rcv);
 1104                         if (error) {
 1105                                 sbunlock(&so->so_rcv);
 1106                                 splx(s);
 1107                                 return (0);
 1108                         }
 1109                         m = so->so_rcv.sb_mb;
 1110                         if (m)
 1111                                 nextrecord = m->m_nextpkt;
 1112                 }
 1113         }
 1114 
 1115         if (m && pr->pr_flags & PR_ATOMIC) {
 1116                 flags |= MSG_TRUNC;
 1117                 if ((flags & MSG_PEEK) == 0)
 1118                         (void) sbdroprecord(&so->so_rcv);
 1119         }
 1120         if ((flags & MSG_PEEK) == 0) {
 1121                 if (m == 0) {
 1122                         /*
 1123                          * First part is an inline SB_EMPTY_FIXUP().  Second
 1124                          * part makes sure sb_lastrecord is up-to-date if
 1125                          * there is still data in the socket buffer.
 1126                          */
 1127                         so->so_rcv.sb_mb = nextrecord;
 1128                         if (so->so_rcv.sb_mb == NULL) {
 1129                                 so->so_rcv.sb_mbtail = NULL;
 1130                                 so->so_rcv.sb_lastrecord = NULL;
 1131                         } else if (nextrecord->m_nextpkt == NULL)
 1132                                 so->so_rcv.sb_lastrecord = nextrecord;
 1133                 }
 1134                 SBLASTRECORDCHK(&so->so_rcv);
 1135                 SBLASTMBUFCHK(&so->so_rcv);
 1136                 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
 1137                         (*pr->pr_usrreqs->pru_rcvd)(so, flags);
 1138         }
 1139         if (orig_resid == uio->uio_resid && orig_resid &&
 1140             (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
 1141                 sbunlock(&so->so_rcv);
 1142                 splx(s);
 1143                 goto restart;
 1144         }
 1145 
 1146         if (flagsp)
 1147                 *flagsp |= flags;
 1148 release:
 1149         sbunlock(&so->so_rcv);
 1150         splx(s);
 1151         return (error);
 1152 }
 1153 
 1154 int
 1155 soshutdown(so, how)
 1156         struct socket *so;
 1157         int how;
 1158 {
 1159         struct protosw *pr = so->so_proto;
 1160 
 1161         if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR))
 1162                 return (EINVAL);
 1163 
 1164         if (how != SHUT_WR)
 1165                 sorflush(so);
 1166         if (how != SHUT_RD)
 1167                 return ((*pr->pr_usrreqs->pru_shutdown)(so));
 1168         return (0);
 1169 }
 1170 
 1171 void
 1172 sorflush(so)
 1173         struct socket *so;
 1174 {
 1175         struct sockbuf *sb = &so->so_rcv;
 1176         struct protosw *pr = so->so_proto;
 1177         int s;
 1178         struct sockbuf asb;
 1179 
 1180         sb->sb_flags |= SB_NOINTR;
 1181         (void) sblock(sb, M_WAITOK);
 1182         s = splimp();
 1183         socantrcvmore(so);
 1184         sbunlock(sb);
 1185         asb = *sb;
 1186         /*
 1187          * Invalidate/clear most of the sockbuf structure, but keep
 1188          * its selinfo structure valid.
 1189          */
 1190         bzero(&sb->sb_startzero,
 1191             sizeof(*sb) - offsetof(struct sockbuf, sb_startzero));
 1192         splx(s);
 1193 
 1194         if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
 1195                 (*pr->pr_domain->dom_dispose)(asb.sb_mb);
 1196         sbrelease(&asb, so);
 1197 }
 1198 
 1199 #ifdef INET
 1200 static int
 1201 do_setopt_accept_filter(so, sopt)
 1202         struct  socket *so;
 1203         struct  sockopt *sopt;
 1204 {
 1205         struct accept_filter_arg        *afap = NULL;
 1206         struct accept_filter    *afp;
 1207         struct so_accf  *af = so->so_accf;
 1208         int     error = 0;
 1209 
 1210         /* do not set/remove accept filters on non listen sockets */
 1211         if ((so->so_options & SO_ACCEPTCONN) == 0) {
 1212                 error = EINVAL;
 1213                 goto out;
 1214         }
 1215 
 1216         /* removing the filter */
 1217         if (sopt == NULL) {
 1218                 if (af != NULL) {
 1219                         if (af->so_accept_filter != NULL &&
 1220                                 af->so_accept_filter->accf_destroy != NULL) {
 1221                                 af->so_accept_filter->accf_destroy(so);
 1222                         }
 1223                         if (af->so_accept_filter_str != NULL) {
 1224                                 FREE(af->so_accept_filter_str, M_ACCF);
 1225                         }
 1226                         FREE(af, M_ACCF);
 1227                         so->so_accf = NULL;
 1228                 }
 1229                 so->so_options &= ~SO_ACCEPTFILTER;
 1230                 return (0);
 1231         }
 1232         /* adding a filter */
 1233         /* must remove previous filter first */
 1234         if (af != NULL) {
 1235                 error = EINVAL;
 1236                 goto out;
 1237         }
 1238         /* don't put large objects on the kernel stack */
 1239         MALLOC(afap, struct accept_filter_arg *, sizeof(*afap), M_TEMP, M_WAITOK);
 1240         error = sooptcopyin(sopt, afap, sizeof *afap, sizeof *afap);
 1241         afap->af_name[sizeof(afap->af_name)-1] = '\0';
 1242         afap->af_arg[sizeof(afap->af_arg)-1] = '\0';
 1243         if (error)
 1244                 goto out;
 1245         afp = accept_filt_get(afap->af_name);
 1246         if (afp == NULL) {
 1247                 error = ENOENT;
 1248                 goto out;
 1249         }
 1250         MALLOC(af, struct so_accf *, sizeof(*af), M_ACCF, M_WAITOK | M_ZERO);
 1251         if (afp->accf_create != NULL) {
 1252                 if (afap->af_name[0] != '\0') {
 1253                         int len = strlen(afap->af_name) + 1;
 1254 
 1255                         MALLOC(af->so_accept_filter_str, char *, len, M_ACCF, M_WAITOK);
 1256                         strcpy(af->so_accept_filter_str, afap->af_name);
 1257                 }
 1258                 af->so_accept_filter_arg = afp->accf_create(so, afap->af_arg);
 1259                 if (af->so_accept_filter_arg == NULL) {
 1260                         FREE(af->so_accept_filter_str, M_ACCF);
 1261                         FREE(af, M_ACCF);
 1262                         so->so_accf = NULL;
 1263                         error = EINVAL;
 1264                         goto out;
 1265                 }
 1266         }
 1267         af->so_accept_filter = afp;
 1268         so->so_accf = af;
 1269         so->so_options |= SO_ACCEPTFILTER;
 1270 out:
 1271         if (afap != NULL)
 1272                 FREE(afap, M_TEMP);
 1273         return (error);
 1274 }
 1275 #endif /* INET */
 1276 
 1277 /*
 1278  * Perhaps this routine, and sooptcopyout(), below, ought to come in
 1279  * an additional variant to handle the case where the option value needs
 1280  * to be some kind of integer, but not a specific size.
 1281  * In addition to their use here, these functions are also called by the
 1282  * protocol-level pr_ctloutput() routines.
 1283  */
 1284 int
 1285 sooptcopyin(sopt, buf, len, minlen)
 1286         struct  sockopt *sopt;
 1287         void    *buf;
 1288         size_t  len;
 1289         size_t  minlen;
 1290 {
 1291         size_t  valsize;
 1292 
 1293         /*
 1294          * If the user gives us more than we wanted, we ignore it,
 1295          * but if we don't get the minimum length the caller
 1296          * wants, we return EINVAL.  On success, sopt->sopt_valsize
 1297          * is set to however much we actually retrieved.
 1298          */
 1299         if ((valsize = sopt->sopt_valsize) < minlen)
 1300                 return EINVAL;
 1301         if (valsize > len)
 1302                 sopt->sopt_valsize = valsize = len;
 1303 
 1304         if (sopt->sopt_td != 0)
 1305                 return (copyin(sopt->sopt_val, buf, valsize));
 1306 
 1307         bcopy(sopt->sopt_val, buf, valsize);
 1308         return 0;
 1309 }
 1310 
 1311 int
 1312 sosetopt(so, sopt)
 1313         struct socket *so;
 1314         struct sockopt *sopt;
 1315 {
 1316         int     error, optval;
 1317         struct  linger l;
 1318         struct  timeval tv;
 1319         u_long  val;
 1320 #ifdef MAC
 1321         struct mac extmac;
 1322 #endif
 1323 
 1324         error = 0;
 1325         if (sopt->sopt_level != SOL_SOCKET) {
 1326                 if (so->so_proto && so->so_proto->pr_ctloutput)
 1327                         return ((*so->so_proto->pr_ctloutput)
 1328                                   (so, sopt));
 1329                 error = ENOPROTOOPT;
 1330         } else {
 1331                 switch (sopt->sopt_name) {
 1332 #ifdef INET
 1333                 case SO_ACCEPTFILTER:
 1334                         error = do_setopt_accept_filter(so, sopt);
 1335                         if (error)
 1336                                 goto bad;
 1337                         break;
 1338 #endif
 1339                 case SO_LINGER:
 1340                         error = sooptcopyin(sopt, &l, sizeof l, sizeof l);
 1341                         if (error)
 1342                                 goto bad;
 1343 
 1344                         so->so_linger = l.l_linger;
 1345                         if (l.l_onoff)
 1346                                 so->so_options |= SO_LINGER;
 1347                         else
 1348                                 so->so_options &= ~SO_LINGER;
 1349                         break;
 1350 
 1351                 case SO_DEBUG:
 1352                 case SO_KEEPALIVE:
 1353                 case SO_DONTROUTE:
 1354                 case SO_USELOOPBACK:
 1355                 case SO_BROADCAST:
 1356                 case SO_REUSEADDR:
 1357                 case SO_REUSEPORT:
 1358                 case SO_OOBINLINE:
 1359                 case SO_TIMESTAMP:
 1360                 case SO_NOSIGPIPE:
 1361                         error = sooptcopyin(sopt, &optval, sizeof optval,
 1362                                             sizeof optval);
 1363                         if (error)
 1364                                 goto bad;
 1365                         if (optval)
 1366                                 so->so_options |= sopt->sopt_name;
 1367                         else
 1368                                 so->so_options &= ~sopt->sopt_name;
 1369                         break;
 1370 
 1371                 case SO_SNDBUF:
 1372                 case SO_RCVBUF:
 1373                 case SO_SNDLOWAT:
 1374                 case SO_RCVLOWAT:
 1375                         error = sooptcopyin(sopt, &optval, sizeof optval,
 1376                                             sizeof optval);
 1377                         if (error)
 1378                                 goto bad;
 1379 
 1380                         /*
 1381                          * Values < 1 make no sense for any of these
 1382                          * options, so disallow them.
 1383                          */
 1384                         if (optval < 1) {
 1385                                 error = EINVAL;
 1386                                 goto bad;
 1387                         }
 1388 
 1389                         switch (sopt->sopt_name) {
 1390                         case SO_SNDBUF:
 1391                         case SO_RCVBUF:
 1392                                 if (sbreserve(sopt->sopt_name == SO_SNDBUF ?
 1393                                     &so->so_snd : &so->so_rcv, (u_long)optval,
 1394                                     so, curthread) == 0) {
 1395                                         error = ENOBUFS;
 1396                                         goto bad;
 1397                                 }
 1398                                 break;
 1399 
 1400                         /*
 1401                          * Make sure the low-water is never greater than
 1402                          * the high-water.
 1403                          */
 1404                         case SO_SNDLOWAT:
 1405                                 so->so_snd.sb_lowat =
 1406                                     (optval > so->so_snd.sb_hiwat) ?
 1407                                     so->so_snd.sb_hiwat : optval;
 1408                                 break;
 1409                         case SO_RCVLOWAT:
 1410                                 so->so_rcv.sb_lowat =
 1411                                     (optval > so->so_rcv.sb_hiwat) ?
 1412                                     so->so_rcv.sb_hiwat : optval;
 1413                                 break;
 1414                         }
 1415                         break;
 1416 
 1417                 case SO_SNDTIMEO:
 1418                 case SO_RCVTIMEO:
 1419                         error = sooptcopyin(sopt, &tv, sizeof tv,
 1420                                             sizeof tv);
 1421                         if (error)
 1422                                 goto bad;
 1423 
 1424                         /* assert(hz > 0); */
 1425                         if (tv.tv_sec < 0 || tv.tv_sec > SHRT_MAX / hz ||
 1426                             tv.tv_usec < 0 || tv.tv_usec >= 1000000) {
 1427                                 error = EDOM;
 1428                                 goto bad;
 1429                         }
 1430                         /* assert(tick > 0); */
 1431                         /* assert(ULONG_MAX - SHRT_MAX >= 1000000); */
 1432                         val = (u_long)(tv.tv_sec * hz) + tv.tv_usec / tick;
 1433                         if (val > SHRT_MAX) {
 1434                                 error = EDOM;
 1435                                 goto bad;
 1436                         }
 1437                         if (val == 0 && tv.tv_usec != 0)
 1438                                 val = 1;
 1439 
 1440                         switch (sopt->sopt_name) {
 1441                         case SO_SNDTIMEO:
 1442                                 so->so_snd.sb_timeo = val;
 1443                                 break;
 1444                         case SO_RCVTIMEO:
 1445                                 so->so_rcv.sb_timeo = val;
 1446                                 break;
 1447                         }
 1448                         break;
 1449                 case SO_LABEL:
 1450 #ifdef MAC
 1451                         error = sooptcopyin(sopt, &extmac, sizeof extmac,
 1452                             sizeof extmac);
 1453                         if (error)
 1454                                 goto bad;
 1455                         error = mac_setsockopt_label(sopt->sopt_td->td_ucred,
 1456                             so, &extmac);
 1457 #else
 1458                         error = EOPNOTSUPP;
 1459 #endif
 1460                         break;
 1461                 default:
 1462                         error = ENOPROTOOPT;
 1463                         break;
 1464                 }
 1465                 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) {
 1466                         (void) ((*so->so_proto->pr_ctloutput)
 1467                                   (so, sopt));
 1468                 }
 1469         }
 1470 bad:
 1471         return (error);
 1472 }
 1473 
 1474 /* Helper routine for getsockopt */
 1475 int
 1476 sooptcopyout(struct sockopt *sopt, const void *buf, size_t len)
 1477 {
 1478         int     error;
 1479         size_t  valsize;
 1480 
 1481         error = 0;
 1482 
 1483         /*
 1484          * Documented get behavior is that we always return a value,
 1485          * possibly truncated to fit in the user's buffer.
 1486          * Traditional behavior is that we always tell the user
 1487          * precisely how much we copied, rather than something useful
 1488          * like the total amount we had available for her.
 1489          * Note that this interface is not idempotent; the entire answer must
 1490          * generated ahead of time.
 1491          */
 1492         valsize = min(len, sopt->sopt_valsize);
 1493         sopt->sopt_valsize = valsize;
 1494         if (sopt->sopt_val != 0) {
 1495                 if (sopt->sopt_td != 0)
 1496                         error = copyout(buf, sopt->sopt_val, valsize);
 1497                 else
 1498                         bcopy(buf, sopt->sopt_val, valsize);
 1499         }
 1500         return error;
 1501 }
 1502 
 1503 int
 1504 sogetopt(so, sopt)
 1505         struct socket *so;
 1506         struct sockopt *sopt;
 1507 {
 1508         int     error, optval;
 1509         struct  linger l;
 1510         struct  timeval tv;
 1511 #ifdef INET
 1512         struct accept_filter_arg *afap;
 1513 #endif
 1514 #ifdef MAC
 1515         struct mac extmac;
 1516 #endif
 1517 
 1518         error = 0;
 1519         if (sopt->sopt_level != SOL_SOCKET) {
 1520                 if (so->so_proto && so->so_proto->pr_ctloutput) {
 1521                         return ((*so->so_proto->pr_ctloutput)
 1522                                   (so, sopt));
 1523                 } else
 1524                         return (ENOPROTOOPT);
 1525         } else {
 1526                 switch (sopt->sopt_name) {
 1527 #ifdef INET
 1528                 case SO_ACCEPTFILTER:
 1529                         if ((so->so_options & SO_ACCEPTCONN) == 0)
 1530                                 return (EINVAL);
 1531                         MALLOC(afap, struct accept_filter_arg *, sizeof(*afap),
 1532                                 M_TEMP, M_WAITOK | M_ZERO);
 1533                         if ((so->so_options & SO_ACCEPTFILTER) != 0) {
 1534                                 strcpy(afap->af_name, so->so_accf->so_accept_filter->accf_name);
 1535                                 if (so->so_accf->so_accept_filter_str != NULL)
 1536                                         strcpy(afap->af_arg, so->so_accf->so_accept_filter_str);
 1537                         }
 1538                         error = sooptcopyout(sopt, afap, sizeof(*afap));
 1539                         FREE(afap, M_TEMP);
 1540                         break;
 1541 #endif
 1542 
 1543                 case SO_LINGER:
 1544                         l.l_onoff = so->so_options & SO_LINGER;
 1545                         l.l_linger = so->so_linger;
 1546                         error = sooptcopyout(sopt, &l, sizeof l);
 1547                         break;
 1548 
 1549                 case SO_USELOOPBACK:
 1550                 case SO_DONTROUTE:
 1551                 case SO_DEBUG:
 1552                 case SO_KEEPALIVE:
 1553                 case SO_REUSEADDR:
 1554                 case SO_REUSEPORT:
 1555                 case SO_BROADCAST:
 1556                 case SO_OOBINLINE:
 1557                 case SO_TIMESTAMP:
 1558                 case SO_NOSIGPIPE:
 1559                         optval = so->so_options & sopt->sopt_name;
 1560 integer:
 1561                         error = sooptcopyout(sopt, &optval, sizeof optval);
 1562                         break;
 1563 
 1564                 case SO_TYPE:
 1565                         optval = so->so_type;
 1566                         goto integer;
 1567 
 1568                 case SO_ERROR:
 1569                         optval = so->so_error;
 1570                         so->so_error = 0;
 1571                         goto integer;
 1572 
 1573                 case SO_SNDBUF:
 1574                         optval = so->so_snd.sb_hiwat;
 1575                         goto integer;
 1576 
 1577                 case SO_RCVBUF:
 1578                         optval = so->so_rcv.sb_hiwat;
 1579                         goto integer;
 1580 
 1581                 case SO_SNDLOWAT:
 1582                         optval = so->so_snd.sb_lowat;
 1583                         goto integer;
 1584 
 1585                 case SO_RCVLOWAT:
 1586                         optval = so->so_rcv.sb_lowat;
 1587                         goto integer;
 1588 
 1589                 case SO_SNDTIMEO:
 1590                 case SO_RCVTIMEO:
 1591                         optval = (sopt->sopt_name == SO_SNDTIMEO ?
 1592                                   so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
 1593 
 1594                         tv.tv_sec = optval / hz;
 1595                         tv.tv_usec = (optval % hz) * tick;
 1596                         error = sooptcopyout(sopt, &tv, sizeof tv);
 1597                         break;
 1598                 case SO_LABEL:
 1599 #ifdef MAC
 1600                         error = sooptcopyin(sopt, &extmac, sizeof(extmac),
 1601                             sizeof(extmac));
 1602                         if (error)
 1603                                 return (error);
 1604                         error = mac_getsockopt_label(sopt->sopt_td->td_ucred,
 1605                             so, &extmac);
 1606                         if (error)
 1607                                 return (error);
 1608                         error = sooptcopyout(sopt, &extmac, sizeof extmac);
 1609 #else
 1610                         error = EOPNOTSUPP;
 1611 #endif
 1612                         break;
 1613                 case SO_PEERLABEL:
 1614 #ifdef MAC
 1615                         error = sooptcopyin(sopt, &extmac, sizeof(extmac),
 1616                             sizeof(extmac));
 1617                         if (error)
 1618                                 return (error);
 1619                         error = mac_getsockopt_peerlabel(
 1620                             sopt->sopt_td->td_ucred, so, &extmac);
 1621                         if (error)
 1622                                 return (error);
 1623                         error = sooptcopyout(sopt, &extmac, sizeof extmac);
 1624 #else
 1625                         error = EOPNOTSUPP;
 1626 #endif
 1627                         break;
 1628                 default:
 1629                         error = ENOPROTOOPT;
 1630                         break;
 1631                 }
 1632                 return (error);
 1633         }
 1634 }
 1635 
 1636 /* XXX; prepare mbuf for (__FreeBSD__ < 3) routines. */
 1637 int
 1638 soopt_getm(struct sockopt *sopt, struct mbuf **mp)
 1639 {
 1640         struct mbuf *m, *m_prev;
 1641         int sopt_size = sopt->sopt_valsize;
 1642 
 1643         MGET(m, sopt->sopt_td ? M_TRYWAIT : M_DONTWAIT, MT_DATA);
 1644         if (m == 0)
 1645                 return ENOBUFS;
 1646         if (sopt_size > MLEN) {
 1647                 MCLGET(m, sopt->sopt_td ? M_TRYWAIT : M_DONTWAIT);
 1648                 if ((m->m_flags & M_EXT) == 0) {
 1649                         m_free(m);
 1650                         return ENOBUFS;
 1651                 }
 1652                 m->m_len = min(MCLBYTES, sopt_size);
 1653         } else {
 1654                 m->m_len = min(MLEN, sopt_size);
 1655         }
 1656         sopt_size -= m->m_len;
 1657         *mp = m;
 1658         m_prev = m;
 1659 
 1660         while (sopt_size) {
 1661                 MGET(m, sopt->sopt_td ? M_TRYWAIT : M_DONTWAIT, MT_DATA);
 1662                 if (m == 0) {
 1663                         m_freem(*mp);
 1664                         return ENOBUFS;
 1665                 }
 1666                 if (sopt_size > MLEN) {
 1667                         MCLGET(m, sopt->sopt_td ? M_TRYWAIT : M_DONTWAIT);
 1668                         if ((m->m_flags & M_EXT) == 0) {
 1669                                 m_freem(*mp);
 1670                                 return ENOBUFS;
 1671                         }
 1672                         m->m_len = min(MCLBYTES, sopt_size);
 1673                 } else {
 1674                         m->m_len = min(MLEN, sopt_size);
 1675                 }
 1676                 sopt_size -= m->m_len;
 1677                 m_prev->m_next = m;
 1678                 m_prev = m;
 1679         }
 1680         return 0;
 1681 }
 1682 
 1683 /* XXX; copyin sopt data into mbuf chain for (__FreeBSD__ < 3) routines. */
 1684 int
 1685 soopt_mcopyin(struct sockopt *sopt, struct mbuf *m)
 1686 {
 1687         struct mbuf *m0 = m;
 1688 
 1689         if (sopt->sopt_val == NULL)
 1690                 return 0;
 1691         while (m != NULL && sopt->sopt_valsize >= m->m_len) {
 1692                 if (sopt->sopt_td != NULL) {
 1693                         int error;
 1694 
 1695                         error = copyin(sopt->sopt_val, mtod(m, char *),
 1696                                        m->m_len);
 1697                         if (error != 0) {
 1698                                 m_freem(m0);
 1699                                 return(error);
 1700                         }
 1701                 } else
 1702                         bcopy(sopt->sopt_val, mtod(m, char *), m->m_len);
 1703                 sopt->sopt_valsize -= m->m_len;
 1704                 (caddr_t)sopt->sopt_val += m->m_len;
 1705                 m = m->m_next;
 1706         }
 1707         if (m != NULL) /* should be allocated enoughly at ip6_sooptmcopyin() */
 1708                 panic("ip6_sooptmcopyin");
 1709         return 0;
 1710 }
 1711 
 1712 /* XXX; copyout mbuf chain data into soopt for (__FreeBSD__ < 3) routines. */
 1713 int
 1714 soopt_mcopyout(struct sockopt *sopt, struct mbuf *m)
 1715 {
 1716         struct mbuf *m0 = m;
 1717         size_t valsize = 0;
 1718 
 1719         if (sopt->sopt_val == NULL)
 1720                 return 0;
 1721         while (m != NULL && sopt->sopt_valsize >= m->m_len) {
 1722                 if (sopt->sopt_td != NULL) {
 1723                         int error;
 1724 
 1725                         error = copyout(mtod(m, char *), sopt->sopt_val,
 1726                                        m->m_len);
 1727                         if (error != 0) {
 1728                                 m_freem(m0);
 1729                                 return(error);
 1730                         }
 1731                 } else
 1732                         bcopy(mtod(m, char *), sopt->sopt_val, m->m_len);
 1733                sopt->sopt_valsize -= m->m_len;
 1734                (caddr_t)sopt->sopt_val += m->m_len;
 1735                valsize += m->m_len;
 1736                m = m->m_next;
 1737         }
 1738         if (m != NULL) {
 1739                 /* enough soopt buffer should be given from user-land */
 1740                 m_freem(m0);
 1741                 return(EINVAL);
 1742         }
 1743         sopt->sopt_valsize = valsize;
 1744         return 0;
 1745 }
 1746 
 1747 void
 1748 sohasoutofband(so)
 1749         struct socket *so;
 1750 {
 1751         if (so->so_sigio != NULL)
 1752                 pgsigio(&so->so_sigio, SIGURG, 0);
 1753         selwakeuppri(&so->so_rcv.sb_sel, PSOCK);
 1754 }
 1755 
 1756 int
 1757 sopoll(struct socket *so, int events, struct ucred *active_cred,
 1758     struct thread *td)
 1759 {
 1760         int revents = 0;
 1761         int s = splnet();
 1762 
 1763         if (events & (POLLIN | POLLRDNORM))
 1764                 if (soreadable(so))
 1765                         revents |= events & (POLLIN | POLLRDNORM);
 1766 
 1767         if (events & POLLINIGNEOF)
 1768                 if (so->so_rcv.sb_cc >= so->so_rcv.sb_lowat ||
 1769                     !TAILQ_EMPTY(&so->so_comp) || so->so_error)
 1770                         revents |= POLLINIGNEOF;
 1771 
 1772         if (events & (POLLOUT | POLLWRNORM))
 1773                 if (sowriteable(so))
 1774                         revents |= events & (POLLOUT | POLLWRNORM);
 1775 
 1776         if (events & (POLLPRI | POLLRDBAND))
 1777                 if (so->so_oobmark || (so->so_state & SS_RCVATMARK))
 1778                         revents |= events & (POLLPRI | POLLRDBAND);
 1779 
 1780         if (revents == 0) {
 1781                 if (events &
 1782                     (POLLIN | POLLINIGNEOF | POLLPRI | POLLRDNORM |
 1783                      POLLRDBAND)) {
 1784                         selrecord(td, &so->so_rcv.sb_sel);
 1785                         so->so_rcv.sb_flags |= SB_SEL;
 1786                 }
 1787 
 1788                 if (events & (POLLOUT | POLLWRNORM)) {
 1789                         selrecord(td, &so->so_snd.sb_sel);
 1790                         so->so_snd.sb_flags |= SB_SEL;
 1791                 }
 1792         }
 1793 
 1794         splx(s);
 1795         return (revents);
 1796 }
 1797 
 1798 int
 1799 soo_kqfilter(struct file *fp, struct knote *kn)
 1800 {
 1801         struct socket *so = kn->kn_fp->f_data;
 1802         struct sockbuf *sb;
 1803         int s;
 1804 
 1805         switch (kn->kn_filter) {
 1806         case EVFILT_READ:
 1807                 if (so->so_options & SO_ACCEPTCONN)
 1808                         kn->kn_fop = &solisten_filtops;
 1809                 else
 1810                         kn->kn_fop = &soread_filtops;
 1811                 sb = &so->so_rcv;
 1812                 break;
 1813         case EVFILT_WRITE:
 1814                 kn->kn_fop = &sowrite_filtops;
 1815                 sb = &so->so_snd;
 1816                 break;
 1817         default:
 1818                 return (1);
 1819         }
 1820 
 1821         s = splnet();
 1822         SLIST_INSERT_HEAD(&sb->sb_sel.si_note, kn, kn_selnext);
 1823         sb->sb_flags |= SB_KNOTE;
 1824         splx(s);
 1825         return (0);
 1826 }
 1827 
 1828 static void
 1829 filt_sordetach(struct knote *kn)
 1830 {
 1831         struct socket *so = kn->kn_fp->f_data;
 1832         int s = splnet();
 1833 
 1834         SLIST_REMOVE(&so->so_rcv.sb_sel.si_note, kn, knote, kn_selnext);
 1835         if (SLIST_EMPTY(&so->so_rcv.sb_sel.si_note))
 1836                 so->so_rcv.sb_flags &= ~SB_KNOTE;
 1837         splx(s);
 1838 }
 1839 
 1840 /*ARGSUSED*/
 1841 static int
 1842 filt_soread(struct knote *kn, long hint)
 1843 {
 1844         struct socket *so = kn->kn_fp->f_data;
 1845 
 1846         kn->kn_data = so->so_rcv.sb_cc - so->so_rcv.sb_ctl;
 1847         if (so->so_state & SS_CANTRCVMORE) {
 1848                 kn->kn_flags |= EV_EOF;
 1849                 kn->kn_fflags = so->so_error;
 1850                 return (1);
 1851         }
 1852         if (so->so_error)       /* temporary udp error */
 1853                 return (1);
 1854         if (kn->kn_sfflags & NOTE_LOWAT)
 1855                 return (kn->kn_data >= kn->kn_sdata);
 1856         return (so->so_rcv.sb_cc >= so->so_rcv.sb_lowat);
 1857 }
 1858 
 1859 static void
 1860 filt_sowdetach(struct knote *kn)
 1861 {
 1862         struct socket *so = kn->kn_fp->f_data;
 1863         int s = splnet();
 1864 
 1865         SLIST_REMOVE(&so->so_snd.sb_sel.si_note, kn, knote, kn_selnext);
 1866         if (SLIST_EMPTY(&so->so_snd.sb_sel.si_note))
 1867                 so->so_snd.sb_flags &= ~SB_KNOTE;
 1868         splx(s);
 1869 }
 1870 
 1871 /*ARGSUSED*/
 1872 static int
 1873 filt_sowrite(struct knote *kn, long hint)
 1874 {
 1875         struct socket *so = kn->kn_fp->f_data;
 1876 
 1877         kn->kn_data = sbspace(&so->so_snd);
 1878         if (so->so_state & SS_CANTSENDMORE) {
 1879                 kn->kn_flags |= EV_EOF;
 1880                 kn->kn_fflags = so->so_error;
 1881                 return (1);
 1882         }
 1883         if (so->so_error)       /* temporary udp error */
 1884                 return (1);
 1885         if (((so->so_state & SS_ISCONNECTED) == 0) &&
 1886             (so->so_proto->pr_flags & PR_CONNREQUIRED))
 1887                 return (0);
 1888         if (kn->kn_sfflags & NOTE_LOWAT)
 1889                 return (kn->kn_data >= kn->kn_sdata);
 1890         return (kn->kn_data >= so->so_snd.sb_lowat);
 1891 }
 1892 
 1893 /*ARGSUSED*/
 1894 static int
 1895 filt_solisten(struct knote *kn, long hint)
 1896 {
 1897         struct socket *so = kn->kn_fp->f_data;
 1898 
 1899         kn->kn_data = so->so_qlen;
 1900         return (! TAILQ_EMPTY(&so->so_comp));
 1901 }
 1902 
 1903 int
 1904 socheckuid(struct socket *so, uid_t uid)
 1905 {
 1906 
 1907         if (so == NULL)
 1908                 return (EPERM);
 1909         if (so->so_cred->cr_uid == uid)
 1910                 return (0);
 1911         return (EPERM);
 1912 }

Cache object: 0f49789cee2d0adc7d077f1b954a6205


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.