uipc_socket.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*-
    2  * SPDX-License-Identifier: BSD-3-Clause
    3  *
    4  * Copyright (c) 1982, 1986, 1988, 1990, 1993
    5  *      The Regents of the University of California.
    6  * Copyright (c) 2004 The FreeBSD Foundation
    7  * Copyright (c) 2004-2008 Robert N. M. Watson
    8  * All rights reserved.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 3. Neither the name of the University nor the names of its contributors
   19  *    may be used to endorse or promote products derived from this software
   20  *    without specific prior written permission.
   21  *
   22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   32  * SUCH DAMAGE.
   33  *
   34  *      @(#)uipc_socket.c       8.3 (Berkeley) 4/15/94
   35  */
   36 
   37 /*
   38  * Comments on the socket life cycle:
   39  *
   40  * soalloc() sets of socket layer state for a socket, called only by
   41  * socreate() and sonewconn().  Socket layer private.
   42  *
   43  * sodealloc() tears down socket layer state for a socket, called only by
   44  * sofree() and sonewconn().  Socket layer private.
   45  *
   46  * pru_attach() associates protocol layer state with an allocated socket;
   47  * called only once, may fail, aborting socket allocation.  This is called
   48  * from socreate() and sonewconn().  Socket layer private.
   49  *
   50  * pru_detach() disassociates protocol layer state from an attached socket,
   51  * and will be called exactly once for sockets in which pru_attach() has
   52  * been successfully called.  If pru_attach() returned an error,
   53  * pru_detach() will not be called.  Socket layer private.
   54  *
   55  * pru_abort() and pru_close() notify the protocol layer that the last
   56  * consumer of a socket is starting to tear down the socket, and that the
   57  * protocol should terminate the connection.  Historically, pru_abort() also
   58  * detached protocol state from the socket state, but this is no longer the
   59  * case.
   60  *
   61  * socreate() creates a socket and attaches protocol state.  This is a public
   62  * interface that may be used by socket layer consumers to create new
   63  * sockets.
   64  *
   65  * sonewconn() creates a socket and attaches protocol state.  This is a
   66  * public interface  that may be used by protocols to create new sockets when
   67  * a new connection is received and will be available for accept() on a
   68  * listen socket.
   69  *
   70  * soclose() destroys a socket after possibly waiting for it to disconnect.
   71  * This is a public interface that socket consumers should use to close and
   72  * release a socket when done with it.
   73  *
   74  * soabort() destroys a socket without waiting for it to disconnect (used
   75  * only for incoming connections that are already partially or fully
   76  * connected).  This is used internally by the socket layer when clearing
   77  * listen socket queues (due to overflow or close on the listen socket), but
   78  * is also a public interface protocols may use to abort connections in
   79  * their incomplete listen queues should they no longer be required.  Sockets
   80  * placed in completed connection listen queues should not be aborted for
   81  * reasons described in the comment above the soclose() implementation.  This
   82  * is not a general purpose close routine, and except in the specific
   83  * circumstances described here, should not be used.
   84  *
   85  * sofree() will free a socket and its protocol state if all references on
   86  * the socket have been released, and is the public interface to attempt to
   87  * free a socket when a reference is removed.  This is a socket layer private
   88  * interface.
   89  *
   90  * NOTE: In addition to socreate() and soclose(), which provide a single
   91  * socket reference to the consumer to be managed as required, there are two
   92  * calls to explicitly manage socket references, soref(), and sorele().
   93  * Currently, these are generally required only when transitioning a socket
   94  * from a listen queue to a file descriptor, in order to prevent garbage
   95  * collection of the socket at an untimely moment.  For a number of reasons,
   96  * these interfaces are not preferred, and should be avoided.
   97  *
   98  * NOTE: With regard to VNETs the general rule is that callers do not set
   99  * curvnet. Exceptions to this rule include soabort(), sodisconnect(),
  100  * sofree() (and with that sorele(), sotryfree()), as well as sonewconn()
  101  * and sorflush(), which are usually called from a pre-set VNET context.
  102  * sopoll() currently does not need a VNET context to be set.
  103  */
  104 
  105 #include <sys/cdefs.h>
  106 __FBSDID("$FreeBSD: releng/12.0/sys/kern/uipc_socket.c 340980 2018-11-26 16:36:38Z markj $");
  107 
  108 #include "opt_inet.h"
  109 #include "opt_inet6.h"
  110 #include "opt_sctp.h"
  111 
  112 #include <sys/param.h>
  113 #include <sys/systm.h>
  114 #include <sys/fcntl.h>
  115 #include <sys/limits.h>
  116 #include <sys/lock.h>
  117 #include <sys/mac.h>
  118 #include <sys/malloc.h>
  119 #include <sys/mbuf.h>
  120 #include <sys/mutex.h>
  121 #include <sys/domain.h>
  122 #include <sys/file.h>                   /* for struct knote */
  123 #include <sys/hhook.h>
  124 #include <sys/kernel.h>
  125 #include <sys/khelp.h>
  126 #include <sys/event.h>
  127 #include <sys/eventhandler.h>
  128 #include <sys/poll.h>
  129 #include <sys/proc.h>
  130 #include <sys/protosw.h>
  131 #include <sys/socket.h>
  132 #include <sys/socketvar.h>
  133 #include <sys/resourcevar.h>
  134 #include <net/route.h>
  135 #include <sys/signalvar.h>
  136 #include <sys/stat.h>
  137 #include <sys/sx.h>
  138 #include <sys/sysctl.h>
  139 #include <sys/taskqueue.h>
  140 #include <sys/uio.h>
  141 #include <sys/jail.h>
  142 #include <sys/syslog.h>
  143 #include <netinet/in.h>
  144 
  145 #include <net/vnet.h>
  146 
  147 #include <security/mac/mac_framework.h>
  148 
  149 #include <vm/uma.h>
  150 
  151 #ifdef COMPAT_FREEBSD32
  152 #include <sys/mount.h>
  153 #include <sys/sysent.h>
  154 #include <compat/freebsd32/freebsd32.h>
  155 #endif
  156 
  157 static int      soreceive_rcvoob(struct socket *so, struct uio *uio,
  158                     int flags);
  159 static void     so_rdknl_lock(void *);
  160 static void     so_rdknl_unlock(void *);
  161 static void     so_rdknl_assert_locked(void *);
  162 static void     so_rdknl_assert_unlocked(void *);
  163 static void     so_wrknl_lock(void *);
  164 static void     so_wrknl_unlock(void *);
  165 static void     so_wrknl_assert_locked(void *);
  166 static void     so_wrknl_assert_unlocked(void *);
  167 
  168 static void     filt_sordetach(struct knote *kn);
  169 static int      filt_soread(struct knote *kn, long hint);
  170 static void     filt_sowdetach(struct knote *kn);
  171 static int      filt_sowrite(struct knote *kn, long hint);
  172 static int      filt_soempty(struct knote *kn, long hint);
  173 static int inline hhook_run_socket(struct socket *so, void *hctx, int32_t h_id);
  174 fo_kqfilter_t   soo_kqfilter;
  175 
  176 static struct filterops soread_filtops = {
  177         .f_isfd = 1,
  178         .f_detach = filt_sordetach,
  179         .f_event = filt_soread,
  180 };
  181 static struct filterops sowrite_filtops = {
  182         .f_isfd = 1,
  183         .f_detach = filt_sowdetach,
  184         .f_event = filt_sowrite,
  185 };
  186 static struct filterops soempty_filtops = {
  187         .f_isfd = 1,
  188         .f_detach = filt_sowdetach,
  189         .f_event = filt_soempty,
  190 };
  191 
  192 so_gen_t        so_gencnt;      /* generation count for sockets */
  193 
  194 MALLOC_DEFINE(M_SONAME, "soname", "socket name");
  195 MALLOC_DEFINE(M_PCB, "pcb", "protocol control block");
  196 
  197 #define VNET_SO_ASSERT(so)                                              \
  198         VNET_ASSERT(curvnet != NULL,                                    \
  199             ("%s:%d curvnet is NULL, so=%p", __func__, __LINE__, (so)));
  200 
  201 VNET_DEFINE(struct hhook_head *, socket_hhh[HHOOK_SOCKET_LAST + 1]);
  202 #define V_socket_hhh            VNET(socket_hhh)
  203 
  204 /*
  205  * Limit on the number of connections in the listen queue waiting
  206  * for accept(2).
  207  * NB: The original sysctl somaxconn is still available but hidden
  208  * to prevent confusion about the actual purpose of this number.
  209  */
  210 static u_int somaxconn = SOMAXCONN;
  211 
  212 static int
  213 sysctl_somaxconn(SYSCTL_HANDLER_ARGS)
  214 {
  215         int error;
  216         int val;
  217 
  218         val = somaxconn;
  219         error = sysctl_handle_int(oidp, &val, 0, req);
  220         if (error || !req->newptr )
  221                 return (error);
  222 
  223         /*
  224          * The purpose of the UINT_MAX / 3 limit, is so that the formula
  225          *   3 * so_qlimit / 2
  226          * below, will not overflow.
  227          */
  228 
  229         if (val < 1 || val > UINT_MAX / 3)
  230                 return (EINVAL);
  231 
  232         somaxconn = val;
  233         return (0);
  234 }
  235 SYSCTL_PROC(_kern_ipc, OID_AUTO, soacceptqueue, CTLTYPE_UINT | CTLFLAG_RW,
  236     0, sizeof(int), sysctl_somaxconn, "I",
  237     "Maximum listen socket pending connection accept queue size");
  238 SYSCTL_PROC(_kern_ipc, KIPC_SOMAXCONN, somaxconn,
  239     CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_SKIP,
  240     0, sizeof(int), sysctl_somaxconn, "I",
  241     "Maximum listen socket pending connection accept queue size (compat)");
  242 
  243 static int numopensockets;
  244 SYSCTL_INT(_kern_ipc, OID_AUTO, numopensockets, CTLFLAG_RD,
  245     &numopensockets, 0, "Number of open sockets");
  246 
  247 /*
  248  * accept_mtx locks down per-socket fields relating to accept queues.  See
  249  * socketvar.h for an annotation of the protected fields of struct socket.
  250  */
  251 struct mtx accept_mtx;
  252 MTX_SYSINIT(accept_mtx, &accept_mtx, "accept", MTX_DEF);
  253 
  254 /*
  255  * so_global_mtx protects so_gencnt, numopensockets, and the per-socket
  256  * so_gencnt field.
  257  */
  258 static struct mtx so_global_mtx;
  259 MTX_SYSINIT(so_global_mtx, &so_global_mtx, "so_glabel", MTX_DEF);
  260 
  261 /*
  262  * General IPC sysctl name space, used by sockets and a variety of other IPC
  263  * types.
  264  */
  265 SYSCTL_NODE(_kern, KERN_IPC, ipc, CTLFLAG_RW, 0, "IPC");
  266 
  267 /*
  268  * Initialize the socket subsystem and set up the socket
  269  * memory allocator.
  270  */
  271 static uma_zone_t socket_zone;
  272 int     maxsockets;
  273 
  274 static void
  275 socket_zone_change(void *tag)
  276 {
  277 
  278         maxsockets = uma_zone_set_max(socket_zone, maxsockets);
  279 }
  280 
  281 static void
  282 socket_hhook_register(int subtype)
  283 {
  284         
  285         if (hhook_head_register(HHOOK_TYPE_SOCKET, subtype,
  286             &V_socket_hhh[subtype],
  287             HHOOK_NOWAIT|HHOOK_HEADISINVNET) != 0)
  288                 printf("%s: WARNING: unable to register hook\n", __func__);
  289 }
  290 
  291 static void
  292 socket_hhook_deregister(int subtype)
  293 {
  294         
  295         if (hhook_head_deregister(V_socket_hhh[subtype]) != 0)
  296                 printf("%s: WARNING: unable to deregister hook\n", __func__);
  297 }
  298 
  299 static void
  300 socket_init(void *tag)
  301 {
  302 
  303         socket_zone = uma_zcreate("socket", sizeof(struct socket), NULL, NULL,
  304             NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
  305         maxsockets = uma_zone_set_max(socket_zone, maxsockets);
  306         uma_zone_set_warning(socket_zone, "kern.ipc.maxsockets limit reached");
  307         EVENTHANDLER_REGISTER(maxsockets_change, socket_zone_change, NULL,
  308             EVENTHANDLER_PRI_FIRST);
  309 }
  310 SYSINIT(socket, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY, socket_init, NULL);
  311 
  312 static void
  313 socket_vnet_init(const void *unused __unused)
  314 {
  315         int i;
  316 
  317         /* We expect a contiguous range */
  318         for (i = 0; i <= HHOOK_SOCKET_LAST; i++)
  319                 socket_hhook_register(i);
  320 }
  321 VNET_SYSINIT(socket_vnet_init, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY,
  322     socket_vnet_init, NULL);
  323 
  324 static void
  325 socket_vnet_uninit(const void *unused __unused)
  326 {
  327         int i;
  328 
  329         for (i = 0; i <= HHOOK_SOCKET_LAST; i++)
  330                 socket_hhook_deregister(i);
  331 }
  332 VNET_SYSUNINIT(socket_vnet_uninit, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY,
  333     socket_vnet_uninit, NULL);
  334 
  335 /*
  336  * Initialise maxsockets.  This SYSINIT must be run after
  337  * tunable_mbinit().
  338  */
  339 static void
  340 init_maxsockets(void *ignored)
  341 {
  342 
  343         TUNABLE_INT_FETCH("kern.ipc.maxsockets", &maxsockets);
  344         maxsockets = imax(maxsockets, maxfiles);
  345 }
  346 SYSINIT(param, SI_SUB_TUNABLES, SI_ORDER_ANY, init_maxsockets, NULL);
  347 
  348 /*
  349  * Sysctl to get and set the maximum global sockets limit.  Notify protocols
  350  * of the change so that they can update their dependent limits as required.
  351  */
  352 static int
  353 sysctl_maxsockets(SYSCTL_HANDLER_ARGS)
  354 {
  355         int error, newmaxsockets;
  356 
  357         newmaxsockets = maxsockets;
  358         error = sysctl_handle_int(oidp, &newmaxsockets, 0, req);
  359         if (error == 0 && req->newptr) {
  360                 if (newmaxsockets > maxsockets &&
  361                     newmaxsockets <= maxfiles) {
  362                         maxsockets = newmaxsockets;
  363                         EVENTHANDLER_INVOKE(maxsockets_change);
  364                 } else
  365                         error = EINVAL;
  366         }
  367         return (error);
  368 }
  369 SYSCTL_PROC(_kern_ipc, OID_AUTO, maxsockets, CTLTYPE_INT|CTLFLAG_RW,
  370     &maxsockets, 0, sysctl_maxsockets, "IU",
  371     "Maximum number of sockets available");
  372 
  373 /*
  374  * Socket operation routines.  These routines are called by the routines in
  375  * sys_socket.c or from a system process, and implement the semantics of
  376  * socket operations by switching out to the protocol specific routines.
  377  */
  378 
  379 /*
  380  * Get a socket structure from our zone, and initialize it.  Note that it
  381  * would probably be better to allocate socket and PCB at the same time, but
  382  * I'm not convinced that all the protocols can be easily modified to do
  383  * this.
  384  *
  385  * soalloc() returns a socket with a ref count of 0.
  386  */
  387 static struct socket *
  388 soalloc(struct vnet *vnet)
  389 {
  390         struct socket *so;
  391 
  392         so = uma_zalloc(socket_zone, M_NOWAIT | M_ZERO);
  393         if (so == NULL)
  394                 return (NULL);
  395 #ifdef MAC
  396         if (mac_socket_init(so, M_NOWAIT) != 0) {
  397                 uma_zfree(socket_zone, so);
  398                 return (NULL);
  399         }
  400 #endif
  401         if (khelp_init_osd(HELPER_CLASS_SOCKET, &so->osd)) {
  402                 uma_zfree(socket_zone, so);
  403                 return (NULL);
  404         }
  405 
  406         /*
  407          * The socket locking protocol allows to lock 2 sockets at a time,
  408          * however, the first one must be a listening socket.  WITNESS lacks
  409          * a feature to change class of an existing lock, so we use DUPOK.
  410          */
  411         mtx_init(&so->so_lock, "socket", NULL, MTX_DEF | MTX_DUPOK);
  412         SOCKBUF_LOCK_INIT(&so->so_snd, "so_snd");
  413         SOCKBUF_LOCK_INIT(&so->so_rcv, "so_rcv");
  414         so->so_rcv.sb_sel = &so->so_rdsel;
  415         so->so_snd.sb_sel = &so->so_wrsel;
  416         sx_init(&so->so_snd.sb_sx, "so_snd_sx");
  417         sx_init(&so->so_rcv.sb_sx, "so_rcv_sx");
  418         TAILQ_INIT(&so->so_snd.sb_aiojobq);
  419         TAILQ_INIT(&so->so_rcv.sb_aiojobq);
  420         TASK_INIT(&so->so_snd.sb_aiotask, 0, soaio_snd, so);
  421         TASK_INIT(&so->so_rcv.sb_aiotask, 0, soaio_rcv, so);
  422 #ifdef VIMAGE
  423         VNET_ASSERT(vnet != NULL, ("%s:%d vnet is NULL, so=%p",
  424             __func__, __LINE__, so));
  425         so->so_vnet = vnet;
  426 #endif
  427         /* We shouldn't need the so_global_mtx */
  428         if (hhook_run_socket(so, NULL, HHOOK_SOCKET_CREATE)) {
  429                 /* Do we need more comprehensive error returns? */
  430                 uma_zfree(socket_zone, so);
  431                 return (NULL);
  432         }
  433         mtx_lock(&so_global_mtx);
  434         so->so_gencnt = ++so_gencnt;
  435         ++numopensockets;
  436 #ifdef VIMAGE
  437         vnet->vnet_sockcnt++;
  438 #endif
  439         mtx_unlock(&so_global_mtx);
  440 
  441         return (so);
  442 }
  443 
  444 /*
  445  * Free the storage associated with a socket at the socket layer, tear down
  446  * locks, labels, etc.  All protocol state is assumed already to have been
  447  * torn down (and possibly never set up) by the caller.
  448  */
  449 static void
  450 sodealloc(struct socket *so)
  451 {
  452 
  453         KASSERT(so->so_count == 0, ("sodealloc(): so_count %d", so->so_count));
  454         KASSERT(so->so_pcb == NULL, ("sodealloc(): so_pcb != NULL"));
  455 
  456         mtx_lock(&so_global_mtx);
  457         so->so_gencnt = ++so_gencnt;
  458         --numopensockets;       /* Could be below, but faster here. */
  459 #ifdef VIMAGE
  460         VNET_ASSERT(so->so_vnet != NULL, ("%s:%d so_vnet is NULL, so=%p",
  461             __func__, __LINE__, so));
  462         so->so_vnet->vnet_sockcnt--;
  463 #endif
  464         mtx_unlock(&so_global_mtx);
  465 #ifdef MAC
  466         mac_socket_destroy(so);
  467 #endif
  468         hhook_run_socket(so, NULL, HHOOK_SOCKET_CLOSE);
  469 
  470         crfree(so->so_cred);
  471         khelp_destroy_osd(&so->osd);
  472         if (SOLISTENING(so)) {
  473                 if (so->sol_accept_filter != NULL)
  474                         accept_filt_setopt(so, NULL);
  475         } else {
  476                 if (so->so_rcv.sb_hiwat)
  477                         (void)chgsbsize(so->so_cred->cr_uidinfo,
  478                             &so->so_rcv.sb_hiwat, 0, RLIM_INFINITY);
  479                 if (so->so_snd.sb_hiwat)
  480                         (void)chgsbsize(so->so_cred->cr_uidinfo,
  481                             &so->so_snd.sb_hiwat, 0, RLIM_INFINITY);
  482                 sx_destroy(&so->so_snd.sb_sx);
  483                 sx_destroy(&so->so_rcv.sb_sx);
  484                 SOCKBUF_LOCK_DESTROY(&so->so_snd);
  485                 SOCKBUF_LOCK_DESTROY(&so->so_rcv);
  486         }
  487         mtx_destroy(&so->so_lock);
  488         uma_zfree(socket_zone, so);
  489 }
  490 
  491 /*
  492  * socreate returns a socket with a ref count of 1.  The socket should be
  493  * closed with soclose().
  494  */
  495 int
  496 socreate(int dom, struct socket **aso, int type, int proto,
  497     struct ucred *cred, struct thread *td)
  498 {
  499         struct protosw *prp;
  500         struct socket *so;
  501         int error;
  502 
  503         if (proto)
  504                 prp = pffindproto(dom, proto, type);
  505         else
  506                 prp = pffindtype(dom, type);
  507 
  508         if (prp == NULL) {
  509                 /* No support for domain. */
  510                 if (pffinddomain(dom) == NULL)
  511                         return (EAFNOSUPPORT);
  512                 /* No support for socket type. */
  513                 if (proto == 0 && type != 0)
  514                         return (EPROTOTYPE);
  515                 return (EPROTONOSUPPORT);
  516         }
  517         if (prp->pr_usrreqs->pru_attach == NULL ||
  518             prp->pr_usrreqs->pru_attach == pru_attach_notsupp)
  519                 return (EPROTONOSUPPORT);
  520 
  521         if (prison_check_af(cred, prp->pr_domain->dom_family) != 0)
  522                 return (EPROTONOSUPPORT);
  523 
  524         if (prp->pr_type != type)
  525                 return (EPROTOTYPE);
  526         so = soalloc(CRED_TO_VNET(cred));
  527         if (so == NULL)
  528                 return (ENOBUFS);
  529 
  530         so->so_type = type;
  531         so->so_cred = crhold(cred);
  532         if ((prp->pr_domain->dom_family == PF_INET) ||
  533             (prp->pr_domain->dom_family == PF_INET6) ||
  534             (prp->pr_domain->dom_family == PF_ROUTE))
  535                 so->so_fibnum = td->td_proc->p_fibnum;
  536         else
  537                 so->so_fibnum = 0;
  538         so->so_proto = prp;
  539 #ifdef MAC
  540         mac_socket_create(cred, so);
  541 #endif
  542         knlist_init(&so->so_rdsel.si_note, so, so_rdknl_lock, so_rdknl_unlock,
  543             so_rdknl_assert_locked, so_rdknl_assert_unlocked);
  544         knlist_init(&so->so_wrsel.si_note, so, so_wrknl_lock, so_wrknl_unlock,
  545             so_wrknl_assert_locked, so_wrknl_assert_unlocked);
  546         /*
  547          * Auto-sizing of socket buffers is managed by the protocols and
  548          * the appropriate flags must be set in the pru_attach function.
  549          */
  550         CURVNET_SET(so->so_vnet);
  551         error = (*prp->pr_usrreqs->pru_attach)(so, proto, td);
  552         CURVNET_RESTORE();
  553         if (error) {
  554                 sodealloc(so);
  555                 return (error);
  556         }
  557         soref(so);
  558         *aso = so;
  559         return (0);
  560 }
  561 
  562 #ifdef REGRESSION
  563 static int regression_sonewconn_earlytest = 1;
  564 SYSCTL_INT(_regression, OID_AUTO, sonewconn_earlytest, CTLFLAG_RW,
  565     &regression_sonewconn_earlytest, 0, "Perform early sonewconn limit test");
  566 #endif
  567 
  568 /*
  569  * When an attempt at a new connection is noted on a socket which accepts
  570  * connections, sonewconn is called.  If the connection is possible (subject
  571  * to space constraints, etc.) then we allocate a new structure, properly
  572  * linked into the data structure of the original socket, and return this.
  573  * Connstatus may be 0, or SS_ISCONFIRMING, or SS_ISCONNECTED.
  574  *
  575  * Note: the ref count on the socket is 0 on return.
  576  */
  577 struct socket *
  578 sonewconn(struct socket *head, int connstatus)
  579 {
  580         static struct timeval lastover;
  581         static struct timeval overinterval = { 60, 0 };
  582         static int overcount;
  583 
  584         struct socket *so;
  585         u_int over;
  586 
  587         SOLISTEN_LOCK(head);
  588         over = (head->sol_qlen > 3 * head->sol_qlimit / 2);
  589         SOLISTEN_UNLOCK(head);
  590 #ifdef REGRESSION
  591         if (regression_sonewconn_earlytest && over) {
  592 #else
  593         if (over) {
  594 #endif
  595                 overcount++;
  596 
  597                 if (ratecheck(&lastover, &overinterval)) {
  598                         log(LOG_DEBUG, "%s: pcb %p: Listen queue overflow: "
  599                             "%i already in queue awaiting acceptance "
  600                             "(%d occurrences)\n",
  601                             __func__, head->so_pcb, head->sol_qlen, overcount);
  602 
  603                         overcount = 0;
  604                 }
  605 
  606                 return (NULL);
  607         }
  608         VNET_ASSERT(head->so_vnet != NULL, ("%s: so %p vnet is NULL",
  609             __func__, head));
  610         so = soalloc(head->so_vnet);
  611         if (so == NULL) {
  612                 log(LOG_DEBUG, "%s: pcb %p: New socket allocation failure: "
  613                     "limit reached or out of memory\n",
  614                     __func__, head->so_pcb);
  615                 return (NULL);
  616         }
  617         so->so_listen = head;
  618         so->so_type = head->so_type;
  619         so->so_linger = head->so_linger;
  620         so->so_state = head->so_state | SS_NOFDREF;
  621         so->so_fibnum = head->so_fibnum;
  622         so->so_proto = head->so_proto;
  623         so->so_cred = crhold(head->so_cred);
  624 #ifdef MAC
  625         mac_socket_newconn(head, so);
  626 #endif
  627         knlist_init(&so->so_rdsel.si_note, so, so_rdknl_lock, so_rdknl_unlock,
  628             so_rdknl_assert_locked, so_rdknl_assert_unlocked);
  629         knlist_init(&so->so_wrsel.si_note, so, so_wrknl_lock, so_wrknl_unlock,
  630             so_wrknl_assert_locked, so_wrknl_assert_unlocked);
  631         VNET_SO_ASSERT(head);
  632         if (soreserve(so, head->sol_sbsnd_hiwat, head->sol_sbrcv_hiwat)) {
  633                 sodealloc(so);
  634                 log(LOG_DEBUG, "%s: pcb %p: soreserve() failed\n",
  635                     __func__, head->so_pcb);
  636                 return (NULL);
  637         }
  638         if ((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) {
  639                 sodealloc(so);
  640                 log(LOG_DEBUG, "%s: pcb %p: pru_attach() failed\n",
  641                     __func__, head->so_pcb);
  642                 return (NULL);
  643         }
  644         so->so_rcv.sb_lowat = head->sol_sbrcv_lowat;
  645         so->so_snd.sb_lowat = head->sol_sbsnd_lowat;
  646         so->so_rcv.sb_timeo = head->sol_sbrcv_timeo;
  647         so->so_snd.sb_timeo = head->sol_sbsnd_timeo;
  648         so->so_rcv.sb_flags |= head->sol_sbrcv_flags & SB_AUTOSIZE;
  649         so->so_snd.sb_flags |= head->sol_sbsnd_flags & SB_AUTOSIZE;
  650 
  651         SOLISTEN_LOCK(head);
  652         if (head->sol_accept_filter != NULL)
  653                 connstatus = 0;
  654         so->so_state |= connstatus;
  655         so->so_options = head->so_options & ~SO_ACCEPTCONN;
  656         soref(head); /* A socket on (in)complete queue refs head. */
  657         if (connstatus) {
  658                 TAILQ_INSERT_TAIL(&head->sol_comp, so, so_list);
  659                 so->so_qstate = SQ_COMP;
  660                 head->sol_qlen++;
  661                 solisten_wakeup(head);  /* unlocks */
  662         } else {
  663                 /*
  664                  * Keep removing sockets from the head until there's room for
  665                  * us to insert on the tail.  In pre-locking revisions, this
  666                  * was a simple if(), but as we could be racing with other
  667                  * threads and soabort() requires dropping locks, we must
  668                  * loop waiting for the condition to be true.
  669                  */
  670                 while (head->sol_incqlen > head->sol_qlimit) {
  671                         struct socket *sp;
  672 
  673                         sp = TAILQ_FIRST(&head->sol_incomp);
  674                         TAILQ_REMOVE(&head->sol_incomp, sp, so_list);
  675                         head->sol_incqlen--;
  676                         SOCK_LOCK(sp);
  677                         sp->so_qstate = SQ_NONE;
  678                         sp->so_listen = NULL;
  679                         SOCK_UNLOCK(sp);
  680                         sorele(head);   /* does SOLISTEN_UNLOCK, head stays */
  681                         soabort(sp);
  682                         SOLISTEN_LOCK(head);
  683                 }
  684                 TAILQ_INSERT_TAIL(&head->sol_incomp, so, so_list);
  685                 so->so_qstate = SQ_INCOMP;
  686                 head->sol_incqlen++;
  687                 SOLISTEN_UNLOCK(head);
  688         }
  689         return (so);
  690 }
  691 
  692 #ifdef SCTP
  693 /*
  694  * Socket part of sctp_peeloff().  Detach a new socket from an
  695  * association.  The new socket is returned with a reference.
  696  */
  697 struct socket *
  698 sopeeloff(struct socket *head)
  699 {
  700         struct socket *so;
  701 
  702         VNET_ASSERT(head->so_vnet != NULL, ("%s:%d so_vnet is NULL, head=%p",
  703             __func__, __LINE__, head));
  704         so = soalloc(head->so_vnet);
  705         if (so == NULL) {
  706                 log(LOG_DEBUG, "%s: pcb %p: New socket allocation failure: "
  707                     "limit reached or out of memory\n",
  708                     __func__, head->so_pcb);
  709                 return (NULL);
  710         }
  711         so->so_type = head->so_type;
  712         so->so_options = head->so_options;
  713         so->so_linger = head->so_linger;
  714         so->so_state = (head->so_state & SS_NBIO) | SS_ISCONNECTED;
  715         so->so_fibnum = head->so_fibnum;
  716         so->so_proto = head->so_proto;
  717         so->so_cred = crhold(head->so_cred);
  718 #ifdef MAC
  719         mac_socket_newconn(head, so);
  720 #endif
  721         knlist_init(&so->so_rdsel.si_note, so, so_rdknl_lock, so_rdknl_unlock,
  722             so_rdknl_assert_locked, so_rdknl_assert_unlocked);
  723         knlist_init(&so->so_wrsel.si_note, so, so_wrknl_lock, so_wrknl_unlock,
  724             so_wrknl_assert_locked, so_wrknl_assert_unlocked);
  725         VNET_SO_ASSERT(head);
  726         if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) {
  727                 sodealloc(so);
  728                 log(LOG_DEBUG, "%s: pcb %p: soreserve() failed\n",
  729                     __func__, head->so_pcb);
  730                 return (NULL);
  731         }
  732         if ((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) {
  733                 sodealloc(so);
  734                 log(LOG_DEBUG, "%s: pcb %p: pru_attach() failed\n",
  735                     __func__, head->so_pcb);
  736                 return (NULL);
  737         }
  738         so->so_rcv.sb_lowat = head->so_rcv.sb_lowat;
  739         so->so_snd.sb_lowat = head->so_snd.sb_lowat;
  740         so->so_rcv.sb_timeo = head->so_rcv.sb_timeo;
  741         so->so_snd.sb_timeo = head->so_snd.sb_timeo;
  742         so->so_rcv.sb_flags |= head->so_rcv.sb_flags & SB_AUTOSIZE;
  743         so->so_snd.sb_flags |= head->so_snd.sb_flags & SB_AUTOSIZE;
  744 
  745         soref(so);
  746 
  747         return (so);
  748 }
  749 #endif  /* SCTP */
  750 
  751 int
  752 sobind(struct socket *so, struct sockaddr *nam, struct thread *td)
  753 {
  754         int error;
  755 
  756         CURVNET_SET(so->so_vnet);
  757         error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, td);
  758         CURVNET_RESTORE();
  759         return (error);
  760 }
  761 
  762 int
  763 sobindat(int fd, struct socket *so, struct sockaddr *nam, struct thread *td)
  764 {
  765         int error;
  766 
  767         CURVNET_SET(so->so_vnet);
  768         error = (*so->so_proto->pr_usrreqs->pru_bindat)(fd, so, nam, td);
  769         CURVNET_RESTORE();
  770         return (error);
  771 }
  772 
  773 /*
  774  * solisten() transitions a socket from a non-listening state to a listening
  775  * state, but can also be used to update the listen queue depth on an
  776  * existing listen socket.  The protocol will call back into the sockets
  777  * layer using solisten_proto_check() and solisten_proto() to check and set
  778  * socket-layer listen state.  Call backs are used so that the protocol can
  779  * acquire both protocol and socket layer locks in whatever order is required
  780  * by the protocol.
  781  *
  782  * Protocol implementors are advised to hold the socket lock across the
  783  * socket-layer test and set to avoid races at the socket layer.
  784  */
  785 int
  786 solisten(struct socket *so, int backlog, struct thread *td)
  787 {
  788         int error;
  789 
  790         CURVNET_SET(so->so_vnet);
  791         error = (*so->so_proto->pr_usrreqs->pru_listen)(so, backlog, td);
  792         CURVNET_RESTORE();
  793         return (error);
  794 }
  795 
  796 int
  797 solisten_proto_check(struct socket *so)
  798 {
  799 
  800         SOCK_LOCK_ASSERT(so);
  801 
  802         if (so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING |
  803             SS_ISDISCONNECTING))
  804                 return (EINVAL);
  805         return (0);
  806 }
  807 
  808 void
  809 solisten_proto(struct socket *so, int backlog)
  810 {
  811         int sbrcv_lowat, sbsnd_lowat;
  812         u_int sbrcv_hiwat, sbsnd_hiwat;
  813         short sbrcv_flags, sbsnd_flags;
  814         sbintime_t sbrcv_timeo, sbsnd_timeo;
  815 
  816         SOCK_LOCK_ASSERT(so);
  817 
  818         if (SOLISTENING(so))
  819                 goto listening;
  820 
  821         /*
  822          * Change this socket to listening state.
  823          */
  824         sbrcv_lowat = so->so_rcv.sb_lowat;
  825         sbsnd_lowat = so->so_snd.sb_lowat;
  826         sbrcv_hiwat = so->so_rcv.sb_hiwat;
  827         sbsnd_hiwat = so->so_snd.sb_hiwat;
  828         sbrcv_flags = so->so_rcv.sb_flags;
  829         sbsnd_flags = so->so_snd.sb_flags;
  830         sbrcv_timeo = so->so_rcv.sb_timeo;
  831         sbsnd_timeo = so->so_snd.sb_timeo;
  832 
  833         sbdestroy(&so->so_snd, so);
  834         sbdestroy(&so->so_rcv, so);
  835         sx_destroy(&so->so_snd.sb_sx);
  836         sx_destroy(&so->so_rcv.sb_sx);
  837         SOCKBUF_LOCK_DESTROY(&so->so_snd);
  838         SOCKBUF_LOCK_DESTROY(&so->so_rcv);
  839 
  840 #ifdef INVARIANTS
  841         bzero(&so->so_rcv,
  842             sizeof(struct socket) - offsetof(struct socket, so_rcv));
  843 #endif
  844 
  845         so->sol_sbrcv_lowat = sbrcv_lowat;
  846         so->sol_sbsnd_lowat = sbsnd_lowat;
  847         so->sol_sbrcv_hiwat = sbrcv_hiwat;
  848         so->sol_sbsnd_hiwat = sbsnd_hiwat;
  849         so->sol_sbrcv_flags = sbrcv_flags;
  850         so->sol_sbsnd_flags = sbsnd_flags;
  851         so->sol_sbrcv_timeo = sbrcv_timeo;
  852         so->sol_sbsnd_timeo = sbsnd_timeo;
  853 
  854         so->sol_qlen = so->sol_incqlen = 0;
  855         TAILQ_INIT(&so->sol_incomp);
  856         TAILQ_INIT(&so->sol_comp);
  857 
  858         so->sol_accept_filter = NULL;
  859         so->sol_accept_filter_arg = NULL;
  860         so->sol_accept_filter_str = NULL;
  861 
  862         so->sol_upcall = NULL;
  863         so->sol_upcallarg = NULL;
  864 
  865         so->so_options |= SO_ACCEPTCONN;
  866 
  867 listening:
  868         if (backlog < 0 || backlog > somaxconn)
  869                 backlog = somaxconn;
  870         so->sol_qlimit = backlog;
  871 }
  872 
  873 /*
  874  * Wakeup listeners/subsystems once we have a complete connection.
  875  * Enters with lock, returns unlocked.
  876  */
  877 void
  878 solisten_wakeup(struct socket *sol)
  879 {
  880 
  881         if (sol->sol_upcall != NULL)
  882                 (void )sol->sol_upcall(sol, sol->sol_upcallarg, M_NOWAIT);
  883         else {
  884                 selwakeuppri(&sol->so_rdsel, PSOCK);
  885                 KNOTE_LOCKED(&sol->so_rdsel.si_note, 0);
  886         }
  887         SOLISTEN_UNLOCK(sol);
  888         wakeup_one(&sol->sol_comp);
  889 }
  890 
  891 /*
  892  * Return single connection off a listening socket queue.  Main consumer of
  893  * the function is kern_accept4().  Some modules, that do their own accept
  894  * management also use the function.
  895  *
  896  * Listening socket must be locked on entry and is returned unlocked on
  897  * return.
  898  * The flags argument is set of accept4(2) flags and ACCEPT4_INHERIT.
  899  */
  900 int
  901 solisten_dequeue(struct socket *head, struct socket **ret, int flags)
  902 {
  903         struct socket *so;
  904         int error;
  905 
  906         SOLISTEN_LOCK_ASSERT(head);
  907 
  908         while (!(head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->sol_comp) &&
  909             head->so_error == 0) {
  910                 error = msleep(&head->sol_comp, &head->so_lock, PSOCK | PCATCH,
  911                     "accept", 0);
  912                 if (error != 0) {
  913                         SOLISTEN_UNLOCK(head);
  914                         return (error);
  915                 }
  916         }
  917         if (head->so_error) {
  918                 error = head->so_error;
  919                 head->so_error = 0;
  920         } else if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->sol_comp))
  921                 error = EWOULDBLOCK;
  922         else
  923                 error = 0;
  924         if (error) {
  925                 SOLISTEN_UNLOCK(head);
  926                 return (error);
  927         }
  928         so = TAILQ_FIRST(&head->sol_comp);
  929         SOCK_LOCK(so);
  930         KASSERT(so->so_qstate == SQ_COMP,
  931             ("%s: so %p not SQ_COMP", __func__, so));
  932         soref(so);
  933         head->sol_qlen--;
  934         so->so_qstate = SQ_NONE;
  935         so->so_listen = NULL;
  936         TAILQ_REMOVE(&head->sol_comp, so, so_list);
  937         if (flags & ACCEPT4_INHERIT)
  938                 so->so_state |= (head->so_state & SS_NBIO);
  939         else
  940                 so->so_state |= (flags & SOCK_NONBLOCK) ? SS_NBIO : 0;
  941         SOCK_UNLOCK(so);
  942         sorele(head);
  943 
  944         *ret = so;
  945         return (0);
  946 }
  947 
  948 /*
  949  * Evaluate the reference count and named references on a socket; if no
  950  * references remain, free it.  This should be called whenever a reference is
  951  * released, such as in sorele(), but also when named reference flags are
  952  * cleared in socket or protocol code.
  953  *
  954  * sofree() will free the socket if:
  955  *
  956  * - There are no outstanding file descriptor references or related consumers
  957  *   (so_count == 0).
  958  *
  959  * - The socket has been closed by user space, if ever open (SS_NOFDREF).
  960  *
  961  * - The protocol does not have an outstanding strong reference on the socket
  962  *   (SS_PROTOREF).
  963  *
  964  * - The socket is not in a completed connection queue, so a process has been
  965  *   notified that it is present.  If it is removed, the user process may
  966  *   block in accept() despite select() saying the socket was ready.
  967  */
  968 void
  969 sofree(struct socket *so)
  970 {
  971         struct protosw *pr = so->so_proto;
  972 
  973         SOCK_LOCK_ASSERT(so);
  974 
  975         if ((so->so_state & SS_NOFDREF) == 0 || so->so_count != 0 ||
  976             (so->so_state & SS_PROTOREF) || (so->so_qstate == SQ_COMP)) {
  977                 SOCK_UNLOCK(so);
  978                 return;
  979         }
  980 
  981         if (!SOLISTENING(so) && so->so_qstate == SQ_INCOMP) {
  982                 struct socket *sol;
  983 
  984                 sol = so->so_listen;
  985                 KASSERT(sol, ("%s: so %p on incomp of NULL", __func__, so));
  986 
  987                 /*
  988                  * To solve race between close of a listening socket and
  989                  * a socket on its incomplete queue, we need to lock both.
  990                  * The order is first listening socket, then regular.
  991                  * Since we don't have SS_NOFDREF neither SS_PROTOREF, this
  992                  * function and the listening socket are the only pointers
  993                  * to so.  To preserve so and sol, we reference both and then
  994                  * relock.
  995                  * After relock the socket may not move to so_comp since it
  996                  * doesn't have PCB already, but it may be removed from
  997                  * so_incomp. If that happens, we share responsiblity on
  998                  * freeing the socket, but soclose() has already removed
  999                  * it from queue.
 1000                  */
 1001                 soref(sol);
 1002                 soref(so);
 1003                 SOCK_UNLOCK(so);
 1004                 SOLISTEN_LOCK(sol);
 1005                 SOCK_LOCK(so);
 1006                 if (so->so_qstate == SQ_INCOMP) {
 1007                         KASSERT(so->so_listen == sol,
 1008                             ("%s: so %p migrated out of sol %p",
 1009                             __func__, so, sol));
 1010                         TAILQ_REMOVE(&sol->sol_incomp, so, so_list);
 1011                         sol->sol_incqlen--;
 1012                         /* This is guarenteed not to be the last. */
 1013                         refcount_release(&sol->so_count);
 1014                         so->so_qstate = SQ_NONE;
 1015                         so->so_listen = NULL;
 1016                 } else
 1017                         KASSERT(so->so_listen == NULL,
 1018                             ("%s: so %p not on (in)comp with so_listen",
 1019                             __func__, so));
 1020                 sorele(sol);
 1021                 KASSERT(so->so_count == 1,
 1022                     ("%s: so %p count %u", __func__, so, so->so_count));
 1023                 so->so_count = 0;
 1024         }
 1025         if (SOLISTENING(so))
 1026                 so->so_error = ECONNABORTED;
 1027         SOCK_UNLOCK(so);
 1028 
 1029         if (so->so_dtor != NULL)
 1030                 so->so_dtor(so);
 1031 
 1032         VNET_SO_ASSERT(so);
 1033         if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL)
 1034                 (*pr->pr_domain->dom_dispose)(so);
 1035         if (pr->pr_usrreqs->pru_detach != NULL)
 1036                 (*pr->pr_usrreqs->pru_detach)(so);
 1037 
 1038         /*
 1039          * From this point on, we assume that no other references to this
 1040          * socket exist anywhere else in the stack.  Therefore, no locks need
 1041          * to be acquired or held.
 1042          *
 1043          * We used to do a lot of socket buffer and socket locking here, as
 1044          * well as invoke sorflush() and perform wakeups.  The direct call to
 1045          * dom_dispose() and sbrelease_internal() are an inlining of what was
 1046          * necessary from sorflush().
 1047          *
 1048          * Notice that the socket buffer and kqueue state are torn down
 1049          * before calling pru_detach.  This means that protocols shold not
 1050          * assume they can perform socket wakeups, etc, in their detach code.
 1051          */
 1052         if (!SOLISTENING(so)) {
 1053                 sbdestroy(&so->so_snd, so);
 1054                 sbdestroy(&so->so_rcv, so);
 1055         }
 1056         seldrain(&so->so_rdsel);
 1057         seldrain(&so->so_wrsel);
 1058         knlist_destroy(&so->so_rdsel.si_note);
 1059         knlist_destroy(&so->so_wrsel.si_note);
 1060         sodealloc(so);
 1061 }
 1062 
 1063 /*
 1064  * Close a socket on last file table reference removal.  Initiate disconnect
 1065  * if connected.  Free socket when disconnect complete.
 1066  *
 1067  * This function will sorele() the socket.  Note that soclose() may be called
 1068  * prior to the ref count reaching zero.  The actual socket structure will
 1069  * not be freed until the ref count reaches zero.
 1070  */
 1071 int
 1072 soclose(struct socket *so)
 1073 {
 1074         struct accept_queue lqueue;
 1075         bool listening;
 1076         int error = 0;
 1077 
 1078         KASSERT(!(so->so_state & SS_NOFDREF), ("soclose: SS_NOFDREF on enter"));
 1079 
 1080         CURVNET_SET(so->so_vnet);
 1081         funsetown(&so->so_sigio);
 1082         if (so->so_state & SS_ISCONNECTED) {
 1083                 if ((so->so_state & SS_ISDISCONNECTING) == 0) {
 1084                         error = sodisconnect(so);
 1085                         if (error) {
 1086                                 if (error == ENOTCONN)
 1087                                         error = 0;
 1088                                 goto drop;
 1089                         }
 1090                 }
 1091                 if (so->so_options & SO_LINGER) {
 1092                         if ((so->so_state & SS_ISDISCONNECTING) &&
 1093                             (so->so_state & SS_NBIO))
 1094                                 goto drop;
 1095                         while (so->so_state & SS_ISCONNECTED) {
 1096                                 error = tsleep(&so->so_timeo,
 1097                                     PSOCK | PCATCH, "soclos",
 1098                                     so->so_linger * hz);
 1099                                 if (error)
 1100                                         break;
 1101                         }
 1102                 }
 1103         }
 1104 
 1105 drop:
 1106         if (so->so_proto->pr_usrreqs->pru_close != NULL)
 1107                 (*so->so_proto->pr_usrreqs->pru_close)(so);
 1108 
 1109         SOCK_LOCK(so);
 1110         if ((listening = (so->so_options & SO_ACCEPTCONN))) {
 1111                 struct socket *sp;
 1112 
 1113                 TAILQ_INIT(&lqueue);
 1114                 TAILQ_SWAP(&lqueue, &so->sol_incomp, socket, so_list);
 1115                 TAILQ_CONCAT(&lqueue, &so->sol_comp, so_list);
 1116 
 1117                 so->sol_qlen = so->sol_incqlen = 0;
 1118 
 1119                 TAILQ_FOREACH(sp, &lqueue, so_list) {
 1120                         SOCK_LOCK(sp);
 1121                         sp->so_qstate = SQ_NONE;
 1122                         sp->so_listen = NULL;
 1123                         SOCK_UNLOCK(sp);
 1124                         /* Guaranteed not to be the last. */
 1125                         refcount_release(&so->so_count);
 1126                 }
 1127         }
 1128         KASSERT((so->so_state & SS_NOFDREF) == 0, ("soclose: NOFDREF"));
 1129         so->so_state |= SS_NOFDREF;
 1130         sorele(so);
 1131         if (listening) {
 1132                 struct socket *sp;
 1133 
 1134                 TAILQ_FOREACH(sp, &lqueue, so_list) {
 1135                         SOCK_LOCK(sp);
 1136                         if (sp->so_count == 0) {
 1137                                 SOCK_UNLOCK(sp);
 1138                                 soabort(sp);
 1139                         } else
 1140                                 /* sp is now in sofree() */
 1141                                 SOCK_UNLOCK(sp);
 1142                 }
 1143         }
 1144         CURVNET_RESTORE();
 1145         return (error);
 1146 }
 1147 
 1148 /*
 1149  * soabort() is used to abruptly tear down a connection, such as when a
 1150  * resource limit is reached (listen queue depth exceeded), or if a listen
 1151  * socket is closed while there are sockets waiting to be accepted.
 1152  *
 1153  * This interface is tricky, because it is called on an unreferenced socket,
 1154  * and must be called only by a thread that has actually removed the socket
 1155  * from the listen queue it was on, or races with other threads are risked.
 1156  *
 1157  * This interface will call into the protocol code, so must not be called
 1158  * with any socket locks held.  Protocols do call it while holding their own
 1159  * recursible protocol mutexes, but this is something that should be subject
 1160  * to review in the future.
 1161  */
 1162 void
 1163 soabort(struct socket *so)
 1164 {
 1165 
 1166         /*
 1167          * In as much as is possible, assert that no references to this
 1168          * socket are held.  This is not quite the same as asserting that the
 1169          * current thread is responsible for arranging for no references, but
 1170          * is as close as we can get for now.
 1171          */
 1172         KASSERT(so->so_count == 0, ("soabort: so_count"));
 1173         KASSERT((so->so_state & SS_PROTOREF) == 0, ("soabort: SS_PROTOREF"));
 1174         KASSERT(so->so_state & SS_NOFDREF, ("soabort: !SS_NOFDREF"));
 1175         KASSERT(so->so_qstate == SQ_NONE, ("soabort: !SQ_NONE"));
 1176         VNET_SO_ASSERT(so);
 1177 
 1178         if (so->so_proto->pr_usrreqs->pru_abort != NULL)
 1179                 (*so->so_proto->pr_usrreqs->pru_abort)(so);
 1180         SOCK_LOCK(so);
 1181         sofree(so);
 1182 }
 1183 
 1184 int
 1185 soaccept(struct socket *so, struct sockaddr **nam)
 1186 {
 1187         int error;
 1188 
 1189         SOCK_LOCK(so);
 1190         KASSERT((so->so_state & SS_NOFDREF) != 0, ("soaccept: !NOFDREF"));
 1191         so->so_state &= ~SS_NOFDREF;
 1192         SOCK_UNLOCK(so);
 1193 
 1194         CURVNET_SET(so->so_vnet);
 1195         error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam);
 1196         CURVNET_RESTORE();
 1197         return (error);
 1198 }
 1199 
 1200 int
 1201 soconnect(struct socket *so, struct sockaddr *nam, struct thread *td)
 1202 {
 1203 
 1204         return (soconnectat(AT_FDCWD, so, nam, td));
 1205 }
 1206 
 1207 int
 1208 soconnectat(int fd, struct socket *so, struct sockaddr *nam, struct thread *td)
 1209 {
 1210         int error;
 1211 
 1212         if (so->so_options & SO_ACCEPTCONN)
 1213                 return (EOPNOTSUPP);
 1214 
 1215         CURVNET_SET(so->so_vnet);
 1216         /*
 1217          * If protocol is connection-based, can only connect once.
 1218          * Otherwise, if connected, try to disconnect first.  This allows
 1219          * user to disconnect by connecting to, e.g., a null address.
 1220          */
 1221         if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
 1222             ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
 1223             (error = sodisconnect(so)))) {
 1224                 error = EISCONN;
 1225         } else {
 1226                 /*
 1227                  * Prevent accumulated error from previous connection from
 1228                  * biting us.
 1229                  */
 1230                 so->so_error = 0;
 1231                 if (fd == AT_FDCWD) {
 1232                         error = (*so->so_proto->pr_usrreqs->pru_connect)(so,
 1233                             nam, td);
 1234                 } else {
 1235                         error = (*so->so_proto->pr_usrreqs->pru_connectat)(fd,
 1236                             so, nam, td);
 1237                 }
 1238         }
 1239         CURVNET_RESTORE();
 1240 
 1241         return (error);
 1242 }
 1243 
 1244 int
 1245 soconnect2(struct socket *so1, struct socket *so2)
 1246 {
 1247         int error;
 1248 
 1249         CURVNET_SET(so1->so_vnet);
 1250         error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2);
 1251         CURVNET_RESTORE();
 1252         return (error);
 1253 }
 1254 
 1255 int
 1256 sodisconnect(struct socket *so)
 1257 {
 1258         int error;
 1259 
 1260         if ((so->so_state & SS_ISCONNECTED) == 0)
 1261                 return (ENOTCONN);
 1262         if (so->so_state & SS_ISDISCONNECTING)
 1263                 return (EALREADY);
 1264         VNET_SO_ASSERT(so);
 1265         error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so);
 1266         return (error);
 1267 }
 1268 
 1269 #define SBLOCKWAIT(f)   (((f) & MSG_DONTWAIT) ? 0 : SBL_WAIT)
 1270 
 1271 int
 1272 sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio,
 1273     struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
 1274 {
 1275         long space;
 1276         ssize_t resid;
 1277         int clen = 0, error, dontroute;
 1278 
 1279         KASSERT(so->so_type == SOCK_DGRAM, ("sosend_dgram: !SOCK_DGRAM"));
 1280         KASSERT(so->so_proto->pr_flags & PR_ATOMIC,
 1281             ("sosend_dgram: !PR_ATOMIC"));
 1282 
 1283         if (uio != NULL)
 1284                 resid = uio->uio_resid;
 1285         else
 1286                 resid = top->m_pkthdr.len;
 1287         /*
 1288          * In theory resid should be unsigned.  However, space must be
 1289          * signed, as it might be less than 0 if we over-committed, and we
 1290          * must use a signed comparison of space and resid.  On the other
 1291          * hand, a negative resid causes us to loop sending 0-length
 1292          * segments to the protocol.
 1293          */
 1294         if (resid < 0) {
 1295                 error = EINVAL;
 1296                 goto out;
 1297         }
 1298 
 1299         dontroute =
 1300             (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0;
 1301         if (td != NULL)
 1302                 td->td_ru.ru_msgsnd++;
 1303         if (control != NULL)
 1304                 clen = control->m_len;
 1305 
 1306         SOCKBUF_LOCK(&so->so_snd);
 1307         if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 1308                 SOCKBUF_UNLOCK(&so->so_snd);
 1309                 error = EPIPE;
 1310                 goto out;
 1311         }
 1312         if (so->so_error) {
 1313                 error = so->so_error;
 1314                 so->so_error = 0;
 1315                 SOCKBUF_UNLOCK(&so->so_snd);
 1316                 goto out;
 1317         }
 1318         if ((so->so_state & SS_ISCONNECTED) == 0) {
 1319                 /*
 1320                  * `sendto' and `sendmsg' is allowed on a connection-based
 1321                  * socket if it supports implied connect.  Return ENOTCONN if
 1322                  * not connected and no address is supplied.
 1323                  */
 1324                 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) &&
 1325                     (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) {
 1326                         if ((so->so_state & SS_ISCONFIRMING) == 0 &&
 1327                             !(resid == 0 && clen != 0)) {
 1328                                 SOCKBUF_UNLOCK(&so->so_snd);
 1329                                 error = ENOTCONN;
 1330                                 goto out;
 1331                         }
 1332                 } else if (addr == NULL) {
 1333                         if (so->so_proto->pr_flags & PR_CONNREQUIRED)
 1334                                 error = ENOTCONN;
 1335                         else
 1336                                 error = EDESTADDRREQ;
 1337                         SOCKBUF_UNLOCK(&so->so_snd);
 1338                         goto out;
 1339                 }
 1340         }
 1341 
 1342         /*
 1343          * Do we need MSG_OOB support in SOCK_DGRAM?  Signs here may be a
 1344          * problem and need fixing.
 1345          */
 1346         space = sbspace(&so->so_snd);
 1347         if (flags & MSG_OOB)
 1348                 space += 1024;
 1349         space -= clen;
 1350         SOCKBUF_UNLOCK(&so->so_snd);
 1351         if (resid > space) {
 1352                 error = EMSGSIZE;
 1353                 goto out;
 1354         }
 1355         if (uio == NULL) {
 1356                 resid = 0;
 1357                 if (flags & MSG_EOR)
 1358                         top->m_flags |= M_EOR;
 1359         } else {
 1360                 /*
 1361                  * Copy the data from userland into a mbuf chain.
 1362                  * If no data is to be copied in, a single empty mbuf
 1363                  * is returned.
 1364                  */
 1365                 top = m_uiotombuf(uio, M_WAITOK, space, max_hdr,
 1366                     (M_PKTHDR | ((flags & MSG_EOR) ? M_EOR : 0)));
 1367                 if (top == NULL) {
 1368                         error = EFAULT; /* only possible error */
 1369                         goto out;
 1370                 }
 1371                 space -= resid - uio->uio_resid;
 1372                 resid = uio->uio_resid;
 1373         }
 1374         KASSERT(resid == 0, ("sosend_dgram: resid != 0"));
 1375         /*
 1376          * XXXRW: Frobbing SO_DONTROUTE here is even worse without sblock
 1377          * than with.
 1378          */
 1379         if (dontroute) {
 1380                 SOCK_LOCK(so);
 1381                 so->so_options |= SO_DONTROUTE;
 1382                 SOCK_UNLOCK(so);
 1383         }
 1384         /*
 1385          * XXX all the SBS_CANTSENDMORE checks previously done could be out
 1386          * of date.  We could have received a reset packet in an interrupt or
 1387          * maybe we slept while doing page faults in uiomove() etc.  We could
 1388          * probably recheck again inside the locking protection here, but
 1389          * there are probably other places that this also happens.  We must
 1390          * rethink this.
 1391          */
 1392         VNET_SO_ASSERT(so);
 1393         error = (*so->so_proto->pr_usrreqs->pru_send)(so,
 1394             (flags & MSG_OOB) ? PRUS_OOB :
 1395         /*
 1396          * If the user set MSG_EOF, the protocol understands this flag and
 1397          * nothing left to send then use PRU_SEND_EOF instead of PRU_SEND.
 1398          */
 1399             ((flags & MSG_EOF) &&
 1400              (so->so_proto->pr_flags & PR_IMPLOPCL) &&
 1401              (resid <= 0)) ?
 1402                 PRUS_EOF :
 1403                 /* If there is more to send set PRUS_MORETOCOME */
 1404                 (flags & MSG_MORETOCOME) ||
 1405                 (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0,
 1406                 top, addr, control, td);
 1407         if (dontroute) {
 1408                 SOCK_LOCK(so);
 1409                 so->so_options &= ~SO_DONTROUTE;
 1410                 SOCK_UNLOCK(so);
 1411         }
 1412         clen = 0;
 1413         control = NULL;
 1414         top = NULL;
 1415 out:
 1416         if (top != NULL)
 1417                 m_freem(top);
 1418         if (control != NULL)
 1419                 m_freem(control);
 1420         return (error);
 1421 }
 1422 
 1423 /*
 1424  * Send on a socket.  If send must go all at once and message is larger than
 1425  * send buffering, then hard error.  Lock against other senders.  If must go
 1426  * all at once and not enough room now, then inform user that this would
 1427  * block and do nothing.  Otherwise, if nonblocking, send as much as
 1428  * possible.  The data to be sent is described by "uio" if nonzero, otherwise
 1429  * by the mbuf chain "top" (which must be null if uio is not).  Data provided
 1430  * in mbuf chain must be small enough to send all at once.
 1431  *
 1432  * Returns nonzero on error, timeout or signal; callers must check for short
 1433  * counts if EINTR/ERESTART are returned.  Data and control buffers are freed
 1434  * on return.
 1435  */
 1436 int
 1437 sosend_generic(struct socket *so, struct sockaddr *addr, struct uio *uio,
 1438     struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
 1439 {
 1440         long space;
 1441         ssize_t resid;
 1442         int clen = 0, error, dontroute;
 1443         int atomic = sosendallatonce(so) || top;
 1444 
 1445         if (uio != NULL)
 1446                 resid = uio->uio_resid;
 1447         else
 1448                 resid = top->m_pkthdr.len;
 1449         /*
 1450          * In theory resid should be unsigned.  However, space must be
 1451          * signed, as it might be less than 0 if we over-committed, and we
 1452          * must use a signed comparison of space and resid.  On the other
 1453          * hand, a negative resid causes us to loop sending 0-length
 1454          * segments to the protocol.
 1455          *
 1456          * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM
 1457          * type sockets since that's an error.
 1458          */
 1459         if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) {
 1460                 error = EINVAL;
 1461                 goto out;
 1462         }
 1463 
 1464         dontroute =
 1465             (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
 1466             (so->so_proto->pr_flags & PR_ATOMIC);
 1467         if (td != NULL)
 1468                 td->td_ru.ru_msgsnd++;
 1469         if (control != NULL)
 1470                 clen = control->m_len;
 1471 
 1472         error = sblock(&so->so_snd, SBLOCKWAIT(flags));
 1473         if (error)
 1474                 goto out;
 1475 
 1476 restart:
 1477         do {
 1478                 SOCKBUF_LOCK(&so->so_snd);
 1479                 if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 1480                         SOCKBUF_UNLOCK(&so->so_snd);
 1481                         error = EPIPE;
 1482                         goto release;
 1483                 }
 1484                 if (so->so_error) {
 1485                         error = so->so_error;
 1486                         so->so_error = 0;
 1487                         SOCKBUF_UNLOCK(&so->so_snd);
 1488                         goto release;
 1489                 }
 1490                 if ((so->so_state & SS_ISCONNECTED) == 0) {
 1491                         /*
 1492                          * `sendto' and `sendmsg' is allowed on a connection-
 1493                          * based socket if it supports implied connect.
 1494                          * Return ENOTCONN if not connected and no address is
 1495                          * supplied.
 1496                          */
 1497                         if ((so->so_proto->pr_flags & PR_CONNREQUIRED) &&
 1498                             (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) {
 1499                                 if ((so->so_state & SS_ISCONFIRMING) == 0 &&
 1500                                     !(resid == 0 && clen != 0)) {
 1501                                         SOCKBUF_UNLOCK(&so->so_snd);
 1502                                         error = ENOTCONN;
 1503                                         goto release;
 1504                                 }
 1505                         } else if (addr == NULL) {
 1506                                 SOCKBUF_UNLOCK(&so->so_snd);
 1507                                 if (so->so_proto->pr_flags & PR_CONNREQUIRED)
 1508                                         error = ENOTCONN;
 1509                                 else
 1510                                         error = EDESTADDRREQ;
 1511                                 goto release;
 1512                         }
 1513                 }
 1514                 space = sbspace(&so->so_snd);
 1515                 if (flags & MSG_OOB)
 1516                         space += 1024;
 1517                 if ((atomic && resid > so->so_snd.sb_hiwat) ||
 1518                     clen > so->so_snd.sb_hiwat) {
 1519                         SOCKBUF_UNLOCK(&so->so_snd);
 1520                         error = EMSGSIZE;
 1521                         goto release;
 1522                 }
 1523                 if (space < resid + clen &&
 1524                     (atomic || space < so->so_snd.sb_lowat || space < clen)) {
 1525                         if ((so->so_state & SS_NBIO) || (flags & MSG_NBIO)) {
 1526                                 SOCKBUF_UNLOCK(&so->so_snd);
 1527                                 error = EWOULDBLOCK;
 1528                                 goto release;
 1529                         }
 1530                         error = sbwait(&so->so_snd);
 1531                         SOCKBUF_UNLOCK(&so->so_snd);
 1532                         if (error)
 1533                                 goto release;
 1534                         goto restart;
 1535                 }
 1536                 SOCKBUF_UNLOCK(&so->so_snd);
 1537                 space -= clen;
 1538                 do {
 1539                         if (uio == NULL) {
 1540                                 resid = 0;
 1541                                 if (flags & MSG_EOR)
 1542                                         top->m_flags |= M_EOR;
 1543                         } else {
 1544                                 /*
 1545                                  * Copy the data from userland into a mbuf
 1546                                  * chain.  If resid is 0, which can happen
 1547                                  * only if we have control to send, then
 1548                                  * a single empty mbuf is returned.  This
 1549                                  * is a workaround to prevent protocol send
 1550                                  * methods to panic.
 1551                                  */
 1552                                 top = m_uiotombuf(uio, M_WAITOK, space,
 1553                                     (atomic ? max_hdr : 0),
 1554                                     (atomic ? M_PKTHDR : 0) |
 1555                                     ((flags & MSG_EOR) ? M_EOR : 0));
 1556                                 if (top == NULL) {
 1557                                         error = EFAULT; /* only possible error */
 1558                                         goto release;
 1559                                 }
 1560                                 space -= resid - uio->uio_resid;
 1561                                 resid = uio->uio_resid;
 1562                         }
 1563                         if (dontroute) {
 1564                                 SOCK_LOCK(so);
 1565                                 so->so_options |= SO_DONTROUTE;
 1566                                 SOCK_UNLOCK(so);
 1567                         }
 1568                         /*
 1569                          * XXX all the SBS_CANTSENDMORE checks previously
 1570                          * done could be out of date.  We could have received
 1571                          * a reset packet in an interrupt or maybe we slept
 1572                          * while doing page faults in uiomove() etc.  We
 1573                          * could probably recheck again inside the locking
 1574                          * protection here, but there are probably other
 1575                          * places that this also happens.  We must rethink
 1576                          * this.
 1577                          */
 1578                         VNET_SO_ASSERT(so);
 1579                         error = (*so->so_proto->pr_usrreqs->pru_send)(so,
 1580                             (flags & MSG_OOB) ? PRUS_OOB :
 1581                         /*
 1582                          * If the user set MSG_EOF, the protocol understands
 1583                          * this flag and nothing left to send then use
 1584                          * PRU_SEND_EOF instead of PRU_SEND.
 1585                          */
 1586                             ((flags & MSG_EOF) &&
 1587                              (so->so_proto->pr_flags & PR_IMPLOPCL) &&
 1588                              (resid <= 0)) ?
 1589                                 PRUS_EOF :
 1590                         /* If there is more to send set PRUS_MORETOCOME. */
 1591                             (flags & MSG_MORETOCOME) ||
 1592                             (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0,
 1593                             top, addr, control, td);
 1594                         if (dontroute) {
 1595                                 SOCK_LOCK(so);
 1596                                 so->so_options &= ~SO_DONTROUTE;
 1597                                 SOCK_UNLOCK(so);
 1598                         }
 1599                         clen = 0;
 1600                         control = NULL;
 1601                         top = NULL;
 1602                         if (error)
 1603                                 goto release;
 1604                 } while (resid && space > 0);
 1605         } while (resid);
 1606 
 1607 release:
 1608         sbunlock(&so->so_snd);
 1609 out:
 1610         if (top != NULL)
 1611                 m_freem(top);
 1612         if (control != NULL)
 1613                 m_freem(control);
 1614         return (error);
 1615 }
 1616 
 1617 int
 1618 sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
 1619     struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
 1620 {
 1621         int error;
 1622 
 1623         CURVNET_SET(so->so_vnet);
 1624         if (!SOLISTENING(so))
 1625                 error = so->so_proto->pr_usrreqs->pru_sosend(so, addr, uio,
 1626                     top, control, flags, td);
 1627         else {
 1628                 m_freem(top);
 1629                 m_freem(control);
 1630                 error = ENOTCONN;
 1631         }
 1632         CURVNET_RESTORE();
 1633         return (error);
 1634 }
 1635 
 1636 /*
 1637  * The part of soreceive() that implements reading non-inline out-of-band
 1638  * data from a socket.  For more complete comments, see soreceive(), from
 1639  * which this code originated.
 1640  *
 1641  * Note that soreceive_rcvoob(), unlike the remainder of soreceive(), is
 1642  * unable to return an mbuf chain to the caller.
 1643  */
 1644 static int
 1645 soreceive_rcvoob(struct socket *so, struct uio *uio, int flags)
 1646 {
 1647         struct protosw *pr = so->so_proto;
 1648         struct mbuf *m;
 1649         int error;
 1650 
 1651         KASSERT(flags & MSG_OOB, ("soreceive_rcvoob: (flags & MSG_OOB) == 0"));
 1652         VNET_SO_ASSERT(so);
 1653 
 1654         m = m_get(M_WAITOK, MT_DATA);
 1655         error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
 1656         if (error)
 1657                 goto bad;
 1658         do {
 1659                 error = uiomove(mtod(m, void *),
 1660                     (int) min(uio->uio_resid, m->m_len), uio);
 1661                 m = m_free(m);
 1662         } while (uio->uio_resid && error == 0 && m);
 1663 bad:
 1664         if (m != NULL)
 1665                 m_freem(m);
 1666         return (error);
 1667 }
 1668 
 1669 /*
 1670  * Following replacement or removal of the first mbuf on the first mbuf chain
 1671  * of a socket buffer, push necessary state changes back into the socket
 1672  * buffer so that other consumers see the values consistently.  'nextrecord'
 1673  * is the callers locally stored value of the original value of
 1674  * sb->sb_mb->m_nextpkt which must be restored when the lead mbuf changes.
 1675  * NOTE: 'nextrecord' may be NULL.
 1676  */
 1677 static __inline void
 1678 sockbuf_pushsync(struct sockbuf *sb, struct mbuf *nextrecord)
 1679 {
 1680 
 1681         SOCKBUF_LOCK_ASSERT(sb);
 1682         /*
 1683          * First, update for the new value of nextrecord.  If necessary, make
 1684          * it the first record.
 1685          */
 1686         if (sb->sb_mb != NULL)
 1687                 sb->sb_mb->m_nextpkt = nextrecord;
 1688         else
 1689                 sb->sb_mb = nextrecord;
 1690 
 1691         /*
 1692          * Now update any dependent socket buffer fields to reflect the new
 1693          * state.  This is an expanded inline of SB_EMPTY_FIXUP(), with the
 1694          * addition of a second clause that takes care of the case where
 1695          * sb_mb has been updated, but remains the last record.
 1696          */
 1697         if (sb->sb_mb == NULL) {
 1698                 sb->sb_mbtail = NULL;
 1699                 sb->sb_lastrecord = NULL;
 1700         } else if (sb->sb_mb->m_nextpkt == NULL)
 1701                 sb->sb_lastrecord = sb->sb_mb;
 1702 }
 1703 
 1704 /*
 1705  * Implement receive operations on a socket.  We depend on the way that
 1706  * records are added to the sockbuf by sbappend.  In particular, each record
 1707  * (mbufs linked through m_next) must begin with an address if the protocol
 1708  * so specifies, followed by an optional mbuf or mbufs containing ancillary
 1709  * data, and then zero or more mbufs of data.  In order to allow parallelism
 1710  * between network receive and copying to user space, as well as avoid
 1711  * sleeping with a mutex held, we release the socket buffer mutex during the
 1712  * user space copy.  Although the sockbuf is locked, new data may still be
 1713  * appended, and thus we must maintain consistency of the sockbuf during that
 1714  * time.
 1715  *
 1716  * The caller may receive the data as a single mbuf chain by supplying an
 1717  * mbuf **mp0 for use in returning the chain.  The uio is then used only for
 1718  * the count in uio_resid.
 1719  */
 1720 int
 1721 soreceive_generic(struct socket *so, struct sockaddr **psa, struct uio *uio,
 1722     struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
 1723 {
 1724         struct mbuf *m, **mp;
 1725         int flags, error, offset;
 1726         ssize_t len;
 1727         struct protosw *pr = so->so_proto;
 1728         struct mbuf *nextrecord;
 1729         int moff, type = 0;
 1730         ssize_t orig_resid = uio->uio_resid;
 1731 
 1732         mp = mp0;
 1733         if (psa != NULL)
 1734                 *psa = NULL;
 1735         if (controlp != NULL)
 1736                 *controlp = NULL;
 1737         if (flagsp != NULL)
 1738                 flags = *flagsp &~ MSG_EOR;
 1739         else
 1740                 flags = 0;
 1741         if (flags & MSG_OOB)
 1742                 return (soreceive_rcvoob(so, uio, flags));
 1743         if (mp != NULL)
 1744                 *mp = NULL;
 1745         if ((pr->pr_flags & PR_WANTRCVD) && (so->so_state & SS_ISCONFIRMING)
 1746             && uio->uio_resid) {
 1747                 VNET_SO_ASSERT(so);
 1748                 (*pr->pr_usrreqs->pru_rcvd)(so, 0);
 1749         }
 1750 
 1751         error = sblock(&so->so_rcv, SBLOCKWAIT(flags));
 1752         if (error)
 1753                 return (error);
 1754 
 1755 restart:
 1756         SOCKBUF_LOCK(&so->so_rcv);
 1757         m = so->so_rcv.sb_mb;
 1758         /*
 1759          * If we have less data than requested, block awaiting more (subject
 1760          * to any timeout) if:
 1761          *   1. the current count is less than the low water mark, or
 1762          *   2. MSG_DONTWAIT is not set
 1763          */
 1764         if (m == NULL || (((flags & MSG_DONTWAIT) == 0 &&
 1765             sbavail(&so->so_rcv) < uio->uio_resid) &&
 1766             sbavail(&so->so_rcv) < so->so_rcv.sb_lowat &&
 1767             m->m_nextpkt == NULL && (pr->pr_flags & PR_ATOMIC) == 0)) {
 1768                 KASSERT(m != NULL || !sbavail(&so->so_rcv),
 1769                     ("receive: m == %p sbavail == %u",
 1770                     m, sbavail(&so->so_rcv)));
 1771                 if (so->so_error) {
 1772                         if (m != NULL)
 1773                                 goto dontblock;
 1774                         error = so->so_error;
 1775                         if ((flags & MSG_PEEK) == 0)
 1776                                 so->so_error = 0;
 1777                         SOCKBUF_UNLOCK(&so->so_rcv);
 1778                         goto release;
 1779                 }
 1780                 SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 1781                 if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
 1782                         if (m == NULL) {
 1783                                 SOCKBUF_UNLOCK(&so->so_rcv);
 1784                                 goto release;
 1785                         } else
 1786                                 goto dontblock;
 1787                 }
 1788                 for (; m != NULL; m = m->m_next)
 1789                         if (m->m_type == MT_OOBDATA  || (m->m_flags & M_EOR)) {
 1790                                 m = so->so_rcv.sb_mb;
 1791                                 goto dontblock;
 1792                         }
 1793                 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
 1794                     (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
 1795                         SOCKBUF_UNLOCK(&so->so_rcv);
 1796                         error = ENOTCONN;
 1797                         goto release;
 1798                 }
 1799                 if (uio->uio_resid == 0) {
 1800                         SOCKBUF_UNLOCK(&so->so_rcv);
 1801                         goto release;
 1802                 }
 1803                 if ((so->so_state & SS_NBIO) ||
 1804                     (flags & (MSG_DONTWAIT|MSG_NBIO))) {
 1805                         SOCKBUF_UNLOCK(&so->so_rcv);
 1806                         error = EWOULDBLOCK;
 1807                         goto release;
 1808                 }
 1809                 SBLASTRECORDCHK(&so->so_rcv);
 1810                 SBLASTMBUFCHK(&so->so_rcv);
 1811                 error = sbwait(&so->so_rcv);
 1812                 SOCKBUF_UNLOCK(&so->so_rcv);
 1813                 if (error)
 1814                         goto release;
 1815                 goto restart;
 1816         }
 1817 dontblock:
 1818         /*
 1819          * From this point onward, we maintain 'nextrecord' as a cache of the
 1820          * pointer to the next record in the socket buffer.  We must keep the
 1821          * various socket buffer pointers and local stack versions of the
 1822          * pointers in sync, pushing out modifications before dropping the
 1823          * socket buffer mutex, and re-reading them when picking it up.
 1824          *
 1825          * Otherwise, we will race with the network stack appending new data
 1826          * or records onto the socket buffer by using inconsistent/stale
 1827          * versions of the field, possibly resulting in socket buffer
 1828          * corruption.
 1829          *
 1830          * By holding the high-level sblock(), we prevent simultaneous
 1831          * readers from pulling off the front of the socket buffer.
 1832          */
 1833         SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 1834         if (uio->uio_td)
 1835                 uio->uio_td->td_ru.ru_msgrcv++;
 1836         KASSERT(m == so->so_rcv.sb_mb, ("soreceive: m != so->so_rcv.sb_mb"));
 1837         SBLASTRECORDCHK(&so->so_rcv);
 1838         SBLASTMBUFCHK(&so->so_rcv);
 1839         nextrecord = m->m_nextpkt;
 1840         if (pr->pr_flags & PR_ADDR) {
 1841                 KASSERT(m->m_type == MT_SONAME,
 1842                     ("m->m_type == %d", m->m_type));
 1843                 orig_resid = 0;
 1844                 if (psa != NULL)
 1845                         *psa = sodupsockaddr(mtod(m, struct sockaddr *),
 1846                             M_NOWAIT);
 1847                 if (flags & MSG_PEEK) {
 1848                         m = m->m_next;
 1849                 } else {
 1850                         sbfree(&so->so_rcv, m);
 1851                         so->so_rcv.sb_mb = m_free(m);
 1852                         m = so->so_rcv.sb_mb;
 1853                         sockbuf_pushsync(&so->so_rcv, nextrecord);
 1854                 }
 1855         }
 1856 
 1857         /*
 1858          * Process one or more MT_CONTROL mbufs present before any data mbufs
 1859          * in the first mbuf chain on the socket buffer.  If MSG_PEEK, we
 1860          * just copy the data; if !MSG_PEEK, we call into the protocol to
 1861          * perform externalization (or freeing if controlp == NULL).
 1862          */
 1863         if (m != NULL && m->m_type == MT_CONTROL) {
 1864                 struct mbuf *cm = NULL, *cmn;
 1865                 struct mbuf **cme = &cm;
 1866 
 1867                 do {
 1868                         if (flags & MSG_PEEK) {
 1869                                 if (controlp != NULL) {
 1870                                         *controlp = m_copym(m, 0, m->m_len,
 1871                                             M_NOWAIT);
 1872                                         controlp = &(*controlp)->m_next;
 1873                                 }
 1874                                 m = m->m_next;
 1875                         } else {
 1876                                 sbfree(&so->so_rcv, m);
 1877                                 so->so_rcv.sb_mb = m->m_next;
 1878                                 m->m_next = NULL;
 1879                                 *cme = m;
 1880                                 cme = &(*cme)->m_next;
 1881                                 m = so->so_rcv.sb_mb;
 1882                         }
 1883                 } while (m != NULL && m->m_type == MT_CONTROL);
 1884                 if ((flags & MSG_PEEK) == 0)
 1885                         sockbuf_pushsync(&so->so_rcv, nextrecord);
 1886                 while (cm != NULL) {
 1887                         cmn = cm->m_next;
 1888                         cm->m_next = NULL;
 1889                         if (pr->pr_domain->dom_externalize != NULL) {
 1890                                 SOCKBUF_UNLOCK(&so->so_rcv);
 1891                                 VNET_SO_ASSERT(so);
 1892                                 error = (*pr->pr_domain->dom_externalize)
 1893                                     (cm, controlp, flags);
 1894                                 SOCKBUF_LOCK(&so->so_rcv);
 1895                         } else if (controlp != NULL)
 1896                                 *controlp = cm;
 1897                         else
 1898                                 m_freem(cm);
 1899                         if (controlp != NULL) {
 1900                                 orig_resid = 0;
 1901                                 while (*controlp != NULL)
 1902                                         controlp = &(*controlp)->m_next;
 1903                         }
 1904                         cm = cmn;
 1905                 }
 1906                 if (m != NULL)
 1907                         nextrecord = so->so_rcv.sb_mb->m_nextpkt;
 1908                 else
 1909                         nextrecord = so->so_rcv.sb_mb;
 1910                 orig_resid = 0;
 1911         }
 1912         if (m != NULL) {
 1913                 if ((flags & MSG_PEEK) == 0) {
 1914                         KASSERT(m->m_nextpkt == nextrecord,
 1915                             ("soreceive: post-control, nextrecord !sync"));
 1916                         if (nextrecord == NULL) {
 1917                                 KASSERT(so->so_rcv.sb_mb == m,
 1918                                     ("soreceive: post-control, sb_mb!=m"));
 1919                                 KASSERT(so->so_rcv.sb_lastrecord == m,
 1920                                     ("soreceive: post-control, lastrecord!=m"));
 1921                         }
 1922                 }
 1923                 type = m->m_type;
 1924                 if (type == MT_OOBDATA)
 1925                         flags |= MSG_OOB;
 1926         } else {
 1927                 if ((flags & MSG_PEEK) == 0) {
 1928                         KASSERT(so->so_rcv.sb_mb == nextrecord,
 1929                             ("soreceive: sb_mb != nextrecord"));
 1930                         if (so->so_rcv.sb_mb == NULL) {
 1931                                 KASSERT(so->so_rcv.sb_lastrecord == NULL,
 1932                                     ("soreceive: sb_lastercord != NULL"));
 1933                         }
 1934                 }
 1935         }
 1936         SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 1937         SBLASTRECORDCHK(&so->so_rcv);
 1938         SBLASTMBUFCHK(&so->so_rcv);
 1939 
 1940         /*
 1941          * Now continue to read any data mbufs off of the head of the socket
 1942          * buffer until the read request is satisfied.  Note that 'type' is
 1943          * used to store the type of any mbuf reads that have happened so far
 1944          * such that soreceive() can stop reading if the type changes, which
 1945          * causes soreceive() to return only one of regular data and inline
 1946          * out-of-band data in a single socket receive operation.
 1947          */
 1948         moff = 0;
 1949         offset = 0;
 1950         while (m != NULL && !(m->m_flags & M_NOTAVAIL) && uio->uio_resid > 0
 1951             && error == 0) {
 1952                 /*
 1953                  * If the type of mbuf has changed since the last mbuf
 1954                  * examined ('type'), end the receive operation.
 1955                  */
 1956                 SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 1957                 if (m->m_type == MT_OOBDATA || m->m_type == MT_CONTROL) {
 1958                         if (type != m->m_type)
 1959                                 break;
 1960                 } else if (type == MT_OOBDATA)
 1961                         break;
 1962                 else
 1963                     KASSERT(m->m_type == MT_DATA,
 1964                         ("m->m_type == %d", m->m_type));
 1965                 so->so_rcv.sb_state &= ~SBS_RCVATMARK;
 1966                 len = uio->uio_resid;
 1967                 if (so->so_oobmark && len > so->so_oobmark - offset)
 1968                         len = so->so_oobmark - offset;
 1969                 if (len > m->m_len - moff)
 1970                         len = m->m_len - moff;
 1971                 /*
 1972                  * If mp is set, just pass back the mbufs.  Otherwise copy
 1973                  * them out via the uio, then free.  Sockbuf must be
 1974                  * consistent here (points to current mbuf, it points to next
 1975                  * record) when we drop priority; we must note any additions
 1976                  * to the sockbuf when we block interrupts again.
 1977                  */
 1978                 if (mp == NULL) {
 1979                         SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 1980                         SBLASTRECORDCHK(&so->so_rcv);
 1981                         SBLASTMBUFCHK(&so->so_rcv);
 1982                         SOCKBUF_UNLOCK(&so->so_rcv);
 1983                         error = uiomove(mtod(m, char *) + moff, (int)len, uio);
 1984                         SOCKBUF_LOCK(&so->so_rcv);
 1985                         if (error) {
 1986                                 /*
 1987                                  * The MT_SONAME mbuf has already been removed
 1988                                  * from the record, so it is necessary to
 1989                                  * remove the data mbufs, if any, to preserve
 1990                                  * the invariant in the case of PR_ADDR that
 1991                                  * requires MT_SONAME mbufs at the head of
 1992                                  * each record.
 1993                                  */
 1994                                 if (pr->pr_flags & PR_ATOMIC &&
 1995                                     ((flags & MSG_PEEK) == 0))
 1996                                         (void)sbdroprecord_locked(&so->so_rcv);
 1997                                 SOCKBUF_UNLOCK(&so->so_rcv);
 1998                                 goto release;
 1999                         }
 2000                 } else
 2001                         uio->uio_resid -= len;
 2002                 SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 2003                 if (len == m->m_len - moff) {
 2004                         if (m->m_flags & M_EOR)
 2005                                 flags |= MSG_EOR;
 2006                         if (flags & MSG_PEEK) {
 2007                                 m = m->m_next;
 2008                                 moff = 0;
 2009                         } else {
 2010                                 nextrecord = m->m_nextpkt;
 2011                                 sbfree(&so->so_rcv, m);
 2012                                 if (mp != NULL) {
 2013                                         m->m_nextpkt = NULL;
 2014                                         *mp = m;
 2015                                         mp = &m->m_next;
 2016                                         so->so_rcv.sb_mb = m = m->m_next;
 2017                                         *mp = NULL;
 2018                                 } else {
 2019                                         so->so_rcv.sb_mb = m_free(m);
 2020                                         m = so->so_rcv.sb_mb;
 2021                                 }
 2022                                 sockbuf_pushsync(&so->so_rcv, nextrecord);
 2023                                 SBLASTRECORDCHK(&so->so_rcv);
 2024                                 SBLASTMBUFCHK(&so->so_rcv);
 2025                         }
 2026                 } else {
 2027                         if (flags & MSG_PEEK)
 2028                                 moff += len;
 2029                         else {
 2030                                 if (mp != NULL) {
 2031                                         if (flags & MSG_DONTWAIT) {
 2032                                                 *mp = m_copym(m, 0, len,
 2033                                                     M_NOWAIT);
 2034                                                 if (*mp == NULL) {
 2035                                                         /*
 2036                                                          * m_copym() couldn't
 2037                                                          * allocate an mbuf.
 2038                                                          * Adjust uio_resid back
 2039                                                          * (it was adjusted
 2040                                                          * down by len bytes,
 2041                                                          * which we didn't end
 2042                                                          * up "copying" over).
 2043                                                          */
 2044                                                         uio->uio_resid += len;
 2045                                                         break;
 2046                                                 }
 2047                                         } else {
 2048                                                 SOCKBUF_UNLOCK(&so->so_rcv);
 2049                                                 *mp = m_copym(m, 0, len,
 2050                                                     M_WAITOK);
 2051                                                 SOCKBUF_LOCK(&so->so_rcv);
 2052                                         }
 2053                                 }
 2054                                 sbcut_locked(&so->so_rcv, len);
 2055                         }
 2056                 }
 2057                 SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 2058                 if (so->so_oobmark) {
 2059                         if ((flags & MSG_PEEK) == 0) {
 2060                                 so->so_oobmark -= len;
 2061                                 if (so->so_oobmark == 0) {
 2062                                         so->so_rcv.sb_state |= SBS_RCVATMARK;
 2063                                         break;
 2064                                 }
 2065                         } else {
 2066                                 offset += len;
 2067                                 if (offset == so->so_oobmark)
 2068                                         break;
 2069                         }
 2070                 }
 2071                 if (flags & MSG_EOR)
 2072                         break;
 2073                 /*
 2074                  * If the MSG_WAITALL flag is set (for non-atomic socket), we
 2075                  * must not quit until "uio->uio_resid == 0" or an error
 2076                  * termination.  If a signal/timeout occurs, return with a
 2077                  * short count but without error.  Keep sockbuf locked
 2078                  * against other readers.
 2079                  */
 2080                 while (flags & MSG_WAITALL && m == NULL && uio->uio_resid > 0 &&
 2081                     !sosendallatonce(so) && nextrecord == NULL) {
 2082                         SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 2083                         if (so->so_error ||
 2084                             so->so_rcv.sb_state & SBS_CANTRCVMORE)
 2085                                 break;
 2086                         /*
 2087                          * Notify the protocol that some data has been
 2088                          * drained before blocking.
 2089                          */
 2090                         if (pr->pr_flags & PR_WANTRCVD) {
 2091                                 SOCKBUF_UNLOCK(&so->so_rcv);
 2092                                 VNET_SO_ASSERT(so);
 2093                                 (*pr->pr_usrreqs->pru_rcvd)(so, flags);
 2094                                 SOCKBUF_LOCK(&so->so_rcv);
 2095                         }
 2096                         SBLASTRECORDCHK(&so->so_rcv);
 2097                         SBLASTMBUFCHK(&so->so_rcv);
 2098                         /*
 2099                          * We could receive some data while was notifying
 2100                          * the protocol. Skip blocking in this case.
 2101                          */
 2102                         if (so->so_rcv.sb_mb == NULL) {
 2103                                 error = sbwait(&so->so_rcv);
 2104                                 if (error) {
 2105                                         SOCKBUF_UNLOCK(&so->so_rcv);
 2106                                         goto release;
 2107                                 }
 2108                         }
 2109                         m = so->so_rcv.sb_mb;
 2110                         if (m != NULL)
 2111                                 nextrecord = m->m_nextpkt;
 2112                 }
 2113         }
 2114 
 2115         SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 2116         if (m != NULL && pr->pr_flags & PR_ATOMIC) {
 2117                 flags |= MSG_TRUNC;
 2118                 if ((flags & MSG_PEEK) == 0)
 2119                         (void) sbdroprecord_locked(&so->so_rcv);
 2120         }
 2121         if ((flags & MSG_PEEK) == 0) {
 2122                 if (m == NULL) {
 2123                         /*
 2124                          * First part is an inline SB_EMPTY_FIXUP().  Second
 2125                          * part makes sure sb_lastrecord is up-to-date if
 2126                          * there is still data in the socket buffer.
 2127                          */
 2128                         so->so_rcv.sb_mb = nextrecord;
 2129                         if (so->so_rcv.sb_mb == NULL) {
 2130                                 so->so_rcv.sb_mbtail = NULL;
 2131                                 so->so_rcv.sb_lastrecord = NULL;
 2132                         } else if (nextrecord->m_nextpkt == NULL)
 2133                                 so->so_rcv.sb_lastrecord = nextrecord;
 2134                 }
 2135                 SBLASTRECORDCHK(&so->so_rcv);
 2136                 SBLASTMBUFCHK(&so->so_rcv);
 2137                 /*
 2138                  * If soreceive() is being done from the socket callback,
 2139                  * then don't need to generate ACK to peer to update window,
 2140                  * since ACK will be generated on return to TCP.
 2141                  */
 2142                 if (!(flags & MSG_SOCALLBCK) &&
 2143                     (pr->pr_flags & PR_WANTRCVD)) {
 2144                         SOCKBUF_UNLOCK(&so->so_rcv);
 2145                         VNET_SO_ASSERT(so);
 2146                         (*pr->pr_usrreqs->pru_rcvd)(so, flags);
 2147                         SOCKBUF_LOCK(&so->so_rcv);
 2148                 }
 2149         }
 2150         SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 2151         if (orig_resid == uio->uio_resid && orig_resid &&
 2152             (flags & MSG_EOR) == 0 && (so->so_rcv.sb_state & SBS_CANTRCVMORE) == 0) {
 2153                 SOCKBUF_UNLOCK(&so->so_rcv);
 2154                 goto restart;
 2155         }
 2156         SOCKBUF_UNLOCK(&so->so_rcv);
 2157 
 2158         if (flagsp != NULL)
 2159                 *flagsp |= flags;
 2160 release:
 2161         sbunlock(&so->so_rcv);
 2162         return (error);
 2163 }
 2164 
 2165 /*
 2166  * Optimized version of soreceive() for stream (TCP) sockets.
 2167  */
 2168 int
 2169 soreceive_stream(struct socket *so, struct sockaddr **psa, struct uio *uio,
 2170     struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
 2171 {
 2172         int len = 0, error = 0, flags, oresid;
 2173         struct sockbuf *sb;
 2174         struct mbuf *m, *n = NULL;
 2175 
 2176         /* We only do stream sockets. */
 2177         if (so->so_type != SOCK_STREAM)
 2178                 return (EINVAL);
 2179         if (psa != NULL)
 2180                 *psa = NULL;
 2181         if (flagsp != NULL)
 2182                 flags = *flagsp &~ MSG_EOR;
 2183         else
 2184                 flags = 0;
 2185         if (controlp != NULL)
 2186                 *controlp = NULL;
 2187         if (flags & MSG_OOB)
 2188                 return (soreceive_rcvoob(so, uio, flags));
 2189         if (mp0 != NULL)
 2190                 *mp0 = NULL;
 2191 
 2192         sb = &so->so_rcv;
 2193 
 2194         /* Prevent other readers from entering the socket. */
 2195         error = sblock(sb, SBLOCKWAIT(flags));
 2196         if (error)
 2197                 goto out;
 2198         SOCKBUF_LOCK(sb);
 2199 
 2200         /* Easy one, no space to copyout anything. */
 2201         if (uio->uio_resid == 0) {
 2202                 error = EINVAL;
 2203                 goto out;
 2204         }
 2205         oresid = uio->uio_resid;
 2206 
 2207         /* We will never ever get anything unless we are or were connected. */
 2208         if (!(so->so_state & (SS_ISCONNECTED|SS_ISDISCONNECTED))) {
 2209                 error = ENOTCONN;
 2210                 goto out;
 2211         }
 2212 
 2213 restart:
 2214         SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 2215 
 2216         /* Abort if socket has reported problems. */
 2217         if (so->so_error) {
 2218                 if (sbavail(sb) > 0)
 2219                         goto deliver;
 2220                 if (oresid > uio->uio_resid)
 2221                         goto out;
 2222                 error = so->so_error;
 2223                 if (!(flags & MSG_PEEK))
 2224                         so->so_error = 0;
 2225                 goto out;
 2226         }
 2227 
 2228         /* Door is closed.  Deliver what is left, if any. */
 2229         if (sb->sb_state & SBS_CANTRCVMORE) {
 2230                 if (sbavail(sb) > 0)
 2231                         goto deliver;
 2232                 else
 2233                         goto out;
 2234         }
 2235 
 2236         /* Socket buffer is empty and we shall not block. */
 2237         if (sbavail(sb) == 0 &&
 2238             ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO)))) {
 2239                 error = EAGAIN;
 2240                 goto out;
 2241         }
 2242 
 2243         /* Socket buffer got some data that we shall deliver now. */
 2244         if (sbavail(sb) > 0 && !(flags & MSG_WAITALL) &&
 2245             ((so->so_state & SS_NBIO) ||
 2246              (flags & (MSG_DONTWAIT|MSG_NBIO)) ||
 2247              sbavail(sb) >= sb->sb_lowat ||
 2248              sbavail(sb) >= uio->uio_resid ||
 2249              sbavail(sb) >= sb->sb_hiwat) ) {
 2250                 goto deliver;
 2251         }
 2252 
 2253         /* On MSG_WAITALL we must wait until all data or error arrives. */
 2254         if ((flags & MSG_WAITALL) &&
 2255             (sbavail(sb) >= uio->uio_resid || sbavail(sb) >= sb->sb_hiwat))
 2256                 goto deliver;
 2257 
 2258         /*
 2259          * Wait and block until (more) data comes in.
 2260          * NB: Drops the sockbuf lock during wait.
 2261          */
 2262         error = sbwait(sb);
 2263         if (error)
 2264                 goto out;
 2265         goto restart;
 2266 
 2267 deliver:
 2268         SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 2269         KASSERT(sbavail(sb) > 0, ("%s: sockbuf empty", __func__));
 2270         KASSERT(sb->sb_mb != NULL, ("%s: sb_mb == NULL", __func__));
 2271 
 2272         /* Statistics. */
 2273         if (uio->uio_td)
 2274                 uio->uio_td->td_ru.ru_msgrcv++;
 2275 
 2276         /* Fill uio until full or current end of socket buffer is reached. */
 2277         len = min(uio->uio_resid, sbavail(sb));
 2278         if (mp0 != NULL) {
 2279                 /* Dequeue as many mbufs as possible. */
 2280                 if (!(flags & MSG_PEEK) && len >= sb->sb_mb->m_len) {
 2281                         if (*mp0 == NULL)
 2282                                 *mp0 = sb->sb_mb;
 2283                         else
 2284                                 m_cat(*mp0, sb->sb_mb);
 2285                         for (m = sb->sb_mb;
 2286                              m != NULL && m->m_len <= len;
 2287                              m = m->m_next) {
 2288                                 KASSERT(!(m->m_flags & M_NOTAVAIL),
 2289                                     ("%s: m %p not available", __func__, m));
 2290                                 len -= m->m_len;
 2291                                 uio->uio_resid -= m->m_len;
 2292                                 sbfree(sb, m);
 2293                                 n = m;
 2294                         }
 2295                         n->m_next = NULL;
 2296                         sb->sb_mb = m;
 2297                         sb->sb_lastrecord = sb->sb_mb;
 2298                         if (sb->sb_mb == NULL)
 2299                                 SB_EMPTY_FIXUP(sb);
 2300                 }
 2301                 /* Copy the remainder. */
 2302                 if (len > 0) {
 2303                         KASSERT(sb->sb_mb != NULL,
 2304                             ("%s: len > 0 && sb->sb_mb empty", __func__));
 2305 
 2306                         m = m_copym(sb->sb_mb, 0, len, M_NOWAIT);
 2307                         if (m == NULL)
 2308                                 len = 0;        /* Don't flush data from sockbuf. */
 2309                         else
 2310                                 uio->uio_resid -= len;
 2311                         if (*mp0 != NULL)
 2312                                 m_cat(*mp0, m);
 2313                         else
 2314                                 *mp0 = m;
 2315                         if (*mp0 == NULL) {
 2316                                 error = ENOBUFS;
 2317                                 goto out;
 2318                         }
 2319                 }
 2320         } else {
 2321                 /* NB: Must unlock socket buffer as uiomove may sleep. */
 2322                 SOCKBUF_UNLOCK(sb);
 2323                 error = m_mbuftouio(uio, sb->sb_mb, len);
 2324                 SOCKBUF_LOCK(sb);
 2325                 if (error)
 2326                         goto out;
 2327         }
 2328         SBLASTRECORDCHK(sb);
 2329         SBLASTMBUFCHK(sb);
 2330 
 2331         /*
 2332          * Remove the delivered data from the socket buffer unless we
 2333          * were only peeking.
 2334          */
 2335         if (!(flags & MSG_PEEK)) {
 2336                 if (len > 0)
 2337                         sbdrop_locked(sb, len);
 2338 
 2339                 /* Notify protocol that we drained some data. */
 2340                 if ((so->so_proto->pr_flags & PR_WANTRCVD) &&
 2341                     (((flags & MSG_WAITALL) && uio->uio_resid > 0) ||
 2342                      !(flags & MSG_SOCALLBCK))) {
 2343                         SOCKBUF_UNLOCK(sb);
 2344                         VNET_SO_ASSERT(so);
 2345                         (*so->so_proto->pr_usrreqs->pru_rcvd)(so, flags);
 2346                         SOCKBUF_LOCK(sb);
 2347                 }
 2348         }
 2349 
 2350         /*
 2351          * For MSG_WAITALL we may have to loop again and wait for
 2352          * more data to come in.
 2353          */
 2354         if ((flags & MSG_WAITALL) && uio->uio_resid > 0)
 2355                 goto restart;
 2356 out:
 2357         SOCKBUF_LOCK_ASSERT(sb);
 2358         SBLASTRECORDCHK(sb);
 2359         SBLASTMBUFCHK(sb);
 2360         SOCKBUF_UNLOCK(sb);
 2361         sbunlock(sb);
 2362         return (error);
 2363 }
 2364 
 2365 /*
 2366  * Optimized version of soreceive() for simple datagram cases from userspace.
 2367  * Unlike in the stream case, we're able to drop a datagram if copyout()
 2368  * fails, and because we handle datagrams atomically, we don't need to use a
 2369  * sleep lock to prevent I/O interlacing.
 2370  */
 2371 int
 2372 soreceive_dgram(struct socket *so, struct sockaddr **psa, struct uio *uio,
 2373     struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
 2374 {
 2375         struct mbuf *m, *m2;
 2376         int flags, error;
 2377         ssize_t len;
 2378         struct protosw *pr = so->so_proto;
 2379         struct mbuf *nextrecord;
 2380 
 2381         if (psa != NULL)
 2382                 *psa = NULL;
 2383         if (controlp != NULL)
 2384                 *controlp = NULL;
 2385         if (flagsp != NULL)
 2386                 flags = *flagsp &~ MSG_EOR;
 2387         else
 2388                 flags = 0;
 2389 
 2390         /*
 2391          * For any complicated cases, fall back to the full
 2392          * soreceive_generic().
 2393          */
 2394         if (mp0 != NULL || (flags & MSG_PEEK) || (flags & MSG_OOB))
 2395                 return (soreceive_generic(so, psa, uio, mp0, controlp,
 2396                     flagsp));
 2397 
 2398         /*
 2399          * Enforce restrictions on use.
 2400          */
 2401         KASSERT((pr->pr_flags & PR_WANTRCVD) == 0,
 2402             ("soreceive_dgram: wantrcvd"));
 2403         KASSERT(pr->pr_flags & PR_ATOMIC, ("soreceive_dgram: !atomic"));
 2404         KASSERT((so->so_rcv.sb_state & SBS_RCVATMARK) == 0,
 2405             ("soreceive_dgram: SBS_RCVATMARK"));
 2406         KASSERT((so->so_proto->pr_flags & PR_CONNREQUIRED) == 0,
 2407             ("soreceive_dgram: P_CONNREQUIRED"));
 2408 
 2409         /*
 2410          * Loop blocking while waiting for a datagram.
 2411          */
 2412         SOCKBUF_LOCK(&so->so_rcv);
 2413         while ((m = so->so_rcv.sb_mb) == NULL) {
 2414                 KASSERT(sbavail(&so->so_rcv) == 0,
 2415                     ("soreceive_dgram: sb_mb NULL but sbavail %u",
 2416                     sbavail(&so->so_rcv)));
 2417                 if (so->so_error) {
 2418                         error = so->so_error;
 2419                         so->so_error = 0;
 2420                         SOCKBUF_UNLOCK(&so->so_rcv);
 2421                         return (error);
 2422                 }
 2423                 if (so->so_rcv.sb_state & SBS_CANTRCVMORE ||
 2424                     uio->uio_resid == 0) {
 2425                         SOCKBUF_UNLOCK(&so->so_rcv);
 2426                         return (0);
 2427                 }
 2428                 if ((so->so_state & SS_NBIO) ||
 2429                     (flags & (MSG_DONTWAIT|MSG_NBIO))) {
 2430                         SOCKBUF_UNLOCK(&so->so_rcv);
 2431                         return (EWOULDBLOCK);
 2432                 }
 2433                 SBLASTRECORDCHK(&so->so_rcv);
 2434                 SBLASTMBUFCHK(&so->so_rcv);
 2435                 error = sbwait(&so->so_rcv);
 2436                 if (error) {
 2437                         SOCKBUF_UNLOCK(&so->so_rcv);
 2438                         return (error);
 2439                 }
 2440         }
 2441         SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 2442 
 2443         if (uio->uio_td)
 2444                 uio->uio_td->td_ru.ru_msgrcv++;
 2445         SBLASTRECORDCHK(&so->so_rcv);
 2446         SBLASTMBUFCHK(&so->so_rcv);
 2447         nextrecord = m->m_nextpkt;
 2448         if (nextrecord == NULL) {
 2449                 KASSERT(so->so_rcv.sb_lastrecord == m,
 2450                     ("soreceive_dgram: lastrecord != m"));
 2451         }
 2452 
 2453         KASSERT(so->so_rcv.sb_mb->m_nextpkt == nextrecord,
 2454             ("soreceive_dgram: m_nextpkt != nextrecord"));
 2455 
 2456         /*
 2457          * Pull 'm' and its chain off the front of the packet queue.
 2458          */
 2459         so->so_rcv.sb_mb = NULL;
 2460         sockbuf_pushsync(&so->so_rcv, nextrecord);
 2461 
 2462         /*
 2463          * Walk 'm's chain and free that many bytes from the socket buffer.
 2464          */
 2465         for (m2 = m; m2 != NULL; m2 = m2->m_next)
 2466                 sbfree(&so->so_rcv, m2);
 2467 
 2468         /*
 2469          * Do a few last checks before we let go of the lock.
 2470          */
 2471         SBLASTRECORDCHK(&so->so_rcv);
 2472         SBLASTMBUFCHK(&so->so_rcv);
 2473         SOCKBUF_UNLOCK(&so->so_rcv);
 2474 
 2475         if (pr->pr_flags & PR_ADDR) {
 2476                 KASSERT(m->m_type == MT_SONAME,
 2477                     ("m->m_type == %d", m->m_type));
 2478                 if (psa != NULL)
 2479                         *psa = sodupsockaddr(mtod(m, struct sockaddr *),
 2480                             M_NOWAIT);
 2481                 m = m_free(m);
 2482         }
 2483         if (m == NULL) {
 2484                 /* XXXRW: Can this happen? */
 2485                 return (0);
 2486         }
 2487 
 2488         /*
 2489          * Packet to copyout() is now in 'm' and it is disconnected from the
 2490          * queue.
 2491          *
 2492          * Process one or more MT_CONTROL mbufs present before any data mbufs
 2493          * in the first mbuf chain on the socket buffer.  We call into the
 2494          * protocol to perform externalization (or freeing if controlp ==
 2495          * NULL). In some cases there can be only MT_CONTROL mbufs without
 2496          * MT_DATA mbufs.
 2497          */
 2498         if (m->m_type == MT_CONTROL) {
 2499                 struct mbuf *cm = NULL, *cmn;
 2500                 struct mbuf **cme = &cm;
 2501 
 2502                 do {
 2503                         m2 = m->m_next;
 2504                         m->m_next = NULL;
 2505                         *cme = m;
 2506                         cme = &(*cme)->m_next;
 2507                         m = m2;
 2508                 } while (m != NULL && m->m_type == MT_CONTROL);
 2509                 while (cm != NULL) {
 2510                         cmn = cm->m_next;
 2511                         cm->m_next = NULL;
 2512                         if (pr->pr_domain->dom_externalize != NULL) {
 2513                                 error = (*pr->pr_domain->dom_externalize)
 2514                                     (cm, controlp, flags);
 2515                         } else if (controlp != NULL)
 2516                                 *controlp = cm;
 2517                         else
 2518                                 m_freem(cm);
 2519                         if (controlp != NULL) {
 2520                                 while (*controlp != NULL)
 2521                                         controlp = &(*controlp)->m_next;
 2522                         }
 2523                         cm = cmn;
 2524                 }
 2525         }
 2526         KASSERT(m == NULL || m->m_type == MT_DATA,
 2527             ("soreceive_dgram: !data"));
 2528         while (m != NULL && uio->uio_resid > 0) {
 2529                 len = uio->uio_resid;
 2530                 if (len > m->m_len)
 2531                         len = m->m_len;
 2532                 error = uiomove(mtod(m, char *), (int)len, uio);
 2533                 if (error) {
 2534                         m_freem(m);
 2535                         return (error);
 2536                 }
 2537                 if (len == m->m_len)
 2538                         m = m_free(m);
 2539                 else {
 2540                         m->m_data += len;
 2541                         m->m_len -= len;
 2542                 }
 2543         }
 2544         if (m != NULL) {
 2545                 flags |= MSG_TRUNC;
 2546                 m_freem(m);
 2547         }
 2548         if (flagsp != NULL)
 2549                 *flagsp |= flags;
 2550         return (0);
 2551 }
 2552 
 2553 int
 2554 soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio,
 2555     struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
 2556 {
 2557         int error;
 2558 
 2559         CURVNET_SET(so->so_vnet);
 2560         if (!SOLISTENING(so))
 2561                 error = (so->so_proto->pr_usrreqs->pru_soreceive(so, psa, uio,
 2562                     mp0, controlp, flagsp));
 2563         else
 2564                 error = ENOTCONN;
 2565         CURVNET_RESTORE();
 2566         return (error);
 2567 }
 2568 
 2569 int
 2570 soshutdown(struct socket *so, int how)
 2571 {
 2572         struct protosw *pr = so->so_proto;
 2573         int error, soerror_enotconn;
 2574 
 2575         if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR))
 2576                 return (EINVAL);
 2577 
 2578         soerror_enotconn = 0;
 2579         if ((so->so_state &
 2580             (SS_ISCONNECTED | SS_ISCONNECTING | SS_ISDISCONNECTING)) == 0) {
 2581                 /*
 2582                  * POSIX mandates us to return ENOTCONN when shutdown(2) is
 2583                  * invoked on a datagram sockets, however historically we would
 2584                  * actually tear socket down. This is known to be leveraged by
 2585                  * some applications to unblock process waiting in recvXXX(2)
 2586                  * by other process that it shares that socket with. Try to meet
 2587                  * both backward-compatibility and POSIX requirements by forcing
 2588                  * ENOTCONN but still asking protocol to perform pru_shutdown().
 2589                  */
 2590                 if (so->so_type != SOCK_DGRAM && !SOLISTENING(so))
 2591                         return (ENOTCONN);
 2592                 soerror_enotconn = 1;
 2593         }
 2594 
 2595         if (SOLISTENING(so)) {
 2596                 if (how != SHUT_WR) {
 2597                         SOLISTEN_LOCK(so);
 2598                         so->so_error = ECONNABORTED;
 2599                         solisten_wakeup(so);    /* unlocks so */
 2600                 }
 2601                 goto done;
 2602         }
 2603 
 2604         CURVNET_SET(so->so_vnet);
 2605         if (pr->pr_usrreqs->pru_flush != NULL)
 2606                 (*pr->pr_usrreqs->pru_flush)(so, how);
 2607         if (how != SHUT_WR)
 2608                 sorflush(so);
 2609         if (how != SHUT_RD) {
 2610                 error = (*pr->pr_usrreqs->pru_shutdown)(so);
 2611                 wakeup(&so->so_timeo);
 2612                 CURVNET_RESTORE();
 2613                 return ((error == 0 && soerror_enotconn) ? ENOTCONN : error);
 2614         }
 2615         wakeup(&so->so_timeo);
 2616         CURVNET_RESTORE();
 2617 
 2618 done:
 2619         return (soerror_enotconn ? ENOTCONN : 0);
 2620 }
 2621 
 2622 void
 2623 sorflush(struct socket *so)
 2624 {
 2625         struct sockbuf *sb = &so->so_rcv;
 2626         struct protosw *pr = so->so_proto;
 2627         struct socket aso;
 2628 
 2629         VNET_SO_ASSERT(so);
 2630 
 2631         /*
 2632          * In order to avoid calling dom_dispose with the socket buffer mutex
 2633          * held, and in order to generally avoid holding the lock for a long
 2634          * time, we make a copy of the socket buffer and clear the original
 2635          * (except locks, state).  The new socket buffer copy won't have
 2636          * initialized locks so we can only call routines that won't use or
 2637          * assert those locks.
 2638          *
 2639          * Dislodge threads currently blocked in receive and wait to acquire
 2640          * a lock against other simultaneous readers before clearing the
 2641          * socket buffer.  Don't let our acquire be interrupted by a signal
 2642          * despite any existing socket disposition on interruptable waiting.
 2643          */
 2644         socantrcvmore(so);
 2645         (void) sblock(sb, SBL_WAIT | SBL_NOINTR);
 2646 
 2647         /*
 2648          * Invalidate/clear most of the sockbuf structure, but leave selinfo
 2649          * and mutex data unchanged.
 2650          */
 2651         SOCKBUF_LOCK(sb);
 2652         bzero(&aso, sizeof(aso));
 2653         aso.so_pcb = so->so_pcb;
 2654         bcopy(&sb->sb_startzero, &aso.so_rcv.sb_startzero,
 2655             sizeof(*sb) - offsetof(struct sockbuf, sb_startzero));
 2656         bzero(&sb->sb_startzero,
 2657             sizeof(*sb) - offsetof(struct sockbuf, sb_startzero));
 2658         SOCKBUF_UNLOCK(sb);
 2659         sbunlock(sb);
 2660 
 2661         /*
 2662          * Dispose of special rights and flush the copied socket.  Don't call
 2663          * any unsafe routines (that rely on locks being initialized) on aso.
 2664          */
 2665         if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL)
 2666                 (*pr->pr_domain->dom_dispose)(&aso);
 2667         sbrelease_internal(&aso.so_rcv, so);
 2668 }
 2669 
 2670 /*
 2671  * Wrapper for Socket established helper hook.
 2672  * Parameters: socket, context of the hook point, hook id.
 2673  */
 2674 static int inline
 2675 hhook_run_socket(struct socket *so, void *hctx, int32_t h_id)
 2676 {
 2677         struct socket_hhook_data hhook_data = {
 2678                 .so = so,
 2679                 .hctx = hctx,
 2680                 .m = NULL,
 2681                 .status = 0
 2682         };
 2683 
 2684         CURVNET_SET(so->so_vnet);
 2685         HHOOKS_RUN_IF(V_socket_hhh[h_id], &hhook_data, &so->osd);
 2686         CURVNET_RESTORE();
 2687 
 2688         /* Ugly but needed, since hhooks return void for now */
 2689         return (hhook_data.status);
 2690 }
 2691 
 2692 /*
 2693  * Perhaps this routine, and sooptcopyout(), below, ought to come in an
 2694  * additional variant to handle the case where the option value needs to be
 2695  * some kind of integer, but not a specific size.  In addition to their use
 2696  * here, these functions are also called by the protocol-level pr_ctloutput()
 2697  * routines.
 2698  */
 2699 int
 2700 sooptcopyin(struct sockopt *sopt, void *buf, size_t len, size_t minlen)
 2701 {
 2702         size_t  valsize;
 2703 
 2704         /*
 2705          * If the user gives us more than we wanted, we ignore it, but if we
 2706          * don't get the minimum length the caller wants, we return EINVAL.
 2707          * On success, sopt->sopt_valsize is set to however much we actually
 2708          * retrieved.
 2709          */
 2710         if ((valsize = sopt->sopt_valsize) < minlen)
 2711                 return EINVAL;
 2712         if (valsize > len)
 2713                 sopt->sopt_valsize = valsize = len;
 2714 
 2715         if (sopt->sopt_td != NULL)
 2716                 return (copyin(sopt->sopt_val, buf, valsize));
 2717 
 2718         bcopy(sopt->sopt_val, buf, valsize);
 2719         return (0);
 2720 }
 2721 
 2722 /*
 2723  * Kernel version of setsockopt(2).
 2724  *
 2725  * XXX: optlen is size_t, not socklen_t
 2726  */
 2727 int
 2728 so_setsockopt(struct socket *so, int level, int optname, void *optval,
 2729     size_t optlen)
 2730 {
 2731         struct sockopt sopt;
 2732 
 2733         sopt.sopt_level = level;
 2734         sopt.sopt_name = optname;
 2735         sopt.sopt_dir = SOPT_SET;
 2736         sopt.sopt_val = optval;
 2737         sopt.sopt_valsize = optlen;
 2738         sopt.sopt_td = NULL;
 2739         return (sosetopt(so, &sopt));
 2740 }
 2741 
 2742 int
 2743 sosetopt(struct socket *so, struct sockopt *sopt)
 2744 {
 2745         int     error, optval;
 2746         struct  linger l;
 2747         struct  timeval tv;
 2748         sbintime_t val;
 2749         uint32_t val32;
 2750 #ifdef MAC
 2751         struct mac extmac;
 2752 #endif
 2753 
 2754         CURVNET_SET(so->so_vnet);
 2755         error = 0;
 2756         if (sopt->sopt_level != SOL_SOCKET) {
 2757                 if (so->so_proto->pr_ctloutput != NULL) {
 2758                         error = (*so->so_proto->pr_ctloutput)(so, sopt);
 2759                         CURVNET_RESTORE();
 2760                         return (error);
 2761                 }
 2762                 error = ENOPROTOOPT;
 2763         } else {
 2764                 switch (sopt->sopt_name) {
 2765                 case SO_ACCEPTFILTER:
 2766                         error = accept_filt_setopt(so, sopt);
 2767                         if (error)
 2768                                 goto bad;
 2769                         break;
 2770 
 2771                 case SO_LINGER:
 2772                         error = sooptcopyin(sopt, &l, sizeof l, sizeof l);
 2773                         if (error)
 2774                                 goto bad;
 2775 
 2776                         SOCK_LOCK(so);
 2777                         so->so_linger = l.l_linger;
 2778                         if (l.l_onoff)
 2779                                 so->so_options |= SO_LINGER;
 2780                         else
 2781                                 so->so_options &= ~SO_LINGER;
 2782                         SOCK_UNLOCK(so);
 2783                         break;
 2784 
 2785                 case SO_DEBUG:
 2786                 case SO_KEEPALIVE:
 2787                 case SO_DONTROUTE:
 2788                 case SO_USELOOPBACK:
 2789                 case SO_BROADCAST:
 2790                 case SO_REUSEADDR:
 2791                 case SO_REUSEPORT:
 2792                 case SO_REUSEPORT_LB:
 2793                 case SO_OOBINLINE:
 2794                 case SO_TIMESTAMP:
 2795                 case SO_BINTIME:
 2796                 case SO_NOSIGPIPE:
 2797                 case SO_NO_DDP:
 2798                 case SO_NO_OFFLOAD:
 2799                         error = sooptcopyin(sopt, &optval, sizeof optval,
 2800                             sizeof optval);
 2801                         if (error)
 2802                                 goto bad;
 2803                         SOCK_LOCK(so);
 2804                         if (optval)
 2805                                 so->so_options |= sopt->sopt_name;
 2806                         else
 2807                                 so->so_options &= ~sopt->sopt_name;
 2808                         SOCK_UNLOCK(so);
 2809                         break;
 2810 
 2811                 case SO_SETFIB:
 2812                         error = sooptcopyin(sopt, &optval, sizeof optval,
 2813                             sizeof optval);
 2814                         if (error)
 2815                                 goto bad;
 2816 
 2817                         if (optval < 0 || optval >= rt_numfibs) {
 2818                                 error = EINVAL;
 2819                                 goto bad;
 2820                         }
 2821                         if (((so->so_proto->pr_domain->dom_family == PF_INET) ||
 2822                            (so->so_proto->pr_domain->dom_family == PF_INET6) ||
 2823                            (so->so_proto->pr_domain->dom_family == PF_ROUTE)))
 2824                                 so->so_fibnum = optval;
 2825                         else
 2826                                 so->so_fibnum = 0;
 2827                         break;
 2828 
 2829                 case SO_USER_COOKIE:
 2830                         error = sooptcopyin(sopt, &val32, sizeof val32,
 2831                             sizeof val32);
 2832                         if (error)
 2833                                 goto bad;
 2834                         so->so_user_cookie = val32;
 2835                         break;
 2836 
 2837                 case SO_SNDBUF:
 2838                 case SO_RCVBUF:
 2839                 case SO_SNDLOWAT:
 2840                 case SO_RCVLOWAT:
 2841                         error = sooptcopyin(sopt, &optval, sizeof optval,
 2842                             sizeof optval);
 2843                         if (error)
 2844                                 goto bad;
 2845 
 2846                         /*
 2847                          * Values < 1 make no sense for any of these options,
 2848                          * so disallow them.
 2849                          */
 2850                         if (optval < 1) {
 2851                                 error = EINVAL;
 2852                                 goto bad;
 2853                         }
 2854 
 2855                         error = sbsetopt(so, sopt->sopt_name, optval);
 2856                         break;
 2857 
 2858                 case SO_SNDTIMEO:
 2859                 case SO_RCVTIMEO:
 2860 #ifdef COMPAT_FREEBSD32
 2861                         if (SV_CURPROC_FLAG(SV_ILP32)) {
 2862                                 struct timeval32 tv32;
 2863 
 2864                                 error = sooptcopyin(sopt, &tv32, sizeof tv32,
 2865                                     sizeof tv32);
 2866                                 CP(tv32, tv, tv_sec);
 2867                                 CP(tv32, tv, tv_usec);
 2868                         } else
 2869 #endif
 2870                                 error = sooptcopyin(sopt, &tv, sizeof tv,
 2871                                     sizeof tv);
 2872                         if (error)
 2873                                 goto bad;
 2874                         if (tv.tv_sec < 0 || tv.tv_usec < 0 ||
 2875                             tv.tv_usec >= 1000000) {
 2876                                 error = EDOM;
 2877                                 goto bad;
 2878                         }
 2879                         if (tv.tv_sec > INT32_MAX)
 2880                                 val = SBT_MAX;
 2881                         else
 2882                                 val = tvtosbt(tv);
 2883                         switch (sopt->sopt_name) {
 2884                         case SO_SNDTIMEO:
 2885                                 so->so_snd.sb_timeo = val;
 2886                                 break;
 2887                         case SO_RCVTIMEO:
 2888                                 so->so_rcv.sb_timeo = val;
 2889                                 break;
 2890                         }
 2891                         break;
 2892 
 2893                 case SO_LABEL:
 2894 #ifdef MAC
 2895                         error = sooptcopyin(sopt, &extmac, sizeof extmac,
 2896                             sizeof extmac);
 2897                         if (error)
 2898                                 goto bad;
 2899                         error = mac_setsockopt_label(sopt->sopt_td->td_ucred,
 2900                             so, &extmac);
 2901 #else
 2902                         error = EOPNOTSUPP;
 2903 #endif
 2904                         break;
 2905 
 2906                 case SO_TS_CLOCK:
 2907                         error = sooptcopyin(sopt, &optval, sizeof optval,
 2908                             sizeof optval);
 2909                         if (error)
 2910                                 goto bad;
 2911                         if (optval < 0 || optval > SO_TS_CLOCK_MAX) {
 2912                                 error = EINVAL;
 2913                                 goto bad;
 2914                         }
 2915                         so->so_ts_clock = optval;
 2916                         break;
 2917 
 2918                 case SO_MAX_PACING_RATE:
 2919                         error = sooptcopyin(sopt, &val32, sizeof(val32),
 2920                             sizeof(val32));
 2921                         if (error)
 2922                                 goto bad;
 2923                         so->so_max_pacing_rate = val32;
 2924                         break;
 2925 
 2926                 default:
 2927                         if (V_socket_hhh[HHOOK_SOCKET_OPT]->hhh_nhooks > 0)
 2928                                 error = hhook_run_socket(so, sopt,
 2929                                     HHOOK_SOCKET_OPT);
 2930                         else
 2931                                 error = ENOPROTOOPT;
 2932                         break;
 2933                 }
 2934                 if (error == 0 && so->so_proto->pr_ctloutput != NULL)
 2935                         (void)(*so->so_proto->pr_ctloutput)(so, sopt);
 2936         }
 2937 bad:
 2938         CURVNET_RESTORE();
 2939         return (error);
 2940 }
 2941 
 2942 /*
 2943  * Helper routine for getsockopt.
 2944  */
 2945 int
 2946 sooptcopyout(struct sockopt *sopt, const void *buf, size_t len)
 2947 {
 2948         int     error;
 2949         size_t  valsize;
 2950 
 2951         error = 0;
 2952 
 2953         /*
 2954          * Documented get behavior is that we always return a value, possibly
 2955          * truncated to fit in the user's buffer.  Traditional behavior is
 2956          * that we always tell the user precisely how much we copied, rather
 2957          * than something useful like the total amount we had available for
 2958          * her.  Note that this interface is not idempotent; the entire
 2959          * answer must be generated ahead of time.
 2960          */
 2961         valsize = min(len, sopt->sopt_valsize);
 2962         sopt->sopt_valsize = valsize;
 2963         if (sopt->sopt_val != NULL) {
 2964                 if (sopt->sopt_td != NULL)
 2965                         error = copyout(buf, sopt->sopt_val, valsize);
 2966                 else
 2967                         bcopy(buf, sopt->sopt_val, valsize);
 2968         }
 2969         return (error);
 2970 }
 2971 
 2972 int
 2973 sogetopt(struct socket *so, struct sockopt *sopt)
 2974 {
 2975         int     error, optval;
 2976         struct  linger l;
 2977         struct  timeval tv;
 2978 #ifdef MAC
 2979         struct mac extmac;
 2980 #endif
 2981 
 2982         CURVNET_SET(so->so_vnet);
 2983         error = 0;
 2984         if (sopt->sopt_level != SOL_SOCKET) {
 2985                 if (so->so_proto->pr_ctloutput != NULL)
 2986                         error = (*so->so_proto->pr_ctloutput)(so, sopt);
 2987                 else
 2988                         error = ENOPROTOOPT;
 2989                 CURVNET_RESTORE();
 2990                 return (error);
 2991         } else {
 2992                 switch (sopt->sopt_name) {
 2993                 case SO_ACCEPTFILTER:
 2994                         error = accept_filt_getopt(so, sopt);
 2995                         break;
 2996 
 2997                 case SO_LINGER:
 2998                         SOCK_LOCK(so);
 2999                         l.l_onoff = so->so_options & SO_LINGER;
 3000                         l.l_linger = so->so_linger;
 3001                         SOCK_UNLOCK(so);
 3002                         error = sooptcopyout(sopt, &l, sizeof l);
 3003                         break;
 3004 
 3005                 case SO_USELOOPBACK:
 3006                 case SO_DONTROUTE:
 3007                 case SO_DEBUG:
 3008                 case SO_KEEPALIVE:
 3009                 case SO_REUSEADDR:
 3010                 case SO_REUSEPORT:
 3011                 case SO_REUSEPORT_LB:
 3012                 case SO_BROADCAST:
 3013                 case SO_OOBINLINE:
 3014                 case SO_ACCEPTCONN:
 3015                 case SO_TIMESTAMP:
 3016                 case SO_BINTIME:
 3017                 case SO_NOSIGPIPE:
 3018                         optval = so->so_options & sopt->sopt_name;
 3019 integer:
 3020                         error = sooptcopyout(sopt, &optval, sizeof optval);
 3021                         break;
 3022 
 3023                 case SO_DOMAIN:
 3024                         optval = so->so_proto->pr_domain->dom_family;
 3025                         goto integer;
 3026 
 3027                 case SO_TYPE:
 3028                         optval = so->so_type;
 3029                         goto integer;
 3030 
 3031                 case SO_PROTOCOL:
 3032                         optval = so->so_proto->pr_protocol;
 3033                         goto integer;
 3034 
 3035                 case SO_ERROR:
 3036                         SOCK_LOCK(so);
 3037                         optval = so->so_error;
 3038                         so->so_error = 0;
 3039                         SOCK_UNLOCK(so);
 3040                         goto integer;
 3041 
 3042                 case SO_SNDBUF:
 3043                         optval = SOLISTENING(so) ? so->sol_sbsnd_hiwat :
 3044                             so->so_snd.sb_hiwat;
 3045                         goto integer;
 3046 
 3047                 case SO_RCVBUF:
 3048                         optval = SOLISTENING(so) ? so->sol_sbrcv_hiwat :
 3049                             so->so_rcv.sb_hiwat;
 3050                         goto integer;
 3051 
 3052                 case SO_SNDLOWAT:
 3053                         optval = SOLISTENING(so) ? so->sol_sbsnd_lowat :
 3054                             so->so_snd.sb_lowat;
 3055                         goto integer;
 3056 
 3057                 case SO_RCVLOWAT:
 3058                         optval = SOLISTENING(so) ? so->sol_sbrcv_lowat :
 3059                             so->so_rcv.sb_lowat;
 3060                         goto integer;
 3061 
 3062                 case SO_SNDTIMEO:
 3063                 case SO_RCVTIMEO:
 3064                         tv = sbttotv(sopt->sopt_name == SO_SNDTIMEO ?
 3065                             so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
 3066 #ifdef COMPAT_FREEBSD32
 3067                         if (SV_CURPROC_FLAG(SV_ILP32)) {
 3068                                 struct timeval32 tv32;
 3069 
 3070                                 CP(tv, tv32, tv_sec);
 3071                                 CP(tv, tv32, tv_usec);
 3072                                 error = sooptcopyout(sopt, &tv32, sizeof tv32);
 3073                         } else
 3074 #endif
 3075                                 error = sooptcopyout(sopt, &tv, sizeof tv);
 3076                         break;
 3077 
 3078                 case SO_LABEL:
 3079 #ifdef MAC
 3080                         error = sooptcopyin(sopt, &extmac, sizeof(extmac),
 3081                             sizeof(extmac));
 3082                         if (error)
 3083                                 goto bad;
 3084                         error = mac_getsockopt_label(sopt->sopt_td->td_ucred,
 3085                             so, &extmac);
 3086                         if (error)
 3087                                 goto bad;
 3088                         error = sooptcopyout(sopt, &extmac, sizeof extmac);
 3089 #else
 3090                         error = EOPNOTSUPP;
 3091 #endif
 3092                         break;
 3093 
 3094                 case SO_PEERLABEL:
 3095 #ifdef MAC
 3096                         error = sooptcopyin(sopt, &extmac, sizeof(extmac),
 3097                             sizeof(extmac));
 3098                         if (error)
 3099                                 goto bad;
 3100                         error = mac_getsockopt_peerlabel(
 3101                             sopt->sopt_td->td_ucred, so, &extmac);
 3102                         if (error)
 3103                                 goto bad;
 3104                         error = sooptcopyout(sopt, &extmac, sizeof extmac);
 3105 #else
 3106                         error = EOPNOTSUPP;
 3107 #endif
 3108                         break;
 3109 
 3110                 case SO_LISTENQLIMIT:
 3111                         optval = SOLISTENING(so) ? so->sol_qlimit : 0;
 3112                         goto integer;
 3113 
 3114                 case SO_LISTENQLEN:
 3115                         optval = SOLISTENING(so) ? so->sol_qlen : 0;
 3116                         goto integer;
 3117 
 3118                 case SO_LISTENINCQLEN:
 3119                         optval = SOLISTENING(so) ? so->sol_incqlen : 0;
 3120                         goto integer;
 3121 
 3122                 case SO_TS_CLOCK:
 3123                         optval = so->so_ts_clock;
 3124                         goto integer;
 3125 
 3126                 case SO_MAX_PACING_RATE:
 3127                         optval = so->so_max_pacing_rate;
 3128                         goto integer;
 3129 
 3130                 default:
 3131                         if (V_socket_hhh[HHOOK_SOCKET_OPT]->hhh_nhooks > 0)
 3132                                 error = hhook_run_socket(so, sopt,
 3133                                     HHOOK_SOCKET_OPT);
 3134                         else
 3135                                 error = ENOPROTOOPT;
 3136                         break;
 3137                 }
 3138         }
 3139 #ifdef MAC
 3140 bad:
 3141 #endif
 3142         CURVNET_RESTORE();
 3143         return (error);
 3144 }
 3145 
 3146 int
 3147 soopt_getm(struct sockopt *sopt, struct mbuf **mp)
 3148 {
 3149         struct mbuf *m, *m_prev;
 3150         int sopt_size = sopt->sopt_valsize;
 3151 
 3152         MGET(m, sopt->sopt_td ? M_WAITOK : M_NOWAIT, MT_DATA);
 3153         if (m == NULL)
 3154                 return ENOBUFS;
 3155         if (sopt_size > MLEN) {
 3156                 MCLGET(m, sopt->sopt_td ? M_WAITOK : M_NOWAIT);
 3157                 if ((m->m_flags & M_EXT) == 0) {
 3158                         m_free(m);
 3159                         return ENOBUFS;
 3160                 }
 3161                 m->m_len = min(MCLBYTES, sopt_size);
 3162         } else {
 3163                 m->m_len = min(MLEN, sopt_size);
 3164         }
 3165         sopt_size -= m->m_len;
 3166         *mp = m;
 3167         m_prev = m;
 3168 
 3169         while (sopt_size) {
 3170                 MGET(m, sopt->sopt_td ? M_WAITOK : M_NOWAIT, MT_DATA);
 3171                 if (m == NULL) {
 3172                         m_freem(*mp);
 3173                         return ENOBUFS;
 3174                 }
 3175                 if (sopt_size > MLEN) {
 3176                         MCLGET(m, sopt->sopt_td != NULL ? M_WAITOK :
 3177                             M_NOWAIT);
 3178                         if ((m->m_flags & M_EXT) == 0) {
 3179                                 m_freem(m);
 3180                                 m_freem(*mp);
 3181                                 return ENOBUFS;
 3182                         }
 3183                         m->m_len = min(MCLBYTES, sopt_size);
 3184                 } else {
 3185                         m->m_len = min(MLEN, sopt_size);
 3186                 }
 3187                 sopt_size -= m->m_len;
 3188                 m_prev->m_next = m;
 3189                 m_prev = m;
 3190         }
 3191         return (0);
 3192 }
 3193 
 3194 int
 3195 soopt_mcopyin(struct sockopt *sopt, struct mbuf *m)
 3196 {
 3197         struct mbuf *m0 = m;
 3198 
 3199         if (sopt->sopt_val == NULL)
 3200                 return (0);
 3201         while (m != NULL && sopt->sopt_valsize >= m->m_len) {
 3202                 if (sopt->sopt_td != NULL) {
 3203                         int error;
 3204 
 3205                         error = copyin(sopt->sopt_val, mtod(m, char *),
 3206                             m->m_len);
 3207                         if (error != 0) {
 3208                                 m_freem(m0);
 3209                                 return(error);
 3210                         }
 3211                 } else
 3212                         bcopy(sopt->sopt_val, mtod(m, char *), m->m_len);
 3213                 sopt->sopt_valsize -= m->m_len;
 3214                 sopt->sopt_val = (char *)sopt->sopt_val + m->m_len;
 3215                 m = m->m_next;
 3216         }
 3217         if (m != NULL) /* should be allocated enoughly at ip6_sooptmcopyin() */
 3218                 panic("ip6_sooptmcopyin");
 3219         return (0);
 3220 }
 3221 
 3222 int
 3223 soopt_mcopyout(struct sockopt *sopt, struct mbuf *m)
 3224 {
 3225         struct mbuf *m0 = m;
 3226         size_t valsize = 0;
 3227 
 3228         if (sopt->sopt_val == NULL)
 3229                 return (0);
 3230         while (m != NULL && sopt->sopt_valsize >= m->m_len) {
 3231                 if (sopt->sopt_td != NULL) {
 3232                         int error;
 3233 
 3234                         error = copyout(mtod(m, char *), sopt->sopt_val,
 3235                             m->m_len);
 3236                         if (error != 0) {
 3237                                 m_freem(m0);
 3238                                 return(error);
 3239                         }
 3240                 } else
 3241                         bcopy(mtod(m, char *), sopt->sopt_val, m->m_len);
 3242                 sopt->sopt_valsize -= m->m_len;
 3243                 sopt->sopt_val = (char *)sopt->sopt_val + m->m_len;
 3244                 valsize += m->m_len;
 3245                 m = m->m_next;
 3246         }
 3247         if (m != NULL) {
 3248                 /* enough soopt buffer should be given from user-land */
 3249                 m_freem(m0);
 3250                 return(EINVAL);
 3251         }
 3252         sopt->sopt_valsize = valsize;
 3253         return (0);
 3254 }
 3255 
 3256 /*
 3257  * sohasoutofband(): protocol notifies socket layer of the arrival of new
 3258  * out-of-band data, which will then notify socket consumers.
 3259  */
 3260 void
 3261 sohasoutofband(struct socket *so)
 3262 {
 3263 
 3264         if (so->so_sigio != NULL)
 3265                 pgsigio(&so->so_sigio, SIGURG, 0);
 3266         selwakeuppri(&so->so_rdsel, PSOCK);
 3267 }
 3268 
 3269 int
 3270 sopoll(struct socket *so, int events, struct ucred *active_cred,
 3271     struct thread *td)
 3272 {
 3273 
 3274         /*
 3275          * We do not need to set or assert curvnet as long as everyone uses
 3276          * sopoll_generic().
 3277          */
 3278         return (so->so_proto->pr_usrreqs->pru_sopoll(so, events, active_cred,
 3279             td));
 3280 }
 3281 
 3282 int
 3283 sopoll_generic(struct socket *so, int events, struct ucred *active_cred,
 3284     struct thread *td)
 3285 {
 3286         int revents;
 3287 
 3288         SOCK_LOCK(so);
 3289         if (SOLISTENING(so)) {
 3290                 if (!(events & (POLLIN | POLLRDNORM)))
 3291                         revents = 0;
 3292                 else if (!TAILQ_EMPTY(&so->sol_comp))
 3293                         revents = events & (POLLIN | POLLRDNORM);
 3294                 else if ((events & POLLINIGNEOF) == 0 && so->so_error)
 3295                         revents = (events & (POLLIN | POLLRDNORM)) | POLLHUP;
 3296                 else {
 3297                         selrecord(td, &so->so_rdsel);
 3298                         revents = 0;
 3299                 }
 3300         } else {
 3301                 revents = 0;
 3302                 SOCKBUF_LOCK(&so->so_snd);
 3303                 SOCKBUF_LOCK(&so->so_rcv);
 3304                 if (events & (POLLIN | POLLRDNORM))
 3305                         if (soreadabledata(so))
 3306                                 revents |= events & (POLLIN | POLLRDNORM);
 3307                 if (events & (POLLOUT | POLLWRNORM))
 3308                         if (sowriteable(so))
 3309                                 revents |= events & (POLLOUT | POLLWRNORM);
 3310                 if (events & (POLLPRI | POLLRDBAND))
 3311                         if (so->so_oobmark ||
 3312                             (so->so_rcv.sb_state & SBS_RCVATMARK))
 3313                                 revents |= events & (POLLPRI | POLLRDBAND);
 3314                 if ((events & POLLINIGNEOF) == 0) {
 3315                         if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
 3316                                 revents |= events & (POLLIN | POLLRDNORM);
 3317                                 if (so->so_snd.sb_state & SBS_CANTSENDMORE)
 3318                                         revents |= POLLHUP;
 3319                         }
 3320                 }
 3321                 if (revents == 0) {
 3322                         if (events &
 3323                             (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) {
 3324                                 selrecord(td, &so->so_rdsel);
 3325                                 so->so_rcv.sb_flags |= SB_SEL;
 3326                         }
 3327                         if (events & (POLLOUT | POLLWRNORM)) {
 3328                                 selrecord(td, &so->so_wrsel);
 3329                                 so->so_snd.sb_flags |= SB_SEL;
 3330                         }
 3331                 }
 3332                 SOCKBUF_UNLOCK(&so->so_rcv);
 3333                 SOCKBUF_UNLOCK(&so->so_snd);
 3334         }
 3335         SOCK_UNLOCK(so);
 3336         return (revents);
 3337 }
 3338 
 3339 int
 3340 soo_kqfilter(struct file *fp, struct knote *kn)
 3341 {
 3342         struct socket *so = kn->kn_fp->f_data;
 3343         struct sockbuf *sb;
 3344         struct knlist *knl;
 3345 
 3346         switch (kn->kn_filter) {
 3347         case EVFILT_READ:
 3348                 kn->kn_fop = &soread_filtops;
 3349                 knl = &so->so_rdsel.si_note;
 3350                 sb = &so->so_rcv;
 3351                 break;
 3352         case EVFILT_WRITE:
 3353                 kn->kn_fop = &sowrite_filtops;
 3354                 knl = &so->so_wrsel.si_note;
 3355                 sb = &so->so_snd;
 3356                 break;
 3357         case EVFILT_EMPTY:
 3358                 kn->kn_fop = &soempty_filtops;
 3359                 knl = &so->so_wrsel.si_note;
 3360                 sb = &so->so_snd;
 3361                 break;
 3362         default:
 3363                 return (EINVAL);
 3364         }
 3365 
 3366         SOCK_LOCK(so);
 3367         if (SOLISTENING(so)) {
 3368                 knlist_add(knl, kn, 1);
 3369         } else {
 3370                 SOCKBUF_LOCK(sb);
 3371                 knlist_add(knl, kn, 1);
 3372                 sb->sb_flags |= SB_KNOTE;
 3373                 SOCKBUF_UNLOCK(sb);
 3374         }
 3375         SOCK_UNLOCK(so);
 3376         return (0);
 3377 }
 3378 
 3379 /*
 3380  * Some routines that return EOPNOTSUPP for entry points that are not
 3381  * supported by a protocol.  Fill in as needed.
 3382  */
 3383 int
 3384 pru_accept_notsupp(struct socket *so, struct sockaddr **nam)
 3385 {
 3386 
 3387         return EOPNOTSUPP;
 3388 }
 3389 
 3390 int
 3391 pru_aio_queue_notsupp(struct socket *so, struct kaiocb *job)
 3392 {
 3393 
 3394         return EOPNOTSUPP;
 3395 }
 3396 
 3397 int
 3398 pru_attach_notsupp(struct socket *so, int proto, struct thread *td)
 3399 {
 3400 
 3401         return EOPNOTSUPP;
 3402 }
 3403 
 3404 int
 3405 pru_bind_notsupp(struct socket *so, struct sockaddr *nam, struct thread *td)
 3406 {
 3407 
 3408         return EOPNOTSUPP;
 3409 }
 3410 
 3411 int
 3412 pru_bindat_notsupp(int fd, struct socket *so, struct sockaddr *nam,
 3413     struct thread *td)
 3414 {
 3415 
 3416         return EOPNOTSUPP;
 3417 }
 3418 
 3419 int
 3420 pru_connect_notsupp(struct socket *so, struct sockaddr *nam, struct thread *td)
 3421 {
 3422 
 3423         return EOPNOTSUPP;
 3424 }
 3425 
 3426 int
 3427 pru_connectat_notsupp(int fd, struct socket *so, struct sockaddr *nam,
 3428     struct thread *td)
 3429 {
 3430 
 3431         return EOPNOTSUPP;
 3432 }
 3433 
 3434 int
 3435 pru_connect2_notsupp(struct socket *so1, struct socket *so2)
 3436 {
 3437 
 3438         return EOPNOTSUPP;
 3439 }
 3440 
 3441 int
 3442 pru_control_notsupp(struct socket *so, u_long cmd, caddr_t data,
 3443     struct ifnet *ifp, struct thread *td)
 3444 {
 3445 
 3446         return EOPNOTSUPP;
 3447 }
 3448 
 3449 int
 3450 pru_disconnect_notsupp(struct socket *so)
 3451 {
 3452 
 3453         return EOPNOTSUPP;
 3454 }
 3455 
 3456 int
 3457 pru_listen_notsupp(struct socket *so, int backlog, struct thread *td)
 3458 {
 3459 
 3460         return EOPNOTSUPP;
 3461 }
 3462 
 3463 int
 3464 pru_peeraddr_notsupp(struct socket *so, struct sockaddr **nam)
 3465 {
 3466 
 3467         return EOPNOTSUPP;
 3468 }
 3469 
 3470 int
 3471 pru_rcvd_notsupp(struct socket *so, int flags)
 3472 {
 3473 
 3474         return EOPNOTSUPP;
 3475 }
 3476 
 3477 int
 3478 pru_rcvoob_notsupp(struct socket *so, struct mbuf *m, int flags)
 3479 {
 3480 
 3481         return EOPNOTSUPP;
 3482 }
 3483 
 3484 int
 3485 pru_send_notsupp(struct socket *so, int flags, struct mbuf *m,
 3486     struct sockaddr *addr, struct mbuf *control, struct thread *td)
 3487 {
 3488 
 3489         return EOPNOTSUPP;
 3490 }
 3491 
 3492 int
 3493 pru_ready_notsupp(struct socket *so, struct mbuf *m, int count)
 3494 {
 3495 
 3496         return (EOPNOTSUPP);
 3497 }
 3498 
 3499 /*
 3500  * This isn't really a ``null'' operation, but it's the default one and
 3501  * doesn't do anything destructive.
 3502  */
 3503 int
 3504 pru_sense_null(struct socket *so, struct stat *sb)
 3505 {
 3506 
 3507         sb->st_blksize = so->so_snd.sb_hiwat;
 3508         return 0;
 3509 }
 3510 
 3511 int
 3512 pru_shutdown_notsupp(struct socket *so)
 3513 {
 3514 
 3515         return EOPNOTSUPP;
 3516 }
 3517 
 3518 int
 3519 pru_sockaddr_notsupp(struct socket *so, struct sockaddr **nam)
 3520 {
 3521 
 3522         return EOPNOTSUPP;
 3523 }
 3524 
 3525 int
 3526 pru_sosend_notsupp(struct socket *so, struct sockaddr *addr, struct uio *uio,
 3527     struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
 3528 {
 3529 
 3530         return EOPNOTSUPP;
 3531 }
 3532 
 3533 int
 3534 pru_soreceive_notsupp(struct socket *so, struct sockaddr **paddr,
 3535     struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
 3536 {
 3537 
 3538         return EOPNOTSUPP;
 3539 }
 3540 
 3541 int
 3542 pru_sopoll_notsupp(struct socket *so, int events, struct ucred *cred,
 3543     struct thread *td)
 3544 {
 3545 
 3546         return EOPNOTSUPP;
 3547 }
 3548 
 3549 static void
 3550 filt_sordetach(struct knote *kn)
 3551 {
 3552         struct socket *so = kn->kn_fp->f_data;
 3553 
 3554         so_rdknl_lock(so);
 3555         knlist_remove(&so->so_rdsel.si_note, kn, 1);
 3556         if (!SOLISTENING(so) && knlist_empty(&so->so_rdsel.si_note))
 3557                 so->so_rcv.sb_flags &= ~SB_KNOTE;
 3558         so_rdknl_unlock(so);
 3559 }
 3560 
 3561 /*ARGSUSED*/
 3562 static int
 3563 filt_soread(struct knote *kn, long hint)
 3564 {
 3565         struct socket *so;
 3566 
 3567         so = kn->kn_fp->f_data;
 3568 
 3569         if (SOLISTENING(so)) {
 3570                 SOCK_LOCK_ASSERT(so);
 3571                 kn->kn_data = so->sol_qlen;
 3572                 if (so->so_error) {
 3573                         kn->kn_flags |= EV_EOF;
 3574                         kn->kn_fflags = so->so_error;
 3575                         return (1);
 3576                 }
 3577                 return (!TAILQ_EMPTY(&so->sol_comp));
 3578         }
 3579 
 3580         SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 3581 
 3582         kn->kn_data = sbavail(&so->so_rcv) - so->so_rcv.sb_ctl;
 3583         if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
 3584                 kn->kn_flags |= EV_EOF;
 3585                 kn->kn_fflags = so->so_error;
 3586                 return (1);
 3587         } else if (so->so_error)        /* temporary udp error */
 3588                 return (1);
 3589 
 3590         if (kn->kn_sfflags & NOTE_LOWAT) {
 3591                 if (kn->kn_data >= kn->kn_sdata)
 3592                         return (1);
 3593         } else if (sbavail(&so->so_rcv) >= so->so_rcv.sb_lowat)
 3594                 return (1);
 3595 
 3596         /* This hook returning non-zero indicates an event, not error */
 3597         return (hhook_run_socket(so, NULL, HHOOK_FILT_SOREAD));
 3598 }
 3599 
 3600 static void
 3601 filt_sowdetach(struct knote *kn)
 3602 {
 3603         struct socket *so = kn->kn_fp->f_data;
 3604 
 3605         so_wrknl_lock(so);
 3606         knlist_remove(&so->so_wrsel.si_note, kn, 1);
 3607         if (!SOLISTENING(so) && knlist_empty(&so->so_wrsel.si_note))
 3608                 so->so_snd.sb_flags &= ~SB_KNOTE;
 3609         so_wrknl_unlock(so);
 3610 }
 3611 
 3612 /*ARGSUSED*/
 3613 static int
 3614 filt_sowrite(struct knote *kn, long hint)
 3615 {
 3616         struct socket *so;
 3617 
 3618         so = kn->kn_fp->f_data;
 3619 
 3620         if (SOLISTENING(so))
 3621                 return (0);
 3622 
 3623         SOCKBUF_LOCK_ASSERT(&so->so_snd);
 3624         kn->kn_data = sbspace(&so->so_snd);
 3625 
 3626         hhook_run_socket(so, kn, HHOOK_FILT_SOWRITE);
 3627 
 3628         if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 3629                 kn->kn_flags |= EV_EOF;
 3630                 kn->kn_fflags = so->so_error;
 3631                 return (1);
 3632         } else if (so->so_error)        /* temporary udp error */
 3633                 return (1);
 3634         else if (((so->so_state & SS_ISCONNECTED) == 0) &&
 3635             (so->so_proto->pr_flags & PR_CONNREQUIRED))
 3636                 return (0);
 3637         else if (kn->kn_sfflags & NOTE_LOWAT)
 3638                 return (kn->kn_data >= kn->kn_sdata);
 3639         else
 3640                 return (kn->kn_data >= so->so_snd.sb_lowat);
 3641 }
 3642 
 3643 static int
 3644 filt_soempty(struct knote *kn, long hint)
 3645 {
 3646         struct socket *so;
 3647 
 3648         so = kn->kn_fp->f_data;
 3649 
 3650         if (SOLISTENING(so))
 3651                 return (1);
 3652 
 3653         SOCKBUF_LOCK_ASSERT(&so->so_snd);
 3654         kn->kn_data = sbused(&so->so_snd);
 3655 
 3656         if (kn->kn_data == 0)
 3657                 return (1);
 3658         else
 3659                 return (0);
 3660 }
 3661 
 3662 int
 3663 socheckuid(struct socket *so, uid_t uid)
 3664 {
 3665 
 3666         if (so == NULL)
 3667                 return (EPERM);
 3668         if (so->so_cred->cr_uid != uid)
 3669                 return (EPERM);
 3670         return (0);
 3671 }
 3672 
 3673 /*
 3674  * These functions are used by protocols to notify the socket layer (and its
 3675  * consumers) of state changes in the sockets driven by protocol-side events.
 3676  */
 3677 
 3678 /*
 3679  * Procedures to manipulate state flags of socket and do appropriate wakeups.
 3680  *
 3681  * Normal sequence from the active (originating) side is that
 3682  * soisconnecting() is called during processing of connect() call, resulting
 3683  * in an eventual call to soisconnected() if/when the connection is
 3684  * established.  When the connection is torn down soisdisconnecting() is
 3685  * called during processing of disconnect() call, and soisdisconnected() is
 3686  * called when the connection to the peer is totally severed.  The semantics
 3687  * of these routines are such that connectionless protocols can call
 3688  * soisconnected() and soisdisconnected() only, bypassing the in-progress
 3689  * calls when setting up a ``connection'' takes no time.
 3690  *
 3691  * From the passive side, a socket is created with two queues of sockets:
 3692  * so_incomp for connections in progress and so_comp for connections already
 3693  * made and awaiting user acceptance.  As a protocol is preparing incoming
 3694  * connections, it creates a socket structure queued on so_incomp by calling
 3695  * sonewconn().  When the connection is established, soisconnected() is
 3696  * called, and transfers the socket structure to so_comp, making it available
 3697  * to accept().
 3698  *
 3699  * If a socket is closed with sockets on either so_incomp or so_comp, these
 3700  * sockets are dropped.
 3701  *
 3702  * If higher-level protocols are implemented in the kernel, the wakeups done
 3703  * here will sometimes cause software-interrupt process scheduling.
 3704  */
 3705 void
 3706 soisconnecting(struct socket *so)
 3707 {
 3708 
 3709         SOCK_LOCK(so);
 3710         so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
 3711         so->so_state |= SS_ISCONNECTING;
 3712         SOCK_UNLOCK(so);
 3713 }
 3714 
 3715 void
 3716 soisconnected(struct socket *so)
 3717 {
 3718 
 3719         SOCK_LOCK(so);
 3720         so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
 3721         so->so_state |= SS_ISCONNECTED;
 3722 
 3723         if (so->so_qstate == SQ_INCOMP) {
 3724                 struct socket *head = so->so_listen;
 3725                 int ret;
 3726 
 3727                 KASSERT(head, ("%s: so %p on incomp of NULL", __func__, so));
 3728                 /*
 3729                  * Promoting a socket from incomplete queue to complete, we
 3730                  * need to go through reverse order of locking.  We first do
 3731                  * trylock, and if that doesn't succeed, we go the hard way
 3732                  * leaving a reference and rechecking consistency after proper
 3733                  * locking.
 3734                  */
 3735                 if (__predict_false(SOLISTEN_TRYLOCK(head) == 0)) {
 3736                         soref(head);
 3737                         SOCK_UNLOCK(so);
 3738                         SOLISTEN_LOCK(head);
 3739                         SOCK_LOCK(so);
 3740                         if (__predict_false(head != so->so_listen)) {
 3741                                 /*
 3742                                  * The socket went off the listen queue,
 3743                                  * should be lost race to close(2) of sol.
 3744                                  * The socket is about to soabort().
 3745                                  */
 3746                                 SOCK_UNLOCK(so);
 3747                                 sorele(head);
 3748                                 return;
 3749                         }
 3750                         /* Not the last one, as so holds a ref. */
 3751                         refcount_release(&head->so_count);
 3752                 }
 3753 again:
 3754                 if ((so->so_options & SO_ACCEPTFILTER) == 0) {
 3755                         TAILQ_REMOVE(&head->sol_incomp, so, so_list);
 3756                         head->sol_incqlen--;
 3757                         TAILQ_INSERT_TAIL(&head->sol_comp, so, so_list);
 3758                         head->sol_qlen++;
 3759                         so->so_qstate = SQ_COMP;
 3760                         SOCK_UNLOCK(so);
 3761                         solisten_wakeup(head);  /* unlocks */
 3762                 } else {
 3763                         SOCKBUF_LOCK(&so->so_rcv);
 3764                         soupcall_set(so, SO_RCV,
 3765                             head->sol_accept_filter->accf_callback,
 3766                             head->sol_accept_filter_arg);
 3767                         so->so_options &= ~SO_ACCEPTFILTER;
 3768                         ret = head->sol_accept_filter->accf_callback(so,
 3769                             head->sol_accept_filter_arg, M_NOWAIT);
 3770                         if (ret == SU_ISCONNECTED) {
 3771                                 soupcall_clear(so, SO_RCV);
 3772                                 SOCKBUF_UNLOCK(&so->so_rcv);
 3773                                 goto again;
 3774                         }
 3775                         SOCKBUF_UNLOCK(&so->so_rcv);
 3776                         SOCK_UNLOCK(so);
 3777                         SOLISTEN_UNLOCK(head);
 3778                 }
 3779                 return;
 3780         }
 3781         SOCK_UNLOCK(so);
 3782         wakeup(&so->so_timeo);
 3783         sorwakeup(so);
 3784         sowwakeup(so);
 3785 }
 3786 
 3787 void
 3788 soisdisconnecting(struct socket *so)
 3789 {
 3790 
 3791         SOCK_LOCK(so);
 3792         so->so_state &= ~SS_ISCONNECTING;
 3793         so->so_state |= SS_ISDISCONNECTING;
 3794 
 3795         if (!SOLISTENING(so)) {
 3796                 SOCKBUF_LOCK(&so->so_rcv);
 3797                 socantrcvmore_locked(so);
 3798                 SOCKBUF_LOCK(&so->so_snd);
 3799                 socantsendmore_locked(so);
 3800         }
 3801         SOCK_UNLOCK(so);
 3802         wakeup(&so->so_timeo);
 3803 }
 3804 
 3805 void
 3806 soisdisconnected(struct socket *so)
 3807 {
 3808 
 3809         SOCK_LOCK(so);
 3810         so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
 3811         so->so_state |= SS_ISDISCONNECTED;
 3812 
 3813         if (!SOLISTENING(so)) {
 3814                 SOCK_UNLOCK(so);
 3815                 SOCKBUF_LOCK(&so->so_rcv);
 3816                 socantrcvmore_locked(so);
 3817                 SOCKBUF_LOCK(&so->so_snd);
 3818                 sbdrop_locked(&so->so_snd, sbused(&so->so_snd));
 3819                 socantsendmore_locked(so);
 3820         } else
 3821                 SOCK_UNLOCK(so);
 3822         wakeup(&so->so_timeo);
 3823 }
 3824 
 3825 /*
 3826  * Make a copy of a sockaddr in a malloced buffer of type M_SONAME.
 3827  */
 3828 struct sockaddr *
 3829 sodupsockaddr(const struct sockaddr *sa, int mflags)
 3830 {
 3831         struct sockaddr *sa2;
 3832 
 3833         sa2 = malloc(sa->sa_len, M_SONAME, mflags);
 3834         if (sa2)
 3835                 bcopy(sa, sa2, sa->sa_len);
 3836         return sa2;
 3837 }
 3838 
 3839 /*
 3840  * Register per-socket destructor.
 3841  */
 3842 void
 3843 sodtor_set(struct socket *so, so_dtor_t *func)
 3844 {
 3845 
 3846         SOCK_LOCK_ASSERT(so);
 3847         so->so_dtor = func;
 3848 }
 3849 
 3850 /*
 3851  * Register per-socket buffer upcalls.
 3852  */
 3853 void
 3854 soupcall_set(struct socket *so, int which, so_upcall_t func, void *arg)
 3855 {
 3856         struct sockbuf *sb;
 3857 
 3858         KASSERT(!SOLISTENING(so), ("%s: so %p listening", __func__, so));
 3859 
 3860         switch (which) {
 3861         case SO_RCV:
 3862                 sb = &so->so_rcv;
 3863                 break;
 3864         case SO_SND:
 3865                 sb = &so->so_snd;
 3866                 break;
 3867         default:
 3868                 panic("soupcall_set: bad which");
 3869         }
 3870         SOCKBUF_LOCK_ASSERT(sb);
 3871         sb->sb_upcall = func;
 3872         sb->sb_upcallarg = arg;
 3873         sb->sb_flags |= SB_UPCALL;
 3874 }
 3875 
 3876 void
 3877 soupcall_clear(struct socket *so, int which)
 3878 {
 3879         struct sockbuf *sb;
 3880 
 3881         KASSERT(!SOLISTENING(so), ("%s: so %p listening", __func__, so));
 3882 
 3883         switch (which) {
 3884         case SO_RCV:
 3885                 sb = &so->so_rcv;
 3886                 break;
 3887         case SO_SND:
 3888                 sb = &so->so_snd;
 3889                 break;
 3890         default:
 3891                 panic("soupcall_clear: bad which");
 3892         }
 3893         SOCKBUF_LOCK_ASSERT(sb);
 3894         KASSERT(sb->sb_upcall != NULL,
 3895             ("%s: so %p no upcall to clear", __func__, so));
 3896         sb->sb_upcall = NULL;
 3897         sb->sb_upcallarg = NULL;
 3898         sb->sb_flags &= ~SB_UPCALL;
 3899 }
 3900 
 3901 void
 3902 solisten_upcall_set(struct socket *so, so_upcall_t func, void *arg)
 3903 {
 3904 
 3905         SOLISTEN_LOCK_ASSERT(so);
 3906         so->sol_upcall = func;
 3907         so->sol_upcallarg = arg;
 3908 }
 3909 
 3910 static void
 3911 so_rdknl_lock(void *arg)
 3912 {
 3913         struct socket *so = arg;
 3914 
 3915         if (SOLISTENING(so))
 3916                 SOCK_LOCK(so);
 3917         else
 3918                 SOCKBUF_LOCK(&so->so_rcv);
 3919 }
 3920 
 3921 static void
 3922 so_rdknl_unlock(void *arg)
 3923 {
 3924         struct socket *so = arg;
 3925 
 3926         if (SOLISTENING(so))
 3927                 SOCK_UNLOCK(so);
 3928         else
 3929                 SOCKBUF_UNLOCK(&so->so_rcv);
 3930 }
 3931 
 3932 static void
 3933 so_rdknl_assert_locked(void *arg)
 3934 {
 3935         struct socket *so = arg;
 3936 
 3937         if (SOLISTENING(so))
 3938                 SOCK_LOCK_ASSERT(so);
 3939         else
 3940                 SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 3941 }
 3942 
 3943 static void
 3944 so_rdknl_assert_unlocked(void *arg)
 3945 {
 3946         struct socket *so = arg;
 3947 
 3948         if (SOLISTENING(so))
 3949                 SOCK_UNLOCK_ASSERT(so);
 3950         else
 3951                 SOCKBUF_UNLOCK_ASSERT(&so->so_rcv);
 3952 }
 3953 
 3954 static void
 3955 so_wrknl_lock(void *arg)
 3956 {
 3957         struct socket *so = arg;
 3958 
 3959         if (SOLISTENING(so))
 3960                 SOCK_LOCK(so);
 3961         else
 3962                 SOCKBUF_LOCK(&so->so_snd);
 3963 }
 3964 
 3965 static void
 3966 so_wrknl_unlock(void *arg)
 3967 {
 3968         struct socket *so = arg;
 3969 
 3970         if (SOLISTENING(so))
 3971                 SOCK_UNLOCK(so);
 3972         else
 3973                 SOCKBUF_UNLOCK(&so->so_snd);
 3974 }
 3975 
 3976 static void
 3977 so_wrknl_assert_locked(void *arg)
 3978 {
 3979         struct socket *so = arg;
 3980 
 3981         if (SOLISTENING(so))
 3982                 SOCK_LOCK_ASSERT(so);
 3983         else
 3984                 SOCKBUF_LOCK_ASSERT(&so->so_snd);
 3985 }
 3986 
 3987 static void
 3988 so_wrknl_assert_unlocked(void *arg)
 3989 {
 3990         struct socket *so = arg;
 3991 
 3992         if (SOLISTENING(so))
 3993                 SOCK_UNLOCK_ASSERT(so);
 3994         else
 3995                 SOCKBUF_UNLOCK_ASSERT(&so->so_snd);
 3996 }
 3997 
 3998 /*
 3999  * Create an external-format (``xsocket'') structure using the information in
 4000  * the kernel-format socket structure pointed to by so.  This is done to
 4001  * reduce the spew of irrelevant information over this interface, to isolate
 4002  * user code from changes in the kernel structure, and potentially to provide
 4003  * information-hiding if we decide that some of this information should be
 4004  * hidden from users.
 4005  */
 4006 void
 4007 sotoxsocket(struct socket *so, struct xsocket *xso)
 4008 {
 4009 
 4010         bzero(xso, sizeof(*xso));
 4011         xso->xso_len = sizeof *xso;
 4012         xso->xso_so = (uintptr_t)so;
 4013         xso->so_type = so->so_type;
 4014         xso->so_options = so->so_options;
 4015         xso->so_linger = so->so_linger;
 4016         xso->so_state = so->so_state;
 4017         xso->so_pcb = (uintptr_t)so->so_pcb;
 4018         xso->xso_protocol = so->so_proto->pr_protocol;
 4019         xso->xso_family = so->so_proto->pr_domain->dom_family;
 4020         xso->so_timeo = so->so_timeo;
 4021         xso->so_error = so->so_error;
 4022         xso->so_uid = so->so_cred->cr_uid;
 4023         xso->so_pgid = so->so_sigio ? so->so_sigio->sio_pgid : 0;
 4024         if (SOLISTENING(so)) {
 4025                 xso->so_qlen = so->sol_qlen;
 4026                 xso->so_incqlen = so->sol_incqlen;
 4027                 xso->so_qlimit = so->sol_qlimit;
 4028                 xso->so_oobmark = 0;
 4029         } else {
 4030                 xso->so_state |= so->so_qstate;
 4031                 xso->so_qlen = xso->so_incqlen = xso->so_qlimit = 0;
 4032                 xso->so_oobmark = so->so_oobmark;
 4033                 sbtoxsockbuf(&so->so_snd, &xso->so_snd);
 4034                 sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
 4035         }
 4036 }
 4037 
 4038 struct sockbuf *
 4039 so_sockbuf_rcv(struct socket *so)
 4040 {
 4041 
 4042         return (&so->so_rcv);
 4043 }
 4044 
 4045 struct sockbuf *
 4046 so_sockbuf_snd(struct socket *so)
 4047 {
 4048 
 4049         return (&so->so_snd);
 4050 }
 4051 
 4052 int
 4053 so_state_get(const struct socket *so)
 4054 {
 4055 
 4056         return (so->so_state);
 4057 }
 4058 
 4059 void
 4060 so_state_set(struct socket *so, int val)
 4061 {
 4062 
 4063         so->so_state = val;
 4064 }
 4065 
 4066 int
 4067 so_options_get(const struct socket *so)
 4068 {
 4069 
 4070         return (so->so_options);
 4071 }
 4072 
 4073 void
 4074 so_options_set(struct socket *so, int val)
 4075 {
 4076 
 4077         so->so_options = val;
 4078 }
 4079 
 4080 int
 4081 so_error_get(const struct socket *so)
 4082 {
 4083 
 4084         return (so->so_error);
 4085 }
 4086 
 4087 void
 4088 so_error_set(struct socket *so, int val)
 4089 {
 4090 
 4091         so->so_error = val;
 4092 }
 4093 
 4094 int
 4095 so_linger_get(const struct socket *so)
 4096 {
 4097 
 4098         return (so->so_linger);
 4099 }
 4100 
 4101 void
 4102 so_linger_set(struct socket *so, int val)
 4103 {
 4104 
 4105         so->so_linger = val;
 4106 }
 4107 
 4108 struct protosw *
 4109 so_protosw_get(const struct socket *so)
 4110 {
 4111 
 4112         return (so->so_proto);
 4113 }
 4114 
 4115 void
 4116 so_protosw_set(struct socket *so, struct protosw *val)
 4117 {
 4118 
 4119         so->so_proto = val;
 4120 }
 4121 
 4122 void
 4123 so_sorwakeup(struct socket *so)
 4124 {
 4125 
 4126         sorwakeup(so);
 4127 }
 4128 
 4129 void
 4130 so_sowwakeup(struct socket *so)
 4131 {
 4132 
 4133         sowwakeup(so);
 4134 }
 4135 
 4136 void
 4137 so_sorwakeup_locked(struct socket *so)
 4138 {
 4139 
 4140         sorwakeup_locked(so);
 4141 }
 4142 
 4143 void
 4144 so_sowwakeup_locked(struct socket *so)
 4145 {
 4146 
 4147         sowwakeup_locked(so);
 4148 }
 4149 
 4150 void
 4151 so_lock(struct socket *so)
 4152 {
 4153 
 4154         SOCK_LOCK(so);
 4155 }
 4156 
 4157 void
 4158 so_unlock(struct socket *so)
 4159 {
 4160 
 4161         SOCK_UNLOCK(so);
 4162 }
Cache object: fba51f15fbd370d98688a88e6774c8b3
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/kern/uipc_socket.c

FreeBSD/Linux Kernel Cross Reference
sys/kern/uipc_socket.c