The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/uipc_usrreq.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-3-Clause
    3  *
    4  * Copyright (c) 1982, 1986, 1989, 1991, 1993
    5  *      The Regents of the University of California. All Rights Reserved.
    6  * Copyright (c) 2004-2009 Robert N. M. Watson All Rights Reserved.
    7  * Copyright (c) 2018 Matthew Macy
    8  *
    9  * Redistribution and use in source and binary forms, with or without
   10  * modification, are permitted provided that the following conditions
   11  * are met:
   12  * 1. Redistributions of source code must retain the above copyright
   13  *    notice, this list of conditions and the following disclaimer.
   14  * 2. Redistributions in binary form must reproduce the above copyright
   15  *    notice, this list of conditions and the following disclaimer in the
   16  *    documentation and/or other materials provided with the distribution.
   17  * 3. Neither the name of the University nor the names of its contributors
   18  *    may be used to endorse or promote products derived from this software
   19  *    without specific prior written permission.
   20  *
   21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   31  * SUCH DAMAGE.
   32  *
   33  *      From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94
   34  */
   35 
   36 /*
   37  * UNIX Domain (Local) Sockets
   38  *
   39  * This is an implementation of UNIX (local) domain sockets.  Each socket has
   40  * an associated struct unpcb (UNIX protocol control block).  Stream sockets
   41  * may be connected to 0 or 1 other socket.  Datagram sockets may be
   42  * connected to 0, 1, or many other sockets.  Sockets may be created and
   43  * connected in pairs (socketpair(2)), or bound/connected to using the file
   44  * system name space.  For most purposes, only the receive socket buffer is
   45  * used, as sending on one socket delivers directly to the receive socket
   46  * buffer of a second socket.
   47  *
   48  * The implementation is substantially complicated by the fact that
   49  * "ancillary data", such as file descriptors or credentials, may be passed
   50  * across UNIX domain sockets.  The potential for passing UNIX domain sockets
   51  * over other UNIX domain sockets requires the implementation of a simple
   52  * garbage collector to find and tear down cycles of disconnected sockets.
   53  *
   54  * TODO:
   55  *      RDM
   56  *      rethink name space problems
   57  *      need a proper out-of-band
   58  */
   59 
   60 #include <sys/cdefs.h>
   61 __FBSDID("$FreeBSD$");
   62 
   63 #include "opt_ddb.h"
   64 
   65 #include <sys/param.h>
   66 #include <sys/capsicum.h>
   67 #include <sys/domain.h>
   68 #include <sys/eventhandler.h>
   69 #include <sys/fcntl.h>
   70 #include <sys/file.h>
   71 #include <sys/filedesc.h>
   72 #include <sys/kernel.h>
   73 #include <sys/lock.h>
   74 #include <sys/malloc.h>
   75 #include <sys/mbuf.h>
   76 #include <sys/mount.h>
   77 #include <sys/mutex.h>
   78 #include <sys/namei.h>
   79 #include <sys/proc.h>
   80 #include <sys/protosw.h>
   81 #include <sys/queue.h>
   82 #include <sys/resourcevar.h>
   83 #include <sys/rwlock.h>
   84 #include <sys/socket.h>
   85 #include <sys/socketvar.h>
   86 #include <sys/signalvar.h>
   87 #include <sys/stat.h>
   88 #include <sys/sx.h>
   89 #include <sys/sysctl.h>
   90 #include <sys/systm.h>
   91 #include <sys/taskqueue.h>
   92 #include <sys/un.h>
   93 #include <sys/unpcb.h>
   94 #include <sys/vnode.h>
   95 
   96 #include <net/vnet.h>
   97 
   98 #ifdef DDB
   99 #include <ddb/ddb.h>
  100 #endif
  101 
  102 #include <security/mac/mac_framework.h>
  103 
  104 #include <vm/uma.h>
  105 
  106 MALLOC_DECLARE(M_FILECAPS);
  107 
  108 static struct domain localdomain;
  109 
  110 static uma_zone_t       unp_zone;
  111 static unp_gen_t        unp_gencnt;     /* (l) */
  112 static u_int            unp_count;      /* (l) Count of local sockets. */
  113 static ino_t            unp_ino;        /* Prototype for fake inode numbers. */
  114 static int              unp_rights;     /* (g) File descriptors in flight. */
  115 static struct unp_head  unp_shead;      /* (l) List of stream sockets. */
  116 static struct unp_head  unp_dhead;      /* (l) List of datagram sockets. */
  117 static struct unp_head  unp_sphead;     /* (l) List of seqpacket sockets. */
  118 
  119 struct unp_defer {
  120         SLIST_ENTRY(unp_defer) ud_link;
  121         struct file *ud_fp;
  122 };
  123 static SLIST_HEAD(, unp_defer) unp_defers;
  124 static int unp_defers_count;
  125 
  126 static const struct sockaddr    sun_noname = { sizeof(sun_noname), AF_LOCAL };
  127 
  128 /*
  129  * Garbage collection of cyclic file descriptor/socket references occurs
  130  * asynchronously in a taskqueue context in order to avoid recursion and
  131  * reentrance in the UNIX domain socket, file descriptor, and socket layer
  132  * code.  See unp_gc() for a full description.
  133  */
  134 static struct timeout_task unp_gc_task;
  135 
  136 /*
  137  * The close of unix domain sockets attached as SCM_RIGHTS is
  138  * postponed to the taskqueue, to avoid arbitrary recursion depth.
  139  * The attached sockets might have another sockets attached.
  140  */
  141 static struct task      unp_defer_task;
  142 
  143 /*
  144  * Both send and receive buffers are allocated PIPSIZ bytes of buffering for
  145  * stream sockets, although the total for sender and receiver is actually
  146  * only PIPSIZ.
  147  *
  148  * Datagram sockets really use the sendspace as the maximum datagram size,
  149  * and don't really want to reserve the sendspace.  Their recvspace should be
  150  * large enough for at least one max-size datagram plus address.
  151  */
  152 #ifndef PIPSIZ
  153 #define PIPSIZ  8192
  154 #endif
  155 static u_long   unpst_sendspace = PIPSIZ;
  156 static u_long   unpst_recvspace = PIPSIZ;
  157 static u_long   unpdg_maxdgram = 2*1024;
  158 static u_long   unpdg_recvspace = 16*1024;      /* support 8KB syslog msgs */
  159 static u_long   unpsp_sendspace = PIPSIZ;       /* really max datagram size */
  160 static u_long   unpsp_recvspace = PIPSIZ;
  161 
  162 static SYSCTL_NODE(_net, PF_LOCAL, local, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
  163     "Local domain");
  164 static SYSCTL_NODE(_net_local, SOCK_STREAM, stream,
  165     CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
  166     "SOCK_STREAM");
  167 static SYSCTL_NODE(_net_local, SOCK_DGRAM, dgram,
  168     CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
  169     "SOCK_DGRAM");
  170 static SYSCTL_NODE(_net_local, SOCK_SEQPACKET, seqpacket,
  171     CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
  172     "SOCK_SEQPACKET");
  173 
  174 SYSCTL_ULONG(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW,
  175            &unpst_sendspace, 0, "Default stream send space.");
  176 SYSCTL_ULONG(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW,
  177            &unpst_recvspace, 0, "Default stream receive space.");
  178 SYSCTL_ULONG(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW,
  179            &unpdg_maxdgram, 0, "Maximum datagram size.");
  180 SYSCTL_ULONG(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW,
  181            &unpdg_recvspace, 0, "Default datagram receive space.");
  182 SYSCTL_ULONG(_net_local_seqpacket, OID_AUTO, maxseqpacket, CTLFLAG_RW,
  183            &unpsp_sendspace, 0, "Default seqpacket send space.");
  184 SYSCTL_ULONG(_net_local_seqpacket, OID_AUTO, recvspace, CTLFLAG_RW,
  185            &unpsp_recvspace, 0, "Default seqpacket receive space.");
  186 SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0,
  187     "File descriptors in flight.");
  188 SYSCTL_INT(_net_local, OID_AUTO, deferred, CTLFLAG_RD,
  189     &unp_defers_count, 0,
  190     "File descriptors deferred to taskqueue for close.");
  191 
  192 /*
  193  * Locking and synchronization:
  194  *
  195  * Several types of locks exist in the local domain socket implementation:
  196  * - a global linkage lock
  197  * - a global connection list lock
  198  * - the mtxpool lock
  199  * - per-unpcb mutexes
  200  *
  201  * The linkage lock protects the global socket lists, the generation number
  202  * counter and garbage collector state.
  203  *
  204  * The connection list lock protects the list of referring sockets in a datagram
  205  * socket PCB.  This lock is also overloaded to protect a global list of
  206  * sockets whose buffers contain socket references in the form of SCM_RIGHTS
  207  * messages.  To avoid recursion, such references are released by a dedicated
  208  * thread.
  209  *
  210  * The mtxpool lock protects the vnode from being modified while referenced.
  211  * Lock ordering rules require that it be acquired before any PCB locks.
  212  *
  213  * The unpcb lock (unp_mtx) protects the most commonly referenced fields in the
  214  * unpcb.  This includes the unp_conn field, which either links two connected
  215  * PCBs together (for connected socket types) or points at the destination
  216  * socket (for connectionless socket types).  The operations of creating or
  217  * destroying a connection therefore involve locking multiple PCBs.  To avoid
  218  * lock order reversals, in some cases this involves dropping a PCB lock and
  219  * using a reference counter to maintain liveness.
  220  *
  221  * UNIX domain sockets each have an unpcb hung off of their so_pcb pointer,
  222  * allocated in pr_attach() and freed in pr_detach().  The validity of that
  223  * pointer is an invariant, so no lock is required to dereference the so_pcb
  224  * pointer if a valid socket reference is held by the caller.  In practice,
  225  * this is always true during operations performed on a socket.  Each unpcb
  226  * has a back-pointer to its socket, unp_socket, which will be stable under
  227  * the same circumstances.
  228  *
  229  * This pointer may only be safely dereferenced as long as a valid reference
  230  * to the unpcb is held.  Typically, this reference will be from the socket,
  231  * or from another unpcb when the referring unpcb's lock is held (in order
  232  * that the reference not be invalidated during use).  For example, to follow
  233  * unp->unp_conn->unp_socket, you need to hold a lock on unp_conn to guarantee
  234  * that detach is not run clearing unp_socket.
  235  *
  236  * Blocking with UNIX domain sockets is a tricky issue: unlike most network
  237  * protocols, bind() is a non-atomic operation, and connect() requires
  238  * potential sleeping in the protocol, due to potentially waiting on local or
  239  * distributed file systems.  We try to separate "lookup" operations, which
  240  * may sleep, and the IPC operations themselves, which typically can occur
  241  * with relative atomicity as locks can be held over the entire operation.
  242  *
  243  * Another tricky issue is simultaneous multi-threaded or multi-process
  244  * access to a single UNIX domain socket.  These are handled by the flags
  245  * UNP_CONNECTING and UNP_BINDING, which prevent concurrent connecting or
  246  * binding, both of which involve dropping UNIX domain socket locks in order
  247  * to perform namei() and other file system operations.
  248  */
  249 static struct rwlock    unp_link_rwlock;
  250 static struct mtx       unp_defers_lock;
  251 
  252 #define UNP_LINK_LOCK_INIT()            rw_init(&unp_link_rwlock,       \
  253                                             "unp_link_rwlock")
  254 
  255 #define UNP_LINK_LOCK_ASSERT()          rw_assert(&unp_link_rwlock,     \
  256                                             RA_LOCKED)
  257 #define UNP_LINK_UNLOCK_ASSERT()        rw_assert(&unp_link_rwlock,     \
  258                                             RA_UNLOCKED)
  259 
  260 #define UNP_LINK_RLOCK()                rw_rlock(&unp_link_rwlock)
  261 #define UNP_LINK_RUNLOCK()              rw_runlock(&unp_link_rwlock)
  262 #define UNP_LINK_WLOCK()                rw_wlock(&unp_link_rwlock)
  263 #define UNP_LINK_WUNLOCK()              rw_wunlock(&unp_link_rwlock)
  264 #define UNP_LINK_WLOCK_ASSERT()         rw_assert(&unp_link_rwlock,     \
  265                                             RA_WLOCKED)
  266 #define UNP_LINK_WOWNED()               rw_wowned(&unp_link_rwlock)
  267 
  268 #define UNP_DEFERRED_LOCK_INIT()        mtx_init(&unp_defers_lock, \
  269                                             "unp_defer", NULL, MTX_DEF)
  270 #define UNP_DEFERRED_LOCK()             mtx_lock(&unp_defers_lock)
  271 #define UNP_DEFERRED_UNLOCK()           mtx_unlock(&unp_defers_lock)
  272 
  273 #define UNP_REF_LIST_LOCK()             UNP_DEFERRED_LOCK();
  274 #define UNP_REF_LIST_UNLOCK()           UNP_DEFERRED_UNLOCK();
  275 
  276 #define UNP_PCB_LOCK_INIT(unp)          mtx_init(&(unp)->unp_mtx,       \
  277                                             "unp", "unp",       \
  278                                             MTX_DUPOK|MTX_DEF)
  279 #define UNP_PCB_LOCK_DESTROY(unp)       mtx_destroy(&(unp)->unp_mtx)
  280 #define UNP_PCB_LOCKPTR(unp)            (&(unp)->unp_mtx)
  281 #define UNP_PCB_LOCK(unp)               mtx_lock(&(unp)->unp_mtx)
  282 #define UNP_PCB_TRYLOCK(unp)            mtx_trylock(&(unp)->unp_mtx)
  283 #define UNP_PCB_UNLOCK(unp)             mtx_unlock(&(unp)->unp_mtx)
  284 #define UNP_PCB_OWNED(unp)              mtx_owned(&(unp)->unp_mtx)
  285 #define UNP_PCB_LOCK_ASSERT(unp)        mtx_assert(&(unp)->unp_mtx, MA_OWNED)
  286 #define UNP_PCB_UNLOCK_ASSERT(unp)      mtx_assert(&(unp)->unp_mtx, MA_NOTOWNED)
  287 
  288 static int      uipc_connect2(struct socket *, struct socket *);
  289 static int      uipc_ctloutput(struct socket *, struct sockopt *);
  290 static int      unp_connect(struct socket *, struct sockaddr *,
  291                     struct thread *);
  292 static int      unp_connectat(int, struct socket *, struct sockaddr *,
  293                     struct thread *, bool);
  294 typedef enum { PRU_CONNECT, PRU_CONNECT2 } conn2_how;
  295 static void     unp_connect2(struct socket *so, struct socket *so2, conn2_how);
  296 static void     unp_disconnect(struct unpcb *unp, struct unpcb *unp2);
  297 static void     unp_dispose(struct socket *so);
  298 static void     unp_shutdown(struct unpcb *);
  299 static void     unp_drop(struct unpcb *);
  300 static void     unp_gc(__unused void *, int);
  301 static void     unp_scan(struct mbuf *, void (*)(struct filedescent **, int));
  302 static void     unp_discard(struct file *);
  303 static void     unp_freerights(struct filedescent **, int);
  304 static int      unp_internalize(struct mbuf **, struct thread *,
  305                     struct mbuf **, u_int *, u_int *);
  306 static void     unp_internalize_fp(struct file *);
  307 static int      unp_externalize(struct mbuf *, struct mbuf **, int);
  308 static int      unp_externalize_fp(struct file *);
  309 static struct mbuf      *unp_addsockcred(struct thread *, struct mbuf *,
  310                     int, struct mbuf **, u_int *, u_int *);
  311 static void     unp_process_defers(void * __unused, int);
  312 
  313 static void
  314 unp_pcb_hold(struct unpcb *unp)
  315 {
  316         u_int old __unused;
  317 
  318         old = refcount_acquire(&unp->unp_refcount);
  319         KASSERT(old > 0, ("%s: unpcb %p has no references", __func__, unp));
  320 }
  321 
  322 static __result_use_check bool
  323 unp_pcb_rele(struct unpcb *unp)
  324 {
  325         bool ret;
  326 
  327         UNP_PCB_LOCK_ASSERT(unp);
  328 
  329         if ((ret = refcount_release(&unp->unp_refcount))) {
  330                 UNP_PCB_UNLOCK(unp);
  331                 UNP_PCB_LOCK_DESTROY(unp);
  332                 uma_zfree(unp_zone, unp);
  333         }
  334         return (ret);
  335 }
  336 
  337 static void
  338 unp_pcb_rele_notlast(struct unpcb *unp)
  339 {
  340         bool ret __unused;
  341 
  342         ret = refcount_release(&unp->unp_refcount);
  343         KASSERT(!ret, ("%s: unpcb %p has no references", __func__, unp));
  344 }
  345 
  346 static void
  347 unp_pcb_lock_pair(struct unpcb *unp, struct unpcb *unp2)
  348 {
  349         UNP_PCB_UNLOCK_ASSERT(unp);
  350         UNP_PCB_UNLOCK_ASSERT(unp2);
  351 
  352         if (unp == unp2) {
  353                 UNP_PCB_LOCK(unp);
  354         } else if ((uintptr_t)unp2 > (uintptr_t)unp) {
  355                 UNP_PCB_LOCK(unp);
  356                 UNP_PCB_LOCK(unp2);
  357         } else {
  358                 UNP_PCB_LOCK(unp2);
  359                 UNP_PCB_LOCK(unp);
  360         }
  361 }
  362 
  363 static void
  364 unp_pcb_unlock_pair(struct unpcb *unp, struct unpcb *unp2)
  365 {
  366         UNP_PCB_UNLOCK(unp);
  367         if (unp != unp2)
  368                 UNP_PCB_UNLOCK(unp2);
  369 }
  370 
  371 /*
  372  * Try to lock the connected peer of an already locked socket.  In some cases
  373  * this requires that we unlock the current socket.  The pairbusy counter is
  374  * used to block concurrent connection attempts while the lock is dropped.  The
  375  * caller must be careful to revalidate PCB state.
  376  */
  377 static struct unpcb *
  378 unp_pcb_lock_peer(struct unpcb *unp)
  379 {
  380         struct unpcb *unp2;
  381 
  382         UNP_PCB_LOCK_ASSERT(unp);
  383         unp2 = unp->unp_conn;
  384         if (unp2 == NULL)
  385                 return (NULL);
  386         if (__predict_false(unp == unp2))
  387                 return (unp);
  388 
  389         UNP_PCB_UNLOCK_ASSERT(unp2);
  390 
  391         if (__predict_true(UNP_PCB_TRYLOCK(unp2)))
  392                 return (unp2);
  393         if ((uintptr_t)unp2 > (uintptr_t)unp) {
  394                 UNP_PCB_LOCK(unp2);
  395                 return (unp2);
  396         }
  397         unp->unp_pairbusy++;
  398         unp_pcb_hold(unp2);
  399         UNP_PCB_UNLOCK(unp);
  400 
  401         UNP_PCB_LOCK(unp2);
  402         UNP_PCB_LOCK(unp);
  403         KASSERT(unp->unp_conn == unp2 || unp->unp_conn == NULL,
  404             ("%s: socket %p was reconnected", __func__, unp));
  405         if (--unp->unp_pairbusy == 0 && (unp->unp_flags & UNP_WAITING) != 0) {
  406                 unp->unp_flags &= ~UNP_WAITING;
  407                 wakeup(unp);
  408         }
  409         if (unp_pcb_rele(unp2)) {
  410                 /* unp2 is unlocked. */
  411                 return (NULL);
  412         }
  413         if (unp->unp_conn == NULL) {
  414                 UNP_PCB_UNLOCK(unp2);
  415                 return (NULL);
  416         }
  417         return (unp2);
  418 }
  419 
  420 static void
  421 uipc_abort(struct socket *so)
  422 {
  423         struct unpcb *unp, *unp2;
  424 
  425         unp = sotounpcb(so);
  426         KASSERT(unp != NULL, ("uipc_abort: unp == NULL"));
  427         UNP_PCB_UNLOCK_ASSERT(unp);
  428 
  429         UNP_PCB_LOCK(unp);
  430         unp2 = unp->unp_conn;
  431         if (unp2 != NULL) {
  432                 unp_pcb_hold(unp2);
  433                 UNP_PCB_UNLOCK(unp);
  434                 unp_drop(unp2);
  435         } else
  436                 UNP_PCB_UNLOCK(unp);
  437 }
  438 
  439 static int
  440 uipc_accept(struct socket *so, struct sockaddr **nam)
  441 {
  442         struct unpcb *unp, *unp2;
  443         const struct sockaddr *sa;
  444 
  445         /*
  446          * Pass back name of connected socket, if it was bound and we are
  447          * still connected (our peer may have closed already!).
  448          */
  449         unp = sotounpcb(so);
  450         KASSERT(unp != NULL, ("uipc_accept: unp == NULL"));
  451 
  452         *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
  453         UNP_PCB_LOCK(unp);
  454         unp2 = unp_pcb_lock_peer(unp);
  455         if (unp2 != NULL && unp2->unp_addr != NULL)
  456                 sa = (struct sockaddr *)unp2->unp_addr;
  457         else
  458                 sa = &sun_noname;
  459         bcopy(sa, *nam, sa->sa_len);
  460         if (unp2 != NULL)
  461                 unp_pcb_unlock_pair(unp, unp2);
  462         else
  463                 UNP_PCB_UNLOCK(unp);
  464         return (0);
  465 }
  466 
  467 static int
  468 uipc_attach(struct socket *so, int proto, struct thread *td)
  469 {
  470         u_long sendspace, recvspace;
  471         struct unpcb *unp;
  472         int error;
  473         bool locked;
  474 
  475         KASSERT(so->so_pcb == NULL, ("uipc_attach: so_pcb != NULL"));
  476         if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
  477                 switch (so->so_type) {
  478                 case SOCK_STREAM:
  479                         sendspace = unpst_sendspace;
  480                         recvspace = unpst_recvspace;
  481                         break;
  482 
  483                 case SOCK_DGRAM:
  484                         STAILQ_INIT(&so->so_rcv.uxdg_mb);
  485                         STAILQ_INIT(&so->so_snd.uxdg_mb);
  486                         TAILQ_INIT(&so->so_rcv.uxdg_conns);
  487                         /*
  488                          * Since send buffer is either bypassed or is a part
  489                          * of one-to-many receive buffer, we assign both space
  490                          * limits to unpdg_recvspace.
  491                          */
  492                         sendspace = recvspace = unpdg_recvspace;
  493                         break;
  494 
  495                 case SOCK_SEQPACKET:
  496                         sendspace = unpsp_sendspace;
  497                         recvspace = unpsp_recvspace;
  498                         break;
  499 
  500                 default:
  501                         panic("uipc_attach");
  502                 }
  503                 error = soreserve(so, sendspace, recvspace);
  504                 if (error)
  505                         return (error);
  506         }
  507         unp = uma_zalloc(unp_zone, M_NOWAIT | M_ZERO);
  508         if (unp == NULL)
  509                 return (ENOBUFS);
  510         LIST_INIT(&unp->unp_refs);
  511         UNP_PCB_LOCK_INIT(unp);
  512         unp->unp_socket = so;
  513         so->so_pcb = unp;
  514         refcount_init(&unp->unp_refcount, 1);
  515 
  516         if ((locked = UNP_LINK_WOWNED()) == false)
  517                 UNP_LINK_WLOCK();
  518 
  519         unp->unp_gencnt = ++unp_gencnt;
  520         unp->unp_ino = ++unp_ino;
  521         unp_count++;
  522         switch (so->so_type) {
  523         case SOCK_STREAM:
  524                 LIST_INSERT_HEAD(&unp_shead, unp, unp_link);
  525                 break;
  526 
  527         case SOCK_DGRAM:
  528                 LIST_INSERT_HEAD(&unp_dhead, unp, unp_link);
  529                 break;
  530 
  531         case SOCK_SEQPACKET:
  532                 LIST_INSERT_HEAD(&unp_sphead, unp, unp_link);
  533                 break;
  534 
  535         default:
  536                 panic("uipc_attach");
  537         }
  538 
  539         if (locked == false)
  540                 UNP_LINK_WUNLOCK();
  541 
  542         return (0);
  543 }
  544 
  545 static int
  546 uipc_bindat(int fd, struct socket *so, struct sockaddr *nam, struct thread *td)
  547 {
  548         struct sockaddr_un *soun = (struct sockaddr_un *)nam;
  549         struct vattr vattr;
  550         int error, namelen;
  551         struct nameidata nd;
  552         struct unpcb *unp;
  553         struct vnode *vp;
  554         struct mount *mp;
  555         cap_rights_t rights;
  556         char *buf;
  557 
  558         if (nam->sa_family != AF_UNIX)
  559                 return (EAFNOSUPPORT);
  560 
  561         unp = sotounpcb(so);
  562         KASSERT(unp != NULL, ("uipc_bind: unp == NULL"));
  563 
  564         if (soun->sun_len > sizeof(struct sockaddr_un))
  565                 return (EINVAL);
  566         namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path);
  567         if (namelen <= 0)
  568                 return (EINVAL);
  569 
  570         /*
  571          * We don't allow simultaneous bind() calls on a single UNIX domain
  572          * socket, so flag in-progress operations, and return an error if an
  573          * operation is already in progress.
  574          *
  575          * Historically, we have not allowed a socket to be rebound, so this
  576          * also returns an error.  Not allowing re-binding simplifies the
  577          * implementation and avoids a great many possible failure modes.
  578          */
  579         UNP_PCB_LOCK(unp);
  580         if (unp->unp_vnode != NULL) {
  581                 UNP_PCB_UNLOCK(unp);
  582                 return (EINVAL);
  583         }
  584         if (unp->unp_flags & UNP_BINDING) {
  585                 UNP_PCB_UNLOCK(unp);
  586                 return (EALREADY);
  587         }
  588         unp->unp_flags |= UNP_BINDING;
  589         UNP_PCB_UNLOCK(unp);
  590 
  591         buf = malloc(namelen + 1, M_TEMP, M_WAITOK);
  592         bcopy(soun->sun_path, buf, namelen);
  593         buf[namelen] = 0;
  594 
  595 restart:
  596         NDINIT_ATRIGHTS(&nd, CREATE, NOFOLLOW | LOCKPARENT | NOCACHE,
  597             UIO_SYSSPACE, buf, fd, cap_rights_init_one(&rights, CAP_BINDAT));
  598 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
  599         error = namei(&nd);
  600         if (error)
  601                 goto error;
  602         vp = nd.ni_vp;
  603         if (vp != NULL || vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
  604                 NDFREE_PNBUF(&nd);
  605                 if (nd.ni_dvp == vp)
  606                         vrele(nd.ni_dvp);
  607                 else
  608                         vput(nd.ni_dvp);
  609                 if (vp != NULL) {
  610                         vrele(vp);
  611                         error = EADDRINUSE;
  612                         goto error;
  613                 }
  614                 error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH);
  615                 if (error)
  616                         goto error;
  617                 goto restart;
  618         }
  619         VATTR_NULL(&vattr);
  620         vattr.va_type = VSOCK;
  621         vattr.va_mode = (ACCESSPERMS & ~td->td_proc->p_pd->pd_cmask);
  622 #ifdef MAC
  623         error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
  624             &vattr);
  625 #endif
  626         if (error == 0)
  627                 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
  628         NDFREE_PNBUF(&nd);
  629         if (error) {
  630                 VOP_VPUT_PAIR(nd.ni_dvp, NULL, true);
  631                 vn_finished_write(mp);
  632                 if (error == ERELOOKUP)
  633                         goto restart;
  634                 goto error;
  635         }
  636         vp = nd.ni_vp;
  637         ASSERT_VOP_ELOCKED(vp, "uipc_bind");
  638         soun = (struct sockaddr_un *)sodupsockaddr(nam, M_WAITOK);
  639 
  640         UNP_PCB_LOCK(unp);
  641         VOP_UNP_BIND(vp, unp);
  642         unp->unp_vnode = vp;
  643         unp->unp_addr = soun;
  644         unp->unp_flags &= ~UNP_BINDING;
  645         UNP_PCB_UNLOCK(unp);
  646         vref(vp);
  647         VOP_VPUT_PAIR(nd.ni_dvp, &vp, true);
  648         vn_finished_write(mp);
  649         free(buf, M_TEMP);
  650         return (0);
  651 
  652 error:
  653         UNP_PCB_LOCK(unp);
  654         unp->unp_flags &= ~UNP_BINDING;
  655         UNP_PCB_UNLOCK(unp);
  656         free(buf, M_TEMP);
  657         return (error);
  658 }
  659 
  660 static int
  661 uipc_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
  662 {
  663 
  664         return (uipc_bindat(AT_FDCWD, so, nam, td));
  665 }
  666 
  667 static int
  668 uipc_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
  669 {
  670         int error;
  671 
  672         KASSERT(td == curthread, ("uipc_connect: td != curthread"));
  673         error = unp_connect(so, nam, td);
  674         return (error);
  675 }
  676 
  677 static int
  678 uipc_connectat(int fd, struct socket *so, struct sockaddr *nam,
  679     struct thread *td)
  680 {
  681         int error;
  682 
  683         KASSERT(td == curthread, ("uipc_connectat: td != curthread"));
  684         error = unp_connectat(fd, so, nam, td, false);
  685         return (error);
  686 }
  687 
  688 static void
  689 uipc_close(struct socket *so)
  690 {
  691         struct unpcb *unp, *unp2;
  692         struct vnode *vp = NULL;
  693         struct mtx *vplock;
  694 
  695         unp = sotounpcb(so);
  696         KASSERT(unp != NULL, ("uipc_close: unp == NULL"));
  697 
  698         vplock = NULL;
  699         if ((vp = unp->unp_vnode) != NULL) {
  700                 vplock = mtx_pool_find(mtxpool_sleep, vp);
  701                 mtx_lock(vplock);
  702         }
  703         UNP_PCB_LOCK(unp);
  704         if (vp && unp->unp_vnode == NULL) {
  705                 mtx_unlock(vplock);
  706                 vp = NULL;
  707         }
  708         if (vp != NULL) {
  709                 VOP_UNP_DETACH(vp);
  710                 unp->unp_vnode = NULL;
  711         }
  712         if ((unp2 = unp_pcb_lock_peer(unp)) != NULL)
  713                 unp_disconnect(unp, unp2);
  714         else
  715                 UNP_PCB_UNLOCK(unp);
  716         if (vp) {
  717                 mtx_unlock(vplock);
  718                 vrele(vp);
  719         }
  720 }
  721 
  722 static int
  723 uipc_connect2(struct socket *so1, struct socket *so2)
  724 {
  725         struct unpcb *unp, *unp2;
  726 
  727         if (so1->so_type != so2->so_type)
  728                 return (EPROTOTYPE);
  729 
  730         unp = so1->so_pcb;
  731         KASSERT(unp != NULL, ("uipc_connect2: unp == NULL"));
  732         unp2 = so2->so_pcb;
  733         KASSERT(unp2 != NULL, ("uipc_connect2: unp2 == NULL"));
  734         unp_pcb_lock_pair(unp, unp2);
  735         unp_connect2(so1, so2, PRU_CONNECT2);
  736         unp_pcb_unlock_pair(unp, unp2);
  737 
  738         return (0);
  739 }
  740 
  741 static void
  742 uipc_detach(struct socket *so)
  743 {
  744         struct unpcb *unp, *unp2;
  745         struct mtx *vplock;
  746         struct vnode *vp;
  747         int local_unp_rights;
  748 
  749         unp = sotounpcb(so);
  750         KASSERT(unp != NULL, ("uipc_detach: unp == NULL"));
  751 
  752         vp = NULL;
  753         vplock = NULL;
  754 
  755         UNP_LINK_WLOCK();
  756         LIST_REMOVE(unp, unp_link);
  757         if (unp->unp_gcflag & UNPGC_DEAD)
  758                 LIST_REMOVE(unp, unp_dead);
  759         unp->unp_gencnt = ++unp_gencnt;
  760         --unp_count;
  761         UNP_LINK_WUNLOCK();
  762 
  763         UNP_PCB_UNLOCK_ASSERT(unp);
  764  restart:
  765         if ((vp = unp->unp_vnode) != NULL) {
  766                 vplock = mtx_pool_find(mtxpool_sleep, vp);
  767                 mtx_lock(vplock);
  768         }
  769         UNP_PCB_LOCK(unp);
  770         if (unp->unp_vnode != vp && unp->unp_vnode != NULL) {
  771                 if (vplock)
  772                         mtx_unlock(vplock);
  773                 UNP_PCB_UNLOCK(unp);
  774                 goto restart;
  775         }
  776         if ((vp = unp->unp_vnode) != NULL) {
  777                 VOP_UNP_DETACH(vp);
  778                 unp->unp_vnode = NULL;
  779         }
  780         if ((unp2 = unp_pcb_lock_peer(unp)) != NULL)
  781                 unp_disconnect(unp, unp2);
  782         else
  783                 UNP_PCB_UNLOCK(unp);
  784 
  785         UNP_REF_LIST_LOCK();
  786         while (!LIST_EMPTY(&unp->unp_refs)) {
  787                 struct unpcb *ref = LIST_FIRST(&unp->unp_refs);
  788 
  789                 unp_pcb_hold(ref);
  790                 UNP_REF_LIST_UNLOCK();
  791 
  792                 MPASS(ref != unp);
  793                 UNP_PCB_UNLOCK_ASSERT(ref);
  794                 unp_drop(ref);
  795                 UNP_REF_LIST_LOCK();
  796         }
  797         UNP_REF_LIST_UNLOCK();
  798 
  799         UNP_PCB_LOCK(unp);
  800         local_unp_rights = unp_rights;
  801         unp->unp_socket->so_pcb = NULL;
  802         unp->unp_socket = NULL;
  803         free(unp->unp_addr, M_SONAME);
  804         unp->unp_addr = NULL;
  805         if (!unp_pcb_rele(unp))
  806                 UNP_PCB_UNLOCK(unp);
  807         if (vp) {
  808                 mtx_unlock(vplock);
  809                 vrele(vp);
  810         }
  811         if (local_unp_rights)
  812                 taskqueue_enqueue_timeout(taskqueue_thread, &unp_gc_task, -1);
  813 
  814         switch (so->so_type) {
  815         case SOCK_DGRAM:
  816                 /*
  817                  * Everything should have been unlinked/freed by unp_dispose()
  818                  * and/or unp_disconnect().
  819                  */
  820                 MPASS(so->so_rcv.uxdg_peeked == NULL);
  821                 MPASS(STAILQ_EMPTY(&so->so_rcv.uxdg_mb));
  822                 MPASS(TAILQ_EMPTY(&so->so_rcv.uxdg_conns));
  823                 MPASS(STAILQ_EMPTY(&so->so_snd.uxdg_mb));
  824         }
  825 }
  826 
  827 static int
  828 uipc_disconnect(struct socket *so)
  829 {
  830         struct unpcb *unp, *unp2;
  831 
  832         unp = sotounpcb(so);
  833         KASSERT(unp != NULL, ("uipc_disconnect: unp == NULL"));
  834 
  835         UNP_PCB_LOCK(unp);
  836         if ((unp2 = unp_pcb_lock_peer(unp)) != NULL)
  837                 unp_disconnect(unp, unp2);
  838         else
  839                 UNP_PCB_UNLOCK(unp);
  840         return (0);
  841 }
  842 
  843 static int
  844 uipc_listen(struct socket *so, int backlog, struct thread *td)
  845 {
  846         struct unpcb *unp;
  847         int error;
  848 
  849         MPASS(so->so_type != SOCK_DGRAM);
  850 
  851         /*
  852          * Synchronize with concurrent connection attempts.
  853          */
  854         error = 0;
  855         unp = sotounpcb(so);
  856         UNP_PCB_LOCK(unp);
  857         if (unp->unp_conn != NULL || (unp->unp_flags & UNP_CONNECTING) != 0)
  858                 error = EINVAL;
  859         else if (unp->unp_vnode == NULL)
  860                 error = EDESTADDRREQ;
  861         if (error != 0) {
  862                 UNP_PCB_UNLOCK(unp);
  863                 return (error);
  864         }
  865 
  866         SOCK_LOCK(so);
  867         error = solisten_proto_check(so);
  868         if (error == 0) {
  869                 cru2xt(td, &unp->unp_peercred);
  870                 solisten_proto(so, backlog);
  871         }
  872         SOCK_UNLOCK(so);
  873         UNP_PCB_UNLOCK(unp);
  874         return (error);
  875 }
  876 
  877 static int
  878 uipc_peeraddr(struct socket *so, struct sockaddr **nam)
  879 {
  880         struct unpcb *unp, *unp2;
  881         const struct sockaddr *sa;
  882 
  883         unp = sotounpcb(so);
  884         KASSERT(unp != NULL, ("uipc_peeraddr: unp == NULL"));
  885 
  886         *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
  887         UNP_LINK_RLOCK();
  888         /*
  889          * XXX: It seems that this test always fails even when connection is
  890          * established.  So, this else clause is added as workaround to
  891          * return PF_LOCAL sockaddr.
  892          */
  893         unp2 = unp->unp_conn;
  894         if (unp2 != NULL) {
  895                 UNP_PCB_LOCK(unp2);
  896                 if (unp2->unp_addr != NULL)
  897                         sa = (struct sockaddr *) unp2->unp_addr;
  898                 else
  899                         sa = &sun_noname;
  900                 bcopy(sa, *nam, sa->sa_len);
  901                 UNP_PCB_UNLOCK(unp2);
  902         } else {
  903                 sa = &sun_noname;
  904                 bcopy(sa, *nam, sa->sa_len);
  905         }
  906         UNP_LINK_RUNLOCK();
  907         return (0);
  908 }
  909 
  910 static int
  911 uipc_rcvd(struct socket *so, int flags)
  912 {
  913         struct unpcb *unp, *unp2;
  914         struct socket *so2;
  915         u_int mbcnt, sbcc;
  916 
  917         unp = sotounpcb(so);
  918         KASSERT(unp != NULL, ("%s: unp == NULL", __func__));
  919         KASSERT(so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET,
  920             ("%s: socktype %d", __func__, so->so_type));
  921 
  922         /*
  923          * Adjust backpressure on sender and wakeup any waiting to write.
  924          *
  925          * The unp lock is acquired to maintain the validity of the unp_conn
  926          * pointer; no lock on unp2 is required as unp2->unp_socket will be
  927          * static as long as we don't permit unp2 to disconnect from unp,
  928          * which is prevented by the lock on unp.  We cache values from
  929          * so_rcv to avoid holding the so_rcv lock over the entire
  930          * transaction on the remote so_snd.
  931          */
  932         SOCKBUF_LOCK(&so->so_rcv);
  933         mbcnt = so->so_rcv.sb_mbcnt;
  934         sbcc = sbavail(&so->so_rcv);
  935         SOCKBUF_UNLOCK(&so->so_rcv);
  936         /*
  937          * There is a benign race condition at this point.  If we're planning to
  938          * clear SB_STOP, but uipc_send is called on the connected socket at
  939          * this instant, it might add data to the sockbuf and set SB_STOP.  Then
  940          * we would erroneously clear SB_STOP below, even though the sockbuf is
  941          * full.  The race is benign because the only ill effect is to allow the
  942          * sockbuf to exceed its size limit, and the size limits are not
  943          * strictly guaranteed anyway.
  944          */
  945         UNP_PCB_LOCK(unp);
  946         unp2 = unp->unp_conn;
  947         if (unp2 == NULL) {
  948                 UNP_PCB_UNLOCK(unp);
  949                 return (0);
  950         }
  951         so2 = unp2->unp_socket;
  952         SOCKBUF_LOCK(&so2->so_snd);
  953         if (sbcc < so2->so_snd.sb_hiwat && mbcnt < so2->so_snd.sb_mbmax)
  954                 so2->so_snd.sb_flags &= ~SB_STOP;
  955         sowwakeup_locked(so2);
  956         UNP_PCB_UNLOCK(unp);
  957         return (0);
  958 }
  959 
  960 static int
  961 uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
  962     struct mbuf *control, struct thread *td)
  963 {
  964         struct unpcb *unp, *unp2;
  965         struct socket *so2;
  966         u_int mbcnt, sbcc;
  967         int error;
  968 
  969         unp = sotounpcb(so);
  970         KASSERT(unp != NULL, ("%s: unp == NULL", __func__));
  971         KASSERT(so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET,
  972             ("%s: socktype %d", __func__, so->so_type));
  973 
  974         error = 0;
  975         if (flags & PRUS_OOB) {
  976                 error = EOPNOTSUPP;
  977                 goto release;
  978         }
  979         if (control != NULL &&
  980             (error = unp_internalize(&control, td, NULL, NULL, NULL)))
  981                 goto release;
  982 
  983         unp2 = NULL;
  984         if ((so->so_state & SS_ISCONNECTED) == 0) {
  985                 if (nam != NULL) {
  986                         if ((error = unp_connect(so, nam, td)) != 0)
  987                                 goto out;
  988                 } else {
  989                         error = ENOTCONN;
  990                         goto out;
  991                 }
  992         }
  993 
  994         UNP_PCB_LOCK(unp);
  995         if ((unp2 = unp_pcb_lock_peer(unp)) == NULL) {
  996                 UNP_PCB_UNLOCK(unp);
  997                 error = ENOTCONN;
  998                 goto out;
  999         } else if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 1000                 unp_pcb_unlock_pair(unp, unp2);
 1001                 error = EPIPE;
 1002                 goto out;
 1003         }
 1004         UNP_PCB_UNLOCK(unp);
 1005         if ((so2 = unp2->unp_socket) == NULL) {
 1006                 UNP_PCB_UNLOCK(unp2);
 1007                 error = ENOTCONN;
 1008                 goto out;
 1009         }
 1010         SOCKBUF_LOCK(&so2->so_rcv);
 1011         if (unp2->unp_flags & UNP_WANTCRED_MASK) {
 1012                 /*
 1013                  * Credentials are passed only once on SOCK_STREAM and
 1014                  * SOCK_SEQPACKET (LOCAL_CREDS => WANTCRED_ONESHOT), or
 1015                  * forever (LOCAL_CREDS_PERSISTENT => WANTCRED_ALWAYS).
 1016                  */
 1017                 control = unp_addsockcred(td, control, unp2->unp_flags, NULL,
 1018                     NULL, NULL);
 1019                 unp2->unp_flags &= ~UNP_WANTCRED_ONESHOT;
 1020         }
 1021 
 1022         /*
 1023          * Send to paired receive port and wake up readers.  Don't
 1024          * check for space available in the receive buffer if we're
 1025          * attaching ancillary data; Unix domain sockets only check
 1026          * for space in the sending sockbuf, and that check is
 1027          * performed one level up the stack.  At that level we cannot
 1028          * precisely account for the amount of buffer space used
 1029          * (e.g., because control messages are not yet internalized).
 1030          */
 1031         switch (so->so_type) {
 1032         case SOCK_STREAM:
 1033                 if (control != NULL) {
 1034                         sbappendcontrol_locked(&so2->so_rcv, m,
 1035                             control, flags);
 1036                         control = NULL;
 1037                 } else
 1038                         sbappend_locked(&so2->so_rcv, m, flags);
 1039                 break;
 1040 
 1041         case SOCK_SEQPACKET:
 1042                 if (sbappendaddr_nospacecheck_locked(&so2->so_rcv,
 1043                     &sun_noname, m, control))
 1044                         control = NULL;
 1045                 break;
 1046         }
 1047 
 1048         mbcnt = so2->so_rcv.sb_mbcnt;
 1049         sbcc = sbavail(&so2->so_rcv);
 1050         if (sbcc)
 1051                 sorwakeup_locked(so2);
 1052         else
 1053                 SOCKBUF_UNLOCK(&so2->so_rcv);
 1054 
 1055         /*
 1056          * The PCB lock on unp2 protects the SB_STOP flag.  Without it,
 1057          * it would be possible for uipc_rcvd to be called at this
 1058          * point, drain the receiving sockbuf, clear SB_STOP, and then
 1059          * we would set SB_STOP below.  That could lead to an empty
 1060          * sockbuf having SB_STOP set
 1061          */
 1062         SOCKBUF_LOCK(&so->so_snd);
 1063         if (sbcc >= so->so_snd.sb_hiwat || mbcnt >= so->so_snd.sb_mbmax)
 1064                 so->so_snd.sb_flags |= SB_STOP;
 1065         SOCKBUF_UNLOCK(&so->so_snd);
 1066         UNP_PCB_UNLOCK(unp2);
 1067         m = NULL;
 1068 out:
 1069         /*
 1070          * PRUS_EOF is equivalent to pr_send followed by pr_shutdown.
 1071          */
 1072         if (flags & PRUS_EOF) {
 1073                 UNP_PCB_LOCK(unp);
 1074                 socantsendmore(so);
 1075                 unp_shutdown(unp);
 1076                 UNP_PCB_UNLOCK(unp);
 1077         }
 1078         if (control != NULL && error != 0)
 1079                 unp_scan(control, unp_freerights);
 1080 
 1081 release:
 1082         if (control != NULL)
 1083                 m_freem(control);
 1084         /*
 1085          * In case of PRUS_NOTREADY, uipc_ready() is responsible
 1086          * for freeing memory.
 1087          */   
 1088         if (m != NULL && (flags & PRUS_NOTREADY) == 0)
 1089                 m_freem(m);
 1090         return (error);
 1091 }
 1092 
 1093 /* PF_UNIX/SOCK_DGRAM version of sbspace() */
 1094 static inline bool
 1095 uipc_dgram_sbspace(struct sockbuf *sb, u_int cc, u_int mbcnt)
 1096 {
 1097         u_int bleft, mleft;
 1098 
 1099         /*
 1100          * Negative space may happen if send(2) is followed by
 1101          * setsockopt(SO_SNDBUF/SO_RCVBUF) that shrinks maximum.
 1102          */
 1103         if (__predict_false(sb->sb_hiwat < sb->uxdg_cc ||
 1104             sb->sb_mbmax < sb->uxdg_mbcnt))
 1105                 return (false);
 1106 
 1107         if (__predict_false(sb->sb_state & SBS_CANTRCVMORE))
 1108                 return (false);
 1109 
 1110         bleft = sb->sb_hiwat - sb->uxdg_cc;
 1111         mleft = sb->sb_mbmax - sb->uxdg_mbcnt;
 1112 
 1113         return (bleft >= cc && mleft >= mbcnt);
 1114 }
 1115 
 1116 /*
 1117  * PF_UNIX/SOCK_DGRAM send
 1118  *
 1119  * Allocate a record consisting of 3 mbufs in the sequence of
 1120  * from -> control -> data and append it to the socket buffer.
 1121  *
 1122  * The first mbuf carries sender's name and is a pkthdr that stores
 1123  * overall length of datagram, its memory consumption and control length.
 1124  */
 1125 #define ctllen  PH_loc.thirtytwo[1]
 1126 _Static_assert(offsetof(struct pkthdr, memlen) + sizeof(u_int) <=
 1127     offsetof(struct pkthdr, ctllen), "unix/dgram can not store ctllen");
 1128 static int
 1129 uipc_sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio,
 1130     struct mbuf *m, struct mbuf *c, int flags, struct thread *td)
 1131 {
 1132         struct unpcb *unp, *unp2;
 1133         const struct sockaddr *from;
 1134         struct socket *so2;
 1135         struct sockbuf *sb;
 1136         struct mbuf *f, *clast;
 1137         u_int cc, ctl, mbcnt;
 1138         u_int dcc __diagused, dctl __diagused, dmbcnt __diagused;
 1139         int error;
 1140 
 1141         MPASS((uio != NULL && m == NULL) || (m != NULL && uio == NULL));
 1142 
 1143         error = 0;
 1144         f = NULL;
 1145         ctl = 0;
 1146 
 1147         if (__predict_false(flags & MSG_OOB)) {
 1148                 error = EOPNOTSUPP;
 1149                 goto out;
 1150         }
 1151         if (m == NULL) {
 1152                 if (__predict_false(uio->uio_resid > unpdg_maxdgram)) {
 1153                         error = EMSGSIZE;
 1154                         goto out;
 1155                 }
 1156                 m = m_uiotombuf(uio, M_WAITOK, 0, max_hdr, M_PKTHDR);
 1157                 if (__predict_false(m == NULL)) {
 1158                         error = EFAULT;
 1159                         goto out;
 1160                 }
 1161                 f = m_gethdr(M_WAITOK, MT_SONAME);
 1162                 cc = m->m_pkthdr.len;
 1163                 mbcnt = MSIZE + m->m_pkthdr.memlen;
 1164                 if (c != NULL &&
 1165                     (error = unp_internalize(&c, td, &clast, &ctl, &mbcnt)))
 1166                         goto out;
 1167         } else {
 1168                 /* pr_sosend() with mbuf usually is a kernel thread. */
 1169 
 1170                 M_ASSERTPKTHDR(m);
 1171                 if (__predict_false(c != NULL))
 1172                         panic("%s: control from a kernel thread", __func__);
 1173 
 1174                 if (__predict_false(m->m_pkthdr.len > unpdg_maxdgram)) {
 1175                         error = EMSGSIZE;
 1176                         goto out;
 1177                 }
 1178                 if ((f = m_gethdr(M_NOWAIT, MT_SONAME)) == NULL) {
 1179                         error = ENOBUFS;
 1180                         goto out;
 1181                 }
 1182                 /* Condition the foreign mbuf to our standards. */
 1183                 m_clrprotoflags(m);
 1184                 m_tag_delete_chain(m, NULL);
 1185                 m->m_pkthdr.rcvif = NULL;
 1186                 m->m_pkthdr.flowid = 0;
 1187                 m->m_pkthdr.csum_flags = 0;
 1188                 m->m_pkthdr.fibnum = 0;
 1189                 m->m_pkthdr.rsstype = 0;
 1190 
 1191                 cc = m->m_pkthdr.len;
 1192                 mbcnt = MSIZE;
 1193                 for (struct mbuf *mb = m; mb != NULL; mb = mb->m_next) {
 1194                         mbcnt += MSIZE;
 1195                         if (mb->m_flags & M_EXT)
 1196                                 mbcnt += mb->m_ext.ext_size;
 1197                 }
 1198         }
 1199 
 1200         unp = sotounpcb(so);
 1201         MPASS(unp);
 1202 
 1203         /*
 1204          * XXXGL: would be cool to fully remove so_snd out of the equation
 1205          * and avoid this lock, which is not only extraneous, but also being
 1206          * released, thus still leaving possibility for a race.  We can easily
 1207          * handle SBS_CANTSENDMORE/SS_ISCONNECTED complement in unpcb, but it
 1208          * is more difficult to invent something to handle so_error.
 1209          */
 1210         error = SOCK_IO_SEND_LOCK(so, SBLOCKWAIT(flags));
 1211         if (error)
 1212                 goto out2;
 1213         SOCK_SENDBUF_LOCK(so);
 1214         if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 1215                 SOCK_SENDBUF_UNLOCK(so);
 1216                 error = EPIPE;
 1217                 goto out3;
 1218         }
 1219         if (so->so_error != 0) {
 1220                 error = so->so_error;
 1221                 so->so_error = 0;
 1222                 SOCK_SENDBUF_UNLOCK(so);
 1223                 goto out3;
 1224         }
 1225         if (((so->so_state & SS_ISCONNECTED) == 0) && addr == NULL) {
 1226                 SOCK_SENDBUF_UNLOCK(so);
 1227                 error = EDESTADDRREQ;
 1228                 goto out3;
 1229         }
 1230         SOCK_SENDBUF_UNLOCK(so);
 1231 
 1232         if (addr != NULL) {
 1233                 if ((error = unp_connectat(AT_FDCWD, so, addr, td, true)))
 1234                         goto out3;
 1235                 UNP_PCB_LOCK_ASSERT(unp);
 1236                 unp2 = unp->unp_conn;
 1237                 UNP_PCB_LOCK_ASSERT(unp2);
 1238         } else {
 1239                 UNP_PCB_LOCK(unp);
 1240                 unp2 = unp_pcb_lock_peer(unp);
 1241                 if (unp2 == NULL) {
 1242                         UNP_PCB_UNLOCK(unp);
 1243                         error = ENOTCONN;
 1244                         goto out3;
 1245                 }
 1246         }
 1247 
 1248         if (unp2->unp_flags & UNP_WANTCRED_MASK)
 1249                 c = unp_addsockcred(td, c, unp2->unp_flags, &clast, &ctl,
 1250                     &mbcnt);
 1251         if (unp->unp_addr != NULL)
 1252                 from = (struct sockaddr *)unp->unp_addr;
 1253         else
 1254                 from = &sun_noname;
 1255         f->m_len = from->sa_len;
 1256         MPASS(from->sa_len <= MLEN);
 1257         bcopy(from, mtod(f, void *), from->sa_len);
 1258         ctl += f->m_len;
 1259 
 1260         /*
 1261          * Concatenate mbufs: from -> control -> data.
 1262          * Save overall cc and mbcnt in "from" mbuf.
 1263          */
 1264         if (c != NULL) {
 1265 #ifdef INVARIANTS
 1266                 struct mbuf *mc;
 1267 
 1268                 for (mc = c; mc->m_next != NULL; mc = mc->m_next);
 1269                 MPASS(mc == clast);
 1270 #endif
 1271                 f->m_next = c;
 1272                 clast->m_next = m;
 1273                 c = NULL;
 1274         } else
 1275                 f->m_next = m;
 1276         m = NULL;
 1277 #ifdef INVARIANTS
 1278         dcc = dctl = dmbcnt = 0;
 1279         for (struct mbuf *mb = f; mb != NULL; mb = mb->m_next) {
 1280                 if (mb->m_type == MT_DATA)
 1281                         dcc += mb->m_len;
 1282                 else
 1283                         dctl += mb->m_len;
 1284                 dmbcnt += MSIZE;
 1285                 if (mb->m_flags & M_EXT)
 1286                         dmbcnt += mb->m_ext.ext_size;
 1287         }
 1288         MPASS(dcc == cc);
 1289         MPASS(dctl == ctl);
 1290         MPASS(dmbcnt == mbcnt);
 1291 #endif
 1292         f->m_pkthdr.len = cc + ctl;
 1293         f->m_pkthdr.memlen = mbcnt;
 1294         f->m_pkthdr.ctllen = ctl;
 1295 
 1296         /*
 1297          * Destination socket buffer selection.
 1298          *
 1299          * Unconnected sends, when !(so->so_state & SS_ISCONNECTED) and the
 1300          * destination address is supplied, create a temporary connection for
 1301          * the run time of the function (see call to unp_connectat() above and
 1302          * to unp_disconnect() below).  We distinguish them by condition of
 1303          * (addr != NULL).  We intentionally avoid adding 'bool connected' for
 1304          * that condition, since, again, through the run time of this code we
 1305          * are always connected.  For such "unconnected" sends, the destination
 1306          * buffer would be the receive buffer of destination socket so2.
 1307          *
 1308          * For connected sends, data lands on the send buffer of the sender's
 1309          * socket "so".  Then, if we just added the very first datagram
 1310          * on this send buffer, we need to add the send buffer on to the
 1311          * receiving socket's buffer list.  We put ourselves on top of the
 1312          * list.  Such logic gives infrequent senders priority over frequent
 1313          * senders.
 1314          *
 1315          * Note on byte count management. As long as event methods kevent(2),
 1316          * select(2) are not protocol specific (yet), we need to maintain
 1317          * meaningful values on the receive buffer.  So, the receive buffer
 1318          * would accumulate counters from all connected buffers potentially
 1319          * having sb_ccc > sb_hiwat or sb_mbcnt > sb_mbmax.
 1320          */
 1321         so2 = unp2->unp_socket;
 1322         sb = (addr == NULL) ? &so->so_snd : &so2->so_rcv;
 1323         SOCK_RECVBUF_LOCK(so2);
 1324         if (uipc_dgram_sbspace(sb, cc + ctl, mbcnt)) {
 1325                 if (addr == NULL && STAILQ_EMPTY(&sb->uxdg_mb))
 1326                         TAILQ_INSERT_HEAD(&so2->so_rcv.uxdg_conns, &so->so_snd,
 1327                             uxdg_clist);
 1328                 STAILQ_INSERT_TAIL(&sb->uxdg_mb, f, m_stailqpkt);
 1329                 sb->uxdg_cc += cc + ctl;
 1330                 sb->uxdg_ctl += ctl;
 1331                 sb->uxdg_mbcnt += mbcnt;
 1332                 so2->so_rcv.sb_acc += cc + ctl;
 1333                 so2->so_rcv.sb_ccc += cc + ctl;
 1334                 so2->so_rcv.sb_ctl += ctl;
 1335                 so2->so_rcv.sb_mbcnt += mbcnt;
 1336                 sorwakeup_locked(so2);
 1337                 f = NULL;
 1338         } else {
 1339                 soroverflow_locked(so2);
 1340                 error = (so->so_state & SS_NBIO) ? EAGAIN : ENOBUFS;
 1341                 if (f->m_next->m_type == MT_CONTROL)
 1342                         unp_scan(f->m_next, unp_freerights);
 1343         }
 1344 
 1345         if (addr != NULL)
 1346                 unp_disconnect(unp, unp2);
 1347         else
 1348                 unp_pcb_unlock_pair(unp, unp2);
 1349 
 1350         td->td_ru.ru_msgsnd++;
 1351 
 1352 out3:
 1353         SOCK_IO_SEND_UNLOCK(so);
 1354 out2:
 1355         if (c)
 1356                 unp_scan(c, unp_freerights);
 1357 out:
 1358         if (f)
 1359                 m_freem(f);
 1360         if (c)
 1361                 m_freem(c);
 1362         if (m)
 1363                 m_freem(m);
 1364 
 1365         return (error);
 1366 }
 1367 
 1368 /*
 1369  * PF_UNIX/SOCK_DGRAM receive with MSG_PEEK.
 1370  * The mbuf has already been unlinked from the uxdg_mb of socket buffer
 1371  * and needs to be linked onto uxdg_peeked of receive socket buffer.
 1372  */
 1373 static int
 1374 uipc_peek_dgram(struct socket *so, struct mbuf *m, struct sockaddr **psa,
 1375     struct uio *uio, struct mbuf **controlp, int *flagsp)
 1376 {
 1377         ssize_t len = 0;
 1378         int error;
 1379 
 1380         so->so_rcv.uxdg_peeked = m;
 1381         so->so_rcv.uxdg_cc += m->m_pkthdr.len;
 1382         so->so_rcv.uxdg_ctl += m->m_pkthdr.ctllen;
 1383         so->so_rcv.uxdg_mbcnt += m->m_pkthdr.memlen;
 1384         SOCK_RECVBUF_UNLOCK(so);
 1385 
 1386         KASSERT(m->m_type == MT_SONAME, ("m->m_type == %d", m->m_type));
 1387         if (psa != NULL)
 1388                 *psa = sodupsockaddr(mtod(m, struct sockaddr *), M_WAITOK);
 1389 
 1390         m = m->m_next;
 1391         KASSERT(m, ("%s: no data or control after soname", __func__));
 1392 
 1393         /*
 1394          * With MSG_PEEK the control isn't executed, just copied.
 1395          */
 1396         while (m != NULL && m->m_type == MT_CONTROL) {
 1397                 if (controlp != NULL) {
 1398                         *controlp = m_copym(m, 0, m->m_len, M_WAITOK);
 1399                         controlp = &(*controlp)->m_next;
 1400                 }
 1401                 m = m->m_next;
 1402         }
 1403         KASSERT(m == NULL || m->m_type == MT_DATA,
 1404             ("%s: not MT_DATA mbuf %p", __func__, m));
 1405         while (m != NULL && uio->uio_resid > 0) {
 1406                 len = uio->uio_resid;
 1407                 if (len > m->m_len)
 1408                         len = m->m_len;
 1409                 error = uiomove(mtod(m, char *), (int)len, uio);
 1410                 if (error) {
 1411                         SOCK_IO_RECV_UNLOCK(so);
 1412                         return (error);
 1413                 }
 1414                 if (len == m->m_len)
 1415                         m = m->m_next;
 1416         }
 1417         SOCK_IO_RECV_UNLOCK(so);
 1418 
 1419         if (flagsp != NULL) {
 1420                 if (m != NULL) {
 1421                         if (*flagsp & MSG_TRUNC) {
 1422                                 /* Report real length of the packet */
 1423                                 uio->uio_resid -= m_length(m, NULL) - len;
 1424                         }
 1425                         *flagsp |= MSG_TRUNC;
 1426                 } else
 1427                         *flagsp &= ~MSG_TRUNC;
 1428         }
 1429 
 1430         return (0);
 1431 }
 1432 
 1433 /*
 1434  * PF_UNIX/SOCK_DGRAM receive
 1435  */
 1436 static int
 1437 uipc_soreceive_dgram(struct socket *so, struct sockaddr **psa, struct uio *uio,
 1438     struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
 1439 {
 1440         struct sockbuf *sb = NULL;
 1441         struct mbuf *m;
 1442         int flags, error;
 1443         ssize_t len = 0;
 1444         bool nonblock;
 1445 
 1446         MPASS(mp0 == NULL);
 1447 
 1448         if (psa != NULL)
 1449                 *psa = NULL;
 1450         if (controlp != NULL)
 1451                 *controlp = NULL;
 1452 
 1453         flags = flagsp != NULL ? *flagsp : 0;
 1454         nonblock = (so->so_state & SS_NBIO) ||
 1455             (flags & (MSG_DONTWAIT | MSG_NBIO));
 1456 
 1457         error = SOCK_IO_RECV_LOCK(so, SBLOCKWAIT(flags));
 1458         if (__predict_false(error))
 1459                 return (error);
 1460 
 1461         /*
 1462          * Loop blocking while waiting for a datagram.  Prioritize connected
 1463          * peers over unconnected sends.  Set sb to selected socket buffer
 1464          * containing an mbuf on exit from the wait loop.  A datagram that
 1465          * had already been peeked at has top priority.
 1466          */
 1467         SOCK_RECVBUF_LOCK(so);
 1468         while ((m = so->so_rcv.uxdg_peeked) == NULL &&
 1469             (sb = TAILQ_FIRST(&so->so_rcv.uxdg_conns)) == NULL &&
 1470             (m = STAILQ_FIRST(&so->so_rcv.uxdg_mb)) == NULL) {
 1471                 if (so->so_error) {
 1472                         error = so->so_error;
 1473                         so->so_error = 0;
 1474                         SOCK_RECVBUF_UNLOCK(so);
 1475                         SOCK_IO_RECV_UNLOCK(so);
 1476                         return (error);
 1477                 }
 1478                 if (so->so_rcv.sb_state & SBS_CANTRCVMORE ||
 1479                     uio->uio_resid == 0) {
 1480                         SOCK_RECVBUF_UNLOCK(so);
 1481                         SOCK_IO_RECV_UNLOCK(so);
 1482                         return (0);
 1483                 }
 1484                 if (nonblock) {
 1485                         SOCK_RECVBUF_UNLOCK(so);
 1486                         SOCK_IO_RECV_UNLOCK(so);
 1487                         return (EWOULDBLOCK);
 1488                 }
 1489                 error = sbwait(so, SO_RCV);
 1490                 if (error) {
 1491                         SOCK_RECVBUF_UNLOCK(so);
 1492                         SOCK_IO_RECV_UNLOCK(so);
 1493                         return (error);
 1494                 }
 1495         }
 1496 
 1497         if (sb == NULL)
 1498                 sb = &so->so_rcv;
 1499         else if (m == NULL)
 1500                 m = STAILQ_FIRST(&sb->uxdg_mb);
 1501         else
 1502                 MPASS(m == so->so_rcv.uxdg_peeked);
 1503 
 1504         MPASS(sb->uxdg_cc > 0);
 1505         M_ASSERTPKTHDR(m);
 1506         KASSERT(m->m_type == MT_SONAME, ("m->m_type == %d", m->m_type));
 1507 
 1508         if (uio->uio_td)
 1509                 uio->uio_td->td_ru.ru_msgrcv++;
 1510 
 1511         if (__predict_true(m != so->so_rcv.uxdg_peeked)) {
 1512                 STAILQ_REMOVE_HEAD(&sb->uxdg_mb, m_stailqpkt);
 1513                 if (STAILQ_EMPTY(&sb->uxdg_mb) && sb != &so->so_rcv)
 1514                         TAILQ_REMOVE(&so->so_rcv.uxdg_conns, sb, uxdg_clist);
 1515         } else
 1516                 so->so_rcv.uxdg_peeked = NULL;
 1517 
 1518         sb->uxdg_cc -= m->m_pkthdr.len;
 1519         sb->uxdg_ctl -= m->m_pkthdr.ctllen;
 1520         sb->uxdg_mbcnt -= m->m_pkthdr.memlen;
 1521 
 1522         if (__predict_false(flags & MSG_PEEK))
 1523                 return (uipc_peek_dgram(so, m, psa, uio, controlp, flagsp));
 1524 
 1525         so->so_rcv.sb_acc -= m->m_pkthdr.len;
 1526         so->so_rcv.sb_ccc -= m->m_pkthdr.len;
 1527         so->so_rcv.sb_ctl -= m->m_pkthdr.ctllen;
 1528         so->so_rcv.sb_mbcnt -= m->m_pkthdr.memlen;
 1529         SOCK_RECVBUF_UNLOCK(so);
 1530 
 1531         if (psa != NULL)
 1532                 *psa = sodupsockaddr(mtod(m, struct sockaddr *), M_WAITOK);
 1533         m = m_free(m);
 1534         KASSERT(m, ("%s: no data or control after soname", __func__));
 1535 
 1536         /*
 1537          * Packet to copyout() is now in 'm' and it is disconnected from the
 1538          * queue.
 1539          *
 1540          * Process one or more MT_CONTROL mbufs present before any data mbufs
 1541          * in the first mbuf chain on the socket buffer.  We call into the
 1542          * unp_externalize() to perform externalization (or freeing if
 1543          * controlp == NULL). In some cases there can be only MT_CONTROL mbufs
 1544          * without MT_DATA mbufs.
 1545          */
 1546         while (m != NULL && m->m_type == MT_CONTROL) {
 1547                 struct mbuf *cm;
 1548 
 1549                 /* XXXGL: unp_externalize() is also dom_externalize() KBI and
 1550                  * it frees whole chain, so we must disconnect the mbuf.
 1551                  */
 1552                 cm = m; m = m->m_next; cm->m_next = NULL;
 1553                 error = unp_externalize(cm, controlp, flags);
 1554                 if (error != 0) {
 1555                         SOCK_IO_RECV_UNLOCK(so);
 1556                         unp_scan(m, unp_freerights);
 1557                         m_freem(m);
 1558                         return (error);
 1559                 }
 1560                 if (controlp != NULL) {
 1561                         while (*controlp != NULL)
 1562                                 controlp = &(*controlp)->m_next;
 1563                 }
 1564         }
 1565         KASSERT(m == NULL || m->m_type == MT_DATA,
 1566             ("%s: not MT_DATA mbuf %p", __func__, m));
 1567         while (m != NULL && uio->uio_resid > 0) {
 1568                 len = uio->uio_resid;
 1569                 if (len > m->m_len)
 1570                         len = m->m_len;
 1571                 error = uiomove(mtod(m, char *), (int)len, uio);
 1572                 if (error) {
 1573                         SOCK_IO_RECV_UNLOCK(so);
 1574                         m_freem(m);
 1575                         return (error);
 1576                 }
 1577                 if (len == m->m_len)
 1578                         m = m_free(m);
 1579                 else {
 1580                         m->m_data += len;
 1581                         m->m_len -= len;
 1582                 }
 1583         }
 1584         SOCK_IO_RECV_UNLOCK(so);
 1585 
 1586         if (m != NULL) {
 1587                 if (flagsp != NULL) {
 1588                         if (flags & MSG_TRUNC) {
 1589                                 /* Report real length of the packet */
 1590                                 uio->uio_resid -= m_length(m, NULL);
 1591                         }
 1592                         *flagsp |= MSG_TRUNC;
 1593                 }
 1594                 m_freem(m);
 1595         } else if (flagsp != NULL)
 1596                 *flagsp &= ~MSG_TRUNC;
 1597 
 1598         return (0);
 1599 }
 1600 
 1601 static bool
 1602 uipc_ready_scan(struct socket *so, struct mbuf *m, int count, int *errorp)
 1603 {
 1604         struct mbuf *mb, *n;
 1605         struct sockbuf *sb;
 1606 
 1607         SOCK_LOCK(so);
 1608         if (SOLISTENING(so)) {
 1609                 SOCK_UNLOCK(so);
 1610                 return (false);
 1611         }
 1612         mb = NULL;
 1613         sb = &so->so_rcv;
 1614         SOCKBUF_LOCK(sb);
 1615         if (sb->sb_fnrdy != NULL) {
 1616                 for (mb = sb->sb_mb, n = mb->m_nextpkt; mb != NULL;) {
 1617                         if (mb == m) {
 1618                                 *errorp = sbready(sb, m, count);
 1619                                 break;
 1620                         }
 1621                         mb = mb->m_next;
 1622                         if (mb == NULL) {
 1623                                 mb = n;
 1624                                 if (mb != NULL)
 1625                                         n = mb->m_nextpkt;
 1626                         }
 1627                 }
 1628         }
 1629         SOCKBUF_UNLOCK(sb);
 1630         SOCK_UNLOCK(so);
 1631         return (mb != NULL);
 1632 }
 1633 
 1634 static int
 1635 uipc_ready(struct socket *so, struct mbuf *m, int count)
 1636 {
 1637         struct unpcb *unp, *unp2;
 1638         struct socket *so2;
 1639         int error, i;
 1640 
 1641         unp = sotounpcb(so);
 1642 
 1643         KASSERT(so->so_type == SOCK_STREAM,
 1644             ("%s: unexpected socket type for %p", __func__, so));
 1645 
 1646         UNP_PCB_LOCK(unp);
 1647         if ((unp2 = unp_pcb_lock_peer(unp)) != NULL) {
 1648                 UNP_PCB_UNLOCK(unp);
 1649                 so2 = unp2->unp_socket;
 1650                 SOCKBUF_LOCK(&so2->so_rcv);
 1651                 if ((error = sbready(&so2->so_rcv, m, count)) == 0)
 1652                         sorwakeup_locked(so2);
 1653                 else
 1654                         SOCKBUF_UNLOCK(&so2->so_rcv);
 1655                 UNP_PCB_UNLOCK(unp2);
 1656                 return (error);
 1657         }
 1658         UNP_PCB_UNLOCK(unp);
 1659 
 1660         /*
 1661          * The receiving socket has been disconnected, but may still be valid.
 1662          * In this case, the now-ready mbufs are still present in its socket
 1663          * buffer, so perform an exhaustive search before giving up and freeing
 1664          * the mbufs.
 1665          */
 1666         UNP_LINK_RLOCK();
 1667         LIST_FOREACH(unp, &unp_shead, unp_link) {
 1668                 if (uipc_ready_scan(unp->unp_socket, m, count, &error))
 1669                         break;
 1670         }
 1671         UNP_LINK_RUNLOCK();
 1672 
 1673         if (unp == NULL) {
 1674                 for (i = 0; i < count; i++)
 1675                         m = m_free(m);
 1676                 error = ECONNRESET;
 1677         }
 1678         return (error);
 1679 }
 1680 
 1681 static int
 1682 uipc_sense(struct socket *so, struct stat *sb)
 1683 {
 1684         struct unpcb *unp;
 1685 
 1686         unp = sotounpcb(so);
 1687         KASSERT(unp != NULL, ("uipc_sense: unp == NULL"));
 1688 
 1689         sb->st_blksize = so->so_snd.sb_hiwat;
 1690         sb->st_dev = NODEV;
 1691         sb->st_ino = unp->unp_ino;
 1692         return (0);
 1693 }
 1694 
 1695 static int
 1696 uipc_shutdown(struct socket *so)
 1697 {
 1698         struct unpcb *unp;
 1699 
 1700         unp = sotounpcb(so);
 1701         KASSERT(unp != NULL, ("uipc_shutdown: unp == NULL"));
 1702 
 1703         UNP_PCB_LOCK(unp);
 1704         socantsendmore(so);
 1705         unp_shutdown(unp);
 1706         UNP_PCB_UNLOCK(unp);
 1707         return (0);
 1708 }
 1709 
 1710 static int
 1711 uipc_sockaddr(struct socket *so, struct sockaddr **nam)
 1712 {
 1713         struct unpcb *unp;
 1714         const struct sockaddr *sa;
 1715 
 1716         unp = sotounpcb(so);
 1717         KASSERT(unp != NULL, ("uipc_sockaddr: unp == NULL"));
 1718 
 1719         *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
 1720         UNP_PCB_LOCK(unp);
 1721         if (unp->unp_addr != NULL)
 1722                 sa = (struct sockaddr *) unp->unp_addr;
 1723         else
 1724                 sa = &sun_noname;
 1725         bcopy(sa, *nam, sa->sa_len);
 1726         UNP_PCB_UNLOCK(unp);
 1727         return (0);
 1728 }
 1729 
 1730 static int
 1731 uipc_ctloutput(struct socket *so, struct sockopt *sopt)
 1732 {
 1733         struct unpcb *unp;
 1734         struct xucred xu;
 1735         int error, optval;
 1736 
 1737         if (sopt->sopt_level != SOL_LOCAL)
 1738                 return (EINVAL);
 1739 
 1740         unp = sotounpcb(so);
 1741         KASSERT(unp != NULL, ("uipc_ctloutput: unp == NULL"));
 1742         error = 0;
 1743         switch (sopt->sopt_dir) {
 1744         case SOPT_GET:
 1745                 switch (sopt->sopt_name) {
 1746                 case LOCAL_PEERCRED:
 1747                         UNP_PCB_LOCK(unp);
 1748                         if (unp->unp_flags & UNP_HAVEPC)
 1749                                 xu = unp->unp_peercred;
 1750                         else {
 1751                                 if (so->so_type == SOCK_STREAM)
 1752                                         error = ENOTCONN;
 1753                                 else
 1754                                         error = EINVAL;
 1755                         }
 1756                         UNP_PCB_UNLOCK(unp);
 1757                         if (error == 0)
 1758                                 error = sooptcopyout(sopt, &xu, sizeof(xu));
 1759                         break;
 1760 
 1761                 case LOCAL_CREDS:
 1762                         /* Unlocked read. */
 1763                         optval = unp->unp_flags & UNP_WANTCRED_ONESHOT ? 1 : 0;
 1764                         error = sooptcopyout(sopt, &optval, sizeof(optval));
 1765                         break;
 1766 
 1767                 case LOCAL_CREDS_PERSISTENT:
 1768                         /* Unlocked read. */
 1769                         optval = unp->unp_flags & UNP_WANTCRED_ALWAYS ? 1 : 0;
 1770                         error = sooptcopyout(sopt, &optval, sizeof(optval));
 1771                         break;
 1772 
 1773                 case LOCAL_CONNWAIT:
 1774                         /* Unlocked read. */
 1775                         optval = unp->unp_flags & UNP_CONNWAIT ? 1 : 0;
 1776                         error = sooptcopyout(sopt, &optval, sizeof(optval));
 1777                         break;
 1778 
 1779                 default:
 1780                         error = EOPNOTSUPP;
 1781                         break;
 1782                 }
 1783                 break;
 1784 
 1785         case SOPT_SET:
 1786                 switch (sopt->sopt_name) {
 1787                 case LOCAL_CREDS:
 1788                 case LOCAL_CREDS_PERSISTENT:
 1789                 case LOCAL_CONNWAIT:
 1790                         error = sooptcopyin(sopt, &optval, sizeof(optval),
 1791                                             sizeof(optval));
 1792                         if (error)
 1793                                 break;
 1794 
 1795 #define OPTSET(bit, exclusive) do {                                     \
 1796         UNP_PCB_LOCK(unp);                                              \
 1797         if (optval) {                                                   \
 1798                 if ((unp->unp_flags & (exclusive)) != 0) {              \
 1799                         UNP_PCB_UNLOCK(unp);                            \
 1800                         error = EINVAL;                                 \
 1801                         break;                                          \
 1802                 }                                                       \
 1803                 unp->unp_flags |= (bit);                                \
 1804         } else                                                          \
 1805                 unp->unp_flags &= ~(bit);                               \
 1806         UNP_PCB_UNLOCK(unp);                                            \
 1807 } while (0)
 1808 
 1809                         switch (sopt->sopt_name) {
 1810                         case LOCAL_CREDS:
 1811                                 OPTSET(UNP_WANTCRED_ONESHOT, UNP_WANTCRED_ALWAYS);
 1812                                 break;
 1813 
 1814                         case LOCAL_CREDS_PERSISTENT:
 1815                                 OPTSET(UNP_WANTCRED_ALWAYS, UNP_WANTCRED_ONESHOT);
 1816                                 break;
 1817 
 1818                         case LOCAL_CONNWAIT:
 1819                                 OPTSET(UNP_CONNWAIT, 0);
 1820                                 break;
 1821 
 1822                         default:
 1823                                 break;
 1824                         }
 1825                         break;
 1826 #undef  OPTSET
 1827                 default:
 1828                         error = ENOPROTOOPT;
 1829                         break;
 1830                 }
 1831                 break;
 1832 
 1833         default:
 1834                 error = EOPNOTSUPP;
 1835                 break;
 1836         }
 1837         return (error);
 1838 }
 1839 
 1840 static int
 1841 unp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 1842 {
 1843 
 1844         return (unp_connectat(AT_FDCWD, so, nam, td, false));
 1845 }
 1846 
 1847 static int
 1848 unp_connectat(int fd, struct socket *so, struct sockaddr *nam,
 1849     struct thread *td, bool return_locked)
 1850 {
 1851         struct mtx *vplock;
 1852         struct sockaddr_un *soun;
 1853         struct vnode *vp;
 1854         struct socket *so2;
 1855         struct unpcb *unp, *unp2, *unp3;
 1856         struct nameidata nd;
 1857         char buf[SOCK_MAXADDRLEN];
 1858         struct sockaddr *sa;
 1859         cap_rights_t rights;
 1860         int error, len;
 1861         bool connreq;
 1862 
 1863         if (nam->sa_family != AF_UNIX)
 1864                 return (EAFNOSUPPORT);
 1865         if (nam->sa_len > sizeof(struct sockaddr_un))
 1866                 return (EINVAL);
 1867         len = nam->sa_len - offsetof(struct sockaddr_un, sun_path);
 1868         if (len <= 0)
 1869                 return (EINVAL);
 1870         soun = (struct sockaddr_un *)nam;
 1871         bcopy(soun->sun_path, buf, len);
 1872         buf[len] = 0;
 1873 
 1874         error = 0;
 1875         unp = sotounpcb(so);
 1876         UNP_PCB_LOCK(unp);
 1877         for (;;) {
 1878                 /*
 1879                  * Wait for connection state to stabilize.  If a connection
 1880                  * already exists, give up.  For datagram sockets, which permit
 1881                  * multiple consecutive connect(2) calls, upper layers are
 1882                  * responsible for disconnecting in advance of a subsequent
 1883                  * connect(2), but this is not synchronized with PCB connection
 1884                  * state.
 1885                  *
 1886                  * Also make sure that no threads are currently attempting to
 1887                  * lock the peer socket, to ensure that unp_conn cannot
 1888                  * transition between two valid sockets while locks are dropped.
 1889                  */
 1890                 if (SOLISTENING(so))
 1891                         error = EOPNOTSUPP;
 1892                 else if (unp->unp_conn != NULL)
 1893                         error = EISCONN;
 1894                 else if ((unp->unp_flags & UNP_CONNECTING) != 0) {
 1895                         error = EALREADY;
 1896                 }
 1897                 if (error != 0) {
 1898                         UNP_PCB_UNLOCK(unp);
 1899                         return (error);
 1900                 }
 1901                 if (unp->unp_pairbusy > 0) {
 1902                         unp->unp_flags |= UNP_WAITING;
 1903                         mtx_sleep(unp, UNP_PCB_LOCKPTR(unp), 0, "unpeer", 0);
 1904                         continue;
 1905                 }
 1906                 break;
 1907         }
 1908         unp->unp_flags |= UNP_CONNECTING;
 1909         UNP_PCB_UNLOCK(unp);
 1910 
 1911         connreq = (so->so_proto->pr_flags & PR_CONNREQUIRED) != 0;
 1912         if (connreq)
 1913                 sa = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
 1914         else
 1915                 sa = NULL;
 1916         NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF,
 1917             UIO_SYSSPACE, buf, fd, cap_rights_init_one(&rights, CAP_CONNECTAT));
 1918         error = namei(&nd);
 1919         if (error)
 1920                 vp = NULL;
 1921         else
 1922                 vp = nd.ni_vp;
 1923         ASSERT_VOP_LOCKED(vp, "unp_connect");
 1924         if (error)
 1925                 goto bad;
 1926         NDFREE_PNBUF(&nd);
 1927 
 1928         if (vp->v_type != VSOCK) {
 1929                 error = ENOTSOCK;
 1930                 goto bad;
 1931         }
 1932 #ifdef MAC
 1933         error = mac_vnode_check_open(td->td_ucred, vp, VWRITE | VREAD);
 1934         if (error)
 1935                 goto bad;
 1936 #endif
 1937         error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td);
 1938         if (error)
 1939                 goto bad;
 1940 
 1941         unp = sotounpcb(so);
 1942         KASSERT(unp != NULL, ("unp_connect: unp == NULL"));
 1943 
 1944         vplock = mtx_pool_find(mtxpool_sleep, vp);
 1945         mtx_lock(vplock);
 1946         VOP_UNP_CONNECT(vp, &unp2);
 1947         if (unp2 == NULL) {
 1948                 error = ECONNREFUSED;
 1949                 goto bad2;
 1950         }
 1951         so2 = unp2->unp_socket;
 1952         if (so->so_type != so2->so_type) {
 1953                 error = EPROTOTYPE;
 1954                 goto bad2;
 1955         }
 1956         if (connreq) {
 1957                 if (SOLISTENING(so2)) {
 1958                         CURVNET_SET(so2->so_vnet);
 1959                         so2 = sonewconn(so2, 0);
 1960                         CURVNET_RESTORE();
 1961                 } else
 1962                         so2 = NULL;
 1963                 if (so2 == NULL) {
 1964                         error = ECONNREFUSED;
 1965                         goto bad2;
 1966                 }
 1967                 unp3 = sotounpcb(so2);
 1968                 unp_pcb_lock_pair(unp2, unp3);
 1969                 if (unp2->unp_addr != NULL) {
 1970                         bcopy(unp2->unp_addr, sa, unp2->unp_addr->sun_len);
 1971                         unp3->unp_addr = (struct sockaddr_un *) sa;
 1972                         sa = NULL;
 1973                 }
 1974 
 1975                 unp_copy_peercred(td, unp3, unp, unp2);
 1976 
 1977                 UNP_PCB_UNLOCK(unp2);
 1978                 unp2 = unp3;
 1979 
 1980                 /*
 1981                  * It is safe to block on the PCB lock here since unp2 is
 1982                  * nascent and cannot be connected to any other sockets.
 1983                  */
 1984                 UNP_PCB_LOCK(unp);
 1985 #ifdef MAC
 1986                 mac_socketpeer_set_from_socket(so, so2);
 1987                 mac_socketpeer_set_from_socket(so2, so);
 1988 #endif
 1989         } else {
 1990                 unp_pcb_lock_pair(unp, unp2);
 1991         }
 1992         KASSERT(unp2 != NULL && so2 != NULL && unp2->unp_socket == so2 &&
 1993             sotounpcb(so2) == unp2,
 1994             ("%s: unp2 %p so2 %p", __func__, unp2, so2));
 1995         unp_connect2(so, so2, PRU_CONNECT);
 1996         KASSERT((unp->unp_flags & UNP_CONNECTING) != 0,
 1997             ("%s: unp %p has UNP_CONNECTING clear", __func__, unp));
 1998         unp->unp_flags &= ~UNP_CONNECTING;
 1999         if (!return_locked)
 2000                 unp_pcb_unlock_pair(unp, unp2);
 2001 bad2:
 2002         mtx_unlock(vplock);
 2003 bad:
 2004         if (vp != NULL) {
 2005                 /*
 2006                  * If we are returning locked (called via uipc_sosend_dgram()),
 2007                  * we need to be sure that vput() won't sleep.  This is
 2008                  * guaranteed by VOP_UNP_CONNECT() call above and unp2 lock.
 2009                  * SOCK_STREAM/SEQPACKET can't request return_locked (yet).
 2010                  */
 2011                 MPASS(!(return_locked && connreq));
 2012                 vput(vp);
 2013         }
 2014         free(sa, M_SONAME);
 2015         if (__predict_false(error)) {
 2016                 UNP_PCB_LOCK(unp);
 2017                 KASSERT((unp->unp_flags & UNP_CONNECTING) != 0,
 2018                     ("%s: unp %p has UNP_CONNECTING clear", __func__, unp));
 2019                 unp->unp_flags &= ~UNP_CONNECTING;
 2020                 UNP_PCB_UNLOCK(unp);
 2021         }
 2022         return (error);
 2023 }
 2024 
 2025 /*
 2026  * Set socket peer credentials at connection time.
 2027  *
 2028  * The client's PCB credentials are copied from its process structure.  The
 2029  * server's PCB credentials are copied from the socket on which it called
 2030  * listen(2).  uipc_listen cached that process's credentials at the time.
 2031  */
 2032 void
 2033 unp_copy_peercred(struct thread *td, struct unpcb *client_unp,
 2034     struct unpcb *server_unp, struct unpcb *listen_unp)
 2035 {
 2036         cru2xt(td, &client_unp->unp_peercred);
 2037         client_unp->unp_flags |= UNP_HAVEPC;
 2038 
 2039         memcpy(&server_unp->unp_peercred, &listen_unp->unp_peercred,
 2040             sizeof(server_unp->unp_peercred));
 2041         server_unp->unp_flags |= UNP_HAVEPC;
 2042         client_unp->unp_flags |= (listen_unp->unp_flags & UNP_WANTCRED_MASK);
 2043 }
 2044 
 2045 static void
 2046 unp_connect2(struct socket *so, struct socket *so2, conn2_how req)
 2047 {
 2048         struct unpcb *unp;
 2049         struct unpcb *unp2;
 2050 
 2051         MPASS(so2->so_type == so->so_type);
 2052         unp = sotounpcb(so);
 2053         KASSERT(unp != NULL, ("unp_connect2: unp == NULL"));
 2054         unp2 = sotounpcb(so2);
 2055         KASSERT(unp2 != NULL, ("unp_connect2: unp2 == NULL"));
 2056 
 2057         UNP_PCB_LOCK_ASSERT(unp);
 2058         UNP_PCB_LOCK_ASSERT(unp2);
 2059         KASSERT(unp->unp_conn == NULL,
 2060             ("%s: socket %p is already connected", __func__, unp));
 2061 
 2062         unp->unp_conn = unp2;
 2063         unp_pcb_hold(unp2);
 2064         unp_pcb_hold(unp);
 2065         switch (so->so_type) {
 2066         case SOCK_DGRAM:
 2067                 UNP_REF_LIST_LOCK();
 2068                 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink);
 2069                 UNP_REF_LIST_UNLOCK();
 2070                 soisconnected(so);
 2071                 break;
 2072 
 2073         case SOCK_STREAM:
 2074         case SOCK_SEQPACKET:
 2075                 KASSERT(unp2->unp_conn == NULL,
 2076                     ("%s: socket %p is already connected", __func__, unp2));
 2077                 unp2->unp_conn = unp;
 2078                 if (req == PRU_CONNECT &&
 2079                     ((unp->unp_flags | unp2->unp_flags) & UNP_CONNWAIT))
 2080                         soisconnecting(so);
 2081                 else
 2082                         soisconnected(so);
 2083                 soisconnected(so2);
 2084                 break;
 2085 
 2086         default:
 2087                 panic("unp_connect2");
 2088         }
 2089 }
 2090 
 2091 static void
 2092 unp_disconnect(struct unpcb *unp, struct unpcb *unp2)
 2093 {
 2094         struct socket *so, *so2;
 2095         struct mbuf *m = NULL;
 2096 #ifdef INVARIANTS
 2097         struct unpcb *unptmp;
 2098 #endif
 2099 
 2100         UNP_PCB_LOCK_ASSERT(unp);
 2101         UNP_PCB_LOCK_ASSERT(unp2);
 2102         KASSERT(unp->unp_conn == unp2,
 2103             ("%s: unpcb %p is not connected to %p", __func__, unp, unp2));
 2104 
 2105         unp->unp_conn = NULL;
 2106         so = unp->unp_socket;
 2107         so2 = unp2->unp_socket;
 2108         switch (unp->unp_socket->so_type) {
 2109         case SOCK_DGRAM:
 2110                 /*
 2111                  * Remove our send socket buffer from the peer's receive buffer.
 2112                  * Move the data to the receive buffer only if it is empty.
 2113                  * This is a protection against a scenario where a peer
 2114                  * connects, floods and disconnects, effectively blocking
 2115                  * sendto() from unconnected sockets.
 2116                  */
 2117                 SOCK_RECVBUF_LOCK(so2);
 2118                 if (!STAILQ_EMPTY(&so->so_snd.uxdg_mb)) {
 2119                         TAILQ_REMOVE(&so2->so_rcv.uxdg_conns, &so->so_snd,
 2120                             uxdg_clist);
 2121                         if (__predict_true((so2->so_rcv.sb_state &
 2122                             SBS_CANTRCVMORE) == 0) &&
 2123                             STAILQ_EMPTY(&so2->so_rcv.uxdg_mb)) {
 2124                                 STAILQ_CONCAT(&so2->so_rcv.uxdg_mb,
 2125                                     &so->so_snd.uxdg_mb);
 2126                                 so2->so_rcv.uxdg_cc += so->so_snd.uxdg_cc;
 2127                                 so2->so_rcv.uxdg_ctl += so->so_snd.uxdg_ctl;
 2128                                 so2->so_rcv.uxdg_mbcnt += so->so_snd.uxdg_mbcnt;
 2129                         } else {
 2130                                 m = STAILQ_FIRST(&so->so_snd.uxdg_mb);
 2131                                 STAILQ_INIT(&so->so_snd.uxdg_mb);
 2132                                 so2->so_rcv.sb_acc -= so->so_snd.uxdg_cc;
 2133                                 so2->so_rcv.sb_ccc -= so->so_snd.uxdg_cc;
 2134                                 so2->so_rcv.sb_ctl -= so->so_snd.uxdg_ctl;
 2135                                 so2->so_rcv.sb_mbcnt -= so->so_snd.uxdg_mbcnt;
 2136                         }
 2137                         /* Note: so may reconnect. */
 2138                         so->so_snd.uxdg_cc = 0;
 2139                         so->so_snd.uxdg_ctl = 0;
 2140                         so->so_snd.uxdg_mbcnt = 0;
 2141                 }
 2142                 SOCK_RECVBUF_UNLOCK(so2);
 2143                 UNP_REF_LIST_LOCK();
 2144 #ifdef INVARIANTS
 2145                 LIST_FOREACH(unptmp, &unp2->unp_refs, unp_reflink) {
 2146                         if (unptmp == unp)
 2147                                 break;
 2148                 }
 2149                 KASSERT(unptmp != NULL,
 2150                     ("%s: %p not found in reflist of %p", __func__, unp, unp2));
 2151 #endif
 2152                 LIST_REMOVE(unp, unp_reflink);
 2153                 UNP_REF_LIST_UNLOCK();
 2154                 if (so) {
 2155                         SOCK_LOCK(so);
 2156                         so->so_state &= ~SS_ISCONNECTED;
 2157                         SOCK_UNLOCK(so);
 2158                 }
 2159                 break;
 2160 
 2161         case SOCK_STREAM:
 2162         case SOCK_SEQPACKET:
 2163                 if (so)
 2164                         soisdisconnected(so);
 2165                 MPASS(unp2->unp_conn == unp);
 2166                 unp2->unp_conn = NULL;
 2167                 if (so2)
 2168                         soisdisconnected(so2);
 2169                 break;
 2170         }
 2171 
 2172         if (unp == unp2) {
 2173                 unp_pcb_rele_notlast(unp);
 2174                 if (!unp_pcb_rele(unp))
 2175                         UNP_PCB_UNLOCK(unp);
 2176         } else {
 2177                 if (!unp_pcb_rele(unp))
 2178                         UNP_PCB_UNLOCK(unp);
 2179                 if (!unp_pcb_rele(unp2))
 2180                         UNP_PCB_UNLOCK(unp2);
 2181         }
 2182 
 2183         if (m != NULL) {
 2184                 unp_scan(m, unp_freerights);
 2185                 m_freem(m);
 2186         }
 2187 }
 2188 
 2189 /*
 2190  * unp_pcblist() walks the global list of struct unpcb's to generate a
 2191  * pointer list, bumping the refcount on each unpcb.  It then copies them out
 2192  * sequentially, validating the generation number on each to see if it has
 2193  * been detached.  All of this is necessary because copyout() may sleep on
 2194  * disk I/O.
 2195  */
 2196 static int
 2197 unp_pcblist(SYSCTL_HANDLER_ARGS)
 2198 {
 2199         struct unpcb *unp, **unp_list;
 2200         unp_gen_t gencnt;
 2201         struct xunpgen *xug;
 2202         struct unp_head *head;
 2203         struct xunpcb *xu;
 2204         u_int i;
 2205         int error, n;
 2206 
 2207         switch ((intptr_t)arg1) {
 2208         case SOCK_STREAM:
 2209                 head = &unp_shead;
 2210                 break;
 2211 
 2212         case SOCK_DGRAM:
 2213                 head = &unp_dhead;
 2214                 break;
 2215 
 2216         case SOCK_SEQPACKET:
 2217                 head = &unp_sphead;
 2218                 break;
 2219 
 2220         default:
 2221                 panic("unp_pcblist: arg1 %d", (int)(intptr_t)arg1);
 2222         }
 2223 
 2224         /*
 2225          * The process of preparing the PCB list is too time-consuming and
 2226          * resource-intensive to repeat twice on every request.
 2227          */
 2228         if (req->oldptr == NULL) {
 2229                 n = unp_count;
 2230                 req->oldidx = 2 * (sizeof *xug)
 2231                         + (n + n/8) * sizeof(struct xunpcb);
 2232                 return (0);
 2233         }
 2234 
 2235         if (req->newptr != NULL)
 2236                 return (EPERM);
 2237 
 2238         /*
 2239          * OK, now we're committed to doing something.
 2240          */
 2241         xug = malloc(sizeof(*xug), M_TEMP, M_WAITOK | M_ZERO);
 2242         UNP_LINK_RLOCK();
 2243         gencnt = unp_gencnt;
 2244         n = unp_count;
 2245         UNP_LINK_RUNLOCK();
 2246 
 2247         xug->xug_len = sizeof *xug;
 2248         xug->xug_count = n;
 2249         xug->xug_gen = gencnt;
 2250         xug->xug_sogen = so_gencnt;
 2251         error = SYSCTL_OUT(req, xug, sizeof *xug);
 2252         if (error) {
 2253                 free(xug, M_TEMP);
 2254                 return (error);
 2255         }
 2256 
 2257         unp_list = malloc(n * sizeof *unp_list, M_TEMP, M_WAITOK);
 2258 
 2259         UNP_LINK_RLOCK();
 2260         for (unp = LIST_FIRST(head), i = 0; unp && i < n;
 2261              unp = LIST_NEXT(unp, unp_link)) {
 2262                 UNP_PCB_LOCK(unp);
 2263                 if (unp->unp_gencnt <= gencnt) {
 2264                         if (cr_cansee(req->td->td_ucred,
 2265                             unp->unp_socket->so_cred)) {
 2266                                 UNP_PCB_UNLOCK(unp);
 2267                                 continue;
 2268                         }
 2269                         unp_list[i++] = unp;
 2270                         unp_pcb_hold(unp);
 2271                 }
 2272                 UNP_PCB_UNLOCK(unp);
 2273         }
 2274         UNP_LINK_RUNLOCK();
 2275         n = i;                  /* In case we lost some during malloc. */
 2276 
 2277         error = 0;
 2278         xu = malloc(sizeof(*xu), M_TEMP, M_WAITOK | M_ZERO);
 2279         for (i = 0; i < n; i++) {
 2280                 unp = unp_list[i];
 2281                 UNP_PCB_LOCK(unp);
 2282                 if (unp_pcb_rele(unp))
 2283                         continue;
 2284 
 2285                 if (unp->unp_gencnt <= gencnt) {
 2286                         xu->xu_len = sizeof *xu;
 2287                         xu->xu_unpp = (uintptr_t)unp;
 2288                         /*
 2289                          * XXX - need more locking here to protect against
 2290                          * connect/disconnect races for SMP.
 2291                          */
 2292                         if (unp->unp_addr != NULL)
 2293                                 bcopy(unp->unp_addr, &xu->xu_addr,
 2294                                       unp->unp_addr->sun_len);
 2295                         else
 2296                                 bzero(&xu->xu_addr, sizeof(xu->xu_addr));
 2297                         if (unp->unp_conn != NULL &&
 2298                             unp->unp_conn->unp_addr != NULL)
 2299                                 bcopy(unp->unp_conn->unp_addr,
 2300                                       &xu->xu_caddr,
 2301                                       unp->unp_conn->unp_addr->sun_len);
 2302                         else
 2303                                 bzero(&xu->xu_caddr, sizeof(xu->xu_caddr));
 2304                         xu->unp_vnode = (uintptr_t)unp->unp_vnode;
 2305                         xu->unp_conn = (uintptr_t)unp->unp_conn;
 2306                         xu->xu_firstref = (uintptr_t)LIST_FIRST(&unp->unp_refs);
 2307                         xu->xu_nextref = (uintptr_t)LIST_NEXT(unp, unp_reflink);
 2308                         xu->unp_gencnt = unp->unp_gencnt;
 2309                         sotoxsocket(unp->unp_socket, &xu->xu_socket);
 2310                         UNP_PCB_UNLOCK(unp);
 2311                         error = SYSCTL_OUT(req, xu, sizeof *xu);
 2312                 } else {
 2313                         UNP_PCB_UNLOCK(unp);
 2314                 }
 2315         }
 2316         free(xu, M_TEMP);
 2317         if (!error) {
 2318                 /*
 2319                  * Give the user an updated idea of our state.  If the
 2320                  * generation differs from what we told her before, she knows
 2321                  * that something happened while we were processing this
 2322                  * request, and it might be necessary to retry.
 2323                  */
 2324                 xug->xug_gen = unp_gencnt;
 2325                 xug->xug_sogen = so_gencnt;
 2326                 xug->xug_count = unp_count;
 2327                 error = SYSCTL_OUT(req, xug, sizeof *xug);
 2328         }
 2329         free(unp_list, M_TEMP);
 2330         free(xug, M_TEMP);
 2331         return (error);
 2332 }
 2333 
 2334 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist,
 2335     CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
 2336     (void *)(intptr_t)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb",
 2337     "List of active local datagram sockets");
 2338 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist,
 2339     CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
 2340     (void *)(intptr_t)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb",
 2341     "List of active local stream sockets");
 2342 SYSCTL_PROC(_net_local_seqpacket, OID_AUTO, pcblist,
 2343     CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
 2344     (void *)(intptr_t)SOCK_SEQPACKET, 0, unp_pcblist, "S,xunpcb",
 2345     "List of active local seqpacket sockets");
 2346 
 2347 static void
 2348 unp_shutdown(struct unpcb *unp)
 2349 {
 2350         struct unpcb *unp2;
 2351         struct socket *so;
 2352 
 2353         UNP_PCB_LOCK_ASSERT(unp);
 2354 
 2355         unp2 = unp->unp_conn;
 2356         if ((unp->unp_socket->so_type == SOCK_STREAM ||
 2357             (unp->unp_socket->so_type == SOCK_SEQPACKET)) && unp2 != NULL) {
 2358                 so = unp2->unp_socket;
 2359                 if (so != NULL)
 2360                         socantrcvmore(so);
 2361         }
 2362 }
 2363 
 2364 static void
 2365 unp_drop(struct unpcb *unp)
 2366 {
 2367         struct socket *so;
 2368         struct unpcb *unp2;
 2369 
 2370         /*
 2371          * Regardless of whether the socket's peer dropped the connection
 2372          * with this socket by aborting or disconnecting, POSIX requires
 2373          * that ECONNRESET is returned.
 2374          */
 2375 
 2376         UNP_PCB_LOCK(unp);
 2377         so = unp->unp_socket;
 2378         if (so)
 2379                 so->so_error = ECONNRESET;
 2380         if ((unp2 = unp_pcb_lock_peer(unp)) != NULL) {
 2381                 /* Last reference dropped in unp_disconnect(). */
 2382                 unp_pcb_rele_notlast(unp);
 2383                 unp_disconnect(unp, unp2);
 2384         } else if (!unp_pcb_rele(unp)) {
 2385                 UNP_PCB_UNLOCK(unp);
 2386         }
 2387 }
 2388 
 2389 static void
 2390 unp_freerights(struct filedescent **fdep, int fdcount)
 2391 {
 2392         struct file *fp;
 2393         int i;
 2394 
 2395         KASSERT(fdcount > 0, ("%s: fdcount %d", __func__, fdcount));
 2396 
 2397         for (i = 0; i < fdcount; i++) {
 2398                 fp = fdep[i]->fde_file;
 2399                 filecaps_free(&fdep[i]->fde_caps);
 2400                 unp_discard(fp);
 2401         }
 2402         free(fdep[0], M_FILECAPS);
 2403 }
 2404 
 2405 static int
 2406 unp_externalize(struct mbuf *control, struct mbuf **controlp, int flags)
 2407 {
 2408         struct thread *td = curthread;          /* XXX */
 2409         struct cmsghdr *cm = mtod(control, struct cmsghdr *);
 2410         int i;
 2411         int *fdp;
 2412         struct filedesc *fdesc = td->td_proc->p_fd;
 2413         struct filedescent **fdep;
 2414         void *data;
 2415         socklen_t clen = control->m_len, datalen;
 2416         int error, newfds;
 2417         u_int newlen;
 2418 
 2419         UNP_LINK_UNLOCK_ASSERT();
 2420 
 2421         error = 0;
 2422         if (controlp != NULL) /* controlp == NULL => free control messages */
 2423                 *controlp = NULL;
 2424         while (cm != NULL) {
 2425                 MPASS(clen >= sizeof(*cm) && clen >= cm->cmsg_len);
 2426 
 2427                 data = CMSG_DATA(cm);
 2428                 datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
 2429                 if (cm->cmsg_level == SOL_SOCKET
 2430                     && cm->cmsg_type == SCM_RIGHTS) {
 2431                         newfds = datalen / sizeof(*fdep);
 2432                         if (newfds == 0)
 2433                                 goto next;
 2434                         fdep = data;
 2435 
 2436                         /* If we're not outputting the descriptors free them. */
 2437                         if (error || controlp == NULL) {
 2438                                 unp_freerights(fdep, newfds);
 2439                                 goto next;
 2440                         }
 2441                         FILEDESC_XLOCK(fdesc);
 2442 
 2443                         /*
 2444                          * Now change each pointer to an fd in the global
 2445                          * table to an integer that is the index to the local
 2446                          * fd table entry that we set up to point to the
 2447                          * global one we are transferring.
 2448                          */
 2449                         newlen = newfds * sizeof(int);
 2450                         *controlp = sbcreatecontrol(NULL, newlen,
 2451                             SCM_RIGHTS, SOL_SOCKET, M_WAITOK);
 2452 
 2453                         fdp = (int *)
 2454                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 2455                         if ((error = fdallocn(td, 0, fdp, newfds))) {
 2456                                 FILEDESC_XUNLOCK(fdesc);
 2457                                 unp_freerights(fdep, newfds);
 2458                                 m_freem(*controlp);
 2459                                 *controlp = NULL;
 2460                                 goto next;
 2461                         }
 2462                         for (i = 0; i < newfds; i++, fdp++) {
 2463                                 _finstall(fdesc, fdep[i]->fde_file, *fdp,
 2464                                     (flags & MSG_CMSG_CLOEXEC) != 0 ? O_CLOEXEC : 0,
 2465                                     &fdep[i]->fde_caps);
 2466                                 unp_externalize_fp(fdep[i]->fde_file);
 2467                         }
 2468 
 2469                         /*
 2470                          * The new type indicates that the mbuf data refers to
 2471                          * kernel resources that may need to be released before
 2472                          * the mbuf is freed.
 2473                          */
 2474                         m_chtype(*controlp, MT_EXTCONTROL);
 2475                         FILEDESC_XUNLOCK(fdesc);
 2476                         free(fdep[0], M_FILECAPS);
 2477                 } else {
 2478                         /* We can just copy anything else across. */
 2479                         if (error || controlp == NULL)
 2480                                 goto next;
 2481                         *controlp = sbcreatecontrol(NULL, datalen,
 2482                             cm->cmsg_type, cm->cmsg_level, M_WAITOK);
 2483                         bcopy(data,
 2484                             CMSG_DATA(mtod(*controlp, struct cmsghdr *)),
 2485                             datalen);
 2486                 }
 2487                 controlp = &(*controlp)->m_next;
 2488 
 2489 next:
 2490                 if (CMSG_SPACE(datalen) < clen) {
 2491                         clen -= CMSG_SPACE(datalen);
 2492                         cm = (struct cmsghdr *)
 2493                             ((caddr_t)cm + CMSG_SPACE(datalen));
 2494                 } else {
 2495                         clen = 0;
 2496                         cm = NULL;
 2497                 }
 2498         }
 2499 
 2500         m_freem(control);
 2501         return (error);
 2502 }
 2503 
 2504 static void
 2505 unp_zone_change(void *tag)
 2506 {
 2507 
 2508         uma_zone_set_max(unp_zone, maxsockets);
 2509 }
 2510 
 2511 #ifdef INVARIANTS
 2512 static void
 2513 unp_zdtor(void *mem, int size __unused, void *arg __unused)
 2514 {
 2515         struct unpcb *unp;
 2516 
 2517         unp = mem;
 2518 
 2519         KASSERT(LIST_EMPTY(&unp->unp_refs),
 2520             ("%s: unpcb %p has lingering refs", __func__, unp));
 2521         KASSERT(unp->unp_socket == NULL,
 2522             ("%s: unpcb %p has socket backpointer", __func__, unp));
 2523         KASSERT(unp->unp_vnode == NULL,
 2524             ("%s: unpcb %p has vnode references", __func__, unp));
 2525         KASSERT(unp->unp_conn == NULL,
 2526             ("%s: unpcb %p is still connected", __func__, unp));
 2527         KASSERT(unp->unp_addr == NULL,
 2528             ("%s: unpcb %p has leaked addr", __func__, unp));
 2529 }
 2530 #endif
 2531 
 2532 static void
 2533 unp_init(void *arg __unused)
 2534 {
 2535         uma_dtor dtor;
 2536 
 2537 #ifdef INVARIANTS
 2538         dtor = unp_zdtor;
 2539 #else
 2540         dtor = NULL;
 2541 #endif
 2542         unp_zone = uma_zcreate("unpcb", sizeof(struct unpcb), NULL, dtor,
 2543             NULL, NULL, UMA_ALIGN_CACHE, 0);
 2544         uma_zone_set_max(unp_zone, maxsockets);
 2545         uma_zone_set_warning(unp_zone, "kern.ipc.maxsockets limit reached");
 2546         EVENTHANDLER_REGISTER(maxsockets_change, unp_zone_change,
 2547             NULL, EVENTHANDLER_PRI_ANY);
 2548         LIST_INIT(&unp_dhead);
 2549         LIST_INIT(&unp_shead);
 2550         LIST_INIT(&unp_sphead);
 2551         SLIST_INIT(&unp_defers);
 2552         TIMEOUT_TASK_INIT(taskqueue_thread, &unp_gc_task, 0, unp_gc, NULL);
 2553         TASK_INIT(&unp_defer_task, 0, unp_process_defers, NULL);
 2554         UNP_LINK_LOCK_INIT();
 2555         UNP_DEFERRED_LOCK_INIT();
 2556 }
 2557 SYSINIT(unp_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_SECOND, unp_init, NULL);
 2558 
 2559 static void
 2560 unp_internalize_cleanup_rights(struct mbuf *control)
 2561 {
 2562         struct cmsghdr *cp;
 2563         struct mbuf *m;
 2564         void *data;
 2565         socklen_t datalen;
 2566 
 2567         for (m = control; m != NULL; m = m->m_next) {
 2568                 cp = mtod(m, struct cmsghdr *);
 2569                 if (cp->cmsg_level != SOL_SOCKET ||
 2570                     cp->cmsg_type != SCM_RIGHTS)
 2571                         continue;
 2572                 data = CMSG_DATA(cp);
 2573                 datalen = (caddr_t)cp + cp->cmsg_len - (caddr_t)data;
 2574                 unp_freerights(data, datalen / sizeof(struct filedesc *));
 2575         }
 2576 }
 2577 
 2578 static int
 2579 unp_internalize(struct mbuf **controlp, struct thread *td,
 2580     struct mbuf **clast, u_int *space, u_int *mbcnt)
 2581 {
 2582         struct mbuf *control, **initial_controlp;
 2583         struct proc *p;
 2584         struct filedesc *fdesc;
 2585         struct bintime *bt;
 2586         struct cmsghdr *cm;
 2587         struct cmsgcred *cmcred;
 2588         struct filedescent *fde, **fdep, *fdev;
 2589         struct file *fp;
 2590         struct timeval *tv;
 2591         struct timespec *ts;
 2592         void *data;
 2593         socklen_t clen, datalen;
 2594         int i, j, error, *fdp, oldfds;
 2595         u_int newlen;
 2596 
 2597         MPASS((*controlp)->m_next == NULL); /* COMPAT_OLDSOCK may violate */
 2598         UNP_LINK_UNLOCK_ASSERT();
 2599 
 2600         p = td->td_proc;
 2601         fdesc = p->p_fd;
 2602         error = 0;
 2603         control = *controlp;
 2604         *controlp = NULL;
 2605         initial_controlp = controlp;
 2606         for (clen = control->m_len, cm = mtod(control, struct cmsghdr *),
 2607             data = CMSG_DATA(cm);
 2608 
 2609             clen >= sizeof(*cm) && cm->cmsg_level == SOL_SOCKET &&
 2610             clen >= cm->cmsg_len && cm->cmsg_len >= sizeof(*cm) &&
 2611             (char *)cm + cm->cmsg_len >= (char *)data;
 2612 
 2613             clen -= min(CMSG_SPACE(datalen), clen),
 2614             cm = (struct cmsghdr *) ((char *)cm + CMSG_SPACE(datalen)),
 2615             data = CMSG_DATA(cm)) {
 2616                 datalen = (char *)cm + cm->cmsg_len - (char *)data;
 2617                 switch (cm->cmsg_type) {
 2618                 case SCM_CREDS:
 2619                         *controlp = sbcreatecontrol(NULL, sizeof(*cmcred),
 2620                             SCM_CREDS, SOL_SOCKET, M_WAITOK);
 2621                         cmcred = (struct cmsgcred *)
 2622                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 2623                         cmcred->cmcred_pid = p->p_pid;
 2624                         cmcred->cmcred_uid = td->td_ucred->cr_ruid;
 2625                         cmcred->cmcred_gid = td->td_ucred->cr_rgid;
 2626                         cmcred->cmcred_euid = td->td_ucred->cr_uid;
 2627                         cmcred->cmcred_ngroups = MIN(td->td_ucred->cr_ngroups,
 2628                             CMGROUP_MAX);
 2629                         for (i = 0; i < cmcred->cmcred_ngroups; i++)
 2630                                 cmcred->cmcred_groups[i] =
 2631                                     td->td_ucred->cr_groups[i];
 2632                         break;
 2633 
 2634                 case SCM_RIGHTS:
 2635                         oldfds = datalen / sizeof (int);
 2636                         if (oldfds == 0)
 2637                                 continue;
 2638                         /* On some machines sizeof pointer is bigger than
 2639                          * sizeof int, so we need to check if data fits into
 2640                          * single mbuf.  We could allocate several mbufs, and
 2641                          * unp_externalize() should even properly handle that.
 2642                          * But it is not worth to complicate the code for an
 2643                          * insane scenario of passing over 200 file descriptors
 2644                          * at once.
 2645                          */
 2646                         newlen = oldfds * sizeof(fdep[0]);
 2647                         if (CMSG_SPACE(newlen) > MCLBYTES) {
 2648                                 error = EMSGSIZE;
 2649                                 goto out;
 2650                         }
 2651                         /*
 2652                          * Check that all the FDs passed in refer to legal
 2653                          * files.  If not, reject the entire operation.
 2654                          */
 2655                         fdp = data;
 2656                         FILEDESC_SLOCK(fdesc);
 2657                         for (i = 0; i < oldfds; i++, fdp++) {
 2658                                 fp = fget_noref(fdesc, *fdp);
 2659                                 if (fp == NULL) {
 2660                                         FILEDESC_SUNLOCK(fdesc);
 2661                                         error = EBADF;
 2662                                         goto out;
 2663                                 }
 2664                                 if (!(fp->f_ops->fo_flags & DFLAG_PASSABLE)) {
 2665                                         FILEDESC_SUNLOCK(fdesc);
 2666                                         error = EOPNOTSUPP;
 2667                                         goto out;
 2668                                 }
 2669                         }
 2670 
 2671                         /*
 2672                          * Now replace the integer FDs with pointers to the
 2673                          * file structure and capability rights.
 2674                          */
 2675                         *controlp = sbcreatecontrol(NULL, newlen,
 2676                             SCM_RIGHTS, SOL_SOCKET, M_WAITOK);
 2677                         fdp = data;
 2678                         for (i = 0; i < oldfds; i++, fdp++) {
 2679                                 if (!fhold(fdesc->fd_ofiles[*fdp].fde_file)) {
 2680                                         fdp = data;
 2681                                         for (j = 0; j < i; j++, fdp++) {
 2682                                                 fdrop(fdesc->fd_ofiles[*fdp].
 2683                                                     fde_file, td);
 2684                                         }
 2685                                         FILEDESC_SUNLOCK(fdesc);
 2686                                         error = EBADF;
 2687                                         goto out;
 2688                                 }
 2689                         }
 2690                         fdp = data;
 2691                         fdep = (struct filedescent **)
 2692                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 2693                         fdev = malloc(sizeof(*fdev) * oldfds, M_FILECAPS,
 2694                             M_WAITOK);
 2695                         for (i = 0; i < oldfds; i++, fdev++, fdp++) {
 2696                                 fde = &fdesc->fd_ofiles[*fdp];
 2697                                 fdep[i] = fdev;
 2698                                 fdep[i]->fde_file = fde->fde_file;
 2699                                 filecaps_copy(&fde->fde_caps,
 2700                                     &fdep[i]->fde_caps, true);
 2701                                 unp_internalize_fp(fdep[i]->fde_file);
 2702                         }
 2703                         FILEDESC_SUNLOCK(fdesc);
 2704                         break;
 2705 
 2706                 case SCM_TIMESTAMP:
 2707                         *controlp = sbcreatecontrol(NULL, sizeof(*tv),
 2708                             SCM_TIMESTAMP, SOL_SOCKET, M_WAITOK);
 2709                         tv = (struct timeval *)
 2710                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 2711                         microtime(tv);
 2712                         break;
 2713 
 2714                 case SCM_BINTIME:
 2715                         *controlp = sbcreatecontrol(NULL, sizeof(*bt),
 2716                             SCM_BINTIME, SOL_SOCKET, M_WAITOK);
 2717                         bt = (struct bintime *)
 2718                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 2719                         bintime(bt);
 2720                         break;
 2721 
 2722                 case SCM_REALTIME:
 2723                         *controlp = sbcreatecontrol(NULL, sizeof(*ts),
 2724                             SCM_REALTIME, SOL_SOCKET, M_WAITOK);
 2725                         ts = (struct timespec *)
 2726                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 2727                         nanotime(ts);
 2728                         break;
 2729 
 2730                 case SCM_MONOTONIC:
 2731                         *controlp = sbcreatecontrol(NULL, sizeof(*ts),
 2732                             SCM_MONOTONIC, SOL_SOCKET, M_WAITOK);
 2733                         ts = (struct timespec *)
 2734                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 2735                         nanouptime(ts);
 2736                         break;
 2737 
 2738                 default:
 2739                         error = EINVAL;
 2740                         goto out;
 2741                 }
 2742 
 2743                 if (space != NULL) {
 2744                         *space += (*controlp)->m_len;
 2745                         *mbcnt += MSIZE;
 2746                         if ((*controlp)->m_flags & M_EXT)
 2747                                 *mbcnt += (*controlp)->m_ext.ext_size;
 2748                         *clast = *controlp;
 2749                 }
 2750                 controlp = &(*controlp)->m_next;
 2751         }
 2752         if (clen > 0)
 2753                 error = EINVAL;
 2754 
 2755 out:
 2756         if (error != 0 && initial_controlp != NULL)
 2757                 unp_internalize_cleanup_rights(*initial_controlp);
 2758         m_freem(control);
 2759         return (error);
 2760 }
 2761 
 2762 static struct mbuf *
 2763 unp_addsockcred(struct thread *td, struct mbuf *control, int mode,
 2764     struct mbuf **clast, u_int *space, u_int *mbcnt)
 2765 {
 2766         struct mbuf *m, *n, *n_prev;
 2767         const struct cmsghdr *cm;
 2768         int ngroups, i, cmsgtype;
 2769         size_t ctrlsz;
 2770 
 2771         ngroups = MIN(td->td_ucred->cr_ngroups, CMGROUP_MAX);
 2772         if (mode & UNP_WANTCRED_ALWAYS) {
 2773                 ctrlsz = SOCKCRED2SIZE(ngroups);
 2774                 cmsgtype = SCM_CREDS2;
 2775         } else {
 2776                 ctrlsz = SOCKCREDSIZE(ngroups);
 2777                 cmsgtype = SCM_CREDS;
 2778         }
 2779 
 2780         m = sbcreatecontrol(NULL, ctrlsz, cmsgtype, SOL_SOCKET, M_NOWAIT);
 2781         if (m == NULL)
 2782                 return (control);
 2783         MPASS((m->m_flags & M_EXT) == 0 && m->m_next == NULL);
 2784 
 2785         if (mode & UNP_WANTCRED_ALWAYS) {
 2786                 struct sockcred2 *sc;
 2787 
 2788                 sc = (void *)CMSG_DATA(mtod(m, struct cmsghdr *));
 2789                 sc->sc_version = 0;
 2790                 sc->sc_pid = td->td_proc->p_pid;
 2791                 sc->sc_uid = td->td_ucred->cr_ruid;
 2792                 sc->sc_euid = td->td_ucred->cr_uid;
 2793                 sc->sc_gid = td->td_ucred->cr_rgid;
 2794                 sc->sc_egid = td->td_ucred->cr_gid;
 2795                 sc->sc_ngroups = ngroups;
 2796                 for (i = 0; i < sc->sc_ngroups; i++)
 2797                         sc->sc_groups[i] = td->td_ucred->cr_groups[i];
 2798         } else {
 2799                 struct sockcred *sc;
 2800 
 2801                 sc = (void *)CMSG_DATA(mtod(m, struct cmsghdr *));
 2802                 sc->sc_uid = td->td_ucred->cr_ruid;
 2803                 sc->sc_euid = td->td_ucred->cr_uid;
 2804                 sc->sc_gid = td->td_ucred->cr_rgid;
 2805                 sc->sc_egid = td->td_ucred->cr_gid;
 2806                 sc->sc_ngroups = ngroups;
 2807                 for (i = 0; i < sc->sc_ngroups; i++)
 2808                         sc->sc_groups[i] = td->td_ucred->cr_groups[i];
 2809         }
 2810 
 2811         /*
 2812          * Unlink SCM_CREDS control messages (struct cmsgcred), since just
 2813          * created SCM_CREDS control message (struct sockcred) has another
 2814          * format.
 2815          */
 2816         if (control != NULL && cmsgtype == SCM_CREDS)
 2817                 for (n = control, n_prev = NULL; n != NULL;) {
 2818                         cm = mtod(n, struct cmsghdr *);
 2819                         if (cm->cmsg_level == SOL_SOCKET &&
 2820                             cm->cmsg_type == SCM_CREDS) {
 2821                                 if (n_prev == NULL)
 2822                                         control = n->m_next;
 2823                                 else
 2824                                         n_prev->m_next = n->m_next;
 2825                                 if (space != NULL) {
 2826                                         MPASS(*space >= n->m_len);
 2827                                         *space -= n->m_len;
 2828                                         MPASS(*mbcnt >= MSIZE);
 2829                                         *mbcnt -= MSIZE;
 2830                                         if (n->m_flags & M_EXT) {
 2831                                                 MPASS(*mbcnt >=
 2832                                                     n->m_ext.ext_size);
 2833                                                 *mbcnt -= n->m_ext.ext_size;
 2834                                         }
 2835                                         MPASS(clast);
 2836                                         if (*clast == n) {
 2837                                                 MPASS(n->m_next == NULL);
 2838                                                 if (n_prev == NULL)
 2839                                                         *clast = m;
 2840                                                 else
 2841                                                         *clast = n_prev;
 2842                                         }
 2843                                 }
 2844                                 n = m_free(n);
 2845                         } else {
 2846                                 n_prev = n;
 2847                                 n = n->m_next;
 2848                         }
 2849                 }
 2850 
 2851         /* Prepend it to the head. */
 2852         m->m_next = control;
 2853         if (space != NULL) {
 2854                 *space += m->m_len;
 2855                 *mbcnt += MSIZE;
 2856                 if (control == NULL)
 2857                         *clast = m;
 2858         }
 2859         return (m);
 2860 }
 2861 
 2862 static struct unpcb *
 2863 fptounp(struct file *fp)
 2864 {
 2865         struct socket *so;
 2866 
 2867         if (fp->f_type != DTYPE_SOCKET)
 2868                 return (NULL);
 2869         if ((so = fp->f_data) == NULL)
 2870                 return (NULL);
 2871         if (so->so_proto->pr_domain != &localdomain)
 2872                 return (NULL);
 2873         return sotounpcb(so);
 2874 }
 2875 
 2876 static void
 2877 unp_discard(struct file *fp)
 2878 {
 2879         struct unp_defer *dr;
 2880 
 2881         if (unp_externalize_fp(fp)) {
 2882                 dr = malloc(sizeof(*dr), M_TEMP, M_WAITOK);
 2883                 dr->ud_fp = fp;
 2884                 UNP_DEFERRED_LOCK();
 2885                 SLIST_INSERT_HEAD(&unp_defers, dr, ud_link);
 2886                 UNP_DEFERRED_UNLOCK();
 2887                 atomic_add_int(&unp_defers_count, 1);
 2888                 taskqueue_enqueue(taskqueue_thread, &unp_defer_task);
 2889         } else
 2890                 closef_nothread(fp);
 2891 }
 2892 
 2893 static void
 2894 unp_process_defers(void *arg __unused, int pending)
 2895 {
 2896         struct unp_defer *dr;
 2897         SLIST_HEAD(, unp_defer) drl;
 2898         int count;
 2899 
 2900         SLIST_INIT(&drl);
 2901         for (;;) {
 2902                 UNP_DEFERRED_LOCK();
 2903                 if (SLIST_FIRST(&unp_defers) == NULL) {
 2904                         UNP_DEFERRED_UNLOCK();
 2905                         break;
 2906                 }
 2907                 SLIST_SWAP(&unp_defers, &drl, unp_defer);
 2908                 UNP_DEFERRED_UNLOCK();
 2909                 count = 0;
 2910                 while ((dr = SLIST_FIRST(&drl)) != NULL) {
 2911                         SLIST_REMOVE_HEAD(&drl, ud_link);
 2912                         closef_nothread(dr->ud_fp);
 2913                         free(dr, M_TEMP);
 2914                         count++;
 2915                 }
 2916                 atomic_add_int(&unp_defers_count, -count);
 2917         }
 2918 }
 2919 
 2920 static void
 2921 unp_internalize_fp(struct file *fp)
 2922 {
 2923         struct unpcb *unp;
 2924 
 2925         UNP_LINK_WLOCK();
 2926         if ((unp = fptounp(fp)) != NULL) {
 2927                 unp->unp_file = fp;
 2928                 unp->unp_msgcount++;
 2929         }
 2930         unp_rights++;
 2931         UNP_LINK_WUNLOCK();
 2932 }
 2933 
 2934 static int
 2935 unp_externalize_fp(struct file *fp)
 2936 {
 2937         struct unpcb *unp;
 2938         int ret;
 2939 
 2940         UNP_LINK_WLOCK();
 2941         if ((unp = fptounp(fp)) != NULL) {
 2942                 unp->unp_msgcount--;
 2943                 ret = 1;
 2944         } else
 2945                 ret = 0;
 2946         unp_rights--;
 2947         UNP_LINK_WUNLOCK();
 2948         return (ret);
 2949 }
 2950 
 2951 /*
 2952  * unp_defer indicates whether additional work has been defered for a future
 2953  * pass through unp_gc().  It is thread local and does not require explicit
 2954  * synchronization.
 2955  */
 2956 static int      unp_marked;
 2957 
 2958 static void
 2959 unp_remove_dead_ref(struct filedescent **fdep, int fdcount)
 2960 {
 2961         struct unpcb *unp;
 2962         struct file *fp;
 2963         int i;
 2964 
 2965         /*
 2966          * This function can only be called from the gc task.
 2967          */
 2968         KASSERT(taskqueue_member(taskqueue_thread, curthread) != 0,
 2969             ("%s: not on gc callout", __func__));
 2970         UNP_LINK_LOCK_ASSERT();
 2971 
 2972         for (i = 0; i < fdcount; i++) {
 2973                 fp = fdep[i]->fde_file;
 2974                 if ((unp = fptounp(fp)) == NULL)
 2975                         continue;
 2976                 if ((unp->unp_gcflag & UNPGC_DEAD) == 0)
 2977                         continue;
 2978                 unp->unp_gcrefs--;
 2979         }
 2980 }
 2981 
 2982 static void
 2983 unp_restore_undead_ref(struct filedescent **fdep, int fdcount)
 2984 {
 2985         struct unpcb *unp;
 2986         struct file *fp;
 2987         int i;
 2988 
 2989         /*
 2990          * This function can only be called from the gc task.
 2991          */
 2992         KASSERT(taskqueue_member(taskqueue_thread, curthread) != 0,
 2993             ("%s: not on gc callout", __func__));
 2994         UNP_LINK_LOCK_ASSERT();
 2995 
 2996         for (i = 0; i < fdcount; i++) {
 2997                 fp = fdep[i]->fde_file;
 2998                 if ((unp = fptounp(fp)) == NULL)
 2999                         continue;
 3000                 if ((unp->unp_gcflag & UNPGC_DEAD) == 0)
 3001                         continue;
 3002                 unp->unp_gcrefs++;
 3003                 unp_marked++;
 3004         }
 3005 }
 3006 
 3007 static void
 3008 unp_scan_socket(struct socket *so, void (*op)(struct filedescent **, int))
 3009 {
 3010         struct sockbuf *sb;
 3011 
 3012         SOCK_LOCK_ASSERT(so);
 3013 
 3014         if (sotounpcb(so)->unp_gcflag & UNPGC_IGNORE_RIGHTS)
 3015                 return;
 3016 
 3017         SOCK_RECVBUF_LOCK(so);
 3018         switch (so->so_type) {
 3019         case SOCK_DGRAM:
 3020                 unp_scan(STAILQ_FIRST(&so->so_rcv.uxdg_mb), op);
 3021                 unp_scan(so->so_rcv.uxdg_peeked, op);
 3022                 TAILQ_FOREACH(sb, &so->so_rcv.uxdg_conns, uxdg_clist)
 3023                         unp_scan(STAILQ_FIRST(&sb->uxdg_mb), op);
 3024                 break;
 3025         case SOCK_STREAM:
 3026         case SOCK_SEQPACKET:
 3027                 unp_scan(so->so_rcv.sb_mb, op);
 3028                 break;
 3029         }
 3030         SOCK_RECVBUF_UNLOCK(so);
 3031 }
 3032 
 3033 static void
 3034 unp_gc_scan(struct unpcb *unp, void (*op)(struct filedescent **, int))
 3035 {
 3036         struct socket *so, *soa;
 3037 
 3038         so = unp->unp_socket;
 3039         SOCK_LOCK(so);
 3040         if (SOLISTENING(so)) {
 3041                 /*
 3042                  * Mark all sockets in our accept queue.
 3043                  */
 3044                 TAILQ_FOREACH(soa, &so->sol_comp, so_list)
 3045                         unp_scan_socket(soa, op);
 3046         } else {
 3047                 /*
 3048                  * Mark all sockets we reference with RIGHTS.
 3049                  */
 3050                 unp_scan_socket(so, op);
 3051         }
 3052         SOCK_UNLOCK(so);
 3053 }
 3054 
 3055 static int unp_recycled;
 3056 SYSCTL_INT(_net_local, OID_AUTO, recycled, CTLFLAG_RD, &unp_recycled, 0, 
 3057     "Number of unreachable sockets claimed by the garbage collector.");
 3058 
 3059 static int unp_taskcount;
 3060 SYSCTL_INT(_net_local, OID_AUTO, taskcount, CTLFLAG_RD, &unp_taskcount, 0, 
 3061     "Number of times the garbage collector has run.");
 3062 
 3063 SYSCTL_UINT(_net_local, OID_AUTO, sockcount, CTLFLAG_RD, &unp_count, 0, 
 3064     "Number of active local sockets.");
 3065 
 3066 static void
 3067 unp_gc(__unused void *arg, int pending)
 3068 {
 3069         struct unp_head *heads[] = { &unp_dhead, &unp_shead, &unp_sphead,
 3070                                     NULL };
 3071         struct unp_head **head;
 3072         struct unp_head unp_deadhead;   /* List of potentially-dead sockets. */
 3073         struct file *f, **unref;
 3074         struct unpcb *unp, *unptmp;
 3075         int i, total, unp_unreachable;
 3076 
 3077         LIST_INIT(&unp_deadhead);
 3078         unp_taskcount++;
 3079         UNP_LINK_RLOCK();
 3080         /*
 3081          * First determine which sockets may be in cycles.
 3082          */
 3083         unp_unreachable = 0;
 3084 
 3085         for (head = heads; *head != NULL; head++)
 3086                 LIST_FOREACH(unp, *head, unp_link) {
 3087                         KASSERT((unp->unp_gcflag & ~UNPGC_IGNORE_RIGHTS) == 0,
 3088                             ("%s: unp %p has unexpected gc flags 0x%x",
 3089                             __func__, unp, (unsigned int)unp->unp_gcflag));
 3090 
 3091                         f = unp->unp_file;
 3092 
 3093                         /*
 3094                          * Check for an unreachable socket potentially in a
 3095                          * cycle.  It must be in a queue as indicated by
 3096                          * msgcount, and this must equal the file reference
 3097                          * count.  Note that when msgcount is 0 the file is
 3098                          * NULL.
 3099                          */
 3100                         if (f != NULL && unp->unp_msgcount != 0 &&
 3101                             refcount_load(&f->f_count) == unp->unp_msgcount) {
 3102                                 LIST_INSERT_HEAD(&unp_deadhead, unp, unp_dead);
 3103                                 unp->unp_gcflag |= UNPGC_DEAD;
 3104                                 unp->unp_gcrefs = unp->unp_msgcount;
 3105                                 unp_unreachable++;
 3106                         }
 3107                 }
 3108 
 3109         /*
 3110          * Scan all sockets previously marked as potentially being in a cycle
 3111          * and remove the references each socket holds on any UNPGC_DEAD
 3112          * sockets in its queue.  After this step, all remaining references on
 3113          * sockets marked UNPGC_DEAD should not be part of any cycle.
 3114          */
 3115         LIST_FOREACH(unp, &unp_deadhead, unp_dead)
 3116                 unp_gc_scan(unp, unp_remove_dead_ref);
 3117 
 3118         /*
 3119          * If a socket still has a non-negative refcount, it cannot be in a
 3120          * cycle.  In this case increment refcount of all children iteratively.
 3121          * Stop the scan once we do a complete loop without discovering
 3122          * a new reachable socket.
 3123          */
 3124         do {
 3125                 unp_marked = 0;
 3126                 LIST_FOREACH_SAFE(unp, &unp_deadhead, unp_dead, unptmp)
 3127                         if (unp->unp_gcrefs > 0) {
 3128                                 unp->unp_gcflag &= ~UNPGC_DEAD;
 3129                                 LIST_REMOVE(unp, unp_dead);
 3130                                 KASSERT(unp_unreachable > 0,
 3131                                     ("%s: unp_unreachable underflow.",
 3132                                     __func__));
 3133                                 unp_unreachable--;
 3134                                 unp_gc_scan(unp, unp_restore_undead_ref);
 3135                         }
 3136         } while (unp_marked);
 3137 
 3138         UNP_LINK_RUNLOCK();
 3139 
 3140         if (unp_unreachable == 0)
 3141                 return;
 3142 
 3143         /*
 3144          * Allocate space for a local array of dead unpcbs.
 3145          * TODO: can this path be simplified by instead using the local
 3146          * dead list at unp_deadhead, after taking out references
 3147          * on the file object and/or unpcb and dropping the link lock?
 3148          */
 3149         unref = malloc(unp_unreachable * sizeof(struct file *),
 3150             M_TEMP, M_WAITOK);
 3151 
 3152         /*
 3153          * Iterate looking for sockets which have been specifically marked
 3154          * as unreachable and store them locally.
 3155          */
 3156         UNP_LINK_RLOCK();
 3157         total = 0;
 3158         LIST_FOREACH(unp, &unp_deadhead, unp_dead) {
 3159                 KASSERT((unp->unp_gcflag & UNPGC_DEAD) != 0,
 3160                     ("%s: unp %p not marked UNPGC_DEAD", __func__, unp));
 3161                 unp->unp_gcflag &= ~UNPGC_DEAD;
 3162                 f = unp->unp_file;
 3163                 if (unp->unp_msgcount == 0 || f == NULL ||
 3164                     refcount_load(&f->f_count) != unp->unp_msgcount ||
 3165                     !fhold(f))
 3166                         continue;
 3167                 unref[total++] = f;
 3168                 KASSERT(total <= unp_unreachable,
 3169                     ("%s: incorrect unreachable count.", __func__));
 3170         }
 3171         UNP_LINK_RUNLOCK();
 3172 
 3173         /*
 3174          * Now flush all sockets, free'ing rights.  This will free the
 3175          * struct files associated with these sockets but leave each socket
 3176          * with one remaining ref.
 3177          */
 3178         for (i = 0; i < total; i++) {
 3179                 struct socket *so;
 3180 
 3181                 so = unref[i]->f_data;
 3182                 CURVNET_SET(so->so_vnet);
 3183                 sorflush(so);
 3184                 CURVNET_RESTORE();
 3185         }
 3186 
 3187         /*
 3188          * And finally release the sockets so they can be reclaimed.
 3189          */
 3190         for (i = 0; i < total; i++)
 3191                 fdrop(unref[i], NULL);
 3192         unp_recycled += total;
 3193         free(unref, M_TEMP);
 3194 }
 3195 
 3196 /*
 3197  * Synchronize against unp_gc, which can trip over data as we are freeing it.
 3198  */
 3199 static void
 3200 unp_dispose(struct socket *so)
 3201 {
 3202         struct sockbuf *sb;
 3203         struct unpcb *unp;
 3204         struct mbuf *m;
 3205 
 3206         MPASS(!SOLISTENING(so));
 3207 
 3208         unp = sotounpcb(so);
 3209         UNP_LINK_WLOCK();
 3210         unp->unp_gcflag |= UNPGC_IGNORE_RIGHTS;
 3211         UNP_LINK_WUNLOCK();
 3212 
 3213         /*
 3214          * Grab our special mbufs before calling sbrelease().
 3215          */
 3216         SOCK_RECVBUF_LOCK(so);
 3217         switch (so->so_type) {
 3218         case SOCK_DGRAM:
 3219                 while ((sb = TAILQ_FIRST(&so->so_rcv.uxdg_conns)) != NULL) {
 3220                         STAILQ_CONCAT(&so->so_rcv.uxdg_mb, &sb->uxdg_mb);
 3221                         TAILQ_REMOVE(&so->so_rcv.uxdg_conns, sb, uxdg_clist);
 3222                         /* Note: socket of sb may reconnect. */
 3223                         sb->uxdg_cc = sb->uxdg_ctl = sb->uxdg_mbcnt = 0;
 3224                 }
 3225                 sb = &so->so_rcv;
 3226                 if (sb->uxdg_peeked != NULL) {
 3227                         STAILQ_INSERT_HEAD(&sb->uxdg_mb, sb->uxdg_peeked,
 3228                             m_stailqpkt);
 3229                         sb->uxdg_peeked = NULL;
 3230                 }
 3231                 m = STAILQ_FIRST(&sb->uxdg_mb);
 3232                 STAILQ_INIT(&sb->uxdg_mb);
 3233                 /* XXX: our shortened sbrelease() */
 3234                 (void)chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, 0,
 3235                     RLIM_INFINITY);
 3236                 /*
 3237                  * XXXGL Mark sb with SBS_CANTRCVMORE.  This is needed to
 3238                  * prevent uipc_sosend_dgram() or unp_disconnect() adding more
 3239                  * data to the socket.
 3240                  * We are now in dom_dispose and it could be a call from
 3241                  * soshutdown() or from the final sofree().  The sofree() case
 3242                  * is simple as it guarantees that no more sends will happen,
 3243                  * however we can race with unp_disconnect() from our peer.
 3244                  * The shutdown(2) case is more exotic.  It would call into
 3245                  * dom_dispose() only if socket is SS_ISCONNECTED.  This is
 3246                  * possible if we did connect(2) on this socket and we also
 3247                  * had it bound with bind(2) and receive connections from other
 3248                  * sockets.  Because soshutdown() violates POSIX (see comment
 3249                  * there) we will end up here shutting down our receive side.
 3250                  * Of course this will have affect not only on the peer we
 3251                  * connect(2)ed to, but also on all of the peers who had
 3252                  * connect(2)ed to us.  Their sends would end up with ENOBUFS.
 3253                  */
 3254                 sb->sb_state |= SBS_CANTRCVMORE;
 3255                 break;
 3256         case SOCK_STREAM:
 3257         case SOCK_SEQPACKET:
 3258                 sb = &so->so_rcv;
 3259                 m = sbcut_locked(sb, sb->sb_ccc);
 3260                 KASSERT(sb->sb_ccc == 0 && sb->sb_mb == 0 && sb->sb_mbcnt == 0,
 3261                     ("%s: ccc %u mb %p mbcnt %u", __func__,
 3262                     sb->sb_ccc, (void *)sb->sb_mb, sb->sb_mbcnt));
 3263                 sbrelease_locked(so, SO_RCV);
 3264                 break;
 3265         }
 3266         SOCK_RECVBUF_UNLOCK(so);
 3267         if (SOCK_IO_RECV_OWNED(so))
 3268                 SOCK_IO_RECV_UNLOCK(so);
 3269 
 3270         if (m != NULL) {
 3271                 unp_scan(m, unp_freerights);
 3272                 m_freem(m);
 3273         }
 3274 }
 3275 
 3276 static void
 3277 unp_scan(struct mbuf *m0, void (*op)(struct filedescent **, int))
 3278 {
 3279         struct mbuf *m;
 3280         struct cmsghdr *cm;
 3281         void *data;
 3282         socklen_t clen, datalen;
 3283 
 3284         while (m0 != NULL) {
 3285                 for (m = m0; m; m = m->m_next) {
 3286                         if (m->m_type != MT_CONTROL)
 3287                                 continue;
 3288 
 3289                         cm = mtod(m, struct cmsghdr *);
 3290                         clen = m->m_len;
 3291 
 3292                         while (cm != NULL) {
 3293                                 if (sizeof(*cm) > clen || cm->cmsg_len > clen)
 3294                                         break;
 3295 
 3296                                 data = CMSG_DATA(cm);
 3297                                 datalen = (caddr_t)cm + cm->cmsg_len
 3298                                     - (caddr_t)data;
 3299 
 3300                                 if (cm->cmsg_level == SOL_SOCKET &&
 3301                                     cm->cmsg_type == SCM_RIGHTS) {
 3302                                         (*op)(data, datalen /
 3303                                             sizeof(struct filedescent *));
 3304                                 }
 3305 
 3306                                 if (CMSG_SPACE(datalen) < clen) {
 3307                                         clen -= CMSG_SPACE(datalen);
 3308                                         cm = (struct cmsghdr *)
 3309                                             ((caddr_t)cm + CMSG_SPACE(datalen));
 3310                                 } else {
 3311                                         clen = 0;
 3312                                         cm = NULL;
 3313                                 }
 3314                         }
 3315                 }
 3316                 m0 = m0->m_nextpkt;
 3317         }
 3318 }
 3319 
 3320 /*
 3321  * Definitions of protocols supported in the LOCAL domain.
 3322  */
 3323 static struct protosw streamproto = {
 3324         .pr_type =              SOCK_STREAM,
 3325         .pr_flags =             PR_CONNREQUIRED|PR_WANTRCVD|PR_RIGHTS|
 3326                                     PR_CAPATTACH,
 3327         .pr_ctloutput =         &uipc_ctloutput,
 3328         .pr_abort =             uipc_abort,
 3329         .pr_accept =            uipc_accept,
 3330         .pr_attach =            uipc_attach,
 3331         .pr_bind =              uipc_bind,
 3332         .pr_bindat =            uipc_bindat,
 3333         .pr_connect =           uipc_connect,
 3334         .pr_connectat =         uipc_connectat,
 3335         .pr_connect2 =          uipc_connect2,
 3336         .pr_detach =            uipc_detach,
 3337         .pr_disconnect =        uipc_disconnect,
 3338         .pr_listen =            uipc_listen,
 3339         .pr_peeraddr =          uipc_peeraddr,
 3340         .pr_rcvd =              uipc_rcvd,
 3341         .pr_send =              uipc_send,
 3342         .pr_ready =             uipc_ready,
 3343         .pr_sense =             uipc_sense,
 3344         .pr_shutdown =          uipc_shutdown,
 3345         .pr_sockaddr =          uipc_sockaddr,
 3346         .pr_soreceive =         soreceive_generic,
 3347         .pr_close =             uipc_close,
 3348 };
 3349 
 3350 static struct protosw dgramproto = {
 3351         .pr_type =              SOCK_DGRAM,
 3352         .pr_flags =             PR_ATOMIC | PR_ADDR |PR_RIGHTS | PR_CAPATTACH |
 3353                                     PR_SOCKBUF,
 3354         .pr_ctloutput =         &uipc_ctloutput,
 3355         .pr_abort =             uipc_abort,
 3356         .pr_accept =            uipc_accept,
 3357         .pr_attach =            uipc_attach,
 3358         .pr_bind =              uipc_bind,
 3359         .pr_bindat =            uipc_bindat,
 3360         .pr_connect =           uipc_connect,
 3361         .pr_connectat =         uipc_connectat,
 3362         .pr_connect2 =          uipc_connect2,
 3363         .pr_detach =            uipc_detach,
 3364         .pr_disconnect =        uipc_disconnect,
 3365         .pr_peeraddr =          uipc_peeraddr,
 3366         .pr_sosend =            uipc_sosend_dgram,
 3367         .pr_sense =             uipc_sense,
 3368         .pr_shutdown =          uipc_shutdown,
 3369         .pr_sockaddr =          uipc_sockaddr,
 3370         .pr_soreceive =         uipc_soreceive_dgram,
 3371         .pr_close =             uipc_close,
 3372 };
 3373 
 3374 static struct protosw seqpacketproto = {
 3375         .pr_type =              SOCK_SEQPACKET,
 3376         /*
 3377          * XXXRW: For now, PR_ADDR because soreceive will bump into them
 3378          * due to our use of sbappendaddr.  A new sbappend variants is needed
 3379          * that supports both atomic record writes and control data.
 3380          */
 3381         .pr_flags =             PR_ADDR|PR_ATOMIC|PR_CONNREQUIRED|
 3382                                     PR_WANTRCVD|PR_RIGHTS|PR_CAPATTACH,
 3383         .pr_ctloutput =         &uipc_ctloutput,
 3384         .pr_abort =             uipc_abort,
 3385         .pr_accept =            uipc_accept,
 3386         .pr_attach =            uipc_attach,
 3387         .pr_bind =              uipc_bind,
 3388         .pr_bindat =            uipc_bindat,
 3389         .pr_connect =           uipc_connect,
 3390         .pr_connectat =         uipc_connectat,
 3391         .pr_connect2 =          uipc_connect2,
 3392         .pr_detach =            uipc_detach,
 3393         .pr_disconnect =        uipc_disconnect,
 3394         .pr_listen =            uipc_listen,
 3395         .pr_peeraddr =          uipc_peeraddr,
 3396         .pr_rcvd =              uipc_rcvd,
 3397         .pr_send =              uipc_send,
 3398         .pr_sense =             uipc_sense,
 3399         .pr_shutdown =          uipc_shutdown,
 3400         .pr_sockaddr =          uipc_sockaddr,
 3401         .pr_soreceive =         soreceive_generic,      /* XXX: or...? */
 3402         .pr_close =             uipc_close,
 3403 };
 3404 
 3405 static struct domain localdomain = {
 3406         .dom_family =           AF_LOCAL,
 3407         .dom_name =             "local",
 3408         .dom_externalize =      unp_externalize,
 3409         .dom_dispose =          unp_dispose,
 3410         .dom_nprotosw =         3,
 3411         .dom_protosw =          {
 3412                 &streamproto,
 3413                 &dgramproto,
 3414                 &seqpacketproto,
 3415         }
 3416 };
 3417 DOMAIN_SET(local);
 3418 
 3419 /*
 3420  * A helper function called by VFS before socket-type vnode reclamation.
 3421  * For an active vnode it clears unp_vnode pointer and decrements unp_vnode
 3422  * use count.
 3423  */
 3424 void
 3425 vfs_unp_reclaim(struct vnode *vp)
 3426 {
 3427         struct unpcb *unp;
 3428         int active;
 3429         struct mtx *vplock;
 3430 
 3431         ASSERT_VOP_ELOCKED(vp, "vfs_unp_reclaim");
 3432         KASSERT(vp->v_type == VSOCK,
 3433             ("vfs_unp_reclaim: vp->v_type != VSOCK"));
 3434 
 3435         active = 0;
 3436         vplock = mtx_pool_find(mtxpool_sleep, vp);
 3437         mtx_lock(vplock);
 3438         VOP_UNP_CONNECT(vp, &unp);
 3439         if (unp == NULL)
 3440                 goto done;
 3441         UNP_PCB_LOCK(unp);
 3442         if (unp->unp_vnode == vp) {
 3443                 VOP_UNP_DETACH(vp);
 3444                 unp->unp_vnode = NULL;
 3445                 active = 1;
 3446         }
 3447         UNP_PCB_UNLOCK(unp);
 3448  done:
 3449         mtx_unlock(vplock);
 3450         if (active)
 3451                 vunref(vp);
 3452 }
 3453 
 3454 #ifdef DDB
 3455 static void
 3456 db_print_indent(int indent)
 3457 {
 3458         int i;
 3459 
 3460         for (i = 0; i < indent; i++)
 3461                 db_printf(" ");
 3462 }
 3463 
 3464 static void
 3465 db_print_unpflags(int unp_flags)
 3466 {
 3467         int comma;
 3468 
 3469         comma = 0;
 3470         if (unp_flags & UNP_HAVEPC) {
 3471                 db_printf("%sUNP_HAVEPC", comma ? ", " : "");
 3472                 comma = 1;
 3473         }
 3474         if (unp_flags & UNP_WANTCRED_ALWAYS) {
 3475                 db_printf("%sUNP_WANTCRED_ALWAYS", comma ? ", " : "");
 3476                 comma = 1;
 3477         }
 3478         if (unp_flags & UNP_WANTCRED_ONESHOT) {
 3479                 db_printf("%sUNP_WANTCRED_ONESHOT", comma ? ", " : "");
 3480                 comma = 1;
 3481         }
 3482         if (unp_flags & UNP_CONNWAIT) {
 3483                 db_printf("%sUNP_CONNWAIT", comma ? ", " : "");
 3484                 comma = 1;
 3485         }
 3486         if (unp_flags & UNP_CONNECTING) {
 3487                 db_printf("%sUNP_CONNECTING", comma ? ", " : "");
 3488                 comma = 1;
 3489         }
 3490         if (unp_flags & UNP_BINDING) {
 3491                 db_printf("%sUNP_BINDING", comma ? ", " : "");
 3492                 comma = 1;
 3493         }
 3494 }
 3495 
 3496 static void
 3497 db_print_xucred(int indent, struct xucred *xu)
 3498 {
 3499         int comma, i;
 3500 
 3501         db_print_indent(indent);
 3502         db_printf("cr_version: %u   cr_uid: %u   cr_pid: %d   cr_ngroups: %d\n",
 3503             xu->cr_version, xu->cr_uid, xu->cr_pid, xu->cr_ngroups);
 3504         db_print_indent(indent);
 3505         db_printf("cr_groups: ");
 3506         comma = 0;
 3507         for (i = 0; i < xu->cr_ngroups; i++) {
 3508                 db_printf("%s%u", comma ? ", " : "", xu->cr_groups[i]);
 3509                 comma = 1;
 3510         }
 3511         db_printf("\n");
 3512 }
 3513 
 3514 static void
 3515 db_print_unprefs(int indent, struct unp_head *uh)
 3516 {
 3517         struct unpcb *unp;
 3518         int counter;
 3519 
 3520         counter = 0;
 3521         LIST_FOREACH(unp, uh, unp_reflink) {
 3522                 if (counter % 4 == 0)
 3523                         db_print_indent(indent);
 3524                 db_printf("%p  ", unp);
 3525                 if (counter % 4 == 3)
 3526                         db_printf("\n");
 3527                 counter++;
 3528         }
 3529         if (counter != 0 && counter % 4 != 0)
 3530                 db_printf("\n");
 3531 }
 3532 
 3533 DB_SHOW_COMMAND(unpcb, db_show_unpcb)
 3534 {
 3535         struct unpcb *unp;
 3536 
 3537         if (!have_addr) {
 3538                 db_printf("usage: show unpcb <addr>\n");
 3539                 return;
 3540         }
 3541         unp = (struct unpcb *)addr;
 3542 
 3543         db_printf("unp_socket: %p   unp_vnode: %p\n", unp->unp_socket,
 3544             unp->unp_vnode);
 3545 
 3546         db_printf("unp_ino: %ju   unp_conn: %p\n", (uintmax_t)unp->unp_ino,
 3547             unp->unp_conn);
 3548 
 3549         db_printf("unp_refs:\n");
 3550         db_print_unprefs(2, &unp->unp_refs);
 3551 
 3552         /* XXXRW: Would be nice to print the full address, if any. */
 3553         db_printf("unp_addr: %p\n", unp->unp_addr);
 3554 
 3555         db_printf("unp_gencnt: %llu\n",
 3556             (unsigned long long)unp->unp_gencnt);
 3557 
 3558         db_printf("unp_flags: %x (", unp->unp_flags);
 3559         db_print_unpflags(unp->unp_flags);
 3560         db_printf(")\n");
 3561 
 3562         db_printf("unp_peercred:\n");
 3563         db_print_xucred(2, &unp->unp_peercred);
 3564 
 3565         db_printf("unp_refcount: %u\n", unp->unp_refcount);
 3566 }
 3567 #endif

Cache object: dce44d1b832ee32546360516c8ddf1ba


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.