The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/uipc_usrreq.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-3-Clause
    3  *
    4  * Copyright (c) 1982, 1986, 1989, 1991, 1993
    5  *      The Regents of the University of California. All Rights Reserved.
    6  * Copyright (c) 2004-2009 Robert N. M. Watson All Rights Reserved.
    7  * Copyright (c) 2018 Matthew Macy
    8  *
    9  * Redistribution and use in source and binary forms, with or without
   10  * modification, are permitted provided that the following conditions
   11  * are met:
   12  * 1. Redistributions of source code must retain the above copyright
   13  *    notice, this list of conditions and the following disclaimer.
   14  * 2. Redistributions in binary form must reproduce the above copyright
   15  *    notice, this list of conditions and the following disclaimer in the
   16  *    documentation and/or other materials provided with the distribution.
   17  * 3. Neither the name of the University nor the names of its contributors
   18  *    may be used to endorse or promote products derived from this software
   19  *    without specific prior written permission.
   20  *
   21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   31  * SUCH DAMAGE.
   32  *
   33  *      From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94
   34  */
   35 
   36 /*
   37  * UNIX Domain (Local) Sockets
   38  *
   39  * This is an implementation of UNIX (local) domain sockets.  Each socket has
   40  * an associated struct unpcb (UNIX protocol control block).  Stream sockets
   41  * may be connected to 0 or 1 other socket.  Datagram sockets may be
   42  * connected to 0, 1, or many other sockets.  Sockets may be created and
   43  * connected in pairs (socketpair(2)), or bound/connected to using the file
   44  * system name space.  For most purposes, only the receive socket buffer is
   45  * used, as sending on one socket delivers directly to the receive socket
   46  * buffer of a second socket.
   47  *
   48  * The implementation is substantially complicated by the fact that
   49  * "ancillary data", such as file descriptors or credentials, may be passed
   50  * across UNIX domain sockets.  The potential for passing UNIX domain sockets
   51  * over other UNIX domain sockets requires the implementation of a simple
   52  * garbage collector to find and tear down cycles of disconnected sockets.
   53  *
   54  * TODO:
   55  *      RDM
   56  *      rethink name space problems
   57  *      need a proper out-of-band
   58  */
   59 
   60 #include <sys/cdefs.h>
   61 __FBSDID("$FreeBSD$");
   62 
   63 #include "opt_ddb.h"
   64 
   65 #include <sys/param.h>
   66 #include <sys/capsicum.h>
   67 #include <sys/domain.h>
   68 #include <sys/eventhandler.h>
   69 #include <sys/fcntl.h>
   70 #include <sys/file.h>
   71 #include <sys/filedesc.h>
   72 #include <sys/kernel.h>
   73 #include <sys/lock.h>
   74 #include <sys/malloc.h>
   75 #include <sys/mbuf.h>
   76 #include <sys/mount.h>
   77 #include <sys/mutex.h>
   78 #include <sys/namei.h>
   79 #include <sys/proc.h>
   80 #include <sys/protosw.h>
   81 #include <sys/queue.h>
   82 #include <sys/resourcevar.h>
   83 #include <sys/rwlock.h>
   84 #include <sys/socket.h>
   85 #include <sys/socketvar.h>
   86 #include <sys/signalvar.h>
   87 #include <sys/stat.h>
   88 #include <sys/sx.h>
   89 #include <sys/sysctl.h>
   90 #include <sys/systm.h>
   91 #include <sys/taskqueue.h>
   92 #include <sys/un.h>
   93 #include <sys/unpcb.h>
   94 #include <sys/vnode.h>
   95 
   96 #include <net/vnet.h>
   97 
   98 #ifdef DDB
   99 #include <ddb/ddb.h>
  100 #endif
  101 
  102 #include <security/mac/mac_framework.h>
  103 
  104 #include <vm/uma.h>
  105 
  106 MALLOC_DECLARE(M_FILECAPS);
  107 
  108 /*
  109  * See unpcb.h for the locking key.
  110  */
  111 
  112 static uma_zone_t       unp_zone;
  113 static unp_gen_t        unp_gencnt;     /* (l) */
  114 static u_int            unp_count;      /* (l) Count of local sockets. */
  115 static ino_t            unp_ino;        /* Prototype for fake inode numbers. */
  116 static int              unp_rights;     /* (g) File descriptors in flight. */
  117 static struct unp_head  unp_shead;      /* (l) List of stream sockets. */
  118 static struct unp_head  unp_dhead;      /* (l) List of datagram sockets. */
  119 static struct unp_head  unp_sphead;     /* (l) List of seqpacket sockets. */
  120 
  121 struct unp_defer {
  122         SLIST_ENTRY(unp_defer) ud_link;
  123         struct file *ud_fp;
  124 };
  125 static SLIST_HEAD(, unp_defer) unp_defers;
  126 static int unp_defers_count;
  127 
  128 static const struct sockaddr    sun_noname = { sizeof(sun_noname), AF_LOCAL };
  129 
  130 /*
  131  * Garbage collection of cyclic file descriptor/socket references occurs
  132  * asynchronously in a taskqueue context in order to avoid recursion and
  133  * reentrance in the UNIX domain socket, file descriptor, and socket layer
  134  * code.  See unp_gc() for a full description.
  135  */
  136 static struct timeout_task unp_gc_task;
  137 
  138 /*
  139  * The close of unix domain sockets attached as SCM_RIGHTS is
  140  * postponed to the taskqueue, to avoid arbitrary recursion depth.
  141  * The attached sockets might have another sockets attached.
  142  */
  143 static struct task      unp_defer_task;
  144 
  145 /*
  146  * Both send and receive buffers are allocated PIPSIZ bytes of buffering for
  147  * stream sockets, although the total for sender and receiver is actually
  148  * only PIPSIZ.
  149  *
  150  * Datagram sockets really use the sendspace as the maximum datagram size,
  151  * and don't really want to reserve the sendspace.  Their recvspace should be
  152  * large enough for at least one max-size datagram plus address.
  153  */
  154 #ifndef PIPSIZ
  155 #define PIPSIZ  8192
  156 #endif
  157 static u_long   unpst_sendspace = PIPSIZ;
  158 static u_long   unpst_recvspace = PIPSIZ;
  159 static u_long   unpdg_sendspace = 2*1024;       /* really max datagram size */
  160 static u_long   unpdg_recvspace = 4*1024;
  161 static u_long   unpsp_sendspace = PIPSIZ;       /* really max datagram size */
  162 static u_long   unpsp_recvspace = PIPSIZ;
  163 
  164 static SYSCTL_NODE(_net, PF_LOCAL, local, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
  165     "Local domain");
  166 static SYSCTL_NODE(_net_local, SOCK_STREAM, stream,
  167     CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
  168     "SOCK_STREAM");
  169 static SYSCTL_NODE(_net_local, SOCK_DGRAM, dgram,
  170     CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
  171     "SOCK_DGRAM");
  172 static SYSCTL_NODE(_net_local, SOCK_SEQPACKET, seqpacket,
  173     CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
  174     "SOCK_SEQPACKET");
  175 
  176 SYSCTL_ULONG(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW,
  177            &unpst_sendspace, 0, "Default stream send space.");
  178 SYSCTL_ULONG(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW,
  179            &unpst_recvspace, 0, "Default stream receive space.");
  180 SYSCTL_ULONG(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW,
  181            &unpdg_sendspace, 0, "Default datagram send space.");
  182 SYSCTL_ULONG(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW,
  183            &unpdg_recvspace, 0, "Default datagram receive space.");
  184 SYSCTL_ULONG(_net_local_seqpacket, OID_AUTO, maxseqpacket, CTLFLAG_RW,
  185            &unpsp_sendspace, 0, "Default seqpacket send space.");
  186 SYSCTL_ULONG(_net_local_seqpacket, OID_AUTO, recvspace, CTLFLAG_RW,
  187            &unpsp_recvspace, 0, "Default seqpacket receive space.");
  188 SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0,
  189     "File descriptors in flight.");
  190 SYSCTL_INT(_net_local, OID_AUTO, deferred, CTLFLAG_RD,
  191     &unp_defers_count, 0,
  192     "File descriptors deferred to taskqueue for close.");
  193 
  194 /*
  195  * Locking and synchronization:
  196  *
  197  * Several types of locks exist in the local domain socket implementation:
  198  * - a global linkage lock
  199  * - a global connection list lock
  200  * - the mtxpool lock
  201  * - per-unpcb mutexes
  202  *
  203  * The linkage lock protects the global socket lists, the generation number
  204  * counter and garbage collector state.
  205  *
  206  * The connection list lock protects the list of referring sockets in a datagram
  207  * socket PCB.  This lock is also overloaded to protect a global list of
  208  * sockets whose buffers contain socket references in the form of SCM_RIGHTS
  209  * messages.  To avoid recursion, such references are released by a dedicated
  210  * thread.
  211  *
  212  * The mtxpool lock protects the vnode from being modified while referenced.
  213  * Lock ordering rules require that it be acquired before any PCB locks.
  214  *
  215  * The unpcb lock (unp_mtx) protects the most commonly referenced fields in the
  216  * unpcb.  This includes the unp_conn field, which either links two connected
  217  * PCBs together (for connected socket types) or points at the destination
  218  * socket (for connectionless socket types).  The operations of creating or
  219  * destroying a connection therefore involve locking multiple PCBs.  To avoid
  220  * lock order reversals, in some cases this involves dropping a PCB lock and
  221  * using a reference counter to maintain liveness.
  222  *
  223  * UNIX domain sockets each have an unpcb hung off of their so_pcb pointer,
  224  * allocated in pru_attach() and freed in pru_detach().  The validity of that
  225  * pointer is an invariant, so no lock is required to dereference the so_pcb
  226  * pointer if a valid socket reference is held by the caller.  In practice,
  227  * this is always true during operations performed on a socket.  Each unpcb
  228  * has a back-pointer to its socket, unp_socket, which will be stable under
  229  * the same circumstances.
  230  *
  231  * This pointer may only be safely dereferenced as long as a valid reference
  232  * to the unpcb is held.  Typically, this reference will be from the socket,
  233  * or from another unpcb when the referring unpcb's lock is held (in order
  234  * that the reference not be invalidated during use).  For example, to follow
  235  * unp->unp_conn->unp_socket, you need to hold a lock on unp_conn to guarantee
  236  * that detach is not run clearing unp_socket.
  237  *
  238  * Blocking with UNIX domain sockets is a tricky issue: unlike most network
  239  * protocols, bind() is a non-atomic operation, and connect() requires
  240  * potential sleeping in the protocol, due to potentially waiting on local or
  241  * distributed file systems.  We try to separate "lookup" operations, which
  242  * may sleep, and the IPC operations themselves, which typically can occur
  243  * with relative atomicity as locks can be held over the entire operation.
  244  *
  245  * Another tricky issue is simultaneous multi-threaded or multi-process
  246  * access to a single UNIX domain socket.  These are handled by the flags
  247  * UNP_CONNECTING and UNP_BINDING, which prevent concurrent connecting or
  248  * binding, both of which involve dropping UNIX domain socket locks in order
  249  * to perform namei() and other file system operations.
  250  */
  251 static struct rwlock    unp_link_rwlock;
  252 static struct mtx       unp_defers_lock;
  253 
  254 #define UNP_LINK_LOCK_INIT()            rw_init(&unp_link_rwlock,       \
  255                                             "unp_link_rwlock")
  256 
  257 #define UNP_LINK_LOCK_ASSERT()          rw_assert(&unp_link_rwlock,     \
  258                                             RA_LOCKED)
  259 #define UNP_LINK_UNLOCK_ASSERT()        rw_assert(&unp_link_rwlock,     \
  260                                             RA_UNLOCKED)
  261 
  262 #define UNP_LINK_RLOCK()                rw_rlock(&unp_link_rwlock)
  263 #define UNP_LINK_RUNLOCK()              rw_runlock(&unp_link_rwlock)
  264 #define UNP_LINK_WLOCK()                rw_wlock(&unp_link_rwlock)
  265 #define UNP_LINK_WUNLOCK()              rw_wunlock(&unp_link_rwlock)
  266 #define UNP_LINK_WLOCK_ASSERT()         rw_assert(&unp_link_rwlock,     \
  267                                             RA_WLOCKED)
  268 #define UNP_LINK_WOWNED()               rw_wowned(&unp_link_rwlock)
  269 
  270 #define UNP_DEFERRED_LOCK_INIT()        mtx_init(&unp_defers_lock, \
  271                                             "unp_defer", NULL, MTX_DEF)
  272 #define UNP_DEFERRED_LOCK()             mtx_lock(&unp_defers_lock)
  273 #define UNP_DEFERRED_UNLOCK()           mtx_unlock(&unp_defers_lock)
  274 
  275 #define UNP_REF_LIST_LOCK()             UNP_DEFERRED_LOCK();
  276 #define UNP_REF_LIST_UNLOCK()           UNP_DEFERRED_UNLOCK();
  277 
  278 #define UNP_PCB_LOCK_INIT(unp)          mtx_init(&(unp)->unp_mtx,       \
  279                                             "unp", "unp",       \
  280                                             MTX_DUPOK|MTX_DEF)
  281 #define UNP_PCB_LOCK_DESTROY(unp)       mtx_destroy(&(unp)->unp_mtx)
  282 #define UNP_PCB_LOCKPTR(unp)            (&(unp)->unp_mtx)
  283 #define UNP_PCB_LOCK(unp)               mtx_lock(&(unp)->unp_mtx)
  284 #define UNP_PCB_TRYLOCK(unp)            mtx_trylock(&(unp)->unp_mtx)
  285 #define UNP_PCB_UNLOCK(unp)             mtx_unlock(&(unp)->unp_mtx)
  286 #define UNP_PCB_OWNED(unp)              mtx_owned(&(unp)->unp_mtx)
  287 #define UNP_PCB_LOCK_ASSERT(unp)        mtx_assert(&(unp)->unp_mtx, MA_OWNED)
  288 #define UNP_PCB_UNLOCK_ASSERT(unp)      mtx_assert(&(unp)->unp_mtx, MA_NOTOWNED)
  289 
  290 static int      uipc_connect2(struct socket *, struct socket *);
  291 static int      uipc_ctloutput(struct socket *, struct sockopt *);
  292 static int      unp_connect(struct socket *, struct sockaddr *,
  293                     struct thread *);
  294 static int      unp_connectat(int, struct socket *, struct sockaddr *,
  295                     struct thread *);
  296 static int      unp_connect2(struct socket *so, struct socket *so2, int);
  297 static void     unp_disconnect(struct unpcb *unp, struct unpcb *unp2);
  298 static void     unp_dispose(struct socket *so);
  299 static void     unp_dispose_mbuf(struct mbuf *);
  300 static void     unp_shutdown(struct unpcb *);
  301 static void     unp_drop(struct unpcb *);
  302 static void     unp_gc(__unused void *, int);
  303 static void     unp_scan(struct mbuf *, void (*)(struct filedescent **, int));
  304 static void     unp_discard(struct file *);
  305 static void     unp_freerights(struct filedescent **, int);
  306 static void     unp_init(void);
  307 static int      unp_internalize(struct mbuf **, struct thread *);
  308 static void     unp_internalize_fp(struct file *);
  309 static int      unp_externalize(struct mbuf *, struct mbuf **, int);
  310 static int      unp_externalize_fp(struct file *);
  311 static struct mbuf      *unp_addsockcred(struct thread *, struct mbuf *, int);
  312 static void     unp_process_defers(void * __unused, int);
  313 
  314 static void
  315 unp_pcb_hold(struct unpcb *unp)
  316 {
  317         u_int old __unused;
  318 
  319         old = refcount_acquire(&unp->unp_refcount);
  320         KASSERT(old > 0, ("%s: unpcb %p has no references", __func__, unp));
  321 }
  322 
  323 static __result_use_check bool
  324 unp_pcb_rele(struct unpcb *unp)
  325 {
  326         bool ret;
  327 
  328         UNP_PCB_LOCK_ASSERT(unp);
  329 
  330         if ((ret = refcount_release(&unp->unp_refcount))) {
  331                 UNP_PCB_UNLOCK(unp);
  332                 UNP_PCB_LOCK_DESTROY(unp);
  333                 uma_zfree(unp_zone, unp);
  334         }
  335         return (ret);
  336 }
  337 
  338 static void
  339 unp_pcb_rele_notlast(struct unpcb *unp)
  340 {
  341         bool ret __unused;
  342 
  343         ret = refcount_release(&unp->unp_refcount);
  344         KASSERT(!ret, ("%s: unpcb %p has no references", __func__, unp));
  345 }
  346 
  347 static void
  348 unp_pcb_lock_pair(struct unpcb *unp, struct unpcb *unp2)
  349 {
  350         UNP_PCB_UNLOCK_ASSERT(unp);
  351         UNP_PCB_UNLOCK_ASSERT(unp2);
  352 
  353         if (unp == unp2) {
  354                 UNP_PCB_LOCK(unp);
  355         } else if ((uintptr_t)unp2 > (uintptr_t)unp) {
  356                 UNP_PCB_LOCK(unp);
  357                 UNP_PCB_LOCK(unp2);
  358         } else {
  359                 UNP_PCB_LOCK(unp2);
  360                 UNP_PCB_LOCK(unp);
  361         }
  362 }
  363 
  364 static void
  365 unp_pcb_unlock_pair(struct unpcb *unp, struct unpcb *unp2)
  366 {
  367         UNP_PCB_UNLOCK(unp);
  368         if (unp != unp2)
  369                 UNP_PCB_UNLOCK(unp2);
  370 }
  371 
  372 /*
  373  * Try to lock the connected peer of an already locked socket.  In some cases
  374  * this requires that we unlock the current socket.  The pairbusy counter is
  375  * used to block concurrent connection attempts while the lock is dropped.  The
  376  * caller must be careful to revalidate PCB state.
  377  */
  378 static struct unpcb *
  379 unp_pcb_lock_peer(struct unpcb *unp)
  380 {
  381         struct unpcb *unp2;
  382 
  383         UNP_PCB_LOCK_ASSERT(unp);
  384         unp2 = unp->unp_conn;
  385         if (unp2 == NULL)
  386                 return (NULL);
  387         if (__predict_false(unp == unp2))
  388                 return (unp);
  389 
  390         UNP_PCB_UNLOCK_ASSERT(unp2);
  391 
  392         if (__predict_true(UNP_PCB_TRYLOCK(unp2)))
  393                 return (unp2);
  394         if ((uintptr_t)unp2 > (uintptr_t)unp) {
  395                 UNP_PCB_LOCK(unp2);
  396                 return (unp2);
  397         }
  398         unp->unp_pairbusy++;
  399         unp_pcb_hold(unp2);
  400         UNP_PCB_UNLOCK(unp);
  401 
  402         UNP_PCB_LOCK(unp2);
  403         UNP_PCB_LOCK(unp);
  404         KASSERT(unp->unp_conn == unp2 || unp->unp_conn == NULL,
  405             ("%s: socket %p was reconnected", __func__, unp));
  406         if (--unp->unp_pairbusy == 0 && (unp->unp_flags & UNP_WAITING) != 0) {
  407                 unp->unp_flags &= ~UNP_WAITING;
  408                 wakeup(unp);
  409         }
  410         if (unp_pcb_rele(unp2)) {
  411                 /* unp2 is unlocked. */
  412                 return (NULL);
  413         }
  414         if (unp->unp_conn == NULL) {
  415                 UNP_PCB_UNLOCK(unp2);
  416                 return (NULL);
  417         }
  418         return (unp2);
  419 }
  420 
  421 /*
  422  * Definitions of protocols supported in the LOCAL domain.
  423  */
  424 static struct domain localdomain;
  425 static struct pr_usrreqs uipc_usrreqs_dgram, uipc_usrreqs_stream;
  426 static struct pr_usrreqs uipc_usrreqs_seqpacket;
  427 static struct protosw localsw[] = {
  428 {
  429         .pr_type =              SOCK_STREAM,
  430         .pr_domain =            &localdomain,
  431         .pr_flags =             PR_CONNREQUIRED|PR_WANTRCVD|PR_RIGHTS,
  432         .pr_ctloutput =         &uipc_ctloutput,
  433         .pr_usrreqs =           &uipc_usrreqs_stream
  434 },
  435 {
  436         .pr_type =              SOCK_DGRAM,
  437         .pr_domain =            &localdomain,
  438         .pr_flags =             PR_ATOMIC|PR_ADDR|PR_RIGHTS,
  439         .pr_ctloutput =         &uipc_ctloutput,
  440         .pr_usrreqs =           &uipc_usrreqs_dgram
  441 },
  442 {
  443         .pr_type =              SOCK_SEQPACKET,
  444         .pr_domain =            &localdomain,
  445 
  446         /*
  447          * XXXRW: For now, PR_ADDR because soreceive will bump into them
  448          * due to our use of sbappendaddr.  A new sbappend variants is needed
  449          * that supports both atomic record writes and control data.
  450          */
  451         .pr_flags =             PR_ADDR|PR_ATOMIC|PR_CONNREQUIRED|PR_WANTRCVD|
  452                                     PR_RIGHTS,
  453         .pr_ctloutput =         &uipc_ctloutput,
  454         .pr_usrreqs =           &uipc_usrreqs_seqpacket,
  455 },
  456 };
  457 
  458 static struct domain localdomain = {
  459         .dom_family =           AF_LOCAL,
  460         .dom_name =             "local",
  461         .dom_init =             unp_init,
  462         .dom_externalize =      unp_externalize,
  463         .dom_dispose =          unp_dispose,
  464         .dom_protosw =          localsw,
  465         .dom_protoswNPROTOSW =  &localsw[nitems(localsw)]
  466 };
  467 DOMAIN_SET(local);
  468 
  469 static void
  470 uipc_abort(struct socket *so)
  471 {
  472         struct unpcb *unp, *unp2;
  473 
  474         unp = sotounpcb(so);
  475         KASSERT(unp != NULL, ("uipc_abort: unp == NULL"));
  476         UNP_PCB_UNLOCK_ASSERT(unp);
  477 
  478         UNP_PCB_LOCK(unp);
  479         unp2 = unp->unp_conn;
  480         if (unp2 != NULL) {
  481                 unp_pcb_hold(unp2);
  482                 UNP_PCB_UNLOCK(unp);
  483                 unp_drop(unp2);
  484         } else
  485                 UNP_PCB_UNLOCK(unp);
  486 }
  487 
  488 static int
  489 uipc_accept(struct socket *so, struct sockaddr **nam)
  490 {
  491         struct unpcb *unp, *unp2;
  492         const struct sockaddr *sa;
  493 
  494         /*
  495          * Pass back name of connected socket, if it was bound and we are
  496          * still connected (our peer may have closed already!).
  497          */
  498         unp = sotounpcb(so);
  499         KASSERT(unp != NULL, ("uipc_accept: unp == NULL"));
  500 
  501         *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
  502         UNP_PCB_LOCK(unp);
  503         unp2 = unp_pcb_lock_peer(unp);
  504         if (unp2 != NULL && unp2->unp_addr != NULL)
  505                 sa = (struct sockaddr *)unp2->unp_addr;
  506         else
  507                 sa = &sun_noname;
  508         bcopy(sa, *nam, sa->sa_len);
  509         if (unp2 != NULL)
  510                 unp_pcb_unlock_pair(unp, unp2);
  511         else
  512                 UNP_PCB_UNLOCK(unp);
  513         return (0);
  514 }
  515 
  516 static int
  517 uipc_attach(struct socket *so, int proto, struct thread *td)
  518 {
  519         u_long sendspace, recvspace;
  520         struct unpcb *unp;
  521         int error;
  522         bool locked;
  523 
  524         KASSERT(so->so_pcb == NULL, ("uipc_attach: so_pcb != NULL"));
  525         if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
  526                 switch (so->so_type) {
  527                 case SOCK_STREAM:
  528                         sendspace = unpst_sendspace;
  529                         recvspace = unpst_recvspace;
  530                         break;
  531 
  532                 case SOCK_DGRAM:
  533                         sendspace = unpdg_sendspace;
  534                         recvspace = unpdg_recvspace;
  535                         break;
  536 
  537                 case SOCK_SEQPACKET:
  538                         sendspace = unpsp_sendspace;
  539                         recvspace = unpsp_recvspace;
  540                         break;
  541 
  542                 default:
  543                         panic("uipc_attach");
  544                 }
  545                 error = soreserve(so, sendspace, recvspace);
  546                 if (error)
  547                         return (error);
  548         }
  549         unp = uma_zalloc(unp_zone, M_NOWAIT | M_ZERO);
  550         if (unp == NULL)
  551                 return (ENOBUFS);
  552         LIST_INIT(&unp->unp_refs);
  553         UNP_PCB_LOCK_INIT(unp);
  554         unp->unp_socket = so;
  555         so->so_pcb = unp;
  556         refcount_init(&unp->unp_refcount, 1);
  557 
  558         if ((locked = UNP_LINK_WOWNED()) == false)
  559                 UNP_LINK_WLOCK();
  560 
  561         unp->unp_gencnt = ++unp_gencnt;
  562         unp->unp_ino = ++unp_ino;
  563         unp_count++;
  564         switch (so->so_type) {
  565         case SOCK_STREAM:
  566                 LIST_INSERT_HEAD(&unp_shead, unp, unp_link);
  567                 break;
  568 
  569         case SOCK_DGRAM:
  570                 LIST_INSERT_HEAD(&unp_dhead, unp, unp_link);
  571                 break;
  572 
  573         case SOCK_SEQPACKET:
  574                 LIST_INSERT_HEAD(&unp_sphead, unp, unp_link);
  575                 break;
  576 
  577         default:
  578                 panic("uipc_attach");
  579         }
  580 
  581         if (locked == false)
  582                 UNP_LINK_WUNLOCK();
  583 
  584         return (0);
  585 }
  586 
  587 static int
  588 uipc_bindat(int fd, struct socket *so, struct sockaddr *nam, struct thread *td)
  589 {
  590         struct sockaddr_un *soun = (struct sockaddr_un *)nam;
  591         struct vattr vattr;
  592         int error, namelen;
  593         struct nameidata nd;
  594         struct unpcb *unp;
  595         struct vnode *vp;
  596         struct mount *mp;
  597         cap_rights_t rights;
  598         char *buf;
  599 
  600         if (nam->sa_family != AF_UNIX)
  601                 return (EAFNOSUPPORT);
  602 
  603         unp = sotounpcb(so);
  604         KASSERT(unp != NULL, ("uipc_bind: unp == NULL"));
  605 
  606         if (soun->sun_len > sizeof(struct sockaddr_un))
  607                 return (EINVAL);
  608         namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path);
  609         if (namelen <= 0)
  610                 return (EINVAL);
  611 
  612         /*
  613          * We don't allow simultaneous bind() calls on a single UNIX domain
  614          * socket, so flag in-progress operations, and return an error if an
  615          * operation is already in progress.
  616          *
  617          * Historically, we have not allowed a socket to be rebound, so this
  618          * also returns an error.  Not allowing re-binding simplifies the
  619          * implementation and avoids a great many possible failure modes.
  620          */
  621         UNP_PCB_LOCK(unp);
  622         if (unp->unp_vnode != NULL) {
  623                 UNP_PCB_UNLOCK(unp);
  624                 return (EINVAL);
  625         }
  626         if (unp->unp_flags & UNP_BINDING) {
  627                 UNP_PCB_UNLOCK(unp);
  628                 return (EALREADY);
  629         }
  630         unp->unp_flags |= UNP_BINDING;
  631         UNP_PCB_UNLOCK(unp);
  632 
  633         buf = malloc(namelen + 1, M_TEMP, M_WAITOK);
  634         bcopy(soun->sun_path, buf, namelen);
  635         buf[namelen] = 0;
  636 
  637 restart:
  638         NDINIT_ATRIGHTS(&nd, CREATE, NOFOLLOW | LOCKPARENT | SAVENAME | NOCACHE,
  639             UIO_SYSSPACE, buf, fd, cap_rights_init_one(&rights, CAP_BINDAT),
  640             td);
  641 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
  642         error = namei(&nd);
  643         if (error)
  644                 goto error;
  645         vp = nd.ni_vp;
  646         if (vp != NULL || vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
  647                 NDFREE(&nd, NDF_ONLY_PNBUF);
  648                 if (nd.ni_dvp == vp)
  649                         vrele(nd.ni_dvp);
  650                 else
  651                         vput(nd.ni_dvp);
  652                 if (vp != NULL) {
  653                         vrele(vp);
  654                         error = EADDRINUSE;
  655                         goto error;
  656                 }
  657                 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH);
  658                 if (error)
  659                         goto error;
  660                 goto restart;
  661         }
  662         VATTR_NULL(&vattr);
  663         vattr.va_type = VSOCK;
  664         vattr.va_mode = (ACCESSPERMS & ~td->td_proc->p_pd->pd_cmask);
  665 #ifdef MAC
  666         error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
  667             &vattr);
  668 #endif
  669         if (error == 0)
  670                 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
  671         NDFREE(&nd, NDF_ONLY_PNBUF);
  672         if (error) {
  673                 VOP_VPUT_PAIR(nd.ni_dvp, NULL, true);
  674                 vn_finished_write(mp);
  675                 if (error == ERELOOKUP)
  676                         goto restart;
  677                 goto error;
  678         }
  679         vp = nd.ni_vp;
  680         ASSERT_VOP_ELOCKED(vp, "uipc_bind");
  681         soun = (struct sockaddr_un *)sodupsockaddr(nam, M_WAITOK);
  682 
  683         UNP_PCB_LOCK(unp);
  684         VOP_UNP_BIND(vp, unp);
  685         unp->unp_vnode = vp;
  686         unp->unp_addr = soun;
  687         unp->unp_flags &= ~UNP_BINDING;
  688         UNP_PCB_UNLOCK(unp);
  689         vref(vp);
  690         VOP_VPUT_PAIR(nd.ni_dvp, &vp, true);
  691         vn_finished_write(mp);
  692         free(buf, M_TEMP);
  693         return (0);
  694 
  695 error:
  696         UNP_PCB_LOCK(unp);
  697         unp->unp_flags &= ~UNP_BINDING;
  698         UNP_PCB_UNLOCK(unp);
  699         free(buf, M_TEMP);
  700         return (error);
  701 }
  702 
  703 static int
  704 uipc_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
  705 {
  706 
  707         return (uipc_bindat(AT_FDCWD, so, nam, td));
  708 }
  709 
  710 static int
  711 uipc_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
  712 {
  713         int error;
  714 
  715         KASSERT(td == curthread, ("uipc_connect: td != curthread"));
  716         error = unp_connect(so, nam, td);
  717         return (error);
  718 }
  719 
  720 static int
  721 uipc_connectat(int fd, struct socket *so, struct sockaddr *nam,
  722     struct thread *td)
  723 {
  724         int error;
  725 
  726         KASSERT(td == curthread, ("uipc_connectat: td != curthread"));
  727         error = unp_connectat(fd, so, nam, td);
  728         return (error);
  729 }
  730 
  731 static void
  732 uipc_close(struct socket *so)
  733 {
  734         struct unpcb *unp, *unp2;
  735         struct vnode *vp = NULL;
  736         struct mtx *vplock;
  737 
  738         unp = sotounpcb(so);
  739         KASSERT(unp != NULL, ("uipc_close: unp == NULL"));
  740 
  741         vplock = NULL;
  742         if ((vp = unp->unp_vnode) != NULL) {
  743                 vplock = mtx_pool_find(mtxpool_sleep, vp);
  744                 mtx_lock(vplock);
  745         }
  746         UNP_PCB_LOCK(unp);
  747         if (vp && unp->unp_vnode == NULL) {
  748                 mtx_unlock(vplock);
  749                 vp = NULL;
  750         }
  751         if (vp != NULL) {
  752                 VOP_UNP_DETACH(vp);
  753                 unp->unp_vnode = NULL;
  754         }
  755         if ((unp2 = unp_pcb_lock_peer(unp)) != NULL)
  756                 unp_disconnect(unp, unp2);
  757         else
  758                 UNP_PCB_UNLOCK(unp);
  759         if (vp) {
  760                 mtx_unlock(vplock);
  761                 vrele(vp);
  762         }
  763 }
  764 
  765 static int
  766 uipc_connect2(struct socket *so1, struct socket *so2)
  767 {
  768         struct unpcb *unp, *unp2;
  769         int error;
  770 
  771         unp = so1->so_pcb;
  772         KASSERT(unp != NULL, ("uipc_connect2: unp == NULL"));
  773         unp2 = so2->so_pcb;
  774         KASSERT(unp2 != NULL, ("uipc_connect2: unp2 == NULL"));
  775         unp_pcb_lock_pair(unp, unp2);
  776         error = unp_connect2(so1, so2, PRU_CONNECT2);
  777         unp_pcb_unlock_pair(unp, unp2);
  778         return (error);
  779 }
  780 
  781 static void
  782 uipc_detach(struct socket *so)
  783 {
  784         struct unpcb *unp, *unp2;
  785         struct mtx *vplock;
  786         struct vnode *vp;
  787         int local_unp_rights;
  788 
  789         unp = sotounpcb(so);
  790         KASSERT(unp != NULL, ("uipc_detach: unp == NULL"));
  791 
  792         vp = NULL;
  793         vplock = NULL;
  794 
  795         SOCK_LOCK(so);
  796         if (!SOLISTENING(so)) {
  797                 /*
  798                  * Once the socket is removed from the global lists,
  799                  * uipc_ready() will not be able to locate its socket buffer, so
  800                  * clear the buffer now.  At this point internalized rights have
  801                  * already been disposed of.
  802                  */
  803                 sbrelease(&so->so_rcv, so);
  804         }
  805         SOCK_UNLOCK(so);
  806 
  807         UNP_LINK_WLOCK();
  808         LIST_REMOVE(unp, unp_link);
  809         if (unp->unp_gcflag & UNPGC_DEAD)
  810                 LIST_REMOVE(unp, unp_dead);
  811         unp->unp_gencnt = ++unp_gencnt;
  812         --unp_count;
  813         UNP_LINK_WUNLOCK();
  814 
  815         UNP_PCB_UNLOCK_ASSERT(unp);
  816  restart:
  817         if ((vp = unp->unp_vnode) != NULL) {
  818                 vplock = mtx_pool_find(mtxpool_sleep, vp);
  819                 mtx_lock(vplock);
  820         }
  821         UNP_PCB_LOCK(unp);
  822         if (unp->unp_vnode != vp && unp->unp_vnode != NULL) {
  823                 if (vplock)
  824                         mtx_unlock(vplock);
  825                 UNP_PCB_UNLOCK(unp);
  826                 goto restart;
  827         }
  828         if ((vp = unp->unp_vnode) != NULL) {
  829                 VOP_UNP_DETACH(vp);
  830                 unp->unp_vnode = NULL;
  831         }
  832         if ((unp2 = unp_pcb_lock_peer(unp)) != NULL)
  833                 unp_disconnect(unp, unp2);
  834         else
  835                 UNP_PCB_UNLOCK(unp);
  836 
  837         UNP_REF_LIST_LOCK();
  838         while (!LIST_EMPTY(&unp->unp_refs)) {
  839                 struct unpcb *ref = LIST_FIRST(&unp->unp_refs);
  840 
  841                 unp_pcb_hold(ref);
  842                 UNP_REF_LIST_UNLOCK();
  843 
  844                 MPASS(ref != unp);
  845                 UNP_PCB_UNLOCK_ASSERT(ref);
  846                 unp_drop(ref);
  847                 UNP_REF_LIST_LOCK();
  848         }
  849         UNP_REF_LIST_UNLOCK();
  850 
  851         UNP_PCB_LOCK(unp);
  852         local_unp_rights = unp_rights;
  853         unp->unp_socket->so_pcb = NULL;
  854         unp->unp_socket = NULL;
  855         free(unp->unp_addr, M_SONAME);
  856         unp->unp_addr = NULL;
  857         if (!unp_pcb_rele(unp))
  858                 UNP_PCB_UNLOCK(unp);
  859         if (vp) {
  860                 mtx_unlock(vplock);
  861                 vrele(vp);
  862         }
  863         if (local_unp_rights)
  864                 taskqueue_enqueue_timeout(taskqueue_thread, &unp_gc_task, -1);
  865 }
  866 
  867 static int
  868 uipc_disconnect(struct socket *so)
  869 {
  870         struct unpcb *unp, *unp2;
  871 
  872         unp = sotounpcb(so);
  873         KASSERT(unp != NULL, ("uipc_disconnect: unp == NULL"));
  874 
  875         UNP_PCB_LOCK(unp);
  876         if ((unp2 = unp_pcb_lock_peer(unp)) != NULL)
  877                 unp_disconnect(unp, unp2);
  878         else
  879                 UNP_PCB_UNLOCK(unp);
  880         return (0);
  881 }
  882 
  883 static int
  884 uipc_listen(struct socket *so, int backlog, struct thread *td)
  885 {
  886         struct unpcb *unp;
  887         int error;
  888 
  889         if (so->so_type != SOCK_STREAM && so->so_type != SOCK_SEQPACKET)
  890                 return (EOPNOTSUPP);
  891 
  892         unp = sotounpcb(so);
  893         KASSERT(unp != NULL, ("uipc_listen: unp == NULL"));
  894 
  895         UNP_PCB_LOCK(unp);
  896         if (unp->unp_vnode == NULL) {
  897                 /* Already connected or not bound to an address. */
  898                 error = unp->unp_conn != NULL ? EINVAL : EDESTADDRREQ;
  899                 UNP_PCB_UNLOCK(unp);
  900                 return (error);
  901         }
  902 
  903         SOCK_LOCK(so);
  904         error = solisten_proto_check(so);
  905         if (error == 0) {
  906                 cru2xt(td, &unp->unp_peercred);
  907                 solisten_proto(so, backlog);
  908         }
  909         SOCK_UNLOCK(so);
  910         UNP_PCB_UNLOCK(unp);
  911         return (error);
  912 }
  913 
  914 static int
  915 uipc_peeraddr(struct socket *so, struct sockaddr **nam)
  916 {
  917         struct unpcb *unp, *unp2;
  918         const struct sockaddr *sa;
  919 
  920         unp = sotounpcb(so);
  921         KASSERT(unp != NULL, ("uipc_peeraddr: unp == NULL"));
  922 
  923         *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
  924         UNP_LINK_RLOCK();
  925         /*
  926          * XXX: It seems that this test always fails even when connection is
  927          * established.  So, this else clause is added as workaround to
  928          * return PF_LOCAL sockaddr.
  929          */
  930         unp2 = unp->unp_conn;
  931         if (unp2 != NULL) {
  932                 UNP_PCB_LOCK(unp2);
  933                 if (unp2->unp_addr != NULL)
  934                         sa = (struct sockaddr *) unp2->unp_addr;
  935                 else
  936                         sa = &sun_noname;
  937                 bcopy(sa, *nam, sa->sa_len);
  938                 UNP_PCB_UNLOCK(unp2);
  939         } else {
  940                 sa = &sun_noname;
  941                 bcopy(sa, *nam, sa->sa_len);
  942         }
  943         UNP_LINK_RUNLOCK();
  944         return (0);
  945 }
  946 
  947 static int
  948 uipc_rcvd(struct socket *so, int flags)
  949 {
  950         struct unpcb *unp, *unp2;
  951         struct socket *so2;
  952         u_int mbcnt, sbcc;
  953 
  954         unp = sotounpcb(so);
  955         KASSERT(unp != NULL, ("%s: unp == NULL", __func__));
  956         KASSERT(so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET,
  957             ("%s: socktype %d", __func__, so->so_type));
  958 
  959         /*
  960          * Adjust backpressure on sender and wakeup any waiting to write.
  961          *
  962          * The unp lock is acquired to maintain the validity of the unp_conn
  963          * pointer; no lock on unp2 is required as unp2->unp_socket will be
  964          * static as long as we don't permit unp2 to disconnect from unp,
  965          * which is prevented by the lock on unp.  We cache values from
  966          * so_rcv to avoid holding the so_rcv lock over the entire
  967          * transaction on the remote so_snd.
  968          */
  969         SOCKBUF_LOCK(&so->so_rcv);
  970         mbcnt = so->so_rcv.sb_mbcnt;
  971         sbcc = sbavail(&so->so_rcv);
  972         SOCKBUF_UNLOCK(&so->so_rcv);
  973         /*
  974          * There is a benign race condition at this point.  If we're planning to
  975          * clear SB_STOP, but uipc_send is called on the connected socket at
  976          * this instant, it might add data to the sockbuf and set SB_STOP.  Then
  977          * we would erroneously clear SB_STOP below, even though the sockbuf is
  978          * full.  The race is benign because the only ill effect is to allow the
  979          * sockbuf to exceed its size limit, and the size limits are not
  980          * strictly guaranteed anyway.
  981          */
  982         UNP_PCB_LOCK(unp);
  983         unp2 = unp->unp_conn;
  984         if (unp2 == NULL) {
  985                 UNP_PCB_UNLOCK(unp);
  986                 return (0);
  987         }
  988         so2 = unp2->unp_socket;
  989         SOCKBUF_LOCK(&so2->so_snd);
  990         if (sbcc < so2->so_snd.sb_hiwat && mbcnt < so2->so_snd.sb_mbmax)
  991                 so2->so_snd.sb_flags &= ~SB_STOP;
  992         sowwakeup_locked(so2);
  993         UNP_PCB_UNLOCK(unp);
  994         return (0);
  995 }
  996 
  997 static int
  998 uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
  999     struct mbuf *control, struct thread *td)
 1000 {
 1001         struct unpcb *unp, *unp2;
 1002         struct socket *so2;
 1003         u_int mbcnt, sbcc;
 1004         int freed, error;
 1005 
 1006         unp = sotounpcb(so);
 1007         KASSERT(unp != NULL, ("%s: unp == NULL", __func__));
 1008         KASSERT(so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM ||
 1009             so->so_type == SOCK_SEQPACKET,
 1010             ("%s: socktype %d", __func__, so->so_type));
 1011 
 1012         freed = error = 0;
 1013         if (flags & PRUS_OOB) {
 1014                 error = EOPNOTSUPP;
 1015                 goto release;
 1016         }
 1017         if (control != NULL && (error = unp_internalize(&control, td)))
 1018                 goto release;
 1019 
 1020         unp2 = NULL;
 1021         switch (so->so_type) {
 1022         case SOCK_DGRAM:
 1023         {
 1024                 const struct sockaddr *from;
 1025 
 1026                 if (nam != NULL) {
 1027                         error = unp_connect(so, nam, td);
 1028                         if (error != 0)
 1029                                 break;
 1030                 }
 1031                 UNP_PCB_LOCK(unp);
 1032 
 1033                 /*
 1034                  * Because connect() and send() are non-atomic in a sendto()
 1035                  * with a target address, it's possible that the socket will
 1036                  * have disconnected before the send() can run.  In that case
 1037                  * return the slightly counter-intuitive but otherwise
 1038                  * correct error that the socket is not connected.
 1039                  */
 1040                 unp2 = unp_pcb_lock_peer(unp);
 1041                 if (unp2 == NULL) {
 1042                         UNP_PCB_UNLOCK(unp);
 1043                         error = ENOTCONN;
 1044                         break;
 1045                 }
 1046 
 1047                 if (unp2->unp_flags & UNP_WANTCRED_MASK)
 1048                         control = unp_addsockcred(td, control,
 1049                             unp2->unp_flags);
 1050                 if (unp->unp_addr != NULL)
 1051                         from = (struct sockaddr *)unp->unp_addr;
 1052                 else
 1053                         from = &sun_noname;
 1054                 so2 = unp2->unp_socket;
 1055                 SOCKBUF_LOCK(&so2->so_rcv);
 1056                 if (sbappendaddr_locked(&so2->so_rcv, from, m,
 1057                     control)) {
 1058                         sorwakeup_locked(so2);
 1059                         m = NULL;
 1060                         control = NULL;
 1061                 } else {
 1062                         SOCKBUF_UNLOCK(&so2->so_rcv);
 1063                         error = ENOBUFS;
 1064                 }
 1065                 if (nam != NULL)
 1066                         unp_disconnect(unp, unp2);
 1067                 else
 1068                         unp_pcb_unlock_pair(unp, unp2);
 1069                 break;
 1070         }
 1071 
 1072         case SOCK_SEQPACKET:
 1073         case SOCK_STREAM:
 1074                 if ((so->so_state & SS_ISCONNECTED) == 0) {
 1075                         if (nam != NULL) {
 1076                                 error = unp_connect(so, nam, td);
 1077                                 if (error != 0)
 1078                                         break;
 1079                         } else {
 1080                                 error = ENOTCONN;
 1081                                 break;
 1082                         }
 1083                 }
 1084 
 1085                 UNP_PCB_LOCK(unp);
 1086                 if ((unp2 = unp_pcb_lock_peer(unp)) == NULL) {
 1087                         UNP_PCB_UNLOCK(unp);
 1088                         error = ENOTCONN;
 1089                         break;
 1090                 } else if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 1091                         unp_pcb_unlock_pair(unp, unp2);
 1092                         error = EPIPE;
 1093                         break;
 1094                 }
 1095                 UNP_PCB_UNLOCK(unp);
 1096                 if ((so2 = unp2->unp_socket) == NULL) {
 1097                         UNP_PCB_UNLOCK(unp2);
 1098                         error = ENOTCONN;
 1099                         break;
 1100                 }
 1101                 SOCKBUF_LOCK(&so2->so_rcv);
 1102                 if (unp2->unp_flags & UNP_WANTCRED_MASK) {
 1103                         /*
 1104                          * Credentials are passed only once on SOCK_STREAM and
 1105                          * SOCK_SEQPACKET (LOCAL_CREDS => WANTCRED_ONESHOT), or
 1106                          * forever (LOCAL_CREDS_PERSISTENT => WANTCRED_ALWAYS).
 1107                          */
 1108                         control = unp_addsockcred(td, control, unp2->unp_flags);
 1109                         unp2->unp_flags &= ~UNP_WANTCRED_ONESHOT;
 1110                 }
 1111 
 1112                 /*
 1113                  * Send to paired receive port and wake up readers.  Don't
 1114                  * check for space available in the receive buffer if we're
 1115                  * attaching ancillary data; Unix domain sockets only check
 1116                  * for space in the sending sockbuf, and that check is
 1117                  * performed one level up the stack.  At that level we cannot
 1118                  * precisely account for the amount of buffer space used
 1119                  * (e.g., because control messages are not yet internalized).
 1120                  */
 1121                 switch (so->so_type) {
 1122                 case SOCK_STREAM:
 1123                         if (control != NULL) {
 1124                                 sbappendcontrol_locked(&so2->so_rcv, m,
 1125                                     control, flags);
 1126                                 control = NULL;
 1127                         } else
 1128                                 sbappend_locked(&so2->so_rcv, m, flags);
 1129                         break;
 1130 
 1131                 case SOCK_SEQPACKET:
 1132                         if (sbappendaddr_nospacecheck_locked(&so2->so_rcv,
 1133                             &sun_noname, m, control))
 1134                                 control = NULL;
 1135                         break;
 1136                 }
 1137 
 1138                 mbcnt = so2->so_rcv.sb_mbcnt;
 1139                 sbcc = sbavail(&so2->so_rcv);
 1140                 if (sbcc)
 1141                         sorwakeup_locked(so2);
 1142                 else
 1143                         SOCKBUF_UNLOCK(&so2->so_rcv);
 1144 
 1145                 /*
 1146                  * The PCB lock on unp2 protects the SB_STOP flag.  Without it,
 1147                  * it would be possible for uipc_rcvd to be called at this
 1148                  * point, drain the receiving sockbuf, clear SB_STOP, and then
 1149                  * we would set SB_STOP below.  That could lead to an empty
 1150                  * sockbuf having SB_STOP set
 1151                  */
 1152                 SOCKBUF_LOCK(&so->so_snd);
 1153                 if (sbcc >= so->so_snd.sb_hiwat || mbcnt >= so->so_snd.sb_mbmax)
 1154                         so->so_snd.sb_flags |= SB_STOP;
 1155                 SOCKBUF_UNLOCK(&so->so_snd);
 1156                 UNP_PCB_UNLOCK(unp2);
 1157                 m = NULL;
 1158                 break;
 1159         }
 1160 
 1161         /*
 1162          * PRUS_EOF is equivalent to pru_send followed by pru_shutdown.
 1163          */
 1164         if (flags & PRUS_EOF) {
 1165                 UNP_PCB_LOCK(unp);
 1166                 socantsendmore(so);
 1167                 unp_shutdown(unp);
 1168                 UNP_PCB_UNLOCK(unp);
 1169         }
 1170         if (control != NULL && error != 0)
 1171                 unp_dispose_mbuf(control);
 1172 
 1173 release:
 1174         if (control != NULL)
 1175                 m_freem(control);
 1176         /*
 1177          * In case of PRUS_NOTREADY, uipc_ready() is responsible
 1178          * for freeing memory.
 1179          */   
 1180         if (m != NULL && (flags & PRUS_NOTREADY) == 0)
 1181                 m_freem(m);
 1182         return (error);
 1183 }
 1184 
 1185 static bool
 1186 uipc_ready_scan(struct socket *so, struct mbuf *m, int count, int *errorp)
 1187 {
 1188         struct mbuf *mb, *n;
 1189         struct sockbuf *sb;
 1190 
 1191         SOCK_LOCK(so);
 1192         if (SOLISTENING(so)) {
 1193                 SOCK_UNLOCK(so);
 1194                 return (false);
 1195         }
 1196         mb = NULL;
 1197         sb = &so->so_rcv;
 1198         SOCKBUF_LOCK(sb);
 1199         if (sb->sb_fnrdy != NULL) {
 1200                 for (mb = sb->sb_mb, n = mb->m_nextpkt; mb != NULL;) {
 1201                         if (mb == m) {
 1202                                 *errorp = sbready(sb, m, count);
 1203                                 break;
 1204                         }
 1205                         mb = mb->m_next;
 1206                         if (mb == NULL) {
 1207                                 mb = n;
 1208                                 if (mb != NULL)
 1209                                         n = mb->m_nextpkt;
 1210                         }
 1211                 }
 1212         }
 1213         SOCKBUF_UNLOCK(sb);
 1214         SOCK_UNLOCK(so);
 1215         return (mb != NULL);
 1216 }
 1217 
 1218 static int
 1219 uipc_ready(struct socket *so, struct mbuf *m, int count)
 1220 {
 1221         struct unpcb *unp, *unp2;
 1222         struct socket *so2;
 1223         int error, i;
 1224 
 1225         unp = sotounpcb(so);
 1226 
 1227         KASSERT(so->so_type == SOCK_STREAM,
 1228             ("%s: unexpected socket type for %p", __func__, so));
 1229 
 1230         UNP_PCB_LOCK(unp);
 1231         if ((unp2 = unp_pcb_lock_peer(unp)) != NULL) {
 1232                 UNP_PCB_UNLOCK(unp);
 1233                 so2 = unp2->unp_socket;
 1234                 SOCKBUF_LOCK(&so2->so_rcv);
 1235                 if ((error = sbready(&so2->so_rcv, m, count)) == 0)
 1236                         sorwakeup_locked(so2);
 1237                 else
 1238                         SOCKBUF_UNLOCK(&so2->so_rcv);
 1239                 UNP_PCB_UNLOCK(unp2);
 1240                 return (error);
 1241         }
 1242         UNP_PCB_UNLOCK(unp);
 1243 
 1244         /*
 1245          * The receiving socket has been disconnected, but may still be valid.
 1246          * In this case, the now-ready mbufs are still present in its socket
 1247          * buffer, so perform an exhaustive search before giving up and freeing
 1248          * the mbufs.
 1249          */
 1250         UNP_LINK_RLOCK();
 1251         LIST_FOREACH(unp, &unp_shead, unp_link) {
 1252                 if (uipc_ready_scan(unp->unp_socket, m, count, &error))
 1253                         break;
 1254         }
 1255         UNP_LINK_RUNLOCK();
 1256 
 1257         if (unp == NULL) {
 1258                 for (i = 0; i < count; i++)
 1259                         m = m_free(m);
 1260                 error = ECONNRESET;
 1261         }
 1262         return (error);
 1263 }
 1264 
 1265 static int
 1266 uipc_sense(struct socket *so, struct stat *sb)
 1267 {
 1268         struct unpcb *unp;
 1269 
 1270         unp = sotounpcb(so);
 1271         KASSERT(unp != NULL, ("uipc_sense: unp == NULL"));
 1272 
 1273         sb->st_blksize = so->so_snd.sb_hiwat;
 1274         sb->st_dev = NODEV;
 1275         sb->st_ino = unp->unp_ino;
 1276         return (0);
 1277 }
 1278 
 1279 static int
 1280 uipc_shutdown(struct socket *so)
 1281 {
 1282         struct unpcb *unp;
 1283 
 1284         unp = sotounpcb(so);
 1285         KASSERT(unp != NULL, ("uipc_shutdown: unp == NULL"));
 1286 
 1287         UNP_PCB_LOCK(unp);
 1288         socantsendmore(so);
 1289         unp_shutdown(unp);
 1290         UNP_PCB_UNLOCK(unp);
 1291         return (0);
 1292 }
 1293 
 1294 static int
 1295 uipc_sockaddr(struct socket *so, struct sockaddr **nam)
 1296 {
 1297         struct unpcb *unp;
 1298         const struct sockaddr *sa;
 1299 
 1300         unp = sotounpcb(so);
 1301         KASSERT(unp != NULL, ("uipc_sockaddr: unp == NULL"));
 1302 
 1303         *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
 1304         UNP_PCB_LOCK(unp);
 1305         if (unp->unp_addr != NULL)
 1306                 sa = (struct sockaddr *) unp->unp_addr;
 1307         else
 1308                 sa = &sun_noname;
 1309         bcopy(sa, *nam, sa->sa_len);
 1310         UNP_PCB_UNLOCK(unp);
 1311         return (0);
 1312 }
 1313 
 1314 static struct pr_usrreqs uipc_usrreqs_dgram = {
 1315         .pru_abort =            uipc_abort,
 1316         .pru_accept =           uipc_accept,
 1317         .pru_attach =           uipc_attach,
 1318         .pru_bind =             uipc_bind,
 1319         .pru_bindat =           uipc_bindat,
 1320         .pru_connect =          uipc_connect,
 1321         .pru_connectat =        uipc_connectat,
 1322         .pru_connect2 =         uipc_connect2,
 1323         .pru_detach =           uipc_detach,
 1324         .pru_disconnect =       uipc_disconnect,
 1325         .pru_listen =           uipc_listen,
 1326         .pru_peeraddr =         uipc_peeraddr,
 1327         .pru_rcvd =             uipc_rcvd,
 1328         .pru_send =             uipc_send,
 1329         .pru_sense =            uipc_sense,
 1330         .pru_shutdown =         uipc_shutdown,
 1331         .pru_sockaddr =         uipc_sockaddr,
 1332         .pru_soreceive =        soreceive_dgram,
 1333         .pru_close =            uipc_close,
 1334 };
 1335 
 1336 static struct pr_usrreqs uipc_usrreqs_seqpacket = {
 1337         .pru_abort =            uipc_abort,
 1338         .pru_accept =           uipc_accept,
 1339         .pru_attach =           uipc_attach,
 1340         .pru_bind =             uipc_bind,
 1341         .pru_bindat =           uipc_bindat,
 1342         .pru_connect =          uipc_connect,
 1343         .pru_connectat =        uipc_connectat,
 1344         .pru_connect2 =         uipc_connect2,
 1345         .pru_detach =           uipc_detach,
 1346         .pru_disconnect =       uipc_disconnect,
 1347         .pru_listen =           uipc_listen,
 1348         .pru_peeraddr =         uipc_peeraddr,
 1349         .pru_rcvd =             uipc_rcvd,
 1350         .pru_send =             uipc_send,
 1351         .pru_sense =            uipc_sense,
 1352         .pru_shutdown =         uipc_shutdown,
 1353         .pru_sockaddr =         uipc_sockaddr,
 1354         .pru_soreceive =        soreceive_generic,      /* XXX: or...? */
 1355         .pru_close =            uipc_close,
 1356 };
 1357 
 1358 static struct pr_usrreqs uipc_usrreqs_stream = {
 1359         .pru_abort =            uipc_abort,
 1360         .pru_accept =           uipc_accept,
 1361         .pru_attach =           uipc_attach,
 1362         .pru_bind =             uipc_bind,
 1363         .pru_bindat =           uipc_bindat,
 1364         .pru_connect =          uipc_connect,
 1365         .pru_connectat =        uipc_connectat,
 1366         .pru_connect2 =         uipc_connect2,
 1367         .pru_detach =           uipc_detach,
 1368         .pru_disconnect =       uipc_disconnect,
 1369         .pru_listen =           uipc_listen,
 1370         .pru_peeraddr =         uipc_peeraddr,
 1371         .pru_rcvd =             uipc_rcvd,
 1372         .pru_send =             uipc_send,
 1373         .pru_ready =            uipc_ready,
 1374         .pru_sense =            uipc_sense,
 1375         .pru_shutdown =         uipc_shutdown,
 1376         .pru_sockaddr =         uipc_sockaddr,
 1377         .pru_soreceive =        soreceive_generic,
 1378         .pru_close =            uipc_close,
 1379 };
 1380 
 1381 static int
 1382 uipc_ctloutput(struct socket *so, struct sockopt *sopt)
 1383 {
 1384         struct unpcb *unp;
 1385         struct xucred xu;
 1386         int error, optval;
 1387 
 1388         if (sopt->sopt_level != SOL_LOCAL)
 1389                 return (EINVAL);
 1390 
 1391         unp = sotounpcb(so);
 1392         KASSERT(unp != NULL, ("uipc_ctloutput: unp == NULL"));
 1393         error = 0;
 1394         switch (sopt->sopt_dir) {
 1395         case SOPT_GET:
 1396                 switch (sopt->sopt_name) {
 1397                 case LOCAL_PEERCRED:
 1398                         UNP_PCB_LOCK(unp);
 1399                         if (unp->unp_flags & UNP_HAVEPC)
 1400                                 xu = unp->unp_peercred;
 1401                         else {
 1402                                 if (so->so_type == SOCK_STREAM)
 1403                                         error = ENOTCONN;
 1404                                 else
 1405                                         error = EINVAL;
 1406                         }
 1407                         UNP_PCB_UNLOCK(unp);
 1408                         if (error == 0)
 1409                                 error = sooptcopyout(sopt, &xu, sizeof(xu));
 1410                         break;
 1411 
 1412                 case LOCAL_CREDS:
 1413                         /* Unlocked read. */
 1414                         optval = unp->unp_flags & UNP_WANTCRED_ONESHOT ? 1 : 0;
 1415                         error = sooptcopyout(sopt, &optval, sizeof(optval));
 1416                         break;
 1417 
 1418                 case LOCAL_CREDS_PERSISTENT:
 1419                         /* Unlocked read. */
 1420                         optval = unp->unp_flags & UNP_WANTCRED_ALWAYS ? 1 : 0;
 1421                         error = sooptcopyout(sopt, &optval, sizeof(optval));
 1422                         break;
 1423 
 1424                 case LOCAL_CONNWAIT:
 1425                         /* Unlocked read. */
 1426                         optval = unp->unp_flags & UNP_CONNWAIT ? 1 : 0;
 1427                         error = sooptcopyout(sopt, &optval, sizeof(optval));
 1428                         break;
 1429 
 1430                 default:
 1431                         error = EOPNOTSUPP;
 1432                         break;
 1433                 }
 1434                 break;
 1435 
 1436         case SOPT_SET:
 1437                 switch (sopt->sopt_name) {
 1438                 case LOCAL_CREDS:
 1439                 case LOCAL_CREDS_PERSISTENT:
 1440                 case LOCAL_CONNWAIT:
 1441                         error = sooptcopyin(sopt, &optval, sizeof(optval),
 1442                                             sizeof(optval));
 1443                         if (error)
 1444                                 break;
 1445 
 1446 #define OPTSET(bit, exclusive) do {                                     \
 1447         UNP_PCB_LOCK(unp);                                              \
 1448         if (optval) {                                                   \
 1449                 if ((unp->unp_flags & (exclusive)) != 0) {              \
 1450                         UNP_PCB_UNLOCK(unp);                            \
 1451                         error = EINVAL;                                 \
 1452                         break;                                          \
 1453                 }                                                       \
 1454                 unp->unp_flags |= (bit);                                \
 1455         } else                                                          \
 1456                 unp->unp_flags &= ~(bit);                               \
 1457         UNP_PCB_UNLOCK(unp);                                            \
 1458 } while (0)
 1459 
 1460                         switch (sopt->sopt_name) {
 1461                         case LOCAL_CREDS:
 1462                                 OPTSET(UNP_WANTCRED_ONESHOT, UNP_WANTCRED_ALWAYS);
 1463                                 break;
 1464 
 1465                         case LOCAL_CREDS_PERSISTENT:
 1466                                 OPTSET(UNP_WANTCRED_ALWAYS, UNP_WANTCRED_ONESHOT);
 1467                                 break;
 1468 
 1469                         case LOCAL_CONNWAIT:
 1470                                 OPTSET(UNP_CONNWAIT, 0);
 1471                                 break;
 1472 
 1473                         default:
 1474                                 break;
 1475                         }
 1476                         break;
 1477 #undef  OPTSET
 1478                 default:
 1479                         error = ENOPROTOOPT;
 1480                         break;
 1481                 }
 1482                 break;
 1483 
 1484         default:
 1485                 error = EOPNOTSUPP;
 1486                 break;
 1487         }
 1488         return (error);
 1489 }
 1490 
 1491 static int
 1492 unp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 1493 {
 1494 
 1495         return (unp_connectat(AT_FDCWD, so, nam, td));
 1496 }
 1497 
 1498 static int
 1499 unp_connectat(int fd, struct socket *so, struct sockaddr *nam,
 1500     struct thread *td)
 1501 {
 1502         struct mtx *vplock;
 1503         struct sockaddr_un *soun;
 1504         struct vnode *vp;
 1505         struct socket *so2;
 1506         struct unpcb *unp, *unp2, *unp3;
 1507         struct nameidata nd;
 1508         char buf[SOCK_MAXADDRLEN];
 1509         struct sockaddr *sa;
 1510         cap_rights_t rights;
 1511         int error, len;
 1512         bool connreq;
 1513 
 1514         if (nam->sa_family != AF_UNIX)
 1515                 return (EAFNOSUPPORT);
 1516         if (nam->sa_len > sizeof(struct sockaddr_un))
 1517                 return (EINVAL);
 1518         len = nam->sa_len - offsetof(struct sockaddr_un, sun_path);
 1519         if (len <= 0)
 1520                 return (EINVAL);
 1521         soun = (struct sockaddr_un *)nam;
 1522         bcopy(soun->sun_path, buf, len);
 1523         buf[len] = 0;
 1524 
 1525         unp = sotounpcb(so);
 1526         UNP_PCB_LOCK(unp);
 1527         for (;;) {
 1528                 /*
 1529                  * Wait for connection state to stabilize.  If a connection
 1530                  * already exists, give up.  For datagram sockets, which permit
 1531                  * multiple consecutive connect(2) calls, upper layers are
 1532                  * responsible for disconnecting in advance of a subsequent
 1533                  * connect(2), but this is not synchronized with PCB connection
 1534                  * state.
 1535                  *
 1536                  * Also make sure that no threads are currently attempting to
 1537                  * lock the peer socket, to ensure that unp_conn cannot
 1538                  * transition between two valid sockets while locks are dropped.
 1539                  */
 1540                 if (unp->unp_conn != NULL) {
 1541                         UNP_PCB_UNLOCK(unp);
 1542                         return (EISCONN);
 1543                 }
 1544                 if ((unp->unp_flags & UNP_CONNECTING) != 0) {
 1545                         UNP_PCB_UNLOCK(unp);
 1546                         return (EALREADY);
 1547                 }
 1548                 if (unp->unp_pairbusy > 0) {
 1549                         unp->unp_flags |= UNP_WAITING;
 1550                         mtx_sleep(unp, UNP_PCB_LOCKPTR(unp), 0, "unpeer", 0);
 1551                         continue;
 1552                 }
 1553                 break;
 1554         }
 1555         unp->unp_flags |= UNP_CONNECTING;
 1556         UNP_PCB_UNLOCK(unp);
 1557 
 1558         connreq = (so->so_proto->pr_flags & PR_CONNREQUIRED) != 0;
 1559         if (connreq)
 1560                 sa = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
 1561         else
 1562                 sa = NULL;
 1563         NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF,
 1564             UIO_SYSSPACE, buf, fd, cap_rights_init_one(&rights, CAP_CONNECTAT),
 1565             td);
 1566         error = namei(&nd);
 1567         if (error)
 1568                 vp = NULL;
 1569         else
 1570                 vp = nd.ni_vp;
 1571         ASSERT_VOP_LOCKED(vp, "unp_connect");
 1572         NDFREE_NOTHING(&nd);
 1573         if (error)
 1574                 goto bad;
 1575 
 1576         if (vp->v_type != VSOCK) {
 1577                 error = ENOTSOCK;
 1578                 goto bad;
 1579         }
 1580 #ifdef MAC
 1581         error = mac_vnode_check_open(td->td_ucred, vp, VWRITE | VREAD);
 1582         if (error)
 1583                 goto bad;
 1584 #endif
 1585         error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td);
 1586         if (error)
 1587                 goto bad;
 1588 
 1589         unp = sotounpcb(so);
 1590         KASSERT(unp != NULL, ("unp_connect: unp == NULL"));
 1591 
 1592         vplock = mtx_pool_find(mtxpool_sleep, vp);
 1593         mtx_lock(vplock);
 1594         VOP_UNP_CONNECT(vp, &unp2);
 1595         if (unp2 == NULL) {
 1596                 error = ECONNREFUSED;
 1597                 goto bad2;
 1598         }
 1599         so2 = unp2->unp_socket;
 1600         if (so->so_type != so2->so_type) {
 1601                 error = EPROTOTYPE;
 1602                 goto bad2;
 1603         }
 1604         if (connreq) {
 1605                 if (so2->so_options & SO_ACCEPTCONN) {
 1606                         CURVNET_SET(so2->so_vnet);
 1607                         so2 = sonewconn(so2, 0);
 1608                         CURVNET_RESTORE();
 1609                 } else
 1610                         so2 = NULL;
 1611                 if (so2 == NULL) {
 1612                         error = ECONNREFUSED;
 1613                         goto bad2;
 1614                 }
 1615                 unp3 = sotounpcb(so2);
 1616                 unp_pcb_lock_pair(unp2, unp3);
 1617                 if (unp2->unp_addr != NULL) {
 1618                         bcopy(unp2->unp_addr, sa, unp2->unp_addr->sun_len);
 1619                         unp3->unp_addr = (struct sockaddr_un *) sa;
 1620                         sa = NULL;
 1621                 }
 1622 
 1623                 unp_copy_peercred(td, unp3, unp, unp2);
 1624 
 1625                 UNP_PCB_UNLOCK(unp2);
 1626                 unp2 = unp3;
 1627 
 1628                 /*
 1629                  * It is safe to block on the PCB lock here since unp2 is
 1630                  * nascent and cannot be connected to any other sockets.
 1631                  */
 1632                 UNP_PCB_LOCK(unp);
 1633 #ifdef MAC
 1634                 mac_socketpeer_set_from_socket(so, so2);
 1635                 mac_socketpeer_set_from_socket(so2, so);
 1636 #endif
 1637         } else {
 1638                 unp_pcb_lock_pair(unp, unp2);
 1639         }
 1640         KASSERT(unp2 != NULL && so2 != NULL && unp2->unp_socket == so2 &&
 1641             sotounpcb(so2) == unp2,
 1642             ("%s: unp2 %p so2 %p", __func__, unp2, so2));
 1643         error = unp_connect2(so, so2, PRU_CONNECT);
 1644         unp_pcb_unlock_pair(unp, unp2);
 1645 bad2:
 1646         mtx_unlock(vplock);
 1647 bad:
 1648         if (vp != NULL) {
 1649                 vput(vp);
 1650         }
 1651         free(sa, M_SONAME);
 1652         UNP_PCB_LOCK(unp);
 1653         KASSERT((unp->unp_flags & UNP_CONNECTING) != 0,
 1654             ("%s: unp %p has UNP_CONNECTING clear", __func__, unp));
 1655         unp->unp_flags &= ~UNP_CONNECTING;
 1656         UNP_PCB_UNLOCK(unp);
 1657         return (error);
 1658 }
 1659 
 1660 /*
 1661  * Set socket peer credentials at connection time.
 1662  *
 1663  * The client's PCB credentials are copied from its process structure.  The
 1664  * server's PCB credentials are copied from the socket on which it called
 1665  * listen(2).  uipc_listen cached that process's credentials at the time.
 1666  */
 1667 void
 1668 unp_copy_peercred(struct thread *td, struct unpcb *client_unp,
 1669     struct unpcb *server_unp, struct unpcb *listen_unp)
 1670 {
 1671         cru2xt(td, &client_unp->unp_peercred);
 1672         client_unp->unp_flags |= UNP_HAVEPC;
 1673 
 1674         memcpy(&server_unp->unp_peercred, &listen_unp->unp_peercred,
 1675             sizeof(server_unp->unp_peercred));
 1676         server_unp->unp_flags |= UNP_HAVEPC;
 1677         client_unp->unp_flags |= (listen_unp->unp_flags & UNP_WANTCRED_MASK);
 1678 }
 1679 
 1680 static int
 1681 unp_connect2(struct socket *so, struct socket *so2, int req)
 1682 {
 1683         struct unpcb *unp;
 1684         struct unpcb *unp2;
 1685 
 1686         unp = sotounpcb(so);
 1687         KASSERT(unp != NULL, ("unp_connect2: unp == NULL"));
 1688         unp2 = sotounpcb(so2);
 1689         KASSERT(unp2 != NULL, ("unp_connect2: unp2 == NULL"));
 1690 
 1691         UNP_PCB_LOCK_ASSERT(unp);
 1692         UNP_PCB_LOCK_ASSERT(unp2);
 1693         KASSERT(unp->unp_conn == NULL,
 1694             ("%s: socket %p is already connected", __func__, unp));
 1695 
 1696         if (so2->so_type != so->so_type)
 1697                 return (EPROTOTYPE);
 1698         unp->unp_conn = unp2;
 1699         unp_pcb_hold(unp2);
 1700         unp_pcb_hold(unp);
 1701         switch (so->so_type) {
 1702         case SOCK_DGRAM:
 1703                 UNP_REF_LIST_LOCK();
 1704                 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink);
 1705                 UNP_REF_LIST_UNLOCK();
 1706                 soisconnected(so);
 1707                 break;
 1708 
 1709         case SOCK_STREAM:
 1710         case SOCK_SEQPACKET:
 1711                 KASSERT(unp2->unp_conn == NULL,
 1712                     ("%s: socket %p is already connected", __func__, unp2));
 1713                 unp2->unp_conn = unp;
 1714                 if (req == PRU_CONNECT &&
 1715                     ((unp->unp_flags | unp2->unp_flags) & UNP_CONNWAIT))
 1716                         soisconnecting(so);
 1717                 else
 1718                         soisconnected(so);
 1719                 soisconnected(so2);
 1720                 break;
 1721 
 1722         default:
 1723                 panic("unp_connect2");
 1724         }
 1725         return (0);
 1726 }
 1727 
 1728 static void
 1729 unp_disconnect(struct unpcb *unp, struct unpcb *unp2)
 1730 {
 1731         struct socket *so, *so2;
 1732 #ifdef INVARIANTS
 1733         struct unpcb *unptmp;
 1734 #endif
 1735 
 1736         UNP_PCB_LOCK_ASSERT(unp);
 1737         UNP_PCB_LOCK_ASSERT(unp2);
 1738         KASSERT(unp->unp_conn == unp2,
 1739             ("%s: unpcb %p is not connected to %p", __func__, unp, unp2));
 1740 
 1741         unp->unp_conn = NULL;
 1742         so = unp->unp_socket;
 1743         so2 = unp2->unp_socket;
 1744         switch (unp->unp_socket->so_type) {
 1745         case SOCK_DGRAM:
 1746                 UNP_REF_LIST_LOCK();
 1747 #ifdef INVARIANTS
 1748                 LIST_FOREACH(unptmp, &unp2->unp_refs, unp_reflink) {
 1749                         if (unptmp == unp)
 1750                                 break;
 1751                 }
 1752                 KASSERT(unptmp != NULL,
 1753                     ("%s: %p not found in reflist of %p", __func__, unp, unp2));
 1754 #endif
 1755                 LIST_REMOVE(unp, unp_reflink);
 1756                 UNP_REF_LIST_UNLOCK();
 1757                 if (so) {
 1758                         SOCK_LOCK(so);
 1759                         so->so_state &= ~SS_ISCONNECTED;
 1760                         SOCK_UNLOCK(so);
 1761                 }
 1762                 break;
 1763 
 1764         case SOCK_STREAM:
 1765         case SOCK_SEQPACKET:
 1766                 if (so)
 1767                         soisdisconnected(so);
 1768                 MPASS(unp2->unp_conn == unp);
 1769                 unp2->unp_conn = NULL;
 1770                 if (so2)
 1771                         soisdisconnected(so2);
 1772                 break;
 1773         }
 1774 
 1775         if (unp == unp2) {
 1776                 unp_pcb_rele_notlast(unp);
 1777                 if (!unp_pcb_rele(unp))
 1778                         UNP_PCB_UNLOCK(unp);
 1779         } else {
 1780                 if (!unp_pcb_rele(unp))
 1781                         UNP_PCB_UNLOCK(unp);
 1782                 if (!unp_pcb_rele(unp2))
 1783                         UNP_PCB_UNLOCK(unp2);
 1784         }
 1785 }
 1786 
 1787 /*
 1788  * unp_pcblist() walks the global list of struct unpcb's to generate a
 1789  * pointer list, bumping the refcount on each unpcb.  It then copies them out
 1790  * sequentially, validating the generation number on each to see if it has
 1791  * been detached.  All of this is necessary because copyout() may sleep on
 1792  * disk I/O.
 1793  */
 1794 static int
 1795 unp_pcblist(SYSCTL_HANDLER_ARGS)
 1796 {
 1797         struct unpcb *unp, **unp_list;
 1798         unp_gen_t gencnt;
 1799         struct xunpgen *xug;
 1800         struct unp_head *head;
 1801         struct xunpcb *xu;
 1802         u_int i;
 1803         int error, n;
 1804 
 1805         switch ((intptr_t)arg1) {
 1806         case SOCK_STREAM:
 1807                 head = &unp_shead;
 1808                 break;
 1809 
 1810         case SOCK_DGRAM:
 1811                 head = &unp_dhead;
 1812                 break;
 1813 
 1814         case SOCK_SEQPACKET:
 1815                 head = &unp_sphead;
 1816                 break;
 1817 
 1818         default:
 1819                 panic("unp_pcblist: arg1 %d", (int)(intptr_t)arg1);
 1820         }
 1821 
 1822         /*
 1823          * The process of preparing the PCB list is too time-consuming and
 1824          * resource-intensive to repeat twice on every request.
 1825          */
 1826         if (req->oldptr == NULL) {
 1827                 n = unp_count;
 1828                 req->oldidx = 2 * (sizeof *xug)
 1829                         + (n + n/8) * sizeof(struct xunpcb);
 1830                 return (0);
 1831         }
 1832 
 1833         if (req->newptr != NULL)
 1834                 return (EPERM);
 1835 
 1836         /*
 1837          * OK, now we're committed to doing something.
 1838          */
 1839         xug = malloc(sizeof(*xug), M_TEMP, M_WAITOK | M_ZERO);
 1840         UNP_LINK_RLOCK();
 1841         gencnt = unp_gencnt;
 1842         n = unp_count;
 1843         UNP_LINK_RUNLOCK();
 1844 
 1845         xug->xug_len = sizeof *xug;
 1846         xug->xug_count = n;
 1847         xug->xug_gen = gencnt;
 1848         xug->xug_sogen = so_gencnt;
 1849         error = SYSCTL_OUT(req, xug, sizeof *xug);
 1850         if (error) {
 1851                 free(xug, M_TEMP);
 1852                 return (error);
 1853         }
 1854 
 1855         unp_list = malloc(n * sizeof *unp_list, M_TEMP, M_WAITOK);
 1856 
 1857         UNP_LINK_RLOCK();
 1858         for (unp = LIST_FIRST(head), i = 0; unp && i < n;
 1859              unp = LIST_NEXT(unp, unp_link)) {
 1860                 UNP_PCB_LOCK(unp);
 1861                 if (unp->unp_gencnt <= gencnt) {
 1862                         if (cr_cansee(req->td->td_ucred,
 1863                             unp->unp_socket->so_cred)) {
 1864                                 UNP_PCB_UNLOCK(unp);
 1865                                 continue;
 1866                         }
 1867                         unp_list[i++] = unp;
 1868                         unp_pcb_hold(unp);
 1869                 }
 1870                 UNP_PCB_UNLOCK(unp);
 1871         }
 1872         UNP_LINK_RUNLOCK();
 1873         n = i;                  /* In case we lost some during malloc. */
 1874 
 1875         error = 0;
 1876         xu = malloc(sizeof(*xu), M_TEMP, M_WAITOK | M_ZERO);
 1877         for (i = 0; i < n; i++) {
 1878                 unp = unp_list[i];
 1879                 UNP_PCB_LOCK(unp);
 1880                 if (unp_pcb_rele(unp))
 1881                         continue;
 1882 
 1883                 if (unp->unp_gencnt <= gencnt) {
 1884                         xu->xu_len = sizeof *xu;
 1885                         xu->xu_unpp = (uintptr_t)unp;
 1886                         /*
 1887                          * XXX - need more locking here to protect against
 1888                          * connect/disconnect races for SMP.
 1889                          */
 1890                         if (unp->unp_addr != NULL)
 1891                                 bcopy(unp->unp_addr, &xu->xu_addr,
 1892                                       unp->unp_addr->sun_len);
 1893                         else
 1894                                 bzero(&xu->xu_addr, sizeof(xu->xu_addr));
 1895                         if (unp->unp_conn != NULL &&
 1896                             unp->unp_conn->unp_addr != NULL)
 1897                                 bcopy(unp->unp_conn->unp_addr,
 1898                                       &xu->xu_caddr,
 1899                                       unp->unp_conn->unp_addr->sun_len);
 1900                         else
 1901                                 bzero(&xu->xu_caddr, sizeof(xu->xu_caddr));
 1902                         xu->unp_vnode = (uintptr_t)unp->unp_vnode;
 1903                         xu->unp_conn = (uintptr_t)unp->unp_conn;
 1904                         xu->xu_firstref = (uintptr_t)LIST_FIRST(&unp->unp_refs);
 1905                         xu->xu_nextref = (uintptr_t)LIST_NEXT(unp, unp_reflink);
 1906                         xu->unp_gencnt = unp->unp_gencnt;
 1907                         sotoxsocket(unp->unp_socket, &xu->xu_socket);
 1908                         UNP_PCB_UNLOCK(unp);
 1909                         error = SYSCTL_OUT(req, xu, sizeof *xu);
 1910                 } else {
 1911                         UNP_PCB_UNLOCK(unp);
 1912                 }
 1913         }
 1914         free(xu, M_TEMP);
 1915         if (!error) {
 1916                 /*
 1917                  * Give the user an updated idea of our state.  If the
 1918                  * generation differs from what we told her before, she knows
 1919                  * that something happened while we were processing this
 1920                  * request, and it might be necessary to retry.
 1921                  */
 1922                 xug->xug_gen = unp_gencnt;
 1923                 xug->xug_sogen = so_gencnt;
 1924                 xug->xug_count = unp_count;
 1925                 error = SYSCTL_OUT(req, xug, sizeof *xug);
 1926         }
 1927         free(unp_list, M_TEMP);
 1928         free(xug, M_TEMP);
 1929         return (error);
 1930 }
 1931 
 1932 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist,
 1933     CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
 1934     (void *)(intptr_t)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb",
 1935     "List of active local datagram sockets");
 1936 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist,
 1937     CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
 1938     (void *)(intptr_t)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb",
 1939     "List of active local stream sockets");
 1940 SYSCTL_PROC(_net_local_seqpacket, OID_AUTO, pcblist,
 1941     CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
 1942     (void *)(intptr_t)SOCK_SEQPACKET, 0, unp_pcblist, "S,xunpcb",
 1943     "List of active local seqpacket sockets");
 1944 
 1945 static void
 1946 unp_shutdown(struct unpcb *unp)
 1947 {
 1948         struct unpcb *unp2;
 1949         struct socket *so;
 1950 
 1951         UNP_PCB_LOCK_ASSERT(unp);
 1952 
 1953         unp2 = unp->unp_conn;
 1954         if ((unp->unp_socket->so_type == SOCK_STREAM ||
 1955             (unp->unp_socket->so_type == SOCK_SEQPACKET)) && unp2 != NULL) {
 1956                 so = unp2->unp_socket;
 1957                 if (so != NULL)
 1958                         socantrcvmore(so);
 1959         }
 1960 }
 1961 
 1962 static void
 1963 unp_drop(struct unpcb *unp)
 1964 {
 1965         struct socket *so = unp->unp_socket;
 1966         struct unpcb *unp2;
 1967 
 1968         /*
 1969          * Regardless of whether the socket's peer dropped the connection
 1970          * with this socket by aborting or disconnecting, POSIX requires
 1971          * that ECONNRESET is returned.
 1972          */
 1973 
 1974         UNP_PCB_LOCK(unp);
 1975         if (so)
 1976                 so->so_error = ECONNRESET;
 1977         if ((unp2 = unp_pcb_lock_peer(unp)) != NULL) {
 1978                 /* Last reference dropped in unp_disconnect(). */
 1979                 unp_pcb_rele_notlast(unp);
 1980                 unp_disconnect(unp, unp2);
 1981         } else if (!unp_pcb_rele(unp)) {
 1982                 UNP_PCB_UNLOCK(unp);
 1983         }
 1984 }
 1985 
 1986 static void
 1987 unp_freerights(struct filedescent **fdep, int fdcount)
 1988 {
 1989         struct file *fp;
 1990         int i;
 1991 
 1992         KASSERT(fdcount > 0, ("%s: fdcount %d", __func__, fdcount));
 1993 
 1994         for (i = 0; i < fdcount; i++) {
 1995                 fp = fdep[i]->fde_file;
 1996                 filecaps_free(&fdep[i]->fde_caps);
 1997                 unp_discard(fp);
 1998         }
 1999         free(fdep[0], M_FILECAPS);
 2000 }
 2001 
 2002 static int
 2003 unp_externalize(struct mbuf *control, struct mbuf **controlp, int flags)
 2004 {
 2005         struct thread *td = curthread;          /* XXX */
 2006         struct cmsghdr *cm = mtod(control, struct cmsghdr *);
 2007         int i;
 2008         int *fdp;
 2009         struct filedesc *fdesc = td->td_proc->p_fd;
 2010         struct filedescent **fdep;
 2011         void *data;
 2012         socklen_t clen = control->m_len, datalen;
 2013         int error, newfds;
 2014         u_int newlen;
 2015 
 2016         UNP_LINK_UNLOCK_ASSERT();
 2017 
 2018         error = 0;
 2019         if (controlp != NULL) /* controlp == NULL => free control messages */
 2020                 *controlp = NULL;
 2021         while (cm != NULL) {
 2022                 if (sizeof(*cm) > clen || cm->cmsg_len > clen) {
 2023                         error = EINVAL;
 2024                         break;
 2025                 }
 2026                 data = CMSG_DATA(cm);
 2027                 datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
 2028                 if (cm->cmsg_level == SOL_SOCKET
 2029                     && cm->cmsg_type == SCM_RIGHTS) {
 2030                         newfds = datalen / sizeof(*fdep);
 2031                         if (newfds == 0)
 2032                                 goto next;
 2033                         fdep = data;
 2034 
 2035                         /* If we're not outputting the descriptors free them. */
 2036                         if (error || controlp == NULL) {
 2037                                 unp_freerights(fdep, newfds);
 2038                                 goto next;
 2039                         }
 2040                         FILEDESC_XLOCK(fdesc);
 2041 
 2042                         /*
 2043                          * Now change each pointer to an fd in the global
 2044                          * table to an integer that is the index to the local
 2045                          * fd table entry that we set up to point to the
 2046                          * global one we are transferring.
 2047                          */
 2048                         newlen = newfds * sizeof(int);
 2049                         *controlp = sbcreatecontrol(NULL, newlen,
 2050                             SCM_RIGHTS, SOL_SOCKET);
 2051                         if (*controlp == NULL) {
 2052                                 FILEDESC_XUNLOCK(fdesc);
 2053                                 error = E2BIG;
 2054                                 unp_freerights(fdep, newfds);
 2055                                 goto next;
 2056                         }
 2057 
 2058                         fdp = (int *)
 2059                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 2060                         if (fdallocn(td, 0, fdp, newfds) != 0) {
 2061                                 FILEDESC_XUNLOCK(fdesc);
 2062                                 error = EMSGSIZE;
 2063                                 unp_freerights(fdep, newfds);
 2064                                 m_freem(*controlp);
 2065                                 *controlp = NULL;
 2066                                 goto next;
 2067                         }
 2068                         for (i = 0; i < newfds; i++, fdp++) {
 2069                                 _finstall(fdesc, fdep[i]->fde_file, *fdp,
 2070                                     (flags & MSG_CMSG_CLOEXEC) != 0 ? UF_EXCLOSE : 0,
 2071                                     &fdep[i]->fde_caps);
 2072                                 unp_externalize_fp(fdep[i]->fde_file);
 2073                         }
 2074 
 2075                         /*
 2076                          * The new type indicates that the mbuf data refers to
 2077                          * kernel resources that may need to be released before
 2078                          * the mbuf is freed.
 2079                          */
 2080                         m_chtype(*controlp, MT_EXTCONTROL);
 2081                         FILEDESC_XUNLOCK(fdesc);
 2082                         free(fdep[0], M_FILECAPS);
 2083                 } else {
 2084                         /* We can just copy anything else across. */
 2085                         if (error || controlp == NULL)
 2086                                 goto next;
 2087                         *controlp = sbcreatecontrol(NULL, datalen,
 2088                             cm->cmsg_type, cm->cmsg_level);
 2089                         if (*controlp == NULL) {
 2090                                 error = ENOBUFS;
 2091                                 goto next;
 2092                         }
 2093                         bcopy(data,
 2094                             CMSG_DATA(mtod(*controlp, struct cmsghdr *)),
 2095                             datalen);
 2096                 }
 2097                 controlp = &(*controlp)->m_next;
 2098 
 2099 next:
 2100                 if (CMSG_SPACE(datalen) < clen) {
 2101                         clen -= CMSG_SPACE(datalen);
 2102                         cm = (struct cmsghdr *)
 2103                             ((caddr_t)cm + CMSG_SPACE(datalen));
 2104                 } else {
 2105                         clen = 0;
 2106                         cm = NULL;
 2107                 }
 2108         }
 2109 
 2110         m_freem(control);
 2111         return (error);
 2112 }
 2113 
 2114 static void
 2115 unp_zone_change(void *tag)
 2116 {
 2117 
 2118         uma_zone_set_max(unp_zone, maxsockets);
 2119 }
 2120 
 2121 #ifdef INVARIANTS
 2122 static void
 2123 unp_zdtor(void *mem, int size __unused, void *arg __unused)
 2124 {
 2125         struct unpcb *unp;
 2126 
 2127         unp = mem;
 2128 
 2129         KASSERT(LIST_EMPTY(&unp->unp_refs),
 2130             ("%s: unpcb %p has lingering refs", __func__, unp));
 2131         KASSERT(unp->unp_socket == NULL,
 2132             ("%s: unpcb %p has socket backpointer", __func__, unp));
 2133         KASSERT(unp->unp_vnode == NULL,
 2134             ("%s: unpcb %p has vnode references", __func__, unp));
 2135         KASSERT(unp->unp_conn == NULL,
 2136             ("%s: unpcb %p is still connected", __func__, unp));
 2137         KASSERT(unp->unp_addr == NULL,
 2138             ("%s: unpcb %p has leaked addr", __func__, unp));
 2139 }
 2140 #endif
 2141 
 2142 static void
 2143 unp_init(void)
 2144 {
 2145         uma_dtor dtor;
 2146 
 2147 #ifdef VIMAGE
 2148         if (!IS_DEFAULT_VNET(curvnet))
 2149                 return;
 2150 #endif
 2151 
 2152 #ifdef INVARIANTS
 2153         dtor = unp_zdtor;
 2154 #else
 2155         dtor = NULL;
 2156 #endif
 2157         unp_zone = uma_zcreate("unpcb", sizeof(struct unpcb), NULL, dtor,
 2158             NULL, NULL, UMA_ALIGN_CACHE, 0);
 2159         uma_zone_set_max(unp_zone, maxsockets);
 2160         uma_zone_set_warning(unp_zone, "kern.ipc.maxsockets limit reached");
 2161         EVENTHANDLER_REGISTER(maxsockets_change, unp_zone_change,
 2162             NULL, EVENTHANDLER_PRI_ANY);
 2163         LIST_INIT(&unp_dhead);
 2164         LIST_INIT(&unp_shead);
 2165         LIST_INIT(&unp_sphead);
 2166         SLIST_INIT(&unp_defers);
 2167         TIMEOUT_TASK_INIT(taskqueue_thread, &unp_gc_task, 0, unp_gc, NULL);
 2168         TASK_INIT(&unp_defer_task, 0, unp_process_defers, NULL);
 2169         UNP_LINK_LOCK_INIT();
 2170         UNP_DEFERRED_LOCK_INIT();
 2171 }
 2172 
 2173 static void
 2174 unp_internalize_cleanup_rights(struct mbuf *control)
 2175 {
 2176         struct cmsghdr *cp;
 2177         struct mbuf *m;
 2178         void *data;
 2179         socklen_t datalen;
 2180 
 2181         for (m = control; m != NULL; m = m->m_next) {
 2182                 cp = mtod(m, struct cmsghdr *);
 2183                 if (cp->cmsg_level != SOL_SOCKET ||
 2184                     cp->cmsg_type != SCM_RIGHTS)
 2185                         continue;
 2186                 data = CMSG_DATA(cp);
 2187                 datalen = (caddr_t)cp + cp->cmsg_len - (caddr_t)data;
 2188                 unp_freerights(data, datalen / sizeof(struct filedesc *));
 2189         }
 2190 }
 2191 
 2192 static int
 2193 unp_internalize(struct mbuf **controlp, struct thread *td)
 2194 {
 2195         struct mbuf *control, **initial_controlp;
 2196         struct proc *p;
 2197         struct filedesc *fdesc;
 2198         struct bintime *bt;
 2199         struct cmsghdr *cm;
 2200         struct cmsgcred *cmcred;
 2201         struct filedescent *fde, **fdep, *fdev;
 2202         struct file *fp;
 2203         struct timeval *tv;
 2204         struct timespec *ts;
 2205         void *data;
 2206         socklen_t clen, datalen;
 2207         int i, j, error, *fdp, oldfds;
 2208         u_int newlen;
 2209 
 2210         UNP_LINK_UNLOCK_ASSERT();
 2211 
 2212         p = td->td_proc;
 2213         fdesc = p->p_fd;
 2214         error = 0;
 2215         control = *controlp;
 2216         clen = control->m_len;
 2217         *controlp = NULL;
 2218         initial_controlp = controlp;
 2219         for (cm = mtod(control, struct cmsghdr *); cm != NULL;) {
 2220                 if (sizeof(*cm) > clen || cm->cmsg_level != SOL_SOCKET
 2221                     || cm->cmsg_len > clen || cm->cmsg_len < sizeof(*cm)) {
 2222                         error = EINVAL;
 2223                         goto out;
 2224                 }
 2225                 data = CMSG_DATA(cm);
 2226                 datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
 2227 
 2228                 switch (cm->cmsg_type) {
 2229                 /*
 2230                  * Fill in credential information.
 2231                  */
 2232                 case SCM_CREDS:
 2233                         *controlp = sbcreatecontrol(NULL, sizeof(*cmcred),
 2234                             SCM_CREDS, SOL_SOCKET);
 2235                         if (*controlp == NULL) {
 2236                                 error = ENOBUFS;
 2237                                 goto out;
 2238                         }
 2239                         cmcred = (struct cmsgcred *)
 2240                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 2241                         cmcred->cmcred_pid = p->p_pid;
 2242                         cmcred->cmcred_uid = td->td_ucred->cr_ruid;
 2243                         cmcred->cmcred_gid = td->td_ucred->cr_rgid;
 2244                         cmcred->cmcred_euid = td->td_ucred->cr_uid;
 2245                         cmcred->cmcred_ngroups = MIN(td->td_ucred->cr_ngroups,
 2246                             CMGROUP_MAX);
 2247                         for (i = 0; i < cmcred->cmcred_ngroups; i++)
 2248                                 cmcred->cmcred_groups[i] =
 2249                                     td->td_ucred->cr_groups[i];
 2250                         break;
 2251 
 2252                 case SCM_RIGHTS:
 2253                         oldfds = datalen / sizeof (int);
 2254                         if (oldfds == 0)
 2255                                 break;
 2256                         /*
 2257                          * Check that all the FDs passed in refer to legal
 2258                          * files.  If not, reject the entire operation.
 2259                          */
 2260                         fdp = data;
 2261                         FILEDESC_SLOCK(fdesc);
 2262                         for (i = 0; i < oldfds; i++, fdp++) {
 2263                                 fp = fget_locked(fdesc, *fdp);
 2264                                 if (fp == NULL) {
 2265                                         FILEDESC_SUNLOCK(fdesc);
 2266                                         error = EBADF;
 2267                                         goto out;
 2268                                 }
 2269                                 if (!(fp->f_ops->fo_flags & DFLAG_PASSABLE)) {
 2270                                         FILEDESC_SUNLOCK(fdesc);
 2271                                         error = EOPNOTSUPP;
 2272                                         goto out;
 2273                                 }
 2274                         }
 2275 
 2276                         /*
 2277                          * Now replace the integer FDs with pointers to the
 2278                          * file structure and capability rights.
 2279                          */
 2280                         newlen = oldfds * sizeof(fdep[0]);
 2281                         *controlp = sbcreatecontrol(NULL, newlen,
 2282                             SCM_RIGHTS, SOL_SOCKET);
 2283                         if (*controlp == NULL) {
 2284                                 FILEDESC_SUNLOCK(fdesc);
 2285                                 error = E2BIG;
 2286                                 goto out;
 2287                         }
 2288                         fdp = data;
 2289                         for (i = 0; i < oldfds; i++, fdp++) {
 2290                                 if (!fhold(fdesc->fd_ofiles[*fdp].fde_file)) {
 2291                                         fdp = data;
 2292                                         for (j = 0; j < i; j++, fdp++) {
 2293                                                 fdrop(fdesc->fd_ofiles[*fdp].
 2294                                                     fde_file, td);
 2295                                         }
 2296                                         FILEDESC_SUNLOCK(fdesc);
 2297                                         error = EBADF;
 2298                                         goto out;
 2299                                 }
 2300                         }
 2301                         fdp = data;
 2302                         fdep = (struct filedescent **)
 2303                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 2304                         fdev = malloc(sizeof(*fdev) * oldfds, M_FILECAPS,
 2305                             M_WAITOK);
 2306                         for (i = 0; i < oldfds; i++, fdev++, fdp++) {
 2307                                 fde = &fdesc->fd_ofiles[*fdp];
 2308                                 fdep[i] = fdev;
 2309                                 fdep[i]->fde_file = fde->fde_file;
 2310                                 filecaps_copy(&fde->fde_caps,
 2311                                     &fdep[i]->fde_caps, true);
 2312                                 unp_internalize_fp(fdep[i]->fde_file);
 2313                         }
 2314                         FILEDESC_SUNLOCK(fdesc);
 2315                         break;
 2316 
 2317                 case SCM_TIMESTAMP:
 2318                         *controlp = sbcreatecontrol(NULL, sizeof(*tv),
 2319                             SCM_TIMESTAMP, SOL_SOCKET);
 2320                         if (*controlp == NULL) {
 2321                                 error = ENOBUFS;
 2322                                 goto out;
 2323                         }
 2324                         tv = (struct timeval *)
 2325                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 2326                         microtime(tv);
 2327                         break;
 2328 
 2329                 case SCM_BINTIME:
 2330                         *controlp = sbcreatecontrol(NULL, sizeof(*bt),
 2331                             SCM_BINTIME, SOL_SOCKET);
 2332                         if (*controlp == NULL) {
 2333                                 error = ENOBUFS;
 2334                                 goto out;
 2335                         }
 2336                         bt = (struct bintime *)
 2337                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 2338                         bintime(bt);
 2339                         break;
 2340 
 2341                 case SCM_REALTIME:
 2342                         *controlp = sbcreatecontrol(NULL, sizeof(*ts),
 2343                             SCM_REALTIME, SOL_SOCKET);
 2344                         if (*controlp == NULL) {
 2345                                 error = ENOBUFS;
 2346                                 goto out;
 2347                         }
 2348                         ts = (struct timespec *)
 2349                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 2350                         nanotime(ts);
 2351                         break;
 2352 
 2353                 case SCM_MONOTONIC:
 2354                         *controlp = sbcreatecontrol(NULL, sizeof(*ts),
 2355                             SCM_MONOTONIC, SOL_SOCKET);
 2356                         if (*controlp == NULL) {
 2357                                 error = ENOBUFS;
 2358                                 goto out;
 2359                         }
 2360                         ts = (struct timespec *)
 2361                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 2362                         nanouptime(ts);
 2363                         break;
 2364 
 2365                 default:
 2366                         error = EINVAL;
 2367                         goto out;
 2368                 }
 2369 
 2370                 if (*controlp != NULL)
 2371                         controlp = &(*controlp)->m_next;
 2372                 if (CMSG_SPACE(datalen) < clen) {
 2373                         clen -= CMSG_SPACE(datalen);
 2374                         cm = (struct cmsghdr *)
 2375                             ((caddr_t)cm + CMSG_SPACE(datalen));
 2376                 } else {
 2377                         clen = 0;
 2378                         cm = NULL;
 2379                 }
 2380         }
 2381 
 2382 out:
 2383         if (error != 0 && initial_controlp != NULL)
 2384                 unp_internalize_cleanup_rights(*initial_controlp);
 2385         m_freem(control);
 2386         return (error);
 2387 }
 2388 
 2389 static struct mbuf *
 2390 unp_addsockcred(struct thread *td, struct mbuf *control, int mode)
 2391 {
 2392         struct mbuf *m, *n, *n_prev;
 2393         const struct cmsghdr *cm;
 2394         int ngroups, i, cmsgtype;
 2395         size_t ctrlsz;
 2396 
 2397         ngroups = MIN(td->td_ucred->cr_ngroups, CMGROUP_MAX);
 2398         if (mode & UNP_WANTCRED_ALWAYS) {
 2399                 ctrlsz = SOCKCRED2SIZE(ngroups);
 2400                 cmsgtype = SCM_CREDS2;
 2401         } else {
 2402                 ctrlsz = SOCKCREDSIZE(ngroups);
 2403                 cmsgtype = SCM_CREDS;
 2404         }
 2405 
 2406         m = sbcreatecontrol(NULL, ctrlsz, cmsgtype, SOL_SOCKET);
 2407         if (m == NULL)
 2408                 return (control);
 2409 
 2410         if (mode & UNP_WANTCRED_ALWAYS) {
 2411                 struct sockcred2 *sc;
 2412 
 2413                 sc = (void *)CMSG_DATA(mtod(m, struct cmsghdr *));
 2414                 sc->sc_version = 0;
 2415                 sc->sc_pid = td->td_proc->p_pid;
 2416                 sc->sc_uid = td->td_ucred->cr_ruid;
 2417                 sc->sc_euid = td->td_ucred->cr_uid;
 2418                 sc->sc_gid = td->td_ucred->cr_rgid;
 2419                 sc->sc_egid = td->td_ucred->cr_gid;
 2420                 sc->sc_ngroups = ngroups;
 2421                 for (i = 0; i < sc->sc_ngroups; i++)
 2422                         sc->sc_groups[i] = td->td_ucred->cr_groups[i];
 2423         } else {
 2424                 struct sockcred *sc;
 2425 
 2426                 sc = (void *)CMSG_DATA(mtod(m, struct cmsghdr *));
 2427                 sc->sc_uid = td->td_ucred->cr_ruid;
 2428                 sc->sc_euid = td->td_ucred->cr_uid;
 2429                 sc->sc_gid = td->td_ucred->cr_rgid;
 2430                 sc->sc_egid = td->td_ucred->cr_gid;
 2431                 sc->sc_ngroups = ngroups;
 2432                 for (i = 0; i < sc->sc_ngroups; i++)
 2433                         sc->sc_groups[i] = td->td_ucred->cr_groups[i];
 2434         }
 2435 
 2436         /*
 2437          * Unlink SCM_CREDS control messages (struct cmsgcred), since just
 2438          * created SCM_CREDS control message (struct sockcred) has another
 2439          * format.
 2440          */
 2441         if (control != NULL && cmsgtype == SCM_CREDS)
 2442                 for (n = control, n_prev = NULL; n != NULL;) {
 2443                         cm = mtod(n, struct cmsghdr *);
 2444                         if (cm->cmsg_level == SOL_SOCKET &&
 2445                             cm->cmsg_type == SCM_CREDS) {
 2446                                 if (n_prev == NULL)
 2447                                         control = n->m_next;
 2448                                 else
 2449                                         n_prev->m_next = n->m_next;
 2450                                 n = m_free(n);
 2451                         } else {
 2452                                 n_prev = n;
 2453                                 n = n->m_next;
 2454                         }
 2455                 }
 2456 
 2457         /* Prepend it to the head. */
 2458         m->m_next = control;
 2459         return (m);
 2460 }
 2461 
 2462 static struct unpcb *
 2463 fptounp(struct file *fp)
 2464 {
 2465         struct socket *so;
 2466 
 2467         if (fp->f_type != DTYPE_SOCKET)
 2468                 return (NULL);
 2469         if ((so = fp->f_data) == NULL)
 2470                 return (NULL);
 2471         if (so->so_proto->pr_domain != &localdomain)
 2472                 return (NULL);
 2473         return sotounpcb(so);
 2474 }
 2475 
 2476 static void
 2477 unp_discard(struct file *fp)
 2478 {
 2479         struct unp_defer *dr;
 2480 
 2481         if (unp_externalize_fp(fp)) {
 2482                 dr = malloc(sizeof(*dr), M_TEMP, M_WAITOK);
 2483                 dr->ud_fp = fp;
 2484                 UNP_DEFERRED_LOCK();
 2485                 SLIST_INSERT_HEAD(&unp_defers, dr, ud_link);
 2486                 UNP_DEFERRED_UNLOCK();
 2487                 atomic_add_int(&unp_defers_count, 1);
 2488                 taskqueue_enqueue(taskqueue_thread, &unp_defer_task);
 2489         } else
 2490                 closef_nothread(fp);
 2491 }
 2492 
 2493 static void
 2494 unp_process_defers(void *arg __unused, int pending)
 2495 {
 2496         struct unp_defer *dr;
 2497         SLIST_HEAD(, unp_defer) drl;
 2498         int count;
 2499 
 2500         SLIST_INIT(&drl);
 2501         for (;;) {
 2502                 UNP_DEFERRED_LOCK();
 2503                 if (SLIST_FIRST(&unp_defers) == NULL) {
 2504                         UNP_DEFERRED_UNLOCK();
 2505                         break;
 2506                 }
 2507                 SLIST_SWAP(&unp_defers, &drl, unp_defer);
 2508                 UNP_DEFERRED_UNLOCK();
 2509                 count = 0;
 2510                 while ((dr = SLIST_FIRST(&drl)) != NULL) {
 2511                         SLIST_REMOVE_HEAD(&drl, ud_link);
 2512                         closef_nothread(dr->ud_fp);
 2513                         free(dr, M_TEMP);
 2514                         count++;
 2515                 }
 2516                 atomic_add_int(&unp_defers_count, -count);
 2517         }
 2518 }
 2519 
 2520 static void
 2521 unp_internalize_fp(struct file *fp)
 2522 {
 2523         struct unpcb *unp;
 2524 
 2525         UNP_LINK_WLOCK();
 2526         if ((unp = fptounp(fp)) != NULL) {
 2527                 unp->unp_file = fp;
 2528                 unp->unp_msgcount++;
 2529         }
 2530         unp_rights++;
 2531         UNP_LINK_WUNLOCK();
 2532 }
 2533 
 2534 static int
 2535 unp_externalize_fp(struct file *fp)
 2536 {
 2537         struct unpcb *unp;
 2538         int ret;
 2539 
 2540         UNP_LINK_WLOCK();
 2541         if ((unp = fptounp(fp)) != NULL) {
 2542                 unp->unp_msgcount--;
 2543                 ret = 1;
 2544         } else
 2545                 ret = 0;
 2546         unp_rights--;
 2547         UNP_LINK_WUNLOCK();
 2548         return (ret);
 2549 }
 2550 
 2551 /*
 2552  * unp_defer indicates whether additional work has been defered for a future
 2553  * pass through unp_gc().  It is thread local and does not require explicit
 2554  * synchronization.
 2555  */
 2556 static int      unp_marked;
 2557 
 2558 static void
 2559 unp_remove_dead_ref(struct filedescent **fdep, int fdcount)
 2560 {
 2561         struct unpcb *unp;
 2562         struct file *fp;
 2563         int i;
 2564 
 2565         /*
 2566          * This function can only be called from the gc task.
 2567          */
 2568         KASSERT(taskqueue_member(taskqueue_thread, curthread) != 0,
 2569             ("%s: not on gc callout", __func__));
 2570         UNP_LINK_LOCK_ASSERT();
 2571 
 2572         for (i = 0; i < fdcount; i++) {
 2573                 fp = fdep[i]->fde_file;
 2574                 if ((unp = fptounp(fp)) == NULL)
 2575                         continue;
 2576                 if ((unp->unp_gcflag & UNPGC_DEAD) == 0)
 2577                         continue;
 2578                 unp->unp_gcrefs--;
 2579         }
 2580 }
 2581 
 2582 static void
 2583 unp_restore_undead_ref(struct filedescent **fdep, int fdcount)
 2584 {
 2585         struct unpcb *unp;
 2586         struct file *fp;
 2587         int i;
 2588 
 2589         /*
 2590          * This function can only be called from the gc task.
 2591          */
 2592         KASSERT(taskqueue_member(taskqueue_thread, curthread) != 0,
 2593             ("%s: not on gc callout", __func__));
 2594         UNP_LINK_LOCK_ASSERT();
 2595 
 2596         for (i = 0; i < fdcount; i++) {
 2597                 fp = fdep[i]->fde_file;
 2598                 if ((unp = fptounp(fp)) == NULL)
 2599                         continue;
 2600                 if ((unp->unp_gcflag & UNPGC_DEAD) == 0)
 2601                         continue;
 2602                 unp->unp_gcrefs++;
 2603                 unp_marked++;
 2604         }
 2605 }
 2606 
 2607 static void
 2608 unp_gc_scan(struct unpcb *unp, void (*op)(struct filedescent **, int))
 2609 {
 2610         struct socket *so, *soa;
 2611 
 2612         so = unp->unp_socket;
 2613         SOCK_LOCK(so);
 2614         if (SOLISTENING(so)) {
 2615                 /*
 2616                  * Mark all sockets in our accept queue.
 2617                  */
 2618                 TAILQ_FOREACH(soa, &so->sol_comp, so_list) {
 2619                         if (sotounpcb(soa)->unp_gcflag & UNPGC_IGNORE_RIGHTS)
 2620                                 continue;
 2621                         SOCKBUF_LOCK(&soa->so_rcv);
 2622                         unp_scan(soa->so_rcv.sb_mb, op);
 2623                         SOCKBUF_UNLOCK(&soa->so_rcv);
 2624                 }
 2625         } else {
 2626                 /*
 2627                  * Mark all sockets we reference with RIGHTS.
 2628                  */
 2629                 if ((unp->unp_gcflag & UNPGC_IGNORE_RIGHTS) == 0) {
 2630                         SOCKBUF_LOCK(&so->so_rcv);
 2631                         unp_scan(so->so_rcv.sb_mb, op);
 2632                         SOCKBUF_UNLOCK(&so->so_rcv);
 2633                 }
 2634         }
 2635         SOCK_UNLOCK(so);
 2636 }
 2637 
 2638 static int unp_recycled;
 2639 SYSCTL_INT(_net_local, OID_AUTO, recycled, CTLFLAG_RD, &unp_recycled, 0, 
 2640     "Number of unreachable sockets claimed by the garbage collector.");
 2641 
 2642 static int unp_taskcount;
 2643 SYSCTL_INT(_net_local, OID_AUTO, taskcount, CTLFLAG_RD, &unp_taskcount, 0, 
 2644     "Number of times the garbage collector has run.");
 2645 
 2646 SYSCTL_UINT(_net_local, OID_AUTO, sockcount, CTLFLAG_RD, &unp_count, 0, 
 2647     "Number of active local sockets.");
 2648 
 2649 static void
 2650 unp_gc(__unused void *arg, int pending)
 2651 {
 2652         struct unp_head *heads[] = { &unp_dhead, &unp_shead, &unp_sphead,
 2653                                     NULL };
 2654         struct unp_head **head;
 2655         struct unp_head unp_deadhead;   /* List of potentially-dead sockets. */
 2656         struct file *f, **unref;
 2657         struct unpcb *unp, *unptmp;
 2658         int i, total, unp_unreachable;
 2659 
 2660         LIST_INIT(&unp_deadhead);
 2661         unp_taskcount++;
 2662         UNP_LINK_RLOCK();
 2663         /*
 2664          * First determine which sockets may be in cycles.
 2665          */
 2666         unp_unreachable = 0;
 2667 
 2668         for (head = heads; *head != NULL; head++)
 2669                 LIST_FOREACH(unp, *head, unp_link) {
 2670                         KASSERT((unp->unp_gcflag & ~UNPGC_IGNORE_RIGHTS) == 0,
 2671                             ("%s: unp %p has unexpected gc flags 0x%x",
 2672                             __func__, unp, (unsigned int)unp->unp_gcflag));
 2673 
 2674                         f = unp->unp_file;
 2675 
 2676                         /*
 2677                          * Check for an unreachable socket potentially in a
 2678                          * cycle.  It must be in a queue as indicated by
 2679                          * msgcount, and this must equal the file reference
 2680                          * count.  Note that when msgcount is 0 the file is
 2681                          * NULL.
 2682                          */
 2683                         if (f != NULL && unp->unp_msgcount != 0 &&
 2684                             refcount_load(&f->f_count) == unp->unp_msgcount) {
 2685                                 LIST_INSERT_HEAD(&unp_deadhead, unp, unp_dead);
 2686                                 unp->unp_gcflag |= UNPGC_DEAD;
 2687                                 unp->unp_gcrefs = unp->unp_msgcount;
 2688                                 unp_unreachable++;
 2689                         }
 2690                 }
 2691 
 2692         /*
 2693          * Scan all sockets previously marked as potentially being in a cycle
 2694          * and remove the references each socket holds on any UNPGC_DEAD
 2695          * sockets in its queue.  After this step, all remaining references on
 2696          * sockets marked UNPGC_DEAD should not be part of any cycle.
 2697          */
 2698         LIST_FOREACH(unp, &unp_deadhead, unp_dead)
 2699                 unp_gc_scan(unp, unp_remove_dead_ref);
 2700 
 2701         /*
 2702          * If a socket still has a non-negative refcount, it cannot be in a
 2703          * cycle.  In this case increment refcount of all children iteratively.
 2704          * Stop the scan once we do a complete loop without discovering
 2705          * a new reachable socket.
 2706          */
 2707         do {
 2708                 unp_marked = 0;
 2709                 LIST_FOREACH_SAFE(unp, &unp_deadhead, unp_dead, unptmp)
 2710                         if (unp->unp_gcrefs > 0) {
 2711                                 unp->unp_gcflag &= ~UNPGC_DEAD;
 2712                                 LIST_REMOVE(unp, unp_dead);
 2713                                 KASSERT(unp_unreachable > 0,
 2714                                     ("%s: unp_unreachable underflow.",
 2715                                     __func__));
 2716                                 unp_unreachable--;
 2717                                 unp_gc_scan(unp, unp_restore_undead_ref);
 2718                         }
 2719         } while (unp_marked);
 2720 
 2721         UNP_LINK_RUNLOCK();
 2722 
 2723         if (unp_unreachable == 0)
 2724                 return;
 2725 
 2726         /*
 2727          * Allocate space for a local array of dead unpcbs.
 2728          * TODO: can this path be simplified by instead using the local
 2729          * dead list at unp_deadhead, after taking out references
 2730          * on the file object and/or unpcb and dropping the link lock?
 2731          */
 2732         unref = malloc(unp_unreachable * sizeof(struct file *),
 2733             M_TEMP, M_WAITOK);
 2734 
 2735         /*
 2736          * Iterate looking for sockets which have been specifically marked
 2737          * as unreachable and store them locally.
 2738          */
 2739         UNP_LINK_RLOCK();
 2740         total = 0;
 2741         LIST_FOREACH(unp, &unp_deadhead, unp_dead) {
 2742                 KASSERT((unp->unp_gcflag & UNPGC_DEAD) != 0,
 2743                     ("%s: unp %p not marked UNPGC_DEAD", __func__, unp));
 2744                 unp->unp_gcflag &= ~UNPGC_DEAD;
 2745                 f = unp->unp_file;
 2746                 if (unp->unp_msgcount == 0 || f == NULL ||
 2747                     refcount_load(&f->f_count) != unp->unp_msgcount ||
 2748                     !fhold(f))
 2749                         continue;
 2750                 unref[total++] = f;
 2751                 KASSERT(total <= unp_unreachable,
 2752                     ("%s: incorrect unreachable count.", __func__));
 2753         }
 2754         UNP_LINK_RUNLOCK();
 2755 
 2756         /*
 2757          * Now flush all sockets, free'ing rights.  This will free the
 2758          * struct files associated with these sockets but leave each socket
 2759          * with one remaining ref.
 2760          */
 2761         for (i = 0; i < total; i++) {
 2762                 struct socket *so;
 2763 
 2764                 so = unref[i]->f_data;
 2765                 CURVNET_SET(so->so_vnet);
 2766                 sorflush(so);
 2767                 CURVNET_RESTORE();
 2768         }
 2769 
 2770         /*
 2771          * And finally release the sockets so they can be reclaimed.
 2772          */
 2773         for (i = 0; i < total; i++)
 2774                 fdrop(unref[i], NULL);
 2775         unp_recycled += total;
 2776         free(unref, M_TEMP);
 2777 }
 2778 
 2779 static void
 2780 unp_dispose_mbuf(struct mbuf *m)
 2781 {
 2782 
 2783         if (m)
 2784                 unp_scan(m, unp_freerights);
 2785 }
 2786 
 2787 /*
 2788  * Synchronize against unp_gc, which can trip over data as we are freeing it.
 2789  */
 2790 static void
 2791 unp_dispose(struct socket *so)
 2792 {
 2793         struct unpcb *unp;
 2794 
 2795         unp = sotounpcb(so);
 2796         UNP_LINK_WLOCK();
 2797         unp->unp_gcflag |= UNPGC_IGNORE_RIGHTS;
 2798         UNP_LINK_WUNLOCK();
 2799         if (!SOLISTENING(so))
 2800                 unp_dispose_mbuf(so->so_rcv.sb_mb);
 2801 }
 2802 
 2803 static void
 2804 unp_scan(struct mbuf *m0, void (*op)(struct filedescent **, int))
 2805 {
 2806         struct mbuf *m;
 2807         struct cmsghdr *cm;
 2808         void *data;
 2809         socklen_t clen, datalen;
 2810 
 2811         while (m0 != NULL) {
 2812                 for (m = m0; m; m = m->m_next) {
 2813                         if (m->m_type != MT_CONTROL)
 2814                                 continue;
 2815 
 2816                         cm = mtod(m, struct cmsghdr *);
 2817                         clen = m->m_len;
 2818 
 2819                         while (cm != NULL) {
 2820                                 if (sizeof(*cm) > clen || cm->cmsg_len > clen)
 2821                                         break;
 2822 
 2823                                 data = CMSG_DATA(cm);
 2824                                 datalen = (caddr_t)cm + cm->cmsg_len
 2825                                     - (caddr_t)data;
 2826 
 2827                                 if (cm->cmsg_level == SOL_SOCKET &&
 2828                                     cm->cmsg_type == SCM_RIGHTS) {
 2829                                         (*op)(data, datalen /
 2830                                             sizeof(struct filedescent *));
 2831                                 }
 2832 
 2833                                 if (CMSG_SPACE(datalen) < clen) {
 2834                                         clen -= CMSG_SPACE(datalen);
 2835                                         cm = (struct cmsghdr *)
 2836                                             ((caddr_t)cm + CMSG_SPACE(datalen));
 2837                                 } else {
 2838                                         clen = 0;
 2839                                         cm = NULL;
 2840                                 }
 2841                         }
 2842                 }
 2843                 m0 = m0->m_nextpkt;
 2844         }
 2845 }
 2846 
 2847 /*
 2848  * A helper function called by VFS before socket-type vnode reclamation.
 2849  * For an active vnode it clears unp_vnode pointer and decrements unp_vnode
 2850  * use count.
 2851  */
 2852 void
 2853 vfs_unp_reclaim(struct vnode *vp)
 2854 {
 2855         struct unpcb *unp;
 2856         int active;
 2857         struct mtx *vplock;
 2858 
 2859         ASSERT_VOP_ELOCKED(vp, "vfs_unp_reclaim");
 2860         KASSERT(vp->v_type == VSOCK,
 2861             ("vfs_unp_reclaim: vp->v_type != VSOCK"));
 2862 
 2863         active = 0;
 2864         vplock = mtx_pool_find(mtxpool_sleep, vp);
 2865         mtx_lock(vplock);
 2866         VOP_UNP_CONNECT(vp, &unp);
 2867         if (unp == NULL)
 2868                 goto done;
 2869         UNP_PCB_LOCK(unp);
 2870         if (unp->unp_vnode == vp) {
 2871                 VOP_UNP_DETACH(vp);
 2872                 unp->unp_vnode = NULL;
 2873                 active = 1;
 2874         }
 2875         UNP_PCB_UNLOCK(unp);
 2876  done:
 2877         mtx_unlock(vplock);
 2878         if (active)
 2879                 vunref(vp);
 2880 }
 2881 
 2882 #ifdef DDB
 2883 static void
 2884 db_print_indent(int indent)
 2885 {
 2886         int i;
 2887 
 2888         for (i = 0; i < indent; i++)
 2889                 db_printf(" ");
 2890 }
 2891 
 2892 static void
 2893 db_print_unpflags(int unp_flags)
 2894 {
 2895         int comma;
 2896 
 2897         comma = 0;
 2898         if (unp_flags & UNP_HAVEPC) {
 2899                 db_printf("%sUNP_HAVEPC", comma ? ", " : "");
 2900                 comma = 1;
 2901         }
 2902         if (unp_flags & UNP_WANTCRED_ALWAYS) {
 2903                 db_printf("%sUNP_WANTCRED_ALWAYS", comma ? ", " : "");
 2904                 comma = 1;
 2905         }
 2906         if (unp_flags & UNP_WANTCRED_ONESHOT) {
 2907                 db_printf("%sUNP_WANTCRED_ONESHOT", comma ? ", " : "");
 2908                 comma = 1;
 2909         }
 2910         if (unp_flags & UNP_CONNWAIT) {
 2911                 db_printf("%sUNP_CONNWAIT", comma ? ", " : "");
 2912                 comma = 1;
 2913         }
 2914         if (unp_flags & UNP_CONNECTING) {
 2915                 db_printf("%sUNP_CONNECTING", comma ? ", " : "");
 2916                 comma = 1;
 2917         }
 2918         if (unp_flags & UNP_BINDING) {
 2919                 db_printf("%sUNP_BINDING", comma ? ", " : "");
 2920                 comma = 1;
 2921         }
 2922 }
 2923 
 2924 static void
 2925 db_print_xucred(int indent, struct xucred *xu)
 2926 {
 2927         int comma, i;
 2928 
 2929         db_print_indent(indent);
 2930         db_printf("cr_version: %u   cr_uid: %u   cr_pid: %d   cr_ngroups: %d\n",
 2931             xu->cr_version, xu->cr_uid, xu->cr_pid, xu->cr_ngroups);
 2932         db_print_indent(indent);
 2933         db_printf("cr_groups: ");
 2934         comma = 0;
 2935         for (i = 0; i < xu->cr_ngroups; i++) {
 2936                 db_printf("%s%u", comma ? ", " : "", xu->cr_groups[i]);
 2937                 comma = 1;
 2938         }
 2939         db_printf("\n");
 2940 }
 2941 
 2942 static void
 2943 db_print_unprefs(int indent, struct unp_head *uh)
 2944 {
 2945         struct unpcb *unp;
 2946         int counter;
 2947 
 2948         counter = 0;
 2949         LIST_FOREACH(unp, uh, unp_reflink) {
 2950                 if (counter % 4 == 0)
 2951                         db_print_indent(indent);
 2952                 db_printf("%p  ", unp);
 2953                 if (counter % 4 == 3)
 2954                         db_printf("\n");
 2955                 counter++;
 2956         }
 2957         if (counter != 0 && counter % 4 != 0)
 2958                 db_printf("\n");
 2959 }
 2960 
 2961 DB_SHOW_COMMAND(unpcb, db_show_unpcb)
 2962 {
 2963         struct unpcb *unp;
 2964 
 2965         if (!have_addr) {
 2966                 db_printf("usage: show unpcb <addr>\n");
 2967                 return;
 2968         }
 2969         unp = (struct unpcb *)addr;
 2970 
 2971         db_printf("unp_socket: %p   unp_vnode: %p\n", unp->unp_socket,
 2972             unp->unp_vnode);
 2973 
 2974         db_printf("unp_ino: %ju   unp_conn: %p\n", (uintmax_t)unp->unp_ino,
 2975             unp->unp_conn);
 2976 
 2977         db_printf("unp_refs:\n");
 2978         db_print_unprefs(2, &unp->unp_refs);
 2979 
 2980         /* XXXRW: Would be nice to print the full address, if any. */
 2981         db_printf("unp_addr: %p\n", unp->unp_addr);
 2982 
 2983         db_printf("unp_gencnt: %llu\n",
 2984             (unsigned long long)unp->unp_gencnt);
 2985 
 2986         db_printf("unp_flags: %x (", unp->unp_flags);
 2987         db_print_unpflags(unp->unp_flags);
 2988         db_printf(")\n");
 2989 
 2990         db_printf("unp_peercred:\n");
 2991         db_print_xucred(2, &unp->unp_peercred);
 2992 
 2993         db_printf("unp_refcount: %u\n", unp->unp_refcount);
 2994 }
 2995 #endif

Cache object: 6aa64c5ee138190b6d9f88e013f78cdd


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.