The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/uipc_usrreq.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-3-Clause
    3  *
    4  * Copyright (c) 1982, 1986, 1989, 1991, 1993
    5  *      The Regents of the University of California. All Rights Reserved.
    6  * Copyright (c) 2004-2009 Robert N. M. Watson All Rights Reserved.
    7  * Copyright (c) 2018 Matthew Macy
    8  *
    9  * Redistribution and use in source and binary forms, with or without
   10  * modification, are permitted provided that the following conditions
   11  * are met:
   12  * 1. Redistributions of source code must retain the above copyright
   13  *    notice, this list of conditions and the following disclaimer.
   14  * 2. Redistributions in binary form must reproduce the above copyright
   15  *    notice, this list of conditions and the following disclaimer in the
   16  *    documentation and/or other materials provided with the distribution.
   17  * 3. Neither the name of the University nor the names of its contributors
   18  *    may be used to endorse or promote products derived from this software
   19  *    without specific prior written permission.
   20  *
   21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   31  * SUCH DAMAGE.
   32  *
   33  *      From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94
   34  */
   35 
   36 /*
   37  * UNIX Domain (Local) Sockets
   38  *
   39  * This is an implementation of UNIX (local) domain sockets.  Each socket has
   40  * an associated struct unpcb (UNIX protocol control block).  Stream sockets
   41  * may be connected to 0 or 1 other socket.  Datagram sockets may be
   42  * connected to 0, 1, or many other sockets.  Sockets may be created and
   43  * connected in pairs (socketpair(2)), or bound/connected to using the file
   44  * system name space.  For most purposes, only the receive socket buffer is
   45  * used, as sending on one socket delivers directly to the receive socket
   46  * buffer of a second socket.
   47  *
   48  * The implementation is substantially complicated by the fact that
   49  * "ancillary data", such as file descriptors or credentials, may be passed
   50  * across UNIX domain sockets.  The potential for passing UNIX domain sockets
   51  * over other UNIX domain sockets requires the implementation of a simple
   52  * garbage collector to find and tear down cycles of disconnected sockets.
   53  *
   54  * TODO:
   55  *      RDM
   56  *      rethink name space problems
   57  *      need a proper out-of-band
   58  */
   59 
   60 #include <sys/cdefs.h>
   61 __FBSDID("$FreeBSD$");
   62 
   63 #include "opt_ddb.h"
   64 
   65 #include <sys/param.h>
   66 #include <sys/capsicum.h>
   67 #include <sys/domain.h>
   68 #include <sys/eventhandler.h>
   69 #include <sys/fcntl.h>
   70 #include <sys/file.h>
   71 #include <sys/filedesc.h>
   72 #include <sys/kernel.h>
   73 #include <sys/lock.h>
   74 #include <sys/malloc.h>
   75 #include <sys/mbuf.h>
   76 #include <sys/mount.h>
   77 #include <sys/mutex.h>
   78 #include <sys/namei.h>
   79 #include <sys/proc.h>
   80 #include <sys/protosw.h>
   81 #include <sys/queue.h>
   82 #include <sys/resourcevar.h>
   83 #include <sys/rwlock.h>
   84 #include <sys/socket.h>
   85 #include <sys/socketvar.h>
   86 #include <sys/signalvar.h>
   87 #include <sys/stat.h>
   88 #include <sys/sx.h>
   89 #include <sys/sysctl.h>
   90 #include <sys/systm.h>
   91 #include <sys/taskqueue.h>
   92 #include <sys/un.h>
   93 #include <sys/unpcb.h>
   94 #include <sys/vnode.h>
   95 
   96 #include <net/vnet.h>
   97 
   98 #ifdef DDB
   99 #include <ddb/ddb.h>
  100 #endif
  101 
  102 #include <security/mac/mac_framework.h>
  103 
  104 #include <vm/uma.h>
  105 
  106 MALLOC_DECLARE(M_FILECAPS);
  107 
  108 /*
  109  * See unpcb.h for the locking key.
  110  */
  111 
  112 static uma_zone_t       unp_zone;
  113 static unp_gen_t        unp_gencnt;     /* (l) */
  114 static u_int            unp_count;      /* (l) Count of local sockets. */
  115 static ino_t            unp_ino;        /* Prototype for fake inode numbers. */
  116 static int              unp_rights;     /* (g) File descriptors in flight. */
  117 static struct unp_head  unp_shead;      /* (l) List of stream sockets. */
  118 static struct unp_head  unp_dhead;      /* (l) List of datagram sockets. */
  119 static struct unp_head  unp_sphead;     /* (l) List of seqpacket sockets. */
  120 
  121 struct unp_defer {
  122         SLIST_ENTRY(unp_defer) ud_link;
  123         struct file *ud_fp;
  124 };
  125 static SLIST_HEAD(, unp_defer) unp_defers;
  126 static int unp_defers_count;
  127 
  128 static const struct sockaddr    sun_noname = { sizeof(sun_noname), AF_LOCAL };
  129 
  130 /*
  131  * Garbage collection of cyclic file descriptor/socket references occurs
  132  * asynchronously in a taskqueue context in order to avoid recursion and
  133  * reentrance in the UNIX domain socket, file descriptor, and socket layer
  134  * code.  See unp_gc() for a full description.
  135  */
  136 static struct timeout_task unp_gc_task;
  137 
  138 /*
  139  * The close of unix domain sockets attached as SCM_RIGHTS is
  140  * postponed to the taskqueue, to avoid arbitrary recursion depth.
  141  * The attached sockets might have another sockets attached.
  142  */
  143 static struct task      unp_defer_task;
  144 
  145 /*
  146  * Both send and receive buffers are allocated PIPSIZ bytes of buffering for
  147  * stream sockets, although the total for sender and receiver is actually
  148  * only PIPSIZ.
  149  *
  150  * Datagram sockets really use the sendspace as the maximum datagram size,
  151  * and don't really want to reserve the sendspace.  Their recvspace should be
  152  * large enough for at least one max-size datagram plus address.
  153  */
  154 #ifndef PIPSIZ
  155 #define PIPSIZ  8192
  156 #endif
  157 static u_long   unpst_sendspace = PIPSIZ;
  158 static u_long   unpst_recvspace = PIPSIZ;
  159 static u_long   unpdg_sendspace = 2*1024;       /* really max datagram size */
  160 static u_long   unpdg_recvspace = 16*1024;      /* support 8KB syslog msgs */
  161 static u_long   unpsp_sendspace = PIPSIZ;       /* really max datagram size */
  162 static u_long   unpsp_recvspace = PIPSIZ;
  163 
  164 static SYSCTL_NODE(_net, PF_LOCAL, local, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
  165     "Local domain");
  166 static SYSCTL_NODE(_net_local, SOCK_STREAM, stream,
  167     CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
  168     "SOCK_STREAM");
  169 static SYSCTL_NODE(_net_local, SOCK_DGRAM, dgram,
  170     CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
  171     "SOCK_DGRAM");
  172 static SYSCTL_NODE(_net_local, SOCK_SEQPACKET, seqpacket,
  173     CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
  174     "SOCK_SEQPACKET");
  175 
  176 SYSCTL_ULONG(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW,
  177            &unpst_sendspace, 0, "Default stream send space.");
  178 SYSCTL_ULONG(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW,
  179            &unpst_recvspace, 0, "Default stream receive space.");
  180 SYSCTL_ULONG(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW,
  181            &unpdg_sendspace, 0, "Default datagram send space.");
  182 SYSCTL_ULONG(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW,
  183            &unpdg_recvspace, 0, "Default datagram receive space.");
  184 SYSCTL_ULONG(_net_local_seqpacket, OID_AUTO, maxseqpacket, CTLFLAG_RW,
  185            &unpsp_sendspace, 0, "Default seqpacket send space.");
  186 SYSCTL_ULONG(_net_local_seqpacket, OID_AUTO, recvspace, CTLFLAG_RW,
  187            &unpsp_recvspace, 0, "Default seqpacket receive space.");
  188 SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0,
  189     "File descriptors in flight.");
  190 SYSCTL_INT(_net_local, OID_AUTO, deferred, CTLFLAG_RD,
  191     &unp_defers_count, 0,
  192     "File descriptors deferred to taskqueue for close.");
  193 
  194 /*
  195  * Locking and synchronization:
  196  *
  197  * Several types of locks exist in the local domain socket implementation:
  198  * - a global linkage lock
  199  * - a global connection list lock
  200  * - the mtxpool lock
  201  * - per-unpcb mutexes
  202  *
  203  * The linkage lock protects the global socket lists, the generation number
  204  * counter and garbage collector state.
  205  *
  206  * The connection list lock protects the list of referring sockets in a datagram
  207  * socket PCB.  This lock is also overloaded to protect a global list of
  208  * sockets whose buffers contain socket references in the form of SCM_RIGHTS
  209  * messages.  To avoid recursion, such references are released by a dedicated
  210  * thread.
  211  *
  212  * The mtxpool lock protects the vnode from being modified while referenced.
  213  * Lock ordering rules require that it be acquired before any PCB locks.
  214  *
  215  * The unpcb lock (unp_mtx) protects the most commonly referenced fields in the
  216  * unpcb.  This includes the unp_conn field, which either links two connected
  217  * PCBs together (for connected socket types) or points at the destination
  218  * socket (for connectionless socket types).  The operations of creating or
  219  * destroying a connection therefore involve locking multiple PCBs.  To avoid
  220  * lock order reversals, in some cases this involves dropping a PCB lock and
  221  * using a reference counter to maintain liveness.
  222  *
  223  * UNIX domain sockets each have an unpcb hung off of their so_pcb pointer,
  224  * allocated in pru_attach() and freed in pru_detach().  The validity of that
  225  * pointer is an invariant, so no lock is required to dereference the so_pcb
  226  * pointer if a valid socket reference is held by the caller.  In practice,
  227  * this is always true during operations performed on a socket.  Each unpcb
  228  * has a back-pointer to its socket, unp_socket, which will be stable under
  229  * the same circumstances.
  230  *
  231  * This pointer may only be safely dereferenced as long as a valid reference
  232  * to the unpcb is held.  Typically, this reference will be from the socket,
  233  * or from another unpcb when the referring unpcb's lock is held (in order
  234  * that the reference not be invalidated during use).  For example, to follow
  235  * unp->unp_conn->unp_socket, you need to hold a lock on unp_conn to guarantee
  236  * that detach is not run clearing unp_socket.
  237  *
  238  * Blocking with UNIX domain sockets is a tricky issue: unlike most network
  239  * protocols, bind() is a non-atomic operation, and connect() requires
  240  * potential sleeping in the protocol, due to potentially waiting on local or
  241  * distributed file systems.  We try to separate "lookup" operations, which
  242  * may sleep, and the IPC operations themselves, which typically can occur
  243  * with relative atomicity as locks can be held over the entire operation.
  244  *
  245  * Another tricky issue is simultaneous multi-threaded or multi-process
  246  * access to a single UNIX domain socket.  These are handled by the flags
  247  * UNP_CONNECTING and UNP_BINDING, which prevent concurrent connecting or
  248  * binding, both of which involve dropping UNIX domain socket locks in order
  249  * to perform namei() and other file system operations.
  250  */
  251 static struct rwlock    unp_link_rwlock;
  252 static struct mtx       unp_defers_lock;
  253 
  254 #define UNP_LINK_LOCK_INIT()            rw_init(&unp_link_rwlock,       \
  255                                             "unp_link_rwlock")
  256 
  257 #define UNP_LINK_LOCK_ASSERT()          rw_assert(&unp_link_rwlock,     \
  258                                             RA_LOCKED)
  259 #define UNP_LINK_UNLOCK_ASSERT()        rw_assert(&unp_link_rwlock,     \
  260                                             RA_UNLOCKED)
  261 
  262 #define UNP_LINK_RLOCK()                rw_rlock(&unp_link_rwlock)
  263 #define UNP_LINK_RUNLOCK()              rw_runlock(&unp_link_rwlock)
  264 #define UNP_LINK_WLOCK()                rw_wlock(&unp_link_rwlock)
  265 #define UNP_LINK_WUNLOCK()              rw_wunlock(&unp_link_rwlock)
  266 #define UNP_LINK_WLOCK_ASSERT()         rw_assert(&unp_link_rwlock,     \
  267                                             RA_WLOCKED)
  268 #define UNP_LINK_WOWNED()               rw_wowned(&unp_link_rwlock)
  269 
  270 #define UNP_DEFERRED_LOCK_INIT()        mtx_init(&unp_defers_lock, \
  271                                             "unp_defer", NULL, MTX_DEF)
  272 #define UNP_DEFERRED_LOCK()             mtx_lock(&unp_defers_lock)
  273 #define UNP_DEFERRED_UNLOCK()           mtx_unlock(&unp_defers_lock)
  274 
  275 #define UNP_REF_LIST_LOCK()             UNP_DEFERRED_LOCK();
  276 #define UNP_REF_LIST_UNLOCK()           UNP_DEFERRED_UNLOCK();
  277 
  278 #define UNP_PCB_LOCK_INIT(unp)          mtx_init(&(unp)->unp_mtx,       \
  279                                             "unp", "unp",       \
  280                                             MTX_DUPOK|MTX_DEF)
  281 #define UNP_PCB_LOCK_DESTROY(unp)       mtx_destroy(&(unp)->unp_mtx)
  282 #define UNP_PCB_LOCKPTR(unp)            (&(unp)->unp_mtx)
  283 #define UNP_PCB_LOCK(unp)               mtx_lock(&(unp)->unp_mtx)
  284 #define UNP_PCB_TRYLOCK(unp)            mtx_trylock(&(unp)->unp_mtx)
  285 #define UNP_PCB_UNLOCK(unp)             mtx_unlock(&(unp)->unp_mtx)
  286 #define UNP_PCB_OWNED(unp)              mtx_owned(&(unp)->unp_mtx)
  287 #define UNP_PCB_LOCK_ASSERT(unp)        mtx_assert(&(unp)->unp_mtx, MA_OWNED)
  288 #define UNP_PCB_UNLOCK_ASSERT(unp)      mtx_assert(&(unp)->unp_mtx, MA_NOTOWNED)
  289 
  290 static int      uipc_connect2(struct socket *, struct socket *);
  291 static int      uipc_ctloutput(struct socket *, struct sockopt *);
  292 static int      unp_connect(struct socket *, struct sockaddr *,
  293                     struct thread *);
  294 static int      unp_connectat(int, struct socket *, struct sockaddr *,
  295                     struct thread *);
  296 static int      unp_connect2(struct socket *so, struct socket *so2, int);
  297 static void     unp_disconnect(struct unpcb *unp, struct unpcb *unp2);
  298 static void     unp_dispose(struct socket *so);
  299 static void     unp_dispose_mbuf(struct mbuf *);
  300 static void     unp_shutdown(struct unpcb *);
  301 static void     unp_drop(struct unpcb *);
  302 static void     unp_gc(__unused void *, int);
  303 static void     unp_scan(struct mbuf *, void (*)(struct filedescent **, int));
  304 static void     unp_discard(struct file *);
  305 static void     unp_freerights(struct filedescent **, int);
  306 static void     unp_init(void);
  307 static int      unp_internalize(struct mbuf **, struct thread *);
  308 static void     unp_internalize_fp(struct file *);
  309 static int      unp_externalize(struct mbuf *, struct mbuf **, int);
  310 static int      unp_externalize_fp(struct file *);
  311 static struct mbuf      *unp_addsockcred(struct thread *, struct mbuf *, int);
  312 static void     unp_process_defers(void * __unused, int);
  313 
  314 static void
  315 unp_pcb_hold(struct unpcb *unp)
  316 {
  317         u_int old __unused;
  318 
  319         old = refcount_acquire(&unp->unp_refcount);
  320         KASSERT(old > 0, ("%s: unpcb %p has no references", __func__, unp));
  321 }
  322 
  323 static __result_use_check bool
  324 unp_pcb_rele(struct unpcb *unp)
  325 {
  326         bool ret;
  327 
  328         UNP_PCB_LOCK_ASSERT(unp);
  329 
  330         if ((ret = refcount_release(&unp->unp_refcount))) {
  331                 UNP_PCB_UNLOCK(unp);
  332                 UNP_PCB_LOCK_DESTROY(unp);
  333                 uma_zfree(unp_zone, unp);
  334         }
  335         return (ret);
  336 }
  337 
  338 static void
  339 unp_pcb_rele_notlast(struct unpcb *unp)
  340 {
  341         bool ret __unused;
  342 
  343         ret = refcount_release(&unp->unp_refcount);
  344         KASSERT(!ret, ("%s: unpcb %p has no references", __func__, unp));
  345 }
  346 
  347 static void
  348 unp_pcb_lock_pair(struct unpcb *unp, struct unpcb *unp2)
  349 {
  350         UNP_PCB_UNLOCK_ASSERT(unp);
  351         UNP_PCB_UNLOCK_ASSERT(unp2);
  352 
  353         if (unp == unp2) {
  354                 UNP_PCB_LOCK(unp);
  355         } else if ((uintptr_t)unp2 > (uintptr_t)unp) {
  356                 UNP_PCB_LOCK(unp);
  357                 UNP_PCB_LOCK(unp2);
  358         } else {
  359                 UNP_PCB_LOCK(unp2);
  360                 UNP_PCB_LOCK(unp);
  361         }
  362 }
  363 
  364 static void
  365 unp_pcb_unlock_pair(struct unpcb *unp, struct unpcb *unp2)
  366 {
  367         UNP_PCB_UNLOCK(unp);
  368         if (unp != unp2)
  369                 UNP_PCB_UNLOCK(unp2);
  370 }
  371 
  372 /*
  373  * Try to lock the connected peer of an already locked socket.  In some cases
  374  * this requires that we unlock the current socket.  The pairbusy counter is
  375  * used to block concurrent connection attempts while the lock is dropped.  The
  376  * caller must be careful to revalidate PCB state.
  377  */
  378 static struct unpcb *
  379 unp_pcb_lock_peer(struct unpcb *unp)
  380 {
  381         struct unpcb *unp2;
  382 
  383         UNP_PCB_LOCK_ASSERT(unp);
  384         unp2 = unp->unp_conn;
  385         if (unp2 == NULL)
  386                 return (NULL);
  387         if (__predict_false(unp == unp2))
  388                 return (unp);
  389 
  390         UNP_PCB_UNLOCK_ASSERT(unp2);
  391 
  392         if (__predict_true(UNP_PCB_TRYLOCK(unp2)))
  393                 return (unp2);
  394         if ((uintptr_t)unp2 > (uintptr_t)unp) {
  395                 UNP_PCB_LOCK(unp2);
  396                 return (unp2);
  397         }
  398         unp->unp_pairbusy++;
  399         unp_pcb_hold(unp2);
  400         UNP_PCB_UNLOCK(unp);
  401 
  402         UNP_PCB_LOCK(unp2);
  403         UNP_PCB_LOCK(unp);
  404         KASSERT(unp->unp_conn == unp2 || unp->unp_conn == NULL,
  405             ("%s: socket %p was reconnected", __func__, unp));
  406         if (--unp->unp_pairbusy == 0 && (unp->unp_flags & UNP_WAITING) != 0) {
  407                 unp->unp_flags &= ~UNP_WAITING;
  408                 wakeup(unp);
  409         }
  410         if (unp_pcb_rele(unp2)) {
  411                 /* unp2 is unlocked. */
  412                 return (NULL);
  413         }
  414         if (unp->unp_conn == NULL) {
  415                 UNP_PCB_UNLOCK(unp2);
  416                 return (NULL);
  417         }
  418         return (unp2);
  419 }
  420 
  421 /*
  422  * Definitions of protocols supported in the LOCAL domain.
  423  */
  424 static struct domain localdomain;
  425 static struct pr_usrreqs uipc_usrreqs_dgram, uipc_usrreqs_stream;
  426 static struct pr_usrreqs uipc_usrreqs_seqpacket;
  427 static struct protosw localsw[] = {
  428 {
  429         .pr_type =              SOCK_STREAM,
  430         .pr_domain =            &localdomain,
  431         .pr_flags =             PR_CONNREQUIRED|PR_WANTRCVD|PR_RIGHTS,
  432         .pr_ctloutput =         &uipc_ctloutput,
  433         .pr_usrreqs =           &uipc_usrreqs_stream
  434 },
  435 {
  436         .pr_type =              SOCK_DGRAM,
  437         .pr_domain =            &localdomain,
  438         .pr_flags =             PR_ATOMIC|PR_ADDR|PR_RIGHTS,
  439         .pr_ctloutput =         &uipc_ctloutput,
  440         .pr_usrreqs =           &uipc_usrreqs_dgram
  441 },
  442 {
  443         .pr_type =              SOCK_SEQPACKET,
  444         .pr_domain =            &localdomain,
  445 
  446         /*
  447          * XXXRW: For now, PR_ADDR because soreceive will bump into them
  448          * due to our use of sbappendaddr.  A new sbappend variants is needed
  449          * that supports both atomic record writes and control data.
  450          */
  451         .pr_flags =             PR_ADDR|PR_ATOMIC|PR_CONNREQUIRED|PR_WANTRCVD|
  452                                     PR_RIGHTS,
  453         .pr_ctloutput =         &uipc_ctloutput,
  454         .pr_usrreqs =           &uipc_usrreqs_seqpacket,
  455 },
  456 };
  457 
  458 static struct domain localdomain = {
  459         .dom_family =           AF_LOCAL,
  460         .dom_name =             "local",
  461         .dom_init =             unp_init,
  462         .dom_externalize =      unp_externalize,
  463         .dom_dispose =          unp_dispose,
  464         .dom_protosw =          localsw,
  465         .dom_protoswNPROTOSW =  &localsw[nitems(localsw)]
  466 };
  467 DOMAIN_SET(local);
  468 
  469 static void
  470 uipc_abort(struct socket *so)
  471 {
  472         struct unpcb *unp, *unp2;
  473 
  474         unp = sotounpcb(so);
  475         KASSERT(unp != NULL, ("uipc_abort: unp == NULL"));
  476         UNP_PCB_UNLOCK_ASSERT(unp);
  477 
  478         UNP_PCB_LOCK(unp);
  479         unp2 = unp->unp_conn;
  480         if (unp2 != NULL) {
  481                 unp_pcb_hold(unp2);
  482                 UNP_PCB_UNLOCK(unp);
  483                 unp_drop(unp2);
  484         } else
  485                 UNP_PCB_UNLOCK(unp);
  486 }
  487 
  488 static int
  489 uipc_accept(struct socket *so, struct sockaddr **nam)
  490 {
  491         struct unpcb *unp, *unp2;
  492         const struct sockaddr *sa;
  493 
  494         /*
  495          * Pass back name of connected socket, if it was bound and we are
  496          * still connected (our peer may have closed already!).
  497          */
  498         unp = sotounpcb(so);
  499         KASSERT(unp != NULL, ("uipc_accept: unp == NULL"));
  500 
  501         *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
  502         UNP_PCB_LOCK(unp);
  503         unp2 = unp_pcb_lock_peer(unp);
  504         if (unp2 != NULL && unp2->unp_addr != NULL)
  505                 sa = (struct sockaddr *)unp2->unp_addr;
  506         else
  507                 sa = &sun_noname;
  508         bcopy(sa, *nam, sa->sa_len);
  509         if (unp2 != NULL)
  510                 unp_pcb_unlock_pair(unp, unp2);
  511         else
  512                 UNP_PCB_UNLOCK(unp);
  513         return (0);
  514 }
  515 
  516 static int
  517 uipc_attach(struct socket *so, int proto, struct thread *td)
  518 {
  519         u_long sendspace, recvspace;
  520         struct unpcb *unp;
  521         int error;
  522         bool locked;
  523 
  524         KASSERT(so->so_pcb == NULL, ("uipc_attach: so_pcb != NULL"));
  525         if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
  526                 switch (so->so_type) {
  527                 case SOCK_STREAM:
  528                         sendspace = unpst_sendspace;
  529                         recvspace = unpst_recvspace;
  530                         break;
  531 
  532                 case SOCK_DGRAM:
  533                         sendspace = unpdg_sendspace;
  534                         recvspace = unpdg_recvspace;
  535                         break;
  536 
  537                 case SOCK_SEQPACKET:
  538                         sendspace = unpsp_sendspace;
  539                         recvspace = unpsp_recvspace;
  540                         break;
  541 
  542                 default:
  543                         panic("uipc_attach");
  544                 }
  545                 error = soreserve(so, sendspace, recvspace);
  546                 if (error)
  547                         return (error);
  548         }
  549         unp = uma_zalloc(unp_zone, M_NOWAIT | M_ZERO);
  550         if (unp == NULL)
  551                 return (ENOBUFS);
  552         LIST_INIT(&unp->unp_refs);
  553         UNP_PCB_LOCK_INIT(unp);
  554         unp->unp_socket = so;
  555         so->so_pcb = unp;
  556         refcount_init(&unp->unp_refcount, 1);
  557 
  558         if ((locked = UNP_LINK_WOWNED()) == false)
  559                 UNP_LINK_WLOCK();
  560 
  561         unp->unp_gencnt = ++unp_gencnt;
  562         unp->unp_ino = ++unp_ino;
  563         unp_count++;
  564         switch (so->so_type) {
  565         case SOCK_STREAM:
  566                 LIST_INSERT_HEAD(&unp_shead, unp, unp_link);
  567                 break;
  568 
  569         case SOCK_DGRAM:
  570                 LIST_INSERT_HEAD(&unp_dhead, unp, unp_link);
  571                 break;
  572 
  573         case SOCK_SEQPACKET:
  574                 LIST_INSERT_HEAD(&unp_sphead, unp, unp_link);
  575                 break;
  576 
  577         default:
  578                 panic("uipc_attach");
  579         }
  580 
  581         if (locked == false)
  582                 UNP_LINK_WUNLOCK();
  583 
  584         return (0);
  585 }
  586 
  587 static int
  588 uipc_bindat(int fd, struct socket *so, struct sockaddr *nam, struct thread *td)
  589 {
  590         struct sockaddr_un *soun = (struct sockaddr_un *)nam;
  591         struct vattr vattr;
  592         int error, namelen;
  593         struct nameidata nd;
  594         struct unpcb *unp;
  595         struct vnode *vp;
  596         struct mount *mp;
  597         cap_rights_t rights;
  598         char *buf;
  599 
  600         if (nam->sa_family != AF_UNIX)
  601                 return (EAFNOSUPPORT);
  602 
  603         unp = sotounpcb(so);
  604         KASSERT(unp != NULL, ("uipc_bind: unp == NULL"));
  605 
  606         if (soun->sun_len > sizeof(struct sockaddr_un))
  607                 return (EINVAL);
  608         namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path);
  609         if (namelen <= 0)
  610                 return (EINVAL);
  611 
  612         /*
  613          * We don't allow simultaneous bind() calls on a single UNIX domain
  614          * socket, so flag in-progress operations, and return an error if an
  615          * operation is already in progress.
  616          *
  617          * Historically, we have not allowed a socket to be rebound, so this
  618          * also returns an error.  Not allowing re-binding simplifies the
  619          * implementation and avoids a great many possible failure modes.
  620          */
  621         UNP_PCB_LOCK(unp);
  622         if (unp->unp_vnode != NULL) {
  623                 UNP_PCB_UNLOCK(unp);
  624                 return (EINVAL);
  625         }
  626         if (unp->unp_flags & UNP_BINDING) {
  627                 UNP_PCB_UNLOCK(unp);
  628                 return (EALREADY);
  629         }
  630         unp->unp_flags |= UNP_BINDING;
  631         UNP_PCB_UNLOCK(unp);
  632 
  633         buf = malloc(namelen + 1, M_TEMP, M_WAITOK);
  634         bcopy(soun->sun_path, buf, namelen);
  635         buf[namelen] = 0;
  636 
  637 restart:
  638         NDINIT_ATRIGHTS(&nd, CREATE, NOFOLLOW | LOCKPARENT | SAVENAME | NOCACHE,
  639             UIO_SYSSPACE, buf, fd, cap_rights_init_one(&rights, CAP_BINDAT),
  640             td);
  641 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
  642         error = namei(&nd);
  643         if (error)
  644                 goto error;
  645         vp = nd.ni_vp;
  646         if (vp != NULL || vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
  647                 NDFREE(&nd, NDF_ONLY_PNBUF);
  648                 if (nd.ni_dvp == vp)
  649                         vrele(nd.ni_dvp);
  650                 else
  651                         vput(nd.ni_dvp);
  652                 if (vp != NULL) {
  653                         vrele(vp);
  654                         error = EADDRINUSE;
  655                         goto error;
  656                 }
  657                 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH);
  658                 if (error)
  659                         goto error;
  660                 goto restart;
  661         }
  662         VATTR_NULL(&vattr);
  663         vattr.va_type = VSOCK;
  664         vattr.va_mode = (ACCESSPERMS & ~td->td_proc->p_pd->pd_cmask);
  665 #ifdef MAC
  666         error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
  667             &vattr);
  668 #endif
  669         if (error == 0)
  670                 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
  671         NDFREE(&nd, NDF_ONLY_PNBUF);
  672         if (error) {
  673                 VOP_VPUT_PAIR(nd.ni_dvp, NULL, true);
  674                 vn_finished_write(mp);
  675                 if (error == ERELOOKUP)
  676                         goto restart;
  677                 goto error;
  678         }
  679         vp = nd.ni_vp;
  680         ASSERT_VOP_ELOCKED(vp, "uipc_bind");
  681         soun = (struct sockaddr_un *)sodupsockaddr(nam, M_WAITOK);
  682 
  683         UNP_PCB_LOCK(unp);
  684         VOP_UNP_BIND(vp, unp);
  685         unp->unp_vnode = vp;
  686         unp->unp_addr = soun;
  687         unp->unp_flags &= ~UNP_BINDING;
  688         UNP_PCB_UNLOCK(unp);
  689         vref(vp);
  690         VOP_VPUT_PAIR(nd.ni_dvp, &vp, true);
  691         vn_finished_write(mp);
  692         free(buf, M_TEMP);
  693         return (0);
  694 
  695 error:
  696         UNP_PCB_LOCK(unp);
  697         unp->unp_flags &= ~UNP_BINDING;
  698         UNP_PCB_UNLOCK(unp);
  699         free(buf, M_TEMP);
  700         return (error);
  701 }
  702 
  703 static int
  704 uipc_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
  705 {
  706 
  707         return (uipc_bindat(AT_FDCWD, so, nam, td));
  708 }
  709 
  710 static int
  711 uipc_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
  712 {
  713         int error;
  714 
  715         KASSERT(td == curthread, ("uipc_connect: td != curthread"));
  716         error = unp_connect(so, nam, td);
  717         return (error);
  718 }
  719 
  720 static int
  721 uipc_connectat(int fd, struct socket *so, struct sockaddr *nam,
  722     struct thread *td)
  723 {
  724         int error;
  725 
  726         KASSERT(td == curthread, ("uipc_connectat: td != curthread"));
  727         error = unp_connectat(fd, so, nam, td);
  728         return (error);
  729 }
  730 
  731 static void
  732 uipc_close(struct socket *so)
  733 {
  734         struct unpcb *unp, *unp2;
  735         struct vnode *vp = NULL;
  736         struct mtx *vplock;
  737 
  738         unp = sotounpcb(so);
  739         KASSERT(unp != NULL, ("uipc_close: unp == NULL"));
  740 
  741         vplock = NULL;
  742         if ((vp = unp->unp_vnode) != NULL) {
  743                 vplock = mtx_pool_find(mtxpool_sleep, vp);
  744                 mtx_lock(vplock);
  745         }
  746         UNP_PCB_LOCK(unp);
  747         if (vp && unp->unp_vnode == NULL) {
  748                 mtx_unlock(vplock);
  749                 vp = NULL;
  750         }
  751         if (vp != NULL) {
  752                 VOP_UNP_DETACH(vp);
  753                 unp->unp_vnode = NULL;
  754         }
  755         if ((unp2 = unp_pcb_lock_peer(unp)) != NULL)
  756                 unp_disconnect(unp, unp2);
  757         else
  758                 UNP_PCB_UNLOCK(unp);
  759         if (vp) {
  760                 mtx_unlock(vplock);
  761                 vrele(vp);
  762         }
  763 }
  764 
  765 static int
  766 uipc_connect2(struct socket *so1, struct socket *so2)
  767 {
  768         struct unpcb *unp, *unp2;
  769         int error;
  770 
  771         unp = so1->so_pcb;
  772         KASSERT(unp != NULL, ("uipc_connect2: unp == NULL"));
  773         unp2 = so2->so_pcb;
  774         KASSERT(unp2 != NULL, ("uipc_connect2: unp2 == NULL"));
  775         unp_pcb_lock_pair(unp, unp2);
  776         error = unp_connect2(so1, so2, PRU_CONNECT2);
  777         unp_pcb_unlock_pair(unp, unp2);
  778         return (error);
  779 }
  780 
  781 static void
  782 uipc_detach(struct socket *so)
  783 {
  784         struct unpcb *unp, *unp2;
  785         struct mtx *vplock;
  786         struct vnode *vp;
  787         int local_unp_rights;
  788 
  789         unp = sotounpcb(so);
  790         KASSERT(unp != NULL, ("uipc_detach: unp == NULL"));
  791 
  792         vp = NULL;
  793         vplock = NULL;
  794 
  795         SOCK_LOCK(so);
  796         if (!SOLISTENING(so)) {
  797                 /*
  798                  * Once the socket is removed from the global lists,
  799                  * uipc_ready() will not be able to locate its socket buffer, so
  800                  * clear the buffer now.  At this point internalized rights have
  801                  * already been disposed of.
  802                  */
  803                 sbrelease(&so->so_rcv, so);
  804         }
  805         SOCK_UNLOCK(so);
  806 
  807         UNP_LINK_WLOCK();
  808         LIST_REMOVE(unp, unp_link);
  809         if (unp->unp_gcflag & UNPGC_DEAD)
  810                 LIST_REMOVE(unp, unp_dead);
  811         unp->unp_gencnt = ++unp_gencnt;
  812         --unp_count;
  813         UNP_LINK_WUNLOCK();
  814 
  815         UNP_PCB_UNLOCK_ASSERT(unp);
  816  restart:
  817         if ((vp = unp->unp_vnode) != NULL) {
  818                 vplock = mtx_pool_find(mtxpool_sleep, vp);
  819                 mtx_lock(vplock);
  820         }
  821         UNP_PCB_LOCK(unp);
  822         if (unp->unp_vnode != vp && unp->unp_vnode != NULL) {
  823                 if (vplock)
  824                         mtx_unlock(vplock);
  825                 UNP_PCB_UNLOCK(unp);
  826                 goto restart;
  827         }
  828         if ((vp = unp->unp_vnode) != NULL) {
  829                 VOP_UNP_DETACH(vp);
  830                 unp->unp_vnode = NULL;
  831         }
  832         if ((unp2 = unp_pcb_lock_peer(unp)) != NULL)
  833                 unp_disconnect(unp, unp2);
  834         else
  835                 UNP_PCB_UNLOCK(unp);
  836 
  837         UNP_REF_LIST_LOCK();
  838         while (!LIST_EMPTY(&unp->unp_refs)) {
  839                 struct unpcb *ref = LIST_FIRST(&unp->unp_refs);
  840 
  841                 unp_pcb_hold(ref);
  842                 UNP_REF_LIST_UNLOCK();
  843 
  844                 MPASS(ref != unp);
  845                 UNP_PCB_UNLOCK_ASSERT(ref);
  846                 unp_drop(ref);
  847                 UNP_REF_LIST_LOCK();
  848         }
  849         UNP_REF_LIST_UNLOCK();
  850 
  851         UNP_PCB_LOCK(unp);
  852         local_unp_rights = unp_rights;
  853         unp->unp_socket->so_pcb = NULL;
  854         unp->unp_socket = NULL;
  855         free(unp->unp_addr, M_SONAME);
  856         unp->unp_addr = NULL;
  857         if (!unp_pcb_rele(unp))
  858                 UNP_PCB_UNLOCK(unp);
  859         if (vp) {
  860                 mtx_unlock(vplock);
  861                 vrele(vp);
  862         }
  863         if (local_unp_rights)
  864                 taskqueue_enqueue_timeout(taskqueue_thread, &unp_gc_task, -1);
  865 }
  866 
  867 static int
  868 uipc_disconnect(struct socket *so)
  869 {
  870         struct unpcb *unp, *unp2;
  871 
  872         unp = sotounpcb(so);
  873         KASSERT(unp != NULL, ("uipc_disconnect: unp == NULL"));
  874 
  875         UNP_PCB_LOCK(unp);
  876         if ((unp2 = unp_pcb_lock_peer(unp)) != NULL)
  877                 unp_disconnect(unp, unp2);
  878         else
  879                 UNP_PCB_UNLOCK(unp);
  880         return (0);
  881 }
  882 
  883 static int
  884 uipc_listen(struct socket *so, int backlog, struct thread *td)
  885 {
  886         struct unpcb *unp;
  887         int error;
  888 
  889         if (so->so_type != SOCK_STREAM && so->so_type != SOCK_SEQPACKET)
  890                 return (EOPNOTSUPP);
  891 
  892         unp = sotounpcb(so);
  893         KASSERT(unp != NULL, ("uipc_listen: unp == NULL"));
  894 
  895         UNP_PCB_LOCK(unp);
  896         if (unp->unp_vnode == NULL) {
  897                 /* Already connected or not bound to an address. */
  898                 error = unp->unp_conn != NULL ? EINVAL : EDESTADDRREQ;
  899                 UNP_PCB_UNLOCK(unp);
  900                 return (error);
  901         }
  902 
  903         SOCK_LOCK(so);
  904         error = solisten_proto_check(so);
  905         if (error == 0) {
  906                 cru2xt(td, &unp->unp_peercred);
  907                 solisten_proto(so, backlog);
  908         }
  909         SOCK_UNLOCK(so);
  910         UNP_PCB_UNLOCK(unp);
  911         return (error);
  912 }
  913 
  914 static int
  915 uipc_peeraddr(struct socket *so, struct sockaddr **nam)
  916 {
  917         struct unpcb *unp, *unp2;
  918         const struct sockaddr *sa;
  919 
  920         unp = sotounpcb(so);
  921         KASSERT(unp != NULL, ("uipc_peeraddr: unp == NULL"));
  922 
  923         *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
  924         UNP_LINK_RLOCK();
  925         /*
  926          * XXX: It seems that this test always fails even when connection is
  927          * established.  So, this else clause is added as workaround to
  928          * return PF_LOCAL sockaddr.
  929          */
  930         unp2 = unp->unp_conn;
  931         if (unp2 != NULL) {
  932                 UNP_PCB_LOCK(unp2);
  933                 if (unp2->unp_addr != NULL)
  934                         sa = (struct sockaddr *) unp2->unp_addr;
  935                 else
  936                         sa = &sun_noname;
  937                 bcopy(sa, *nam, sa->sa_len);
  938                 UNP_PCB_UNLOCK(unp2);
  939         } else {
  940                 sa = &sun_noname;
  941                 bcopy(sa, *nam, sa->sa_len);
  942         }
  943         UNP_LINK_RUNLOCK();
  944         return (0);
  945 }
  946 
  947 static int
  948 uipc_rcvd(struct socket *so, int flags)
  949 {
  950         struct unpcb *unp, *unp2;
  951         struct socket *so2;
  952         u_int mbcnt, sbcc;
  953 
  954         unp = sotounpcb(so);
  955         KASSERT(unp != NULL, ("%s: unp == NULL", __func__));
  956         KASSERT(so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET,
  957             ("%s: socktype %d", __func__, so->so_type));
  958 
  959         /*
  960          * Adjust backpressure on sender and wakeup any waiting to write.
  961          *
  962          * The unp lock is acquired to maintain the validity of the unp_conn
  963          * pointer; no lock on unp2 is required as unp2->unp_socket will be
  964          * static as long as we don't permit unp2 to disconnect from unp,
  965          * which is prevented by the lock on unp.  We cache values from
  966          * so_rcv to avoid holding the so_rcv lock over the entire
  967          * transaction on the remote so_snd.
  968          */
  969         SOCKBUF_LOCK(&so->so_rcv);
  970         mbcnt = so->so_rcv.sb_mbcnt;
  971         sbcc = sbavail(&so->so_rcv);
  972         SOCKBUF_UNLOCK(&so->so_rcv);
  973         /*
  974          * There is a benign race condition at this point.  If we're planning to
  975          * clear SB_STOP, but uipc_send is called on the connected socket at
  976          * this instant, it might add data to the sockbuf and set SB_STOP.  Then
  977          * we would erroneously clear SB_STOP below, even though the sockbuf is
  978          * full.  The race is benign because the only ill effect is to allow the
  979          * sockbuf to exceed its size limit, and the size limits are not
  980          * strictly guaranteed anyway.
  981          */
  982         UNP_PCB_LOCK(unp);
  983         unp2 = unp->unp_conn;
  984         if (unp2 == NULL) {
  985                 UNP_PCB_UNLOCK(unp);
  986                 return (0);
  987         }
  988         so2 = unp2->unp_socket;
  989         SOCKBUF_LOCK(&so2->so_snd);
  990         if (sbcc < so2->so_snd.sb_hiwat && mbcnt < so2->so_snd.sb_mbmax)
  991                 so2->so_snd.sb_flags &= ~SB_STOP;
  992         sowwakeup_locked(so2);
  993         UNP_PCB_UNLOCK(unp);
  994         return (0);
  995 }
  996 
  997 static int
  998 uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
  999     struct mbuf *control, struct thread *td)
 1000 {
 1001         struct unpcb *unp, *unp2;
 1002         struct socket *so2;
 1003         u_int mbcnt, sbcc;
 1004         int error;
 1005 
 1006         unp = sotounpcb(so);
 1007         KASSERT(unp != NULL, ("%s: unp == NULL", __func__));
 1008         KASSERT(so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM ||
 1009             so->so_type == SOCK_SEQPACKET,
 1010             ("%s: socktype %d", __func__, so->so_type));
 1011 
 1012         error = 0;
 1013         if (flags & PRUS_OOB) {
 1014                 error = EOPNOTSUPP;
 1015                 goto release;
 1016         }
 1017         if (control != NULL && (error = unp_internalize(&control, td)))
 1018                 goto release;
 1019 
 1020         unp2 = NULL;
 1021         switch (so->so_type) {
 1022         case SOCK_DGRAM:
 1023         {
 1024                 const struct sockaddr *from;
 1025 
 1026                 if (nam != NULL) {
 1027                         error = unp_connect(so, nam, td);
 1028                         if (error != 0)
 1029                                 break;
 1030                 }
 1031                 UNP_PCB_LOCK(unp);
 1032 
 1033                 /*
 1034                  * Because connect() and send() are non-atomic in a sendto()
 1035                  * with a target address, it's possible that the socket will
 1036                  * have disconnected before the send() can run.  In that case
 1037                  * return the slightly counter-intuitive but otherwise
 1038                  * correct error that the socket is not connected.
 1039                  */
 1040                 unp2 = unp_pcb_lock_peer(unp);
 1041                 if (unp2 == NULL) {
 1042                         UNP_PCB_UNLOCK(unp);
 1043                         error = ENOTCONN;
 1044                         break;
 1045                 }
 1046 
 1047                 if (unp2->unp_flags & UNP_WANTCRED_MASK)
 1048                         control = unp_addsockcred(td, control,
 1049                             unp2->unp_flags);
 1050                 if (unp->unp_addr != NULL)
 1051                         from = (struct sockaddr *)unp->unp_addr;
 1052                 else
 1053                         from = &sun_noname;
 1054                 so2 = unp2->unp_socket;
 1055                 SOCKBUF_LOCK(&so2->so_rcv);
 1056                 if (sbappendaddr_locked(&so2->so_rcv, from, m,
 1057                     control)) {
 1058                         sorwakeup_locked(so2);
 1059                         m = NULL;
 1060                         control = NULL;
 1061                 } else {
 1062                         soroverflow_locked(so2);
 1063                         error = ENOBUFS;
 1064                 }
 1065                 if (nam != NULL)
 1066                         unp_disconnect(unp, unp2);
 1067                 else
 1068                         unp_pcb_unlock_pair(unp, unp2);
 1069                 break;
 1070         }
 1071 
 1072         case SOCK_SEQPACKET:
 1073         case SOCK_STREAM:
 1074                 if ((so->so_state & SS_ISCONNECTED) == 0) {
 1075                         if (nam != NULL) {
 1076                                 error = unp_connect(so, nam, td);
 1077                                 if (error != 0)
 1078                                         break;
 1079                         } else {
 1080                                 error = ENOTCONN;
 1081                                 break;
 1082                         }
 1083                 }
 1084 
 1085                 UNP_PCB_LOCK(unp);
 1086                 if ((unp2 = unp_pcb_lock_peer(unp)) == NULL) {
 1087                         UNP_PCB_UNLOCK(unp);
 1088                         error = ENOTCONN;
 1089                         break;
 1090                 } else if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 1091                         unp_pcb_unlock_pair(unp, unp2);
 1092                         error = EPIPE;
 1093                         break;
 1094                 }
 1095                 UNP_PCB_UNLOCK(unp);
 1096                 if ((so2 = unp2->unp_socket) == NULL) {
 1097                         UNP_PCB_UNLOCK(unp2);
 1098                         error = ENOTCONN;
 1099                         break;
 1100                 }
 1101                 SOCKBUF_LOCK(&so2->so_rcv);
 1102                 if (unp2->unp_flags & UNP_WANTCRED_MASK) {
 1103                         /*
 1104                          * Credentials are passed only once on SOCK_STREAM and
 1105                          * SOCK_SEQPACKET (LOCAL_CREDS => WANTCRED_ONESHOT), or
 1106                          * forever (LOCAL_CREDS_PERSISTENT => WANTCRED_ALWAYS).
 1107                          */
 1108                         control = unp_addsockcred(td, control, unp2->unp_flags);
 1109                         unp2->unp_flags &= ~UNP_WANTCRED_ONESHOT;
 1110                 }
 1111 
 1112                 /*
 1113                  * Send to paired receive port and wake up readers.  Don't
 1114                  * check for space available in the receive buffer if we're
 1115                  * attaching ancillary data; Unix domain sockets only check
 1116                  * for space in the sending sockbuf, and that check is
 1117                  * performed one level up the stack.  At that level we cannot
 1118                  * precisely account for the amount of buffer space used
 1119                  * (e.g., because control messages are not yet internalized).
 1120                  */
 1121                 switch (so->so_type) {
 1122                 case SOCK_STREAM:
 1123                         if (control != NULL) {
 1124                                 sbappendcontrol_locked(&so2->so_rcv, m,
 1125                                     control, flags);
 1126                                 control = NULL;
 1127                         } else
 1128                                 sbappend_locked(&so2->so_rcv, m, flags);
 1129                         break;
 1130 
 1131                 case SOCK_SEQPACKET:
 1132                         if (sbappendaddr_nospacecheck_locked(&so2->so_rcv,
 1133                             &sun_noname, m, control))
 1134                                 control = NULL;
 1135                         break;
 1136                 }
 1137 
 1138                 mbcnt = so2->so_rcv.sb_mbcnt;
 1139                 sbcc = sbavail(&so2->so_rcv);
 1140                 if (sbcc)
 1141                         sorwakeup_locked(so2);
 1142                 else
 1143                         SOCKBUF_UNLOCK(&so2->so_rcv);
 1144 
 1145                 /*
 1146                  * The PCB lock on unp2 protects the SB_STOP flag.  Without it,
 1147                  * it would be possible for uipc_rcvd to be called at this
 1148                  * point, drain the receiving sockbuf, clear SB_STOP, and then
 1149                  * we would set SB_STOP below.  That could lead to an empty
 1150                  * sockbuf having SB_STOP set
 1151                  */
 1152                 SOCKBUF_LOCK(&so->so_snd);
 1153                 if (sbcc >= so->so_snd.sb_hiwat || mbcnt >= so->so_snd.sb_mbmax)
 1154                         so->so_snd.sb_flags |= SB_STOP;
 1155                 SOCKBUF_UNLOCK(&so->so_snd);
 1156                 UNP_PCB_UNLOCK(unp2);
 1157                 m = NULL;
 1158                 break;
 1159         }
 1160 
 1161         /*
 1162          * PRUS_EOF is equivalent to pru_send followed by pru_shutdown.
 1163          */
 1164         if (flags & PRUS_EOF) {
 1165                 UNP_PCB_LOCK(unp);
 1166                 socantsendmore(so);
 1167                 unp_shutdown(unp);
 1168                 UNP_PCB_UNLOCK(unp);
 1169         }
 1170         if (control != NULL && error != 0)
 1171                 unp_dispose_mbuf(control);
 1172 
 1173 release:
 1174         if (control != NULL)
 1175                 m_freem(control);
 1176         /*
 1177          * In case of PRUS_NOTREADY, uipc_ready() is responsible
 1178          * for freeing memory.
 1179          */   
 1180         if (m != NULL && (flags & PRUS_NOTREADY) == 0)
 1181                 m_freem(m);
 1182         return (error);
 1183 }
 1184 
 1185 static bool
 1186 uipc_ready_scan(struct socket *so, struct mbuf *m, int count, int *errorp)
 1187 {
 1188         struct mbuf *mb, *n;
 1189         struct sockbuf *sb;
 1190 
 1191         SOCK_LOCK(so);
 1192         if (SOLISTENING(so)) {
 1193                 SOCK_UNLOCK(so);
 1194                 return (false);
 1195         }
 1196         mb = NULL;
 1197         sb = &so->so_rcv;
 1198         SOCKBUF_LOCK(sb);
 1199         if (sb->sb_fnrdy != NULL) {
 1200                 for (mb = sb->sb_mb, n = mb->m_nextpkt; mb != NULL;) {
 1201                         if (mb == m) {
 1202                                 *errorp = sbready(sb, m, count);
 1203                                 break;
 1204                         }
 1205                         mb = mb->m_next;
 1206                         if (mb == NULL) {
 1207                                 mb = n;
 1208                                 if (mb != NULL)
 1209                                         n = mb->m_nextpkt;
 1210                         }
 1211                 }
 1212         }
 1213         SOCKBUF_UNLOCK(sb);
 1214         SOCK_UNLOCK(so);
 1215         return (mb != NULL);
 1216 }
 1217 
 1218 static int
 1219 uipc_ready(struct socket *so, struct mbuf *m, int count)
 1220 {
 1221         struct unpcb *unp, *unp2;
 1222         struct socket *so2;
 1223         int error, i;
 1224 
 1225         unp = sotounpcb(so);
 1226 
 1227         KASSERT(so->so_type == SOCK_STREAM,
 1228             ("%s: unexpected socket type for %p", __func__, so));
 1229 
 1230         UNP_PCB_LOCK(unp);
 1231         if ((unp2 = unp_pcb_lock_peer(unp)) != NULL) {
 1232                 UNP_PCB_UNLOCK(unp);
 1233                 so2 = unp2->unp_socket;
 1234                 SOCKBUF_LOCK(&so2->so_rcv);
 1235                 if ((error = sbready(&so2->so_rcv, m, count)) == 0)
 1236                         sorwakeup_locked(so2);
 1237                 else
 1238                         SOCKBUF_UNLOCK(&so2->so_rcv);
 1239                 UNP_PCB_UNLOCK(unp2);
 1240                 return (error);
 1241         }
 1242         UNP_PCB_UNLOCK(unp);
 1243 
 1244         /*
 1245          * The receiving socket has been disconnected, but may still be valid.
 1246          * In this case, the now-ready mbufs are still present in its socket
 1247          * buffer, so perform an exhaustive search before giving up and freeing
 1248          * the mbufs.
 1249          */
 1250         UNP_LINK_RLOCK();
 1251         LIST_FOREACH(unp, &unp_shead, unp_link) {
 1252                 if (uipc_ready_scan(unp->unp_socket, m, count, &error))
 1253                         break;
 1254         }
 1255         UNP_LINK_RUNLOCK();
 1256 
 1257         if (unp == NULL) {
 1258                 for (i = 0; i < count; i++)
 1259                         m = m_free(m);
 1260                 error = ECONNRESET;
 1261         }
 1262         return (error);
 1263 }
 1264 
 1265 static int
 1266 uipc_sense(struct socket *so, struct stat *sb)
 1267 {
 1268         struct unpcb *unp;
 1269 
 1270         unp = sotounpcb(so);
 1271         KASSERT(unp != NULL, ("uipc_sense: unp == NULL"));
 1272 
 1273         sb->st_blksize = so->so_snd.sb_hiwat;
 1274         sb->st_dev = NODEV;
 1275         sb->st_ino = unp->unp_ino;
 1276         return (0);
 1277 }
 1278 
 1279 static int
 1280 uipc_shutdown(struct socket *so)
 1281 {
 1282         struct unpcb *unp;
 1283 
 1284         unp = sotounpcb(so);
 1285         KASSERT(unp != NULL, ("uipc_shutdown: unp == NULL"));
 1286 
 1287         UNP_PCB_LOCK(unp);
 1288         socantsendmore(so);
 1289         unp_shutdown(unp);
 1290         UNP_PCB_UNLOCK(unp);
 1291         return (0);
 1292 }
 1293 
 1294 static int
 1295 uipc_sockaddr(struct socket *so, struct sockaddr **nam)
 1296 {
 1297         struct unpcb *unp;
 1298         const struct sockaddr *sa;
 1299 
 1300         unp = sotounpcb(so);
 1301         KASSERT(unp != NULL, ("uipc_sockaddr: unp == NULL"));
 1302 
 1303         *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
 1304         UNP_PCB_LOCK(unp);
 1305         if (unp->unp_addr != NULL)
 1306                 sa = (struct sockaddr *) unp->unp_addr;
 1307         else
 1308                 sa = &sun_noname;
 1309         bcopy(sa, *nam, sa->sa_len);
 1310         UNP_PCB_UNLOCK(unp);
 1311         return (0);
 1312 }
 1313 
 1314 static struct pr_usrreqs uipc_usrreqs_dgram = {
 1315         .pru_abort =            uipc_abort,
 1316         .pru_accept =           uipc_accept,
 1317         .pru_attach =           uipc_attach,
 1318         .pru_bind =             uipc_bind,
 1319         .pru_bindat =           uipc_bindat,
 1320         .pru_connect =          uipc_connect,
 1321         .pru_connectat =        uipc_connectat,
 1322         .pru_connect2 =         uipc_connect2,
 1323         .pru_detach =           uipc_detach,
 1324         .pru_disconnect =       uipc_disconnect,
 1325         .pru_listen =           uipc_listen,
 1326         .pru_peeraddr =         uipc_peeraddr,
 1327         .pru_rcvd =             uipc_rcvd,
 1328         .pru_send =             uipc_send,
 1329         .pru_sense =            uipc_sense,
 1330         .pru_shutdown =         uipc_shutdown,
 1331         .pru_sockaddr =         uipc_sockaddr,
 1332         .pru_soreceive =        soreceive_dgram,
 1333         .pru_close =            uipc_close,
 1334 };
 1335 
 1336 static struct pr_usrreqs uipc_usrreqs_seqpacket = {
 1337         .pru_abort =            uipc_abort,
 1338         .pru_accept =           uipc_accept,
 1339         .pru_attach =           uipc_attach,
 1340         .pru_bind =             uipc_bind,
 1341         .pru_bindat =           uipc_bindat,
 1342         .pru_connect =          uipc_connect,
 1343         .pru_connectat =        uipc_connectat,
 1344         .pru_connect2 =         uipc_connect2,
 1345         .pru_detach =           uipc_detach,
 1346         .pru_disconnect =       uipc_disconnect,
 1347         .pru_listen =           uipc_listen,
 1348         .pru_peeraddr =         uipc_peeraddr,
 1349         .pru_rcvd =             uipc_rcvd,
 1350         .pru_send =             uipc_send,
 1351         .pru_sense =            uipc_sense,
 1352         .pru_shutdown =         uipc_shutdown,
 1353         .pru_sockaddr =         uipc_sockaddr,
 1354         .pru_soreceive =        soreceive_generic,      /* XXX: or...? */
 1355         .pru_close =            uipc_close,
 1356 };
 1357 
 1358 static struct pr_usrreqs uipc_usrreqs_stream = {
 1359         .pru_abort =            uipc_abort,
 1360         .pru_accept =           uipc_accept,
 1361         .pru_attach =           uipc_attach,
 1362         .pru_bind =             uipc_bind,
 1363         .pru_bindat =           uipc_bindat,
 1364         .pru_connect =          uipc_connect,
 1365         .pru_connectat =        uipc_connectat,
 1366         .pru_connect2 =         uipc_connect2,
 1367         .pru_detach =           uipc_detach,
 1368         .pru_disconnect =       uipc_disconnect,
 1369         .pru_listen =           uipc_listen,
 1370         .pru_peeraddr =         uipc_peeraddr,
 1371         .pru_rcvd =             uipc_rcvd,
 1372         .pru_send =             uipc_send,
 1373         .pru_ready =            uipc_ready,
 1374         .pru_sense =            uipc_sense,
 1375         .pru_shutdown =         uipc_shutdown,
 1376         .pru_sockaddr =         uipc_sockaddr,
 1377         .pru_soreceive =        soreceive_generic,
 1378         .pru_close =            uipc_close,
 1379 };
 1380 
 1381 static int
 1382 uipc_ctloutput(struct socket *so, struct sockopt *sopt)
 1383 {
 1384         struct unpcb *unp;
 1385         struct xucred xu;
 1386         int error, optval;
 1387 
 1388         if (sopt->sopt_level != SOL_LOCAL)
 1389                 return (EINVAL);
 1390 
 1391         unp = sotounpcb(so);
 1392         KASSERT(unp != NULL, ("uipc_ctloutput: unp == NULL"));
 1393         error = 0;
 1394         switch (sopt->sopt_dir) {
 1395         case SOPT_GET:
 1396                 switch (sopt->sopt_name) {
 1397                 case LOCAL_PEERCRED:
 1398                         UNP_PCB_LOCK(unp);
 1399                         if (unp->unp_flags & UNP_HAVEPC)
 1400                                 xu = unp->unp_peercred;
 1401                         else {
 1402                                 if (so->so_type == SOCK_STREAM)
 1403                                         error = ENOTCONN;
 1404                                 else
 1405                                         error = EINVAL;
 1406                         }
 1407                         UNP_PCB_UNLOCK(unp);
 1408                         if (error == 0)
 1409                                 error = sooptcopyout(sopt, &xu, sizeof(xu));
 1410                         break;
 1411 
 1412                 case LOCAL_CREDS:
 1413                         /* Unlocked read. */
 1414                         optval = unp->unp_flags & UNP_WANTCRED_ONESHOT ? 1 : 0;
 1415                         error = sooptcopyout(sopt, &optval, sizeof(optval));
 1416                         break;
 1417 
 1418                 case LOCAL_CREDS_PERSISTENT:
 1419                         /* Unlocked read. */
 1420                         optval = unp->unp_flags & UNP_WANTCRED_ALWAYS ? 1 : 0;
 1421                         error = sooptcopyout(sopt, &optval, sizeof(optval));
 1422                         break;
 1423 
 1424                 case LOCAL_CONNWAIT:
 1425                         /* Unlocked read. */
 1426                         optval = unp->unp_flags & UNP_CONNWAIT ? 1 : 0;
 1427                         error = sooptcopyout(sopt, &optval, sizeof(optval));
 1428                         break;
 1429 
 1430                 default:
 1431                         error = EOPNOTSUPP;
 1432                         break;
 1433                 }
 1434                 break;
 1435 
 1436         case SOPT_SET:
 1437                 switch (sopt->sopt_name) {
 1438                 case LOCAL_CREDS:
 1439                 case LOCAL_CREDS_PERSISTENT:
 1440                 case LOCAL_CONNWAIT:
 1441                         error = sooptcopyin(sopt, &optval, sizeof(optval),
 1442                                             sizeof(optval));
 1443                         if (error)
 1444                                 break;
 1445 
 1446 #define OPTSET(bit, exclusive) do {                                     \
 1447         UNP_PCB_LOCK(unp);                                              \
 1448         if (optval) {                                                   \
 1449                 if ((unp->unp_flags & (exclusive)) != 0) {              \
 1450                         UNP_PCB_UNLOCK(unp);                            \
 1451                         error = EINVAL;                                 \
 1452                         break;                                          \
 1453                 }                                                       \
 1454                 unp->unp_flags |= (bit);                                \
 1455         } else                                                          \
 1456                 unp->unp_flags &= ~(bit);                               \
 1457         UNP_PCB_UNLOCK(unp);                                            \
 1458 } while (0)
 1459 
 1460                         switch (sopt->sopt_name) {
 1461                         case LOCAL_CREDS:
 1462                                 OPTSET(UNP_WANTCRED_ONESHOT, UNP_WANTCRED_ALWAYS);
 1463                                 break;
 1464 
 1465                         case LOCAL_CREDS_PERSISTENT:
 1466                                 OPTSET(UNP_WANTCRED_ALWAYS, UNP_WANTCRED_ONESHOT);
 1467                                 break;
 1468 
 1469                         case LOCAL_CONNWAIT:
 1470                                 OPTSET(UNP_CONNWAIT, 0);
 1471                                 break;
 1472 
 1473                         default:
 1474                                 break;
 1475                         }
 1476                         break;
 1477 #undef  OPTSET
 1478                 default:
 1479                         error = ENOPROTOOPT;
 1480                         break;
 1481                 }
 1482                 break;
 1483 
 1484         default:
 1485                 error = EOPNOTSUPP;
 1486                 break;
 1487         }
 1488         return (error);
 1489 }
 1490 
 1491 static int
 1492 unp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 1493 {
 1494 
 1495         return (unp_connectat(AT_FDCWD, so, nam, td));
 1496 }
 1497 
 1498 static int
 1499 unp_connectat(int fd, struct socket *so, struct sockaddr *nam,
 1500     struct thread *td)
 1501 {
 1502         struct mtx *vplock;
 1503         struct sockaddr_un *soun;
 1504         struct vnode *vp;
 1505         struct socket *so2;
 1506         struct unpcb *unp, *unp2, *unp3;
 1507         struct nameidata nd;
 1508         char buf[SOCK_MAXADDRLEN];
 1509         struct sockaddr *sa;
 1510         cap_rights_t rights;
 1511         int error, len;
 1512         bool connreq;
 1513 
 1514         if (nam->sa_family != AF_UNIX)
 1515                 return (EAFNOSUPPORT);
 1516         if (nam->sa_len > sizeof(struct sockaddr_un))
 1517                 return (EINVAL);
 1518         len = nam->sa_len - offsetof(struct sockaddr_un, sun_path);
 1519         if (len <= 0)
 1520                 return (EINVAL);
 1521         soun = (struct sockaddr_un *)nam;
 1522         bcopy(soun->sun_path, buf, len);
 1523         buf[len] = 0;
 1524 
 1525         unp = sotounpcb(so);
 1526         UNP_PCB_LOCK(unp);
 1527         for (;;) {
 1528                 /*
 1529                  * Wait for connection state to stabilize.  If a connection
 1530                  * already exists, give up.  For datagram sockets, which permit
 1531                  * multiple consecutive connect(2) calls, upper layers are
 1532                  * responsible for disconnecting in advance of a subsequent
 1533                  * connect(2), but this is not synchronized with PCB connection
 1534                  * state.
 1535                  *
 1536                  * Also make sure that no threads are currently attempting to
 1537                  * lock the peer socket, to ensure that unp_conn cannot
 1538                  * transition between two valid sockets while locks are dropped.
 1539                  */
 1540                 if (unp->unp_conn != NULL) {
 1541                         UNP_PCB_UNLOCK(unp);
 1542                         return (EISCONN);
 1543                 }
 1544                 if ((unp->unp_flags & UNP_CONNECTING) != 0) {
 1545                         UNP_PCB_UNLOCK(unp);
 1546                         return (EALREADY);
 1547                 }
 1548                 if (unp->unp_pairbusy > 0) {
 1549                         unp->unp_flags |= UNP_WAITING;
 1550                         mtx_sleep(unp, UNP_PCB_LOCKPTR(unp), 0, "unpeer", 0);
 1551                         continue;
 1552                 }
 1553                 break;
 1554         }
 1555         unp->unp_flags |= UNP_CONNECTING;
 1556         UNP_PCB_UNLOCK(unp);
 1557 
 1558         connreq = (so->so_proto->pr_flags & PR_CONNREQUIRED) != 0;
 1559         if (connreq)
 1560                 sa = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
 1561         else
 1562                 sa = NULL;
 1563         NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF,
 1564             UIO_SYSSPACE, buf, fd, cap_rights_init_one(&rights, CAP_CONNECTAT),
 1565             td);
 1566         error = namei(&nd);
 1567         if (error)
 1568                 vp = NULL;
 1569         else
 1570                 vp = nd.ni_vp;
 1571         ASSERT_VOP_LOCKED(vp, "unp_connect");
 1572         NDFREE_NOTHING(&nd);
 1573         if (error)
 1574                 goto bad;
 1575 
 1576         if (vp->v_type != VSOCK) {
 1577                 error = ENOTSOCK;
 1578                 goto bad;
 1579         }
 1580 #ifdef MAC
 1581         error = mac_vnode_check_open(td->td_ucred, vp, VWRITE | VREAD);
 1582         if (error)
 1583                 goto bad;
 1584 #endif
 1585         error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td);
 1586         if (error)
 1587                 goto bad;
 1588 
 1589         unp = sotounpcb(so);
 1590         KASSERT(unp != NULL, ("unp_connect: unp == NULL"));
 1591 
 1592         vplock = mtx_pool_find(mtxpool_sleep, vp);
 1593         mtx_lock(vplock);
 1594         VOP_UNP_CONNECT(vp, &unp2);
 1595         if (unp2 == NULL) {
 1596                 error = ECONNREFUSED;
 1597                 goto bad2;
 1598         }
 1599         so2 = unp2->unp_socket;
 1600         if (so->so_type != so2->so_type) {
 1601                 error = EPROTOTYPE;
 1602                 goto bad2;
 1603         }
 1604         if (connreq) {
 1605                 if (SOLISTENING(so2)) {
 1606                         CURVNET_SET(so2->so_vnet);
 1607                         so2 = sonewconn(so2, 0);
 1608                         CURVNET_RESTORE();
 1609                 } else
 1610                         so2 = NULL;
 1611                 if (so2 == NULL) {
 1612                         error = ECONNREFUSED;
 1613                         goto bad2;
 1614                 }
 1615                 unp3 = sotounpcb(so2);
 1616                 unp_pcb_lock_pair(unp2, unp3);
 1617                 if (unp2->unp_addr != NULL) {
 1618                         bcopy(unp2->unp_addr, sa, unp2->unp_addr->sun_len);
 1619                         unp3->unp_addr = (struct sockaddr_un *) sa;
 1620                         sa = NULL;
 1621                 }
 1622 
 1623                 unp_copy_peercred(td, unp3, unp, unp2);
 1624 
 1625                 UNP_PCB_UNLOCK(unp2);
 1626                 unp2 = unp3;
 1627 
 1628                 /*
 1629                  * It is safe to block on the PCB lock here since unp2 is
 1630                  * nascent and cannot be connected to any other sockets.
 1631                  */
 1632                 UNP_PCB_LOCK(unp);
 1633 #ifdef MAC
 1634                 mac_socketpeer_set_from_socket(so, so2);
 1635                 mac_socketpeer_set_from_socket(so2, so);
 1636 #endif
 1637         } else {
 1638                 unp_pcb_lock_pair(unp, unp2);
 1639         }
 1640         KASSERT(unp2 != NULL && so2 != NULL && unp2->unp_socket == so2 &&
 1641             sotounpcb(so2) == unp2,
 1642             ("%s: unp2 %p so2 %p", __func__, unp2, so2));
 1643         error = unp_connect2(so, so2, PRU_CONNECT);
 1644         unp_pcb_unlock_pair(unp, unp2);
 1645 bad2:
 1646         mtx_unlock(vplock);
 1647 bad:
 1648         if (vp != NULL) {
 1649                 vput(vp);
 1650         }
 1651         free(sa, M_SONAME);
 1652         UNP_PCB_LOCK(unp);
 1653         KASSERT((unp->unp_flags & UNP_CONNECTING) != 0,
 1654             ("%s: unp %p has UNP_CONNECTING clear", __func__, unp));
 1655         unp->unp_flags &= ~UNP_CONNECTING;
 1656         UNP_PCB_UNLOCK(unp);
 1657         return (error);
 1658 }
 1659 
 1660 /*
 1661  * Set socket peer credentials at connection time.
 1662  *
 1663  * The client's PCB credentials are copied from its process structure.  The
 1664  * server's PCB credentials are copied from the socket on which it called
 1665  * listen(2).  uipc_listen cached that process's credentials at the time.
 1666  */
 1667 void
 1668 unp_copy_peercred(struct thread *td, struct unpcb *client_unp,
 1669     struct unpcb *server_unp, struct unpcb *listen_unp)
 1670 {
 1671         cru2xt(td, &client_unp->unp_peercred);
 1672         client_unp->unp_flags |= UNP_HAVEPC;
 1673 
 1674         memcpy(&server_unp->unp_peercred, &listen_unp->unp_peercred,
 1675             sizeof(server_unp->unp_peercred));
 1676         server_unp->unp_flags |= UNP_HAVEPC;
 1677         client_unp->unp_flags |= (listen_unp->unp_flags & UNP_WANTCRED_MASK);
 1678 }
 1679 
 1680 static int
 1681 unp_connect2(struct socket *so, struct socket *so2, int req)
 1682 {
 1683         struct unpcb *unp;
 1684         struct unpcb *unp2;
 1685 
 1686         unp = sotounpcb(so);
 1687         KASSERT(unp != NULL, ("unp_connect2: unp == NULL"));
 1688         unp2 = sotounpcb(so2);
 1689         KASSERT(unp2 != NULL, ("unp_connect2: unp2 == NULL"));
 1690 
 1691         UNP_PCB_LOCK_ASSERT(unp);
 1692         UNP_PCB_LOCK_ASSERT(unp2);
 1693         KASSERT(unp->unp_conn == NULL,
 1694             ("%s: socket %p is already connected", __func__, unp));
 1695 
 1696         if (so2->so_type != so->so_type)
 1697                 return (EPROTOTYPE);
 1698         unp->unp_conn = unp2;
 1699         unp_pcb_hold(unp2);
 1700         unp_pcb_hold(unp);
 1701         switch (so->so_type) {
 1702         case SOCK_DGRAM:
 1703                 UNP_REF_LIST_LOCK();
 1704                 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink);
 1705                 UNP_REF_LIST_UNLOCK();
 1706                 soisconnected(so);
 1707                 break;
 1708 
 1709         case SOCK_STREAM:
 1710         case SOCK_SEQPACKET:
 1711                 KASSERT(unp2->unp_conn == NULL,
 1712                     ("%s: socket %p is already connected", __func__, unp2));
 1713                 unp2->unp_conn = unp;
 1714                 if (req == PRU_CONNECT &&
 1715                     ((unp->unp_flags | unp2->unp_flags) & UNP_CONNWAIT))
 1716                         soisconnecting(so);
 1717                 else
 1718                         soisconnected(so);
 1719                 soisconnected(so2);
 1720                 break;
 1721 
 1722         default:
 1723                 panic("unp_connect2");
 1724         }
 1725         return (0);
 1726 }
 1727 
 1728 static void
 1729 unp_disconnect(struct unpcb *unp, struct unpcb *unp2)
 1730 {
 1731         struct socket *so, *so2;
 1732 #ifdef INVARIANTS
 1733         struct unpcb *unptmp;
 1734 #endif
 1735 
 1736         UNP_PCB_LOCK_ASSERT(unp);
 1737         UNP_PCB_LOCK_ASSERT(unp2);
 1738         KASSERT(unp->unp_conn == unp2,
 1739             ("%s: unpcb %p is not connected to %p", __func__, unp, unp2));
 1740 
 1741         unp->unp_conn = NULL;
 1742         so = unp->unp_socket;
 1743         so2 = unp2->unp_socket;
 1744         switch (unp->unp_socket->so_type) {
 1745         case SOCK_DGRAM:
 1746                 UNP_REF_LIST_LOCK();
 1747 #ifdef INVARIANTS
 1748                 LIST_FOREACH(unptmp, &unp2->unp_refs, unp_reflink) {
 1749                         if (unptmp == unp)
 1750                                 break;
 1751                 }
 1752                 KASSERT(unptmp != NULL,
 1753                     ("%s: %p not found in reflist of %p", __func__, unp, unp2));
 1754 #endif
 1755                 LIST_REMOVE(unp, unp_reflink);
 1756                 UNP_REF_LIST_UNLOCK();
 1757                 if (so) {
 1758                         SOCK_LOCK(so);
 1759                         so->so_state &= ~SS_ISCONNECTED;
 1760                         SOCK_UNLOCK(so);
 1761                 }
 1762                 break;
 1763 
 1764         case SOCK_STREAM:
 1765         case SOCK_SEQPACKET:
 1766                 if (so)
 1767                         soisdisconnected(so);
 1768                 MPASS(unp2->unp_conn == unp);
 1769                 unp2->unp_conn = NULL;
 1770                 if (so2)
 1771                         soisdisconnected(so2);
 1772                 break;
 1773         }
 1774 
 1775         if (unp == unp2) {
 1776                 unp_pcb_rele_notlast(unp);
 1777                 if (!unp_pcb_rele(unp))
 1778                         UNP_PCB_UNLOCK(unp);
 1779         } else {
 1780                 if (!unp_pcb_rele(unp))
 1781                         UNP_PCB_UNLOCK(unp);
 1782                 if (!unp_pcb_rele(unp2))
 1783                         UNP_PCB_UNLOCK(unp2);
 1784         }
 1785 }
 1786 
 1787 /*
 1788  * unp_pcblist() walks the global list of struct unpcb's to generate a
 1789  * pointer list, bumping the refcount on each unpcb.  It then copies them out
 1790  * sequentially, validating the generation number on each to see if it has
 1791  * been detached.  All of this is necessary because copyout() may sleep on
 1792  * disk I/O.
 1793  */
 1794 static int
 1795 unp_pcblist(SYSCTL_HANDLER_ARGS)
 1796 {
 1797         struct unpcb *unp, **unp_list;
 1798         unp_gen_t gencnt;
 1799         struct xunpgen *xug;
 1800         struct unp_head *head;
 1801         struct xunpcb *xu;
 1802         u_int i;
 1803         int error, n;
 1804 
 1805         switch ((intptr_t)arg1) {
 1806         case SOCK_STREAM:
 1807                 head = &unp_shead;
 1808                 break;
 1809 
 1810         case SOCK_DGRAM:
 1811                 head = &unp_dhead;
 1812                 break;
 1813 
 1814         case SOCK_SEQPACKET:
 1815                 head = &unp_sphead;
 1816                 break;
 1817 
 1818         default:
 1819                 panic("unp_pcblist: arg1 %d", (int)(intptr_t)arg1);
 1820         }
 1821 
 1822         /*
 1823          * The process of preparing the PCB list is too time-consuming and
 1824          * resource-intensive to repeat twice on every request.
 1825          */
 1826         if (req->oldptr == NULL) {
 1827                 n = unp_count;
 1828                 req->oldidx = 2 * (sizeof *xug)
 1829                         + (n + n/8) * sizeof(struct xunpcb);
 1830                 return (0);
 1831         }
 1832 
 1833         if (req->newptr != NULL)
 1834                 return (EPERM);
 1835 
 1836         /*
 1837          * OK, now we're committed to doing something.
 1838          */
 1839         xug = malloc(sizeof(*xug), M_TEMP, M_WAITOK | M_ZERO);
 1840         UNP_LINK_RLOCK();
 1841         gencnt = unp_gencnt;
 1842         n = unp_count;
 1843         UNP_LINK_RUNLOCK();
 1844 
 1845         xug->xug_len = sizeof *xug;
 1846         xug->xug_count = n;
 1847         xug->xug_gen = gencnt;
 1848         xug->xug_sogen = so_gencnt;
 1849         error = SYSCTL_OUT(req, xug, sizeof *xug);
 1850         if (error) {
 1851                 free(xug, M_TEMP);
 1852                 return (error);
 1853         }
 1854 
 1855         unp_list = malloc(n * sizeof *unp_list, M_TEMP, M_WAITOK);
 1856 
 1857         UNP_LINK_RLOCK();
 1858         for (unp = LIST_FIRST(head), i = 0; unp && i < n;
 1859              unp = LIST_NEXT(unp, unp_link)) {
 1860                 UNP_PCB_LOCK(unp);
 1861                 if (unp->unp_gencnt <= gencnt) {
 1862                         if (cr_cansee(req->td->td_ucred,
 1863                             unp->unp_socket->so_cred)) {
 1864                                 UNP_PCB_UNLOCK(unp);
 1865                                 continue;
 1866                         }
 1867                         unp_list[i++] = unp;
 1868                         unp_pcb_hold(unp);
 1869                 }
 1870                 UNP_PCB_UNLOCK(unp);
 1871         }
 1872         UNP_LINK_RUNLOCK();
 1873         n = i;                  /* In case we lost some during malloc. */
 1874 
 1875         error = 0;
 1876         xu = malloc(sizeof(*xu), M_TEMP, M_WAITOK | M_ZERO);
 1877         for (i = 0; i < n; i++) {
 1878                 unp = unp_list[i];
 1879                 UNP_PCB_LOCK(unp);
 1880                 if (unp_pcb_rele(unp))
 1881                         continue;
 1882 
 1883                 if (unp->unp_gencnt <= gencnt) {
 1884                         xu->xu_len = sizeof *xu;
 1885                         xu->xu_unpp = (uintptr_t)unp;
 1886                         /*
 1887                          * XXX - need more locking here to protect against
 1888                          * connect/disconnect races for SMP.
 1889                          */
 1890                         if (unp->unp_addr != NULL)
 1891                                 bcopy(unp->unp_addr, &xu->xu_addr,
 1892                                       unp->unp_addr->sun_len);
 1893                         else
 1894                                 bzero(&xu->xu_addr, sizeof(xu->xu_addr));
 1895                         if (unp->unp_conn != NULL &&
 1896                             unp->unp_conn->unp_addr != NULL)
 1897                                 bcopy(unp->unp_conn->unp_addr,
 1898                                       &xu->xu_caddr,
 1899                                       unp->unp_conn->unp_addr->sun_len);
 1900                         else
 1901                                 bzero(&xu->xu_caddr, sizeof(xu->xu_caddr));
 1902                         xu->unp_vnode = (uintptr_t)unp->unp_vnode;
 1903                         xu->unp_conn = (uintptr_t)unp->unp_conn;
 1904                         xu->xu_firstref = (uintptr_t)LIST_FIRST(&unp->unp_refs);
 1905                         xu->xu_nextref = (uintptr_t)LIST_NEXT(unp, unp_reflink);
 1906                         xu->unp_gencnt = unp->unp_gencnt;
 1907                         sotoxsocket(unp->unp_socket, &xu->xu_socket);
 1908                         UNP_PCB_UNLOCK(unp);
 1909                         error = SYSCTL_OUT(req, xu, sizeof *xu);
 1910                 } else {
 1911                         UNP_PCB_UNLOCK(unp);
 1912                 }
 1913         }
 1914         free(xu, M_TEMP);
 1915         if (!error) {
 1916                 /*
 1917                  * Give the user an updated idea of our state.  If the
 1918                  * generation differs from what we told her before, she knows
 1919                  * that something happened while we were processing this
 1920                  * request, and it might be necessary to retry.
 1921                  */
 1922                 xug->xug_gen = unp_gencnt;
 1923                 xug->xug_sogen = so_gencnt;
 1924                 xug->xug_count = unp_count;
 1925                 error = SYSCTL_OUT(req, xug, sizeof *xug);
 1926         }
 1927         free(unp_list, M_TEMP);
 1928         free(xug, M_TEMP);
 1929         return (error);
 1930 }
 1931 
 1932 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist,
 1933     CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
 1934     (void *)(intptr_t)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb",
 1935     "List of active local datagram sockets");
 1936 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist,
 1937     CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
 1938     (void *)(intptr_t)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb",
 1939     "List of active local stream sockets");
 1940 SYSCTL_PROC(_net_local_seqpacket, OID_AUTO, pcblist,
 1941     CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
 1942     (void *)(intptr_t)SOCK_SEQPACKET, 0, unp_pcblist, "S,xunpcb",
 1943     "List of active local seqpacket sockets");
 1944 
 1945 static void
 1946 unp_shutdown(struct unpcb *unp)
 1947 {
 1948         struct unpcb *unp2;
 1949         struct socket *so;
 1950 
 1951         UNP_PCB_LOCK_ASSERT(unp);
 1952 
 1953         unp2 = unp->unp_conn;
 1954         if ((unp->unp_socket->so_type == SOCK_STREAM ||
 1955             (unp->unp_socket->so_type == SOCK_SEQPACKET)) && unp2 != NULL) {
 1956                 so = unp2->unp_socket;
 1957                 if (so != NULL)
 1958                         socantrcvmore(so);
 1959         }
 1960 }
 1961 
 1962 static void
 1963 unp_drop(struct unpcb *unp)
 1964 {
 1965         struct socket *so;
 1966         struct unpcb *unp2;
 1967 
 1968         /*
 1969          * Regardless of whether the socket's peer dropped the connection
 1970          * with this socket by aborting or disconnecting, POSIX requires
 1971          * that ECONNRESET is returned.
 1972          */
 1973 
 1974         UNP_PCB_LOCK(unp);
 1975         so = unp->unp_socket;
 1976         if (so)
 1977                 so->so_error = ECONNRESET;
 1978         if ((unp2 = unp_pcb_lock_peer(unp)) != NULL) {
 1979                 /* Last reference dropped in unp_disconnect(). */
 1980                 unp_pcb_rele_notlast(unp);
 1981                 unp_disconnect(unp, unp2);
 1982         } else if (!unp_pcb_rele(unp)) {
 1983                 UNP_PCB_UNLOCK(unp);
 1984         }
 1985 }
 1986 
 1987 static void
 1988 unp_freerights(struct filedescent **fdep, int fdcount)
 1989 {
 1990         struct file *fp;
 1991         int i;
 1992 
 1993         KASSERT(fdcount > 0, ("%s: fdcount %d", __func__, fdcount));
 1994 
 1995         for (i = 0; i < fdcount; i++) {
 1996                 fp = fdep[i]->fde_file;
 1997                 filecaps_free(&fdep[i]->fde_caps);
 1998                 unp_discard(fp);
 1999         }
 2000         free(fdep[0], M_FILECAPS);
 2001 }
 2002 
 2003 static int
 2004 unp_externalize(struct mbuf *control, struct mbuf **controlp, int flags)
 2005 {
 2006         struct thread *td = curthread;          /* XXX */
 2007         struct cmsghdr *cm = mtod(control, struct cmsghdr *);
 2008         int i;
 2009         int *fdp;
 2010         struct filedesc *fdesc = td->td_proc->p_fd;
 2011         struct filedescent **fdep;
 2012         void *data;
 2013         socklen_t clen = control->m_len, datalen;
 2014         int error, newfds;
 2015         u_int newlen;
 2016 
 2017         UNP_LINK_UNLOCK_ASSERT();
 2018 
 2019         error = 0;
 2020         if (controlp != NULL) /* controlp == NULL => free control messages */
 2021                 *controlp = NULL;
 2022         while (cm != NULL) {
 2023                 if (sizeof(*cm) > clen || cm->cmsg_len > clen) {
 2024                         error = EINVAL;
 2025                         break;
 2026                 }
 2027                 data = CMSG_DATA(cm);
 2028                 datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
 2029                 if (cm->cmsg_level == SOL_SOCKET
 2030                     && cm->cmsg_type == SCM_RIGHTS) {
 2031                         newfds = datalen / sizeof(*fdep);
 2032                         if (newfds == 0)
 2033                                 goto next;
 2034                         fdep = data;
 2035 
 2036                         /* If we're not outputting the descriptors free them. */
 2037                         if (error || controlp == NULL) {
 2038                                 unp_freerights(fdep, newfds);
 2039                                 goto next;
 2040                         }
 2041                         FILEDESC_XLOCK(fdesc);
 2042 
 2043                         /*
 2044                          * Now change each pointer to an fd in the global
 2045                          * table to an integer that is the index to the local
 2046                          * fd table entry that we set up to point to the
 2047                          * global one we are transferring.
 2048                          */
 2049                         newlen = newfds * sizeof(int);
 2050                         *controlp = sbcreatecontrol(NULL, newlen,
 2051                             SCM_RIGHTS, SOL_SOCKET);
 2052                         if (*controlp == NULL) {
 2053                                 FILEDESC_XUNLOCK(fdesc);
 2054                                 error = E2BIG;
 2055                                 unp_freerights(fdep, newfds);
 2056                                 goto next;
 2057                         }
 2058 
 2059                         fdp = (int *)
 2060                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 2061                         if (fdallocn(td, 0, fdp, newfds) != 0) {
 2062                                 FILEDESC_XUNLOCK(fdesc);
 2063                                 error = EMSGSIZE;
 2064                                 unp_freerights(fdep, newfds);
 2065                                 m_freem(*controlp);
 2066                                 *controlp = NULL;
 2067                                 goto next;
 2068                         }
 2069                         for (i = 0; i < newfds; i++, fdp++) {
 2070                                 _finstall(fdesc, fdep[i]->fde_file, *fdp,
 2071                                     (flags & MSG_CMSG_CLOEXEC) != 0 ? O_CLOEXEC : 0,
 2072                                     &fdep[i]->fde_caps);
 2073                                 unp_externalize_fp(fdep[i]->fde_file);
 2074                         }
 2075 
 2076                         /*
 2077                          * The new type indicates that the mbuf data refers to
 2078                          * kernel resources that may need to be released before
 2079                          * the mbuf is freed.
 2080                          */
 2081                         m_chtype(*controlp, MT_EXTCONTROL);
 2082                         FILEDESC_XUNLOCK(fdesc);
 2083                         free(fdep[0], M_FILECAPS);
 2084                 } else {
 2085                         /* We can just copy anything else across. */
 2086                         if (error || controlp == NULL)
 2087                                 goto next;
 2088                         *controlp = sbcreatecontrol(NULL, datalen,
 2089                             cm->cmsg_type, cm->cmsg_level);
 2090                         if (*controlp == NULL) {
 2091                                 error = ENOBUFS;
 2092                                 goto next;
 2093                         }
 2094                         bcopy(data,
 2095                             CMSG_DATA(mtod(*controlp, struct cmsghdr *)),
 2096                             datalen);
 2097                 }
 2098                 controlp = &(*controlp)->m_next;
 2099 
 2100 next:
 2101                 if (CMSG_SPACE(datalen) < clen) {
 2102                         clen -= CMSG_SPACE(datalen);
 2103                         cm = (struct cmsghdr *)
 2104                             ((caddr_t)cm + CMSG_SPACE(datalen));
 2105                 } else {
 2106                         clen = 0;
 2107                         cm = NULL;
 2108                 }
 2109         }
 2110 
 2111         m_freem(control);
 2112         return (error);
 2113 }
 2114 
 2115 static void
 2116 unp_zone_change(void *tag)
 2117 {
 2118 
 2119         uma_zone_set_max(unp_zone, maxsockets);
 2120 }
 2121 
 2122 #ifdef INVARIANTS
 2123 static void
 2124 unp_zdtor(void *mem, int size __unused, void *arg __unused)
 2125 {
 2126         struct unpcb *unp;
 2127 
 2128         unp = mem;
 2129 
 2130         KASSERT(LIST_EMPTY(&unp->unp_refs),
 2131             ("%s: unpcb %p has lingering refs", __func__, unp));
 2132         KASSERT(unp->unp_socket == NULL,
 2133             ("%s: unpcb %p has socket backpointer", __func__, unp));
 2134         KASSERT(unp->unp_vnode == NULL,
 2135             ("%s: unpcb %p has vnode references", __func__, unp));
 2136         KASSERT(unp->unp_conn == NULL,
 2137             ("%s: unpcb %p is still connected", __func__, unp));
 2138         KASSERT(unp->unp_addr == NULL,
 2139             ("%s: unpcb %p has leaked addr", __func__, unp));
 2140 }
 2141 #endif
 2142 
 2143 static void
 2144 unp_init(void)
 2145 {
 2146         uma_dtor dtor;
 2147 
 2148 #ifdef VIMAGE
 2149         if (!IS_DEFAULT_VNET(curvnet))
 2150                 return;
 2151 #endif
 2152 
 2153 #ifdef INVARIANTS
 2154         dtor = unp_zdtor;
 2155 #else
 2156         dtor = NULL;
 2157 #endif
 2158         unp_zone = uma_zcreate("unpcb", sizeof(struct unpcb), NULL, dtor,
 2159             NULL, NULL, UMA_ALIGN_CACHE, 0);
 2160         uma_zone_set_max(unp_zone, maxsockets);
 2161         uma_zone_set_warning(unp_zone, "kern.ipc.maxsockets limit reached");
 2162         EVENTHANDLER_REGISTER(maxsockets_change, unp_zone_change,
 2163             NULL, EVENTHANDLER_PRI_ANY);
 2164         LIST_INIT(&unp_dhead);
 2165         LIST_INIT(&unp_shead);
 2166         LIST_INIT(&unp_sphead);
 2167         SLIST_INIT(&unp_defers);
 2168         TIMEOUT_TASK_INIT(taskqueue_thread, &unp_gc_task, 0, unp_gc, NULL);
 2169         TASK_INIT(&unp_defer_task, 0, unp_process_defers, NULL);
 2170         UNP_LINK_LOCK_INIT();
 2171         UNP_DEFERRED_LOCK_INIT();
 2172 }
 2173 
 2174 static void
 2175 unp_internalize_cleanup_rights(struct mbuf *control)
 2176 {
 2177         struct cmsghdr *cp;
 2178         struct mbuf *m;
 2179         void *data;
 2180         socklen_t datalen;
 2181 
 2182         for (m = control; m != NULL; m = m->m_next) {
 2183                 cp = mtod(m, struct cmsghdr *);
 2184                 if (cp->cmsg_level != SOL_SOCKET ||
 2185                     cp->cmsg_type != SCM_RIGHTS)
 2186                         continue;
 2187                 data = CMSG_DATA(cp);
 2188                 datalen = (caddr_t)cp + cp->cmsg_len - (caddr_t)data;
 2189                 unp_freerights(data, datalen / sizeof(struct filedesc *));
 2190         }
 2191 }
 2192 
 2193 static int
 2194 unp_internalize(struct mbuf **controlp, struct thread *td)
 2195 {
 2196         struct mbuf *control, **initial_controlp;
 2197         struct proc *p;
 2198         struct filedesc *fdesc;
 2199         struct bintime *bt;
 2200         struct cmsghdr *cm;
 2201         struct cmsgcred *cmcred;
 2202         struct filedescent *fde, **fdep, *fdev;
 2203         struct file *fp;
 2204         struct timeval *tv;
 2205         struct timespec *ts;
 2206         void *data;
 2207         socklen_t clen, datalen;
 2208         int i, j, error, *fdp, oldfds;
 2209         u_int newlen;
 2210 
 2211         UNP_LINK_UNLOCK_ASSERT();
 2212 
 2213         p = td->td_proc;
 2214         fdesc = p->p_fd;
 2215         error = 0;
 2216         control = *controlp;
 2217         clen = control->m_len;
 2218         *controlp = NULL;
 2219         initial_controlp = controlp;
 2220         for (cm = mtod(control, struct cmsghdr *); cm != NULL;) {
 2221                 if (sizeof(*cm) > clen || cm->cmsg_level != SOL_SOCKET
 2222                     || cm->cmsg_len > clen || cm->cmsg_len < sizeof(*cm)) {
 2223                         error = EINVAL;
 2224                         goto out;
 2225                 }
 2226                 data = CMSG_DATA(cm);
 2227                 datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
 2228 
 2229                 switch (cm->cmsg_type) {
 2230                 /*
 2231                  * Fill in credential information.
 2232                  */
 2233                 case SCM_CREDS:
 2234                         *controlp = sbcreatecontrol(NULL, sizeof(*cmcred),
 2235                             SCM_CREDS, SOL_SOCKET);
 2236                         if (*controlp == NULL) {
 2237                                 error = ENOBUFS;
 2238                                 goto out;
 2239                         }
 2240                         cmcred = (struct cmsgcred *)
 2241                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 2242                         cmcred->cmcred_pid = p->p_pid;
 2243                         cmcred->cmcred_uid = td->td_ucred->cr_ruid;
 2244                         cmcred->cmcred_gid = td->td_ucred->cr_rgid;
 2245                         cmcred->cmcred_euid = td->td_ucred->cr_uid;
 2246                         cmcred->cmcred_ngroups = MIN(td->td_ucred->cr_ngroups,
 2247                             CMGROUP_MAX);
 2248                         for (i = 0; i < cmcred->cmcred_ngroups; i++)
 2249                                 cmcred->cmcred_groups[i] =
 2250                                     td->td_ucred->cr_groups[i];
 2251                         break;
 2252 
 2253                 case SCM_RIGHTS:
 2254                         oldfds = datalen / sizeof (int);
 2255                         if (oldfds == 0)
 2256                                 break;
 2257                         /*
 2258                          * Check that all the FDs passed in refer to legal
 2259                          * files.  If not, reject the entire operation.
 2260                          */
 2261                         fdp = data;
 2262                         FILEDESC_SLOCK(fdesc);
 2263                         for (i = 0; i < oldfds; i++, fdp++) {
 2264                                 fp = fget_locked(fdesc, *fdp);
 2265                                 if (fp == NULL) {
 2266                                         FILEDESC_SUNLOCK(fdesc);
 2267                                         error = EBADF;
 2268                                         goto out;
 2269                                 }
 2270                                 if (!(fp->f_ops->fo_flags & DFLAG_PASSABLE)) {
 2271                                         FILEDESC_SUNLOCK(fdesc);
 2272                                         error = EOPNOTSUPP;
 2273                                         goto out;
 2274                                 }
 2275                         }
 2276 
 2277                         /*
 2278                          * Now replace the integer FDs with pointers to the
 2279                          * file structure and capability rights.
 2280                          */
 2281                         newlen = oldfds * sizeof(fdep[0]);
 2282                         *controlp = sbcreatecontrol(NULL, newlen,
 2283                             SCM_RIGHTS, SOL_SOCKET);
 2284                         if (*controlp == NULL) {
 2285                                 FILEDESC_SUNLOCK(fdesc);
 2286                                 error = E2BIG;
 2287                                 goto out;
 2288                         }
 2289                         fdp = data;
 2290                         for (i = 0; i < oldfds; i++, fdp++) {
 2291                                 if (!fhold(fdesc->fd_ofiles[*fdp].fde_file)) {
 2292                                         fdp = data;
 2293                                         for (j = 0; j < i; j++, fdp++) {
 2294                                                 fdrop(fdesc->fd_ofiles[*fdp].
 2295                                                     fde_file, td);
 2296                                         }
 2297                                         FILEDESC_SUNLOCK(fdesc);
 2298                                         error = EBADF;
 2299                                         goto out;
 2300                                 }
 2301                         }
 2302                         fdp = data;
 2303                         fdep = (struct filedescent **)
 2304                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 2305                         fdev = malloc(sizeof(*fdev) * oldfds, M_FILECAPS,
 2306                             M_WAITOK);
 2307                         for (i = 0; i < oldfds; i++, fdev++, fdp++) {
 2308                                 fde = &fdesc->fd_ofiles[*fdp];
 2309                                 fdep[i] = fdev;
 2310                                 fdep[i]->fde_file = fde->fde_file;
 2311                                 filecaps_copy(&fde->fde_caps,
 2312                                     &fdep[i]->fde_caps, true);
 2313                                 unp_internalize_fp(fdep[i]->fde_file);
 2314                         }
 2315                         FILEDESC_SUNLOCK(fdesc);
 2316                         break;
 2317 
 2318                 case SCM_TIMESTAMP:
 2319                         *controlp = sbcreatecontrol(NULL, sizeof(*tv),
 2320                             SCM_TIMESTAMP, SOL_SOCKET);
 2321                         if (*controlp == NULL) {
 2322                                 error = ENOBUFS;
 2323                                 goto out;
 2324                         }
 2325                         tv = (struct timeval *)
 2326                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 2327                         microtime(tv);
 2328                         break;
 2329 
 2330                 case SCM_BINTIME:
 2331                         *controlp = sbcreatecontrol(NULL, sizeof(*bt),
 2332                             SCM_BINTIME, SOL_SOCKET);
 2333                         if (*controlp == NULL) {
 2334                                 error = ENOBUFS;
 2335                                 goto out;
 2336                         }
 2337                         bt = (struct bintime *)
 2338                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 2339                         bintime(bt);
 2340                         break;
 2341 
 2342                 case SCM_REALTIME:
 2343                         *controlp = sbcreatecontrol(NULL, sizeof(*ts),
 2344                             SCM_REALTIME, SOL_SOCKET);
 2345                         if (*controlp == NULL) {
 2346                                 error = ENOBUFS;
 2347                                 goto out;
 2348                         }
 2349                         ts = (struct timespec *)
 2350                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 2351                         nanotime(ts);
 2352                         break;
 2353 
 2354                 case SCM_MONOTONIC:
 2355                         *controlp = sbcreatecontrol(NULL, sizeof(*ts),
 2356                             SCM_MONOTONIC, SOL_SOCKET);
 2357                         if (*controlp == NULL) {
 2358                                 error = ENOBUFS;
 2359                                 goto out;
 2360                         }
 2361                         ts = (struct timespec *)
 2362                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 2363                         nanouptime(ts);
 2364                         break;
 2365 
 2366                 default:
 2367                         error = EINVAL;
 2368                         goto out;
 2369                 }
 2370 
 2371                 if (*controlp != NULL)
 2372                         controlp = &(*controlp)->m_next;
 2373                 if (CMSG_SPACE(datalen) < clen) {
 2374                         clen -= CMSG_SPACE(datalen);
 2375                         cm = (struct cmsghdr *)
 2376                             ((caddr_t)cm + CMSG_SPACE(datalen));
 2377                 } else {
 2378                         clen = 0;
 2379                         cm = NULL;
 2380                 }
 2381         }
 2382 
 2383 out:
 2384         if (error != 0 && initial_controlp != NULL)
 2385                 unp_internalize_cleanup_rights(*initial_controlp);
 2386         m_freem(control);
 2387         return (error);
 2388 }
 2389 
 2390 static struct mbuf *
 2391 unp_addsockcred(struct thread *td, struct mbuf *control, int mode)
 2392 {
 2393         struct mbuf *m, *n, *n_prev;
 2394         const struct cmsghdr *cm;
 2395         int ngroups, i, cmsgtype;
 2396         size_t ctrlsz;
 2397 
 2398         ngroups = MIN(td->td_ucred->cr_ngroups, CMGROUP_MAX);
 2399         if (mode & UNP_WANTCRED_ALWAYS) {
 2400                 ctrlsz = SOCKCRED2SIZE(ngroups);
 2401                 cmsgtype = SCM_CREDS2;
 2402         } else {
 2403                 ctrlsz = SOCKCREDSIZE(ngroups);
 2404                 cmsgtype = SCM_CREDS;
 2405         }
 2406 
 2407         m = sbcreatecontrol(NULL, ctrlsz, cmsgtype, SOL_SOCKET);
 2408         if (m == NULL)
 2409                 return (control);
 2410 
 2411         if (mode & UNP_WANTCRED_ALWAYS) {
 2412                 struct sockcred2 *sc;
 2413 
 2414                 sc = (void *)CMSG_DATA(mtod(m, struct cmsghdr *));
 2415                 sc->sc_version = 0;
 2416                 sc->sc_pid = td->td_proc->p_pid;
 2417                 sc->sc_uid = td->td_ucred->cr_ruid;
 2418                 sc->sc_euid = td->td_ucred->cr_uid;
 2419                 sc->sc_gid = td->td_ucred->cr_rgid;
 2420                 sc->sc_egid = td->td_ucred->cr_gid;
 2421                 sc->sc_ngroups = ngroups;
 2422                 for (i = 0; i < sc->sc_ngroups; i++)
 2423                         sc->sc_groups[i] = td->td_ucred->cr_groups[i];
 2424         } else {
 2425                 struct sockcred *sc;
 2426 
 2427                 sc = (void *)CMSG_DATA(mtod(m, struct cmsghdr *));
 2428                 sc->sc_uid = td->td_ucred->cr_ruid;
 2429                 sc->sc_euid = td->td_ucred->cr_uid;
 2430                 sc->sc_gid = td->td_ucred->cr_rgid;
 2431                 sc->sc_egid = td->td_ucred->cr_gid;
 2432                 sc->sc_ngroups = ngroups;
 2433                 for (i = 0; i < sc->sc_ngroups; i++)
 2434                         sc->sc_groups[i] = td->td_ucred->cr_groups[i];
 2435         }
 2436 
 2437         /*
 2438          * Unlink SCM_CREDS control messages (struct cmsgcred), since just
 2439          * created SCM_CREDS control message (struct sockcred) has another
 2440          * format.
 2441          */
 2442         if (control != NULL && cmsgtype == SCM_CREDS)
 2443                 for (n = control, n_prev = NULL; n != NULL;) {
 2444                         cm = mtod(n, struct cmsghdr *);
 2445                         if (cm->cmsg_level == SOL_SOCKET &&
 2446                             cm->cmsg_type == SCM_CREDS) {
 2447                                 if (n_prev == NULL)
 2448                                         control = n->m_next;
 2449                                 else
 2450                                         n_prev->m_next = n->m_next;
 2451                                 n = m_free(n);
 2452                         } else {
 2453                                 n_prev = n;
 2454                                 n = n->m_next;
 2455                         }
 2456                 }
 2457 
 2458         /* Prepend it to the head. */
 2459         m->m_next = control;
 2460         return (m);
 2461 }
 2462 
 2463 static struct unpcb *
 2464 fptounp(struct file *fp)
 2465 {
 2466         struct socket *so;
 2467 
 2468         if (fp->f_type != DTYPE_SOCKET)
 2469                 return (NULL);
 2470         if ((so = fp->f_data) == NULL)
 2471                 return (NULL);
 2472         if (so->so_proto->pr_domain != &localdomain)
 2473                 return (NULL);
 2474         return sotounpcb(so);
 2475 }
 2476 
 2477 static void
 2478 unp_discard(struct file *fp)
 2479 {
 2480         struct unp_defer *dr;
 2481 
 2482         if (unp_externalize_fp(fp)) {
 2483                 dr = malloc(sizeof(*dr), M_TEMP, M_WAITOK);
 2484                 dr->ud_fp = fp;
 2485                 UNP_DEFERRED_LOCK();
 2486                 SLIST_INSERT_HEAD(&unp_defers, dr, ud_link);
 2487                 UNP_DEFERRED_UNLOCK();
 2488                 atomic_add_int(&unp_defers_count, 1);
 2489                 taskqueue_enqueue(taskqueue_thread, &unp_defer_task);
 2490         } else
 2491                 closef_nothread(fp);
 2492 }
 2493 
 2494 static void
 2495 unp_process_defers(void *arg __unused, int pending)
 2496 {
 2497         struct unp_defer *dr;
 2498         SLIST_HEAD(, unp_defer) drl;
 2499         int count;
 2500 
 2501         SLIST_INIT(&drl);
 2502         for (;;) {
 2503                 UNP_DEFERRED_LOCK();
 2504                 if (SLIST_FIRST(&unp_defers) == NULL) {
 2505                         UNP_DEFERRED_UNLOCK();
 2506                         break;
 2507                 }
 2508                 SLIST_SWAP(&unp_defers, &drl, unp_defer);
 2509                 UNP_DEFERRED_UNLOCK();
 2510                 count = 0;
 2511                 while ((dr = SLIST_FIRST(&drl)) != NULL) {
 2512                         SLIST_REMOVE_HEAD(&drl, ud_link);
 2513                         closef_nothread(dr->ud_fp);
 2514                         free(dr, M_TEMP);
 2515                         count++;
 2516                 }
 2517                 atomic_add_int(&unp_defers_count, -count);
 2518         }
 2519 }
 2520 
 2521 static void
 2522 unp_internalize_fp(struct file *fp)
 2523 {
 2524         struct unpcb *unp;
 2525 
 2526         UNP_LINK_WLOCK();
 2527         if ((unp = fptounp(fp)) != NULL) {
 2528                 unp->unp_file = fp;
 2529                 unp->unp_msgcount++;
 2530         }
 2531         unp_rights++;
 2532         UNP_LINK_WUNLOCK();
 2533 }
 2534 
 2535 static int
 2536 unp_externalize_fp(struct file *fp)
 2537 {
 2538         struct unpcb *unp;
 2539         int ret;
 2540 
 2541         UNP_LINK_WLOCK();
 2542         if ((unp = fptounp(fp)) != NULL) {
 2543                 unp->unp_msgcount--;
 2544                 ret = 1;
 2545         } else
 2546                 ret = 0;
 2547         unp_rights--;
 2548         UNP_LINK_WUNLOCK();
 2549         return (ret);
 2550 }
 2551 
 2552 /*
 2553  * unp_defer indicates whether additional work has been defered for a future
 2554  * pass through unp_gc().  It is thread local and does not require explicit
 2555  * synchronization.
 2556  */
 2557 static int      unp_marked;
 2558 
 2559 static void
 2560 unp_remove_dead_ref(struct filedescent **fdep, int fdcount)
 2561 {
 2562         struct unpcb *unp;
 2563         struct file *fp;
 2564         int i;
 2565 
 2566         /*
 2567          * This function can only be called from the gc task.
 2568          */
 2569         KASSERT(taskqueue_member(taskqueue_thread, curthread) != 0,
 2570             ("%s: not on gc callout", __func__));
 2571         UNP_LINK_LOCK_ASSERT();
 2572 
 2573         for (i = 0; i < fdcount; i++) {
 2574                 fp = fdep[i]->fde_file;
 2575                 if ((unp = fptounp(fp)) == NULL)
 2576                         continue;
 2577                 if ((unp->unp_gcflag & UNPGC_DEAD) == 0)
 2578                         continue;
 2579                 unp->unp_gcrefs--;
 2580         }
 2581 }
 2582 
 2583 static void
 2584 unp_restore_undead_ref(struct filedescent **fdep, int fdcount)
 2585 {
 2586         struct unpcb *unp;
 2587         struct file *fp;
 2588         int i;
 2589 
 2590         /*
 2591          * This function can only be called from the gc task.
 2592          */
 2593         KASSERT(taskqueue_member(taskqueue_thread, curthread) != 0,
 2594             ("%s: not on gc callout", __func__));
 2595         UNP_LINK_LOCK_ASSERT();
 2596 
 2597         for (i = 0; i < fdcount; i++) {
 2598                 fp = fdep[i]->fde_file;
 2599                 if ((unp = fptounp(fp)) == NULL)
 2600                         continue;
 2601                 if ((unp->unp_gcflag & UNPGC_DEAD) == 0)
 2602                         continue;
 2603                 unp->unp_gcrefs++;
 2604                 unp_marked++;
 2605         }
 2606 }
 2607 
 2608 static void
 2609 unp_gc_scan(struct unpcb *unp, void (*op)(struct filedescent **, int))
 2610 {
 2611         struct socket *so, *soa;
 2612 
 2613         so = unp->unp_socket;
 2614         SOCK_LOCK(so);
 2615         if (SOLISTENING(so)) {
 2616                 /*
 2617                  * Mark all sockets in our accept queue.
 2618                  */
 2619                 TAILQ_FOREACH(soa, &so->sol_comp, so_list) {
 2620                         if (sotounpcb(soa)->unp_gcflag & UNPGC_IGNORE_RIGHTS)
 2621                                 continue;
 2622                         SOCKBUF_LOCK(&soa->so_rcv);
 2623                         unp_scan(soa->so_rcv.sb_mb, op);
 2624                         SOCKBUF_UNLOCK(&soa->so_rcv);
 2625                 }
 2626         } else {
 2627                 /*
 2628                  * Mark all sockets we reference with RIGHTS.
 2629                  */
 2630                 if ((unp->unp_gcflag & UNPGC_IGNORE_RIGHTS) == 0) {
 2631                         SOCKBUF_LOCK(&so->so_rcv);
 2632                         unp_scan(so->so_rcv.sb_mb, op);
 2633                         SOCKBUF_UNLOCK(&so->so_rcv);
 2634                 }
 2635         }
 2636         SOCK_UNLOCK(so);
 2637 }
 2638 
 2639 static int unp_recycled;
 2640 SYSCTL_INT(_net_local, OID_AUTO, recycled, CTLFLAG_RD, &unp_recycled, 0, 
 2641     "Number of unreachable sockets claimed by the garbage collector.");
 2642 
 2643 static int unp_taskcount;
 2644 SYSCTL_INT(_net_local, OID_AUTO, taskcount, CTLFLAG_RD, &unp_taskcount, 0, 
 2645     "Number of times the garbage collector has run.");
 2646 
 2647 SYSCTL_UINT(_net_local, OID_AUTO, sockcount, CTLFLAG_RD, &unp_count, 0, 
 2648     "Number of active local sockets.");
 2649 
 2650 static void
 2651 unp_gc(__unused void *arg, int pending)
 2652 {
 2653         struct unp_head *heads[] = { &unp_dhead, &unp_shead, &unp_sphead,
 2654                                     NULL };
 2655         struct unp_head **head;
 2656         struct unp_head unp_deadhead;   /* List of potentially-dead sockets. */
 2657         struct file *f, **unref;
 2658         struct unpcb *unp, *unptmp;
 2659         int i, total, unp_unreachable;
 2660 
 2661         LIST_INIT(&unp_deadhead);
 2662         unp_taskcount++;
 2663         UNP_LINK_RLOCK();
 2664         /*
 2665          * First determine which sockets may be in cycles.
 2666          */
 2667         unp_unreachable = 0;
 2668 
 2669         for (head = heads; *head != NULL; head++)
 2670                 LIST_FOREACH(unp, *head, unp_link) {
 2671                         KASSERT((unp->unp_gcflag & ~UNPGC_IGNORE_RIGHTS) == 0,
 2672                             ("%s: unp %p has unexpected gc flags 0x%x",
 2673                             __func__, unp, (unsigned int)unp->unp_gcflag));
 2674 
 2675                         f = unp->unp_file;
 2676 
 2677                         /*
 2678                          * Check for an unreachable socket potentially in a
 2679                          * cycle.  It must be in a queue as indicated by
 2680                          * msgcount, and this must equal the file reference
 2681                          * count.  Note that when msgcount is 0 the file is
 2682                          * NULL.
 2683                          */
 2684                         if (f != NULL && unp->unp_msgcount != 0 &&
 2685                             refcount_load(&f->f_count) == unp->unp_msgcount) {
 2686                                 LIST_INSERT_HEAD(&unp_deadhead, unp, unp_dead);
 2687                                 unp->unp_gcflag |= UNPGC_DEAD;
 2688                                 unp->unp_gcrefs = unp->unp_msgcount;
 2689                                 unp_unreachable++;
 2690                         }
 2691                 }
 2692 
 2693         /*
 2694          * Scan all sockets previously marked as potentially being in a cycle
 2695          * and remove the references each socket holds on any UNPGC_DEAD
 2696          * sockets in its queue.  After this step, all remaining references on
 2697          * sockets marked UNPGC_DEAD should not be part of any cycle.
 2698          */
 2699         LIST_FOREACH(unp, &unp_deadhead, unp_dead)
 2700                 unp_gc_scan(unp, unp_remove_dead_ref);
 2701 
 2702         /*
 2703          * If a socket still has a non-negative refcount, it cannot be in a
 2704          * cycle.  In this case increment refcount of all children iteratively.
 2705          * Stop the scan once we do a complete loop without discovering
 2706          * a new reachable socket.
 2707          */
 2708         do {
 2709                 unp_marked = 0;
 2710                 LIST_FOREACH_SAFE(unp, &unp_deadhead, unp_dead, unptmp)
 2711                         if (unp->unp_gcrefs > 0) {
 2712                                 unp->unp_gcflag &= ~UNPGC_DEAD;
 2713                                 LIST_REMOVE(unp, unp_dead);
 2714                                 KASSERT(unp_unreachable > 0,
 2715                                     ("%s: unp_unreachable underflow.",
 2716                                     __func__));
 2717                                 unp_unreachable--;
 2718                                 unp_gc_scan(unp, unp_restore_undead_ref);
 2719                         }
 2720         } while (unp_marked);
 2721 
 2722         UNP_LINK_RUNLOCK();
 2723 
 2724         if (unp_unreachable == 0)
 2725                 return;
 2726 
 2727         /*
 2728          * Allocate space for a local array of dead unpcbs.
 2729          * TODO: can this path be simplified by instead using the local
 2730          * dead list at unp_deadhead, after taking out references
 2731          * on the file object and/or unpcb and dropping the link lock?
 2732          */
 2733         unref = malloc(unp_unreachable * sizeof(struct file *),
 2734             M_TEMP, M_WAITOK);
 2735 
 2736         /*
 2737          * Iterate looking for sockets which have been specifically marked
 2738          * as unreachable and store them locally.
 2739          */
 2740         UNP_LINK_RLOCK();
 2741         total = 0;
 2742         LIST_FOREACH(unp, &unp_deadhead, unp_dead) {
 2743                 KASSERT((unp->unp_gcflag & UNPGC_DEAD) != 0,
 2744                     ("%s: unp %p not marked UNPGC_DEAD", __func__, unp));
 2745                 unp->unp_gcflag &= ~UNPGC_DEAD;
 2746                 f = unp->unp_file;
 2747                 if (unp->unp_msgcount == 0 || f == NULL ||
 2748                     refcount_load(&f->f_count) != unp->unp_msgcount ||
 2749                     !fhold(f))
 2750                         continue;
 2751                 unref[total++] = f;
 2752                 KASSERT(total <= unp_unreachable,
 2753                     ("%s: incorrect unreachable count.", __func__));
 2754         }
 2755         UNP_LINK_RUNLOCK();
 2756 
 2757         /*
 2758          * Now flush all sockets, free'ing rights.  This will free the
 2759          * struct files associated with these sockets but leave each socket
 2760          * with one remaining ref.
 2761          */
 2762         for (i = 0; i < total; i++) {
 2763                 struct socket *so;
 2764 
 2765                 so = unref[i]->f_data;
 2766                 CURVNET_SET(so->so_vnet);
 2767                 sorflush(so);
 2768                 CURVNET_RESTORE();
 2769         }
 2770 
 2771         /*
 2772          * And finally release the sockets so they can be reclaimed.
 2773          */
 2774         for (i = 0; i < total; i++)
 2775                 fdrop(unref[i], NULL);
 2776         unp_recycled += total;
 2777         free(unref, M_TEMP);
 2778 }
 2779 
 2780 static void
 2781 unp_dispose_mbuf(struct mbuf *m)
 2782 {
 2783 
 2784         if (m)
 2785                 unp_scan(m, unp_freerights);
 2786 }
 2787 
 2788 /*
 2789  * Synchronize against unp_gc, which can trip over data as we are freeing it.
 2790  */
 2791 static void
 2792 unp_dispose(struct socket *so)
 2793 {
 2794         struct unpcb *unp;
 2795 
 2796         unp = sotounpcb(so);
 2797         UNP_LINK_WLOCK();
 2798         unp->unp_gcflag |= UNPGC_IGNORE_RIGHTS;
 2799         UNP_LINK_WUNLOCK();
 2800         if (!SOLISTENING(so))
 2801                 unp_dispose_mbuf(so->so_rcv.sb_mb);
 2802 }
 2803 
 2804 static void
 2805 unp_scan(struct mbuf *m0, void (*op)(struct filedescent **, int))
 2806 {
 2807         struct mbuf *m;
 2808         struct cmsghdr *cm;
 2809         void *data;
 2810         socklen_t clen, datalen;
 2811 
 2812         while (m0 != NULL) {
 2813                 for (m = m0; m; m = m->m_next) {
 2814                         if (m->m_type != MT_CONTROL)
 2815                                 continue;
 2816 
 2817                         cm = mtod(m, struct cmsghdr *);
 2818                         clen = m->m_len;
 2819 
 2820                         while (cm != NULL) {
 2821                                 if (sizeof(*cm) > clen || cm->cmsg_len > clen)
 2822                                         break;
 2823 
 2824                                 data = CMSG_DATA(cm);
 2825                                 datalen = (caddr_t)cm + cm->cmsg_len
 2826                                     - (caddr_t)data;
 2827 
 2828                                 if (cm->cmsg_level == SOL_SOCKET &&
 2829                                     cm->cmsg_type == SCM_RIGHTS) {
 2830                                         (*op)(data, datalen /
 2831                                             sizeof(struct filedescent *));
 2832                                 }
 2833 
 2834                                 if (CMSG_SPACE(datalen) < clen) {
 2835                                         clen -= CMSG_SPACE(datalen);
 2836                                         cm = (struct cmsghdr *)
 2837                                             ((caddr_t)cm + CMSG_SPACE(datalen));
 2838                                 } else {
 2839                                         clen = 0;
 2840                                         cm = NULL;
 2841                                 }
 2842                         }
 2843                 }
 2844                 m0 = m0->m_nextpkt;
 2845         }
 2846 }
 2847 
 2848 /*
 2849  * A helper function called by VFS before socket-type vnode reclamation.
 2850  * For an active vnode it clears unp_vnode pointer and decrements unp_vnode
 2851  * use count.
 2852  */
 2853 void
 2854 vfs_unp_reclaim(struct vnode *vp)
 2855 {
 2856         struct unpcb *unp;
 2857         int active;
 2858         struct mtx *vplock;
 2859 
 2860         ASSERT_VOP_ELOCKED(vp, "vfs_unp_reclaim");
 2861         KASSERT(vp->v_type == VSOCK,
 2862             ("vfs_unp_reclaim: vp->v_type != VSOCK"));
 2863 
 2864         active = 0;
 2865         vplock = mtx_pool_find(mtxpool_sleep, vp);
 2866         mtx_lock(vplock);
 2867         VOP_UNP_CONNECT(vp, &unp);
 2868         if (unp == NULL)
 2869                 goto done;
 2870         UNP_PCB_LOCK(unp);
 2871         if (unp->unp_vnode == vp) {
 2872                 VOP_UNP_DETACH(vp);
 2873                 unp->unp_vnode = NULL;
 2874                 active = 1;
 2875         }
 2876         UNP_PCB_UNLOCK(unp);
 2877  done:
 2878         mtx_unlock(vplock);
 2879         if (active)
 2880                 vunref(vp);
 2881 }
 2882 
 2883 #ifdef DDB
 2884 static void
 2885 db_print_indent(int indent)
 2886 {
 2887         int i;
 2888 
 2889         for (i = 0; i < indent; i++)
 2890                 db_printf(" ");
 2891 }
 2892 
 2893 static void
 2894 db_print_unpflags(int unp_flags)
 2895 {
 2896         int comma;
 2897 
 2898         comma = 0;
 2899         if (unp_flags & UNP_HAVEPC) {
 2900                 db_printf("%sUNP_HAVEPC", comma ? ", " : "");
 2901                 comma = 1;
 2902         }
 2903         if (unp_flags & UNP_WANTCRED_ALWAYS) {
 2904                 db_printf("%sUNP_WANTCRED_ALWAYS", comma ? ", " : "");
 2905                 comma = 1;
 2906         }
 2907         if (unp_flags & UNP_WANTCRED_ONESHOT) {
 2908                 db_printf("%sUNP_WANTCRED_ONESHOT", comma ? ", " : "");
 2909                 comma = 1;
 2910         }
 2911         if (unp_flags & UNP_CONNWAIT) {
 2912                 db_printf("%sUNP_CONNWAIT", comma ? ", " : "");
 2913                 comma = 1;
 2914         }
 2915         if (unp_flags & UNP_CONNECTING) {
 2916                 db_printf("%sUNP_CONNECTING", comma ? ", " : "");
 2917                 comma = 1;
 2918         }
 2919         if (unp_flags & UNP_BINDING) {
 2920                 db_printf("%sUNP_BINDING", comma ? ", " : "");
 2921                 comma = 1;
 2922         }
 2923 }
 2924 
 2925 static void
 2926 db_print_xucred(int indent, struct xucred *xu)
 2927 {
 2928         int comma, i;
 2929 
 2930         db_print_indent(indent);
 2931         db_printf("cr_version: %u   cr_uid: %u   cr_pid: %d   cr_ngroups: %d\n",
 2932             xu->cr_version, xu->cr_uid, xu->cr_pid, xu->cr_ngroups);
 2933         db_print_indent(indent);
 2934         db_printf("cr_groups: ");
 2935         comma = 0;
 2936         for (i = 0; i < xu->cr_ngroups; i++) {
 2937                 db_printf("%s%u", comma ? ", " : "", xu->cr_groups[i]);
 2938                 comma = 1;
 2939         }
 2940         db_printf("\n");
 2941 }
 2942 
 2943 static void
 2944 db_print_unprefs(int indent, struct unp_head *uh)
 2945 {
 2946         struct unpcb *unp;
 2947         int counter;
 2948 
 2949         counter = 0;
 2950         LIST_FOREACH(unp, uh, unp_reflink) {
 2951                 if (counter % 4 == 0)
 2952                         db_print_indent(indent);
 2953                 db_printf("%p  ", unp);
 2954                 if (counter % 4 == 3)
 2955                         db_printf("\n");
 2956                 counter++;
 2957         }
 2958         if (counter != 0 && counter % 4 != 0)
 2959                 db_printf("\n");
 2960 }
 2961 
 2962 DB_SHOW_COMMAND(unpcb, db_show_unpcb)
 2963 {
 2964         struct unpcb *unp;
 2965 
 2966         if (!have_addr) {
 2967                 db_printf("usage: show unpcb <addr>\n");
 2968                 return;
 2969         }
 2970         unp = (struct unpcb *)addr;
 2971 
 2972         db_printf("unp_socket: %p   unp_vnode: %p\n", unp->unp_socket,
 2973             unp->unp_vnode);
 2974 
 2975         db_printf("unp_ino: %ju   unp_conn: %p\n", (uintmax_t)unp->unp_ino,
 2976             unp->unp_conn);
 2977 
 2978         db_printf("unp_refs:\n");
 2979         db_print_unprefs(2, &unp->unp_refs);
 2980 
 2981         /* XXXRW: Would be nice to print the full address, if any. */
 2982         db_printf("unp_addr: %p\n", unp->unp_addr);
 2983 
 2984         db_printf("unp_gencnt: %llu\n",
 2985             (unsigned long long)unp->unp_gencnt);
 2986 
 2987         db_printf("unp_flags: %x (", unp->unp_flags);
 2988         db_print_unpflags(unp->unp_flags);
 2989         db_printf(")\n");
 2990 
 2991         db_printf("unp_peercred:\n");
 2992         db_print_xucred(2, &unp->unp_peercred);
 2993 
 2994         db_printf("unp_refcount: %u\n", unp->unp_refcount);
 2995 }
 2996 #endif

Cache object: eb28ca30d39e9cc48dd498a57b4a5f44


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.