The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/uipc_usrreq.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-3-Clause
    3  *
    4  * Copyright (c) 1982, 1986, 1989, 1991, 1993
    5  *      The Regents of the University of California. All Rights Reserved.
    6  * Copyright (c) 2004-2009 Robert N. M. Watson All Rights Reserved.
    7  * Copyright (c) 2018 Matthew Macy
    8  *
    9  * Redistribution and use in source and binary forms, with or without
   10  * modification, are permitted provided that the following conditions
   11  * are met:
   12  * 1. Redistributions of source code must retain the above copyright
   13  *    notice, this list of conditions and the following disclaimer.
   14  * 2. Redistributions in binary form must reproduce the above copyright
   15  *    notice, this list of conditions and the following disclaimer in the
   16  *    documentation and/or other materials provided with the distribution.
   17  * 3. Neither the name of the University nor the names of its contributors
   18  *    may be used to endorse or promote products derived from this software
   19  *    without specific prior written permission.
   20  *
   21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   31  * SUCH DAMAGE.
   32  *
   33  *      From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94
   34  */
   35 
   36 /*
   37  * UNIX Domain (Local) Sockets
   38  *
   39  * This is an implementation of UNIX (local) domain sockets.  Each socket has
   40  * an associated struct unpcb (UNIX protocol control block).  Stream sockets
   41  * may be connected to 0 or 1 other socket.  Datagram sockets may be
   42  * connected to 0, 1, or many other sockets.  Sockets may be created and
   43  * connected in pairs (socketpair(2)), or bound/connected to using the file
   44  * system name space.  For most purposes, only the receive socket buffer is
   45  * used, as sending on one socket delivers directly to the receive socket
   46  * buffer of a second socket.
   47  *
   48  * The implementation is substantially complicated by the fact that
   49  * "ancillary data", such as file descriptors or credentials, may be passed
   50  * across UNIX domain sockets.  The potential for passing UNIX domain sockets
   51  * over other UNIX domain sockets requires the implementation of a simple
   52  * garbage collector to find and tear down cycles of disconnected sockets.
   53  *
   54  * TODO:
   55  *      RDM
   56  *      rethink name space problems
   57  *      need a proper out-of-band
   58  */
   59 
   60 #include <sys/cdefs.h>
   61 __FBSDID("$FreeBSD$");
   62 
   63 #include "opt_ddb.h"
   64 
   65 #include <sys/param.h>
   66 #include <sys/capsicum.h>
   67 #include <sys/domain.h>
   68 #include <sys/eventhandler.h>
   69 #include <sys/fcntl.h>
   70 #include <sys/file.h>
   71 #include <sys/filedesc.h>
   72 #include <sys/kernel.h>
   73 #include <sys/lock.h>
   74 #include <sys/malloc.h>
   75 #include <sys/mbuf.h>
   76 #include <sys/mount.h>
   77 #include <sys/mutex.h>
   78 #include <sys/namei.h>
   79 #include <sys/proc.h>
   80 #include <sys/protosw.h>
   81 #include <sys/queue.h>
   82 #include <sys/resourcevar.h>
   83 #include <sys/rwlock.h>
   84 #include <sys/socket.h>
   85 #include <sys/socketvar.h>
   86 #include <sys/signalvar.h>
   87 #include <sys/stat.h>
   88 #include <sys/sx.h>
   89 #include <sys/sysctl.h>
   90 #include <sys/systm.h>
   91 #include <sys/taskqueue.h>
   92 #include <sys/un.h>
   93 #include <sys/unpcb.h>
   94 #include <sys/vnode.h>
   95 
   96 #include <net/vnet.h>
   97 
   98 #ifdef DDB
   99 #include <ddb/ddb.h>
  100 #endif
  101 
  102 #include <security/mac/mac_framework.h>
  103 
  104 #include <vm/uma.h>
  105 
  106 MALLOC_DECLARE(M_FILECAPS);
  107 
  108 /*
  109  * See unpcb.h for the locking key.
  110  */
  111 
  112 static uma_zone_t       unp_zone;
  113 static unp_gen_t        unp_gencnt;     /* (l) */
  114 static u_int            unp_count;      /* (l) Count of local sockets. */
  115 static ino_t            unp_ino;        /* Prototype for fake inode numbers. */
  116 static int              unp_rights;     /* (g) File descriptors in flight. */
  117 static struct unp_head  unp_shead;      /* (l) List of stream sockets. */
  118 static struct unp_head  unp_dhead;      /* (l) List of datagram sockets. */
  119 static struct unp_head  unp_sphead;     /* (l) List of seqpacket sockets. */
  120 
  121 struct unp_defer {
  122         SLIST_ENTRY(unp_defer) ud_link;
  123         struct file *ud_fp;
  124 };
  125 static SLIST_HEAD(, unp_defer) unp_defers;
  126 static int unp_defers_count;
  127 
  128 static const struct sockaddr    sun_noname = { sizeof(sun_noname), AF_LOCAL };
  129 
  130 /*
  131  * Garbage collection of cyclic file descriptor/socket references occurs
  132  * asynchronously in a taskqueue context in order to avoid recursion and
  133  * reentrance in the UNIX domain socket, file descriptor, and socket layer
  134  * code.  See unp_gc() for a full description.
  135  */
  136 static struct timeout_task unp_gc_task;
  137 
  138 /*
  139  * The close of unix domain sockets attached as SCM_RIGHTS is
  140  * postponed to the taskqueue, to avoid arbitrary recursion depth.
  141  * The attached sockets might have another sockets attached.
  142  */
  143 static struct task      unp_defer_task;
  144 
  145 /*
  146  * Both send and receive buffers are allocated PIPSIZ bytes of buffering for
  147  * stream sockets, although the total for sender and receiver is actually
  148  * only PIPSIZ.
  149  *
  150  * Datagram sockets really use the sendspace as the maximum datagram size,
  151  * and don't really want to reserve the sendspace.  Their recvspace should be
  152  * large enough for at least one max-size datagram plus address.
  153  */
  154 #ifndef PIPSIZ
  155 #define PIPSIZ  8192
  156 #endif
  157 static u_long   unpst_sendspace = PIPSIZ;
  158 static u_long   unpst_recvspace = PIPSIZ;
  159 static u_long   unpdg_sendspace = 2*1024;       /* really max datagram size */
  160 static u_long   unpdg_recvspace = 4*1024;
  161 static u_long   unpsp_sendspace = PIPSIZ;       /* really max datagram size */
  162 static u_long   unpsp_recvspace = PIPSIZ;
  163 
  164 static SYSCTL_NODE(_net, PF_LOCAL, local, CTLFLAG_RW, 0, "Local domain");
  165 static SYSCTL_NODE(_net_local, SOCK_STREAM, stream, CTLFLAG_RW, 0,
  166     "SOCK_STREAM");
  167 static SYSCTL_NODE(_net_local, SOCK_DGRAM, dgram, CTLFLAG_RW, 0, "SOCK_DGRAM");
  168 static SYSCTL_NODE(_net_local, SOCK_SEQPACKET, seqpacket, CTLFLAG_RW, 0,
  169     "SOCK_SEQPACKET");
  170 
  171 SYSCTL_ULONG(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW,
  172            &unpst_sendspace, 0, "Default stream send space.");
  173 SYSCTL_ULONG(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW,
  174            &unpst_recvspace, 0, "Default stream receive space.");
  175 SYSCTL_ULONG(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW,
  176            &unpdg_sendspace, 0, "Default datagram send space.");
  177 SYSCTL_ULONG(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW,
  178            &unpdg_recvspace, 0, "Default datagram receive space.");
  179 SYSCTL_ULONG(_net_local_seqpacket, OID_AUTO, maxseqpacket, CTLFLAG_RW,
  180            &unpsp_sendspace, 0, "Default seqpacket send space.");
  181 SYSCTL_ULONG(_net_local_seqpacket, OID_AUTO, recvspace, CTLFLAG_RW,
  182            &unpsp_recvspace, 0, "Default seqpacket receive space.");
  183 SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0,
  184     "File descriptors in flight.");
  185 SYSCTL_INT(_net_local, OID_AUTO, deferred, CTLFLAG_RD,
  186     &unp_defers_count, 0,
  187     "File descriptors deferred to taskqueue for close.");
  188 
  189 /*
  190  * Locking and synchronization:
  191  *
  192  * Several types of locks exist in the local domain socket implementation:
  193  * - a global linkage lock
  194  * - a global connection list lock
  195  * - the mtxpool lock
  196  * - per-unpcb mutexes
  197  *
  198  * The linkage lock protects the global socket lists, the generation number
  199  * counter and garbage collector state.
  200  *
  201  * The connection list lock protects the list of referring sockets in a datagram
  202  * socket PCB.  This lock is also overloaded to protect a global list of
  203  * sockets whose buffers contain socket references in the form of SCM_RIGHTS
  204  * messages.  To avoid recursion, such references are released by a dedicated
  205  * thread.
  206  *
  207  * The mtxpool lock protects the vnode from being modified while referenced.
  208  * Lock ordering rules require that it be acquired before any PCB locks.
  209  *
  210  * The unpcb lock (unp_mtx) protects the most commonly referenced fields in the
  211  * unpcb.  This includes the unp_conn field, which either links two connected
  212  * PCBs together (for connected socket types) or points at the destination
  213  * socket (for connectionless socket types).  The operations of creating or
  214  * destroying a connection therefore involve locking multiple PCBs.  To avoid
  215  * lock order reversals, in some cases this involves dropping a PCB lock and
  216  * using a reference counter to maintain liveness.
  217  *
  218  * UNIX domain sockets each have an unpcb hung off of their so_pcb pointer,
  219  * allocated in pru_attach() and freed in pru_detach().  The validity of that
  220  * pointer is an invariant, so no lock is required to dereference the so_pcb
  221  * pointer if a valid socket reference is held by the caller.  In practice,
  222  * this is always true during operations performed on a socket.  Each unpcb
  223  * has a back-pointer to its socket, unp_socket, which will be stable under
  224  * the same circumstances.
  225  *
  226  * This pointer may only be safely dereferenced as long as a valid reference
  227  * to the unpcb is held.  Typically, this reference will be from the socket,
  228  * or from another unpcb when the referring unpcb's lock is held (in order
  229  * that the reference not be invalidated during use).  For example, to follow
  230  * unp->unp_conn->unp_socket, you need to hold a lock on unp_conn to guarantee
  231  * that detach is not run clearing unp_socket.
  232  *
  233  * Blocking with UNIX domain sockets is a tricky issue: unlike most network
  234  * protocols, bind() is a non-atomic operation, and connect() requires
  235  * potential sleeping in the protocol, due to potentially waiting on local or
  236  * distributed file systems.  We try to separate "lookup" operations, which
  237  * may sleep, and the IPC operations themselves, which typically can occur
  238  * with relative atomicity as locks can be held over the entire operation.
  239  *
  240  * Another tricky issue is simultaneous multi-threaded or multi-process
  241  * access to a single UNIX domain socket.  These are handled by the flags
  242  * UNP_CONNECTING and UNP_BINDING, which prevent concurrent connecting or
  243  * binding, both of which involve dropping UNIX domain socket locks in order
  244  * to perform namei() and other file system operations.
  245  */
  246 static struct rwlock    unp_link_rwlock;
  247 static struct mtx       unp_defers_lock;
  248 
  249 #define UNP_LINK_LOCK_INIT()            rw_init(&unp_link_rwlock,       \
  250                                             "unp_link_rwlock")
  251 
  252 #define UNP_LINK_LOCK_ASSERT()  rw_assert(&unp_link_rwlock,     \
  253                                             RA_LOCKED)
  254 #define UNP_LINK_UNLOCK_ASSERT()        rw_assert(&unp_link_rwlock,     \
  255                                             RA_UNLOCKED)
  256 
  257 #define UNP_LINK_RLOCK()                rw_rlock(&unp_link_rwlock)
  258 #define UNP_LINK_RUNLOCK()              rw_runlock(&unp_link_rwlock)
  259 #define UNP_LINK_WLOCK()                rw_wlock(&unp_link_rwlock)
  260 #define UNP_LINK_WUNLOCK()              rw_wunlock(&unp_link_rwlock)
  261 #define UNP_LINK_WLOCK_ASSERT()         rw_assert(&unp_link_rwlock,     \
  262                                             RA_WLOCKED)
  263 #define UNP_LINK_WOWNED()               rw_wowned(&unp_link_rwlock)
  264 
  265 #define UNP_DEFERRED_LOCK_INIT()        mtx_init(&unp_defers_lock, \
  266                                             "unp_defer", NULL, MTX_DEF)
  267 #define UNP_DEFERRED_LOCK()             mtx_lock(&unp_defers_lock)
  268 #define UNP_DEFERRED_UNLOCK()           mtx_unlock(&unp_defers_lock)
  269 
  270 #define UNP_REF_LIST_LOCK()             UNP_DEFERRED_LOCK();
  271 #define UNP_REF_LIST_UNLOCK()           UNP_DEFERRED_UNLOCK();
  272 
  273 #define UNP_PCB_LOCK_INIT(unp)          mtx_init(&(unp)->unp_mtx,       \
  274                                             "unp", "unp",       \
  275                                             MTX_DUPOK|MTX_DEF)
  276 #define UNP_PCB_LOCK_DESTROY(unp)       mtx_destroy(&(unp)->unp_mtx)
  277 #define UNP_PCB_LOCKPTR(unp)            (&(unp)->unp_mtx)
  278 #define UNP_PCB_LOCK(unp)               mtx_lock(&(unp)->unp_mtx)
  279 #define UNP_PCB_TRYLOCK(unp)            mtx_trylock(&(unp)->unp_mtx)
  280 #define UNP_PCB_UNLOCK(unp)             mtx_unlock(&(unp)->unp_mtx)
  281 #define UNP_PCB_OWNED(unp)              mtx_owned(&(unp)->unp_mtx)
  282 #define UNP_PCB_LOCK_ASSERT(unp)        mtx_assert(&(unp)->unp_mtx, MA_OWNED)
  283 #define UNP_PCB_UNLOCK_ASSERT(unp)      mtx_assert(&(unp)->unp_mtx, MA_NOTOWNED)
  284 
  285 static int      uipc_connect2(struct socket *, struct socket *);
  286 static int      uipc_ctloutput(struct socket *, struct sockopt *);
  287 static int      unp_connect(struct socket *, struct sockaddr *,
  288                     struct thread *);
  289 static int      unp_connectat(int, struct socket *, struct sockaddr *,
  290                     struct thread *);
  291 static int      unp_connect2(struct socket *so, struct socket *so2, int);
  292 static void     unp_disconnect(struct unpcb *unp, struct unpcb *unp2);
  293 static void     unp_dispose(struct socket *so);
  294 static void     unp_dispose_mbuf(struct mbuf *);
  295 static void     unp_shutdown(struct unpcb *);
  296 static void     unp_drop(struct unpcb *);
  297 static void     unp_gc(__unused void *, int);
  298 static void     unp_scan(struct mbuf *, void (*)(struct filedescent **, int));
  299 static void     unp_discard(struct file *);
  300 static void     unp_freerights(struct filedescent **, int);
  301 static void     unp_init(void);
  302 static int      unp_internalize(struct mbuf **, struct thread *);
  303 static void     unp_internalize_fp(struct file *);
  304 static int      unp_externalize(struct mbuf *, struct mbuf **, int);
  305 static int      unp_externalize_fp(struct file *);
  306 static struct mbuf      *unp_addsockcred(struct thread *, struct mbuf *);
  307 static void     unp_process_defers(void * __unused, int);
  308 
  309 
  310 static void
  311 unp_pcb_hold(struct unpcb *unp)
  312 {
  313         refcount_acquire(&unp->unp_refcount);
  314 }
  315 
  316 static __result_use_check bool
  317 unp_pcb_rele(struct unpcb *unp)
  318 {
  319         bool ret;
  320 
  321         UNP_PCB_LOCK_ASSERT(unp);
  322 
  323         if ((ret = refcount_release(&unp->unp_refcount))) {
  324                 UNP_PCB_UNLOCK(unp);
  325                 UNP_PCB_LOCK_DESTROY(unp);
  326                 uma_zfree(unp_zone, unp);
  327         }
  328         return (ret);
  329 }
  330 
  331 static void
  332 unp_pcb_rele_notlast(struct unpcb *unp)
  333 {
  334         bool ret __unused;
  335 
  336         ret = refcount_release(&unp->unp_refcount);
  337         KASSERT(!ret, ("%s: unpcb %p has no references", __func__, unp));
  338 }
  339 
  340 static void
  341 unp_pcb_lock_pair(struct unpcb *unp, struct unpcb *unp2)
  342 {
  343         UNP_PCB_UNLOCK_ASSERT(unp);
  344         UNP_PCB_UNLOCK_ASSERT(unp2);
  345 
  346         if (unp == unp2) {
  347                 UNP_PCB_LOCK(unp);
  348         } else if ((uintptr_t)unp2 > (uintptr_t)unp) {
  349                 UNP_PCB_LOCK(unp);
  350                 UNP_PCB_LOCK(unp2);
  351         } else {
  352                 UNP_PCB_LOCK(unp2);
  353                 UNP_PCB_LOCK(unp);
  354         }
  355 }
  356 
  357 static void
  358 unp_pcb_unlock_pair(struct unpcb *unp, struct unpcb *unp2)
  359 {
  360         UNP_PCB_UNLOCK(unp);
  361         if (unp != unp2)
  362                 UNP_PCB_UNLOCK(unp2);
  363 }
  364 
  365 /*
  366  * Try to lock the connected peer of an already locked socket.  In some cases
  367  * this requires that we unlock the current socket.  The pairbusy counter is
  368  * used to block concurrent connection attempts while the lock is dropped.  The
  369  * caller must be careful to revalidate PCB state.
  370  */
  371 static struct unpcb *
  372 unp_pcb_lock_peer(struct unpcb *unp)
  373 {
  374         struct unpcb *unp2;
  375 
  376         UNP_PCB_LOCK_ASSERT(unp);
  377         unp2 = unp->unp_conn;
  378         if (__predict_false(unp2 == NULL))
  379                 return (NULL);
  380         if (__predict_false(unp == unp2))
  381                 return (unp);
  382 
  383         UNP_PCB_UNLOCK_ASSERT(unp2);
  384 
  385         if (__predict_true(UNP_PCB_TRYLOCK(unp2)))
  386                 return (unp2);
  387         if ((uintptr_t)unp2 > (uintptr_t)unp) {
  388                 UNP_PCB_LOCK(unp2);
  389                 return (unp2);
  390         }
  391         unp->unp_pairbusy++;
  392         unp_pcb_hold(unp2);
  393         UNP_PCB_UNLOCK(unp);
  394 
  395         UNP_PCB_LOCK(unp2);
  396         UNP_PCB_LOCK(unp);
  397         KASSERT(unp->unp_conn == unp2 || unp->unp_conn == NULL,
  398             ("%s: socket %p was reconnected", __func__, unp));
  399         if (--unp->unp_pairbusy == 0 && (unp->unp_flags & UNP_WAITING) != 0) {
  400                 unp->unp_flags &= ~UNP_WAITING;
  401                 wakeup(unp);
  402         }
  403         if (unp_pcb_rele(unp2)) {
  404                 /* unp2 is unlocked. */
  405                 return (NULL);
  406         }
  407         if (unp->unp_conn == NULL) {
  408                 UNP_PCB_UNLOCK(unp2);
  409                 return (NULL);
  410         }
  411         return (unp2);
  412 }
  413 
  414 
  415 /*
  416  * Definitions of protocols supported in the LOCAL domain.
  417  */
  418 static struct domain localdomain;
  419 static struct pr_usrreqs uipc_usrreqs_dgram, uipc_usrreqs_stream;
  420 static struct pr_usrreqs uipc_usrreqs_seqpacket;
  421 static struct protosw localsw[] = {
  422 {
  423         .pr_type =              SOCK_STREAM,
  424         .pr_domain =            &localdomain,
  425         .pr_flags =             PR_CONNREQUIRED|PR_WANTRCVD|PR_RIGHTS,
  426         .pr_ctloutput =         &uipc_ctloutput,
  427         .pr_usrreqs =           &uipc_usrreqs_stream
  428 },
  429 {
  430         .pr_type =              SOCK_DGRAM,
  431         .pr_domain =            &localdomain,
  432         .pr_flags =             PR_ATOMIC|PR_ADDR|PR_RIGHTS,
  433         .pr_ctloutput =         &uipc_ctloutput,
  434         .pr_usrreqs =           &uipc_usrreqs_dgram
  435 },
  436 {
  437         .pr_type =              SOCK_SEQPACKET,
  438         .pr_domain =            &localdomain,
  439 
  440         /*
  441          * XXXRW: For now, PR_ADDR because soreceive will bump into them
  442          * due to our use of sbappendaddr.  A new sbappend variants is needed
  443          * that supports both atomic record writes and control data.
  444          */
  445         .pr_flags =             PR_ADDR|PR_ATOMIC|PR_CONNREQUIRED|PR_WANTRCVD|
  446                                     PR_RIGHTS,
  447         .pr_ctloutput =         &uipc_ctloutput,
  448         .pr_usrreqs =           &uipc_usrreqs_seqpacket,
  449 },
  450 };
  451 
  452 static struct domain localdomain = {
  453         .dom_family =           AF_LOCAL,
  454         .dom_name =             "local",
  455         .dom_init =             unp_init,
  456         .dom_externalize =      unp_externalize,
  457         .dom_dispose =          unp_dispose,
  458         .dom_protosw =          localsw,
  459         .dom_protoswNPROTOSW =  &localsw[nitems(localsw)]
  460 };
  461 DOMAIN_SET(local);
  462 
  463 static void
  464 uipc_abort(struct socket *so)
  465 {
  466         struct unpcb *unp, *unp2;
  467 
  468         unp = sotounpcb(so);
  469         KASSERT(unp != NULL, ("uipc_abort: unp == NULL"));
  470         UNP_PCB_UNLOCK_ASSERT(unp);
  471 
  472         UNP_PCB_LOCK(unp);
  473         unp2 = unp->unp_conn;
  474         if (unp2 != NULL) {
  475                 unp_pcb_hold(unp2);
  476                 UNP_PCB_UNLOCK(unp);
  477                 unp_drop(unp2);
  478         } else
  479                 UNP_PCB_UNLOCK(unp);
  480 }
  481 
  482 static int
  483 uipc_accept(struct socket *so, struct sockaddr **nam)
  484 {
  485         struct unpcb *unp, *unp2;
  486         const struct sockaddr *sa;
  487 
  488         /*
  489          * Pass back name of connected socket, if it was bound and we are
  490          * still connected (our peer may have closed already!).
  491          */
  492         unp = sotounpcb(so);
  493         KASSERT(unp != NULL, ("uipc_accept: unp == NULL"));
  494 
  495         *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
  496         UNP_PCB_LOCK(unp);
  497         unp2 = unp_pcb_lock_peer(unp);
  498         if (unp2 != NULL && unp2->unp_addr != NULL)
  499                 sa = (struct sockaddr *)unp2->unp_addr;
  500         else
  501                 sa = &sun_noname;
  502         bcopy(sa, *nam, sa->sa_len);
  503         if (unp2 != NULL)
  504                 unp_pcb_unlock_pair(unp, unp2);
  505         else
  506                 UNP_PCB_UNLOCK(unp);
  507         return (0);
  508 }
  509 
  510 static int
  511 uipc_attach(struct socket *so, int proto, struct thread *td)
  512 {
  513         u_long sendspace, recvspace;
  514         struct unpcb *unp;
  515         int error;
  516         bool locked;
  517 
  518         KASSERT(so->so_pcb == NULL, ("uipc_attach: so_pcb != NULL"));
  519         if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
  520                 switch (so->so_type) {
  521                 case SOCK_STREAM:
  522                         sendspace = unpst_sendspace;
  523                         recvspace = unpst_recvspace;
  524                         break;
  525 
  526                 case SOCK_DGRAM:
  527                         sendspace = unpdg_sendspace;
  528                         recvspace = unpdg_recvspace;
  529                         break;
  530 
  531                 case SOCK_SEQPACKET:
  532                         sendspace = unpsp_sendspace;
  533                         recvspace = unpsp_recvspace;
  534                         break;
  535 
  536                 default:
  537                         panic("uipc_attach");
  538                 }
  539                 error = soreserve(so, sendspace, recvspace);
  540                 if (error)
  541                         return (error);
  542         }
  543         unp = uma_zalloc(unp_zone, M_NOWAIT | M_ZERO);
  544         if (unp == NULL)
  545                 return (ENOBUFS);
  546         LIST_INIT(&unp->unp_refs);
  547         UNP_PCB_LOCK_INIT(unp);
  548         unp->unp_socket = so;
  549         so->so_pcb = unp;
  550         refcount_init(&unp->unp_refcount, 1);
  551 
  552         if ((locked = UNP_LINK_WOWNED()) == false)
  553                 UNP_LINK_WLOCK();
  554 
  555         unp->unp_gencnt = ++unp_gencnt;
  556         unp->unp_ino = ++unp_ino;
  557         unp_count++;
  558         switch (so->so_type) {
  559         case SOCK_STREAM:
  560                 LIST_INSERT_HEAD(&unp_shead, unp, unp_link);
  561                 break;
  562 
  563         case SOCK_DGRAM:
  564                 LIST_INSERT_HEAD(&unp_dhead, unp, unp_link);
  565                 break;
  566 
  567         case SOCK_SEQPACKET:
  568                 LIST_INSERT_HEAD(&unp_sphead, unp, unp_link);
  569                 break;
  570 
  571         default:
  572                 panic("uipc_attach");
  573         }
  574 
  575         if (locked == false)
  576                 UNP_LINK_WUNLOCK();
  577 
  578         return (0);
  579 }
  580 
  581 static int
  582 uipc_bindat(int fd, struct socket *so, struct sockaddr *nam, struct thread *td)
  583 {
  584         struct sockaddr_un *soun = (struct sockaddr_un *)nam;
  585         struct vattr vattr;
  586         int error, namelen;
  587         struct nameidata nd;
  588         struct unpcb *unp;
  589         struct vnode *vp;
  590         struct mount *mp;
  591         cap_rights_t rights;
  592         char *buf;
  593 
  594         if (nam->sa_family != AF_UNIX)
  595                 return (EAFNOSUPPORT);
  596 
  597         unp = sotounpcb(so);
  598         KASSERT(unp != NULL, ("uipc_bind: unp == NULL"));
  599 
  600         if (soun->sun_len > sizeof(struct sockaddr_un))
  601                 return (EINVAL);
  602         namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path);
  603         if (namelen <= 0)
  604                 return (EINVAL);
  605 
  606         /*
  607          * We don't allow simultaneous bind() calls on a single UNIX domain
  608          * socket, so flag in-progress operations, and return an error if an
  609          * operation is already in progress.
  610          *
  611          * Historically, we have not allowed a socket to be rebound, so this
  612          * also returns an error.  Not allowing re-binding simplifies the
  613          * implementation and avoids a great many possible failure modes.
  614          */
  615         UNP_PCB_LOCK(unp);
  616         if (unp->unp_vnode != NULL) {
  617                 UNP_PCB_UNLOCK(unp);
  618                 return (EINVAL);
  619         }
  620         if (unp->unp_flags & UNP_BINDING) {
  621                 UNP_PCB_UNLOCK(unp);
  622                 return (EALREADY);
  623         }
  624         unp->unp_flags |= UNP_BINDING;
  625         UNP_PCB_UNLOCK(unp);
  626 
  627         buf = malloc(namelen + 1, M_TEMP, M_WAITOK);
  628         bcopy(soun->sun_path, buf, namelen);
  629         buf[namelen] = 0;
  630 
  631 restart:
  632         NDINIT_ATRIGHTS(&nd, CREATE, NOFOLLOW | LOCKPARENT | SAVENAME | NOCACHE,
  633             UIO_SYSSPACE, buf, fd, cap_rights_init(&rights, CAP_BINDAT), td);
  634 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
  635         error = namei(&nd);
  636         if (error)
  637                 goto error;
  638         vp = nd.ni_vp;
  639         if (vp != NULL || vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
  640                 NDFREE(&nd, NDF_ONLY_PNBUF);
  641                 if (nd.ni_dvp == vp)
  642                         vrele(nd.ni_dvp);
  643                 else
  644                         vput(nd.ni_dvp);
  645                 if (vp != NULL) {
  646                         vrele(vp);
  647                         error = EADDRINUSE;
  648                         goto error;
  649                 }
  650                 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH);
  651                 if (error)
  652                         goto error;
  653                 goto restart;
  654         }
  655         VATTR_NULL(&vattr);
  656         vattr.va_type = VSOCK;
  657         vattr.va_mode = (ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask);
  658 #ifdef MAC
  659         error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
  660             &vattr);
  661 #endif
  662         if (error == 0)
  663                 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
  664         NDFREE(&nd, NDF_ONLY_PNBUF);
  665         vput(nd.ni_dvp);
  666         if (error) {
  667                 vn_finished_write(mp);
  668                 goto error;
  669         }
  670         vp = nd.ni_vp;
  671         ASSERT_VOP_ELOCKED(vp, "uipc_bind");
  672         soun = (struct sockaddr_un *)sodupsockaddr(nam, M_WAITOK);
  673 
  674         UNP_PCB_LOCK(unp);
  675         VOP_UNP_BIND(vp, unp);
  676         unp->unp_vnode = vp;
  677         unp->unp_addr = soun;
  678         unp->unp_flags &= ~UNP_BINDING;
  679         UNP_PCB_UNLOCK(unp);
  680         VOP_UNLOCK(vp, 0);
  681         vn_finished_write(mp);
  682         free(buf, M_TEMP);
  683         return (0);
  684 
  685 error:
  686         UNP_PCB_LOCK(unp);
  687         unp->unp_flags &= ~UNP_BINDING;
  688         UNP_PCB_UNLOCK(unp);
  689         free(buf, M_TEMP);
  690         return (error);
  691 }
  692 
  693 static int
  694 uipc_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
  695 {
  696 
  697         return (uipc_bindat(AT_FDCWD, so, nam, td));
  698 }
  699 
  700 static int
  701 uipc_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
  702 {
  703         int error;
  704 
  705         KASSERT(td == curthread, ("uipc_connect: td != curthread"));
  706         error = unp_connect(so, nam, td);
  707         return (error);
  708 }
  709 
  710 static int
  711 uipc_connectat(int fd, struct socket *so, struct sockaddr *nam,
  712     struct thread *td)
  713 {
  714         int error;
  715 
  716         KASSERT(td == curthread, ("uipc_connectat: td != curthread"));
  717         error = unp_connectat(fd, so, nam, td);
  718         return (error);
  719 }
  720 
  721 static void
  722 uipc_close(struct socket *so)
  723 {
  724         struct unpcb *unp, *unp2;
  725         struct vnode *vp = NULL;
  726         struct mtx *vplock;
  727 
  728         unp = sotounpcb(so);
  729         KASSERT(unp != NULL, ("uipc_close: unp == NULL"));
  730 
  731 
  732         vplock = NULL;
  733         if ((vp = unp->unp_vnode) != NULL) {
  734                 vplock = mtx_pool_find(mtxpool_sleep, vp);
  735                 mtx_lock(vplock);
  736         }
  737         UNP_PCB_LOCK(unp);
  738         if (vp && unp->unp_vnode == NULL) {
  739                 mtx_unlock(vplock);
  740                 vp = NULL;
  741         }
  742         if (vp != NULL) {
  743                 VOP_UNP_DETACH(vp);
  744                 unp->unp_vnode = NULL;
  745         }
  746         if ((unp2 = unp_pcb_lock_peer(unp)) != NULL)
  747                 unp_disconnect(unp, unp2);
  748         else
  749                 UNP_PCB_UNLOCK(unp);
  750         if (vp) {
  751                 mtx_unlock(vplock);
  752                 vrele(vp);
  753         }
  754 }
  755 
  756 static int
  757 uipc_connect2(struct socket *so1, struct socket *so2)
  758 {
  759         struct unpcb *unp, *unp2;
  760         int error;
  761 
  762         unp = so1->so_pcb;
  763         KASSERT(unp != NULL, ("uipc_connect2: unp == NULL"));
  764         unp2 = so2->so_pcb;
  765         KASSERT(unp2 != NULL, ("uipc_connect2: unp2 == NULL"));
  766         unp_pcb_lock_pair(unp, unp2);
  767         error = unp_connect2(so1, so2, PRU_CONNECT2);
  768         unp_pcb_unlock_pair(unp, unp2);
  769         return (error);
  770 }
  771 
  772 static void
  773 uipc_detach(struct socket *so)
  774 {
  775         struct unpcb *unp, *unp2;
  776         struct mtx *vplock;
  777         struct vnode *vp;
  778         int local_unp_rights;
  779 
  780         unp = sotounpcb(so);
  781         KASSERT(unp != NULL, ("uipc_detach: unp == NULL"));
  782 
  783         vp = NULL;
  784         vplock = NULL;
  785 
  786         SOCK_LOCK(so);
  787         if (!SOLISTENING(so)) {
  788                 /*
  789                  * Once the socket is removed from the global lists,
  790                  * uipc_ready() will not be able to locate its socket buffer, so
  791                  * clear the buffer now.  At this point internalized rights have
  792                  * already been disposed of.
  793                  */
  794                 sbrelease(&so->so_rcv, so);
  795         }
  796         SOCK_UNLOCK(so);
  797 
  798         UNP_LINK_WLOCK();
  799         LIST_REMOVE(unp, unp_link);
  800         unp->unp_gencnt = ++unp_gencnt;
  801         --unp_count;
  802         UNP_LINK_WUNLOCK();
  803 
  804         UNP_PCB_UNLOCK_ASSERT(unp);
  805  restart:
  806         if ((vp = unp->unp_vnode) != NULL) {
  807                 vplock = mtx_pool_find(mtxpool_sleep, vp);
  808                 mtx_lock(vplock);
  809         }
  810         UNP_PCB_LOCK(unp);
  811         if (unp->unp_vnode != vp && unp->unp_vnode != NULL) {
  812                 if (vplock)
  813                         mtx_unlock(vplock);
  814                 UNP_PCB_UNLOCK(unp);
  815                 goto restart;
  816         }
  817         if ((vp = unp->unp_vnode) != NULL) {
  818                 VOP_UNP_DETACH(vp);
  819                 unp->unp_vnode = NULL;
  820         }
  821         if ((unp2 = unp_pcb_lock_peer(unp)) != NULL)
  822                 unp_disconnect(unp, unp2);
  823         else
  824                 UNP_PCB_UNLOCK(unp);
  825 
  826         UNP_REF_LIST_LOCK();
  827         while (!LIST_EMPTY(&unp->unp_refs)) {
  828                 struct unpcb *ref = LIST_FIRST(&unp->unp_refs);
  829 
  830                 unp_pcb_hold(ref);
  831                 UNP_REF_LIST_UNLOCK();
  832 
  833                 MPASS(ref != unp);
  834                 UNP_PCB_UNLOCK_ASSERT(ref);
  835                 unp_drop(ref);
  836                 UNP_REF_LIST_LOCK();
  837         }
  838         UNP_REF_LIST_UNLOCK();
  839 
  840         UNP_PCB_LOCK(unp);
  841         local_unp_rights = unp_rights;
  842         unp->unp_socket->so_pcb = NULL;
  843         unp->unp_socket = NULL;
  844         free(unp->unp_addr, M_SONAME);
  845         unp->unp_addr = NULL;
  846         if (!unp_pcb_rele(unp))
  847                 UNP_PCB_UNLOCK(unp);
  848         if (vp) {
  849                 mtx_unlock(vplock);
  850                 vrele(vp);
  851         }
  852         if (local_unp_rights)
  853                 taskqueue_enqueue_timeout(taskqueue_thread, &unp_gc_task, -1);
  854 }
  855 
  856 static int
  857 uipc_disconnect(struct socket *so)
  858 {
  859         struct unpcb *unp, *unp2;
  860 
  861         unp = sotounpcb(so);
  862         KASSERT(unp != NULL, ("uipc_disconnect: unp == NULL"));
  863 
  864         UNP_PCB_LOCK(unp);
  865         if ((unp2 = unp_pcb_lock_peer(unp)) != NULL)
  866                 unp_disconnect(unp, unp2);
  867         else
  868                 UNP_PCB_UNLOCK(unp);
  869         return (0);
  870 }
  871 
  872 static int
  873 uipc_listen(struct socket *so, int backlog, struct thread *td)
  874 {
  875         struct unpcb *unp;
  876         int error;
  877 
  878         if (so->so_type != SOCK_STREAM && so->so_type != SOCK_SEQPACKET)
  879                 return (EOPNOTSUPP);
  880 
  881         unp = sotounpcb(so);
  882         KASSERT(unp != NULL, ("uipc_listen: unp == NULL"));
  883 
  884         UNP_PCB_LOCK(unp);
  885         if (unp->unp_vnode == NULL) {
  886                 /* Already connected or not bound to an address. */
  887                 error = unp->unp_conn != NULL ? EINVAL : EDESTADDRREQ;
  888                 UNP_PCB_UNLOCK(unp);
  889                 return (error);
  890         }
  891 
  892         SOCK_LOCK(so);
  893         error = solisten_proto_check(so);
  894         if (error == 0) {
  895                 cru2xt(td, &unp->unp_peercred);
  896                 solisten_proto(so, backlog);
  897         }
  898         SOCK_UNLOCK(so);
  899         UNP_PCB_UNLOCK(unp);
  900         return (error);
  901 }
  902 
  903 static int
  904 uipc_peeraddr(struct socket *so, struct sockaddr **nam)
  905 {
  906         struct unpcb *unp, *unp2;
  907         const struct sockaddr *sa;
  908 
  909         unp = sotounpcb(so);
  910         KASSERT(unp != NULL, ("uipc_peeraddr: unp == NULL"));
  911 
  912         *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
  913         UNP_LINK_RLOCK();
  914         /*
  915          * XXX: It seems that this test always fails even when connection is
  916          * established.  So, this else clause is added as workaround to
  917          * return PF_LOCAL sockaddr.
  918          */
  919         unp2 = unp->unp_conn;
  920         if (unp2 != NULL) {
  921                 UNP_PCB_LOCK(unp2);
  922                 if (unp2->unp_addr != NULL)
  923                         sa = (struct sockaddr *) unp2->unp_addr;
  924                 else
  925                         sa = &sun_noname;
  926                 bcopy(sa, *nam, sa->sa_len);
  927                 UNP_PCB_UNLOCK(unp2);
  928         } else {
  929                 sa = &sun_noname;
  930                 bcopy(sa, *nam, sa->sa_len);
  931         }
  932         UNP_LINK_RUNLOCK();
  933         return (0);
  934 }
  935 
  936 static int
  937 uipc_rcvd(struct socket *so, int flags)
  938 {
  939         struct unpcb *unp, *unp2;
  940         struct socket *so2;
  941         u_int mbcnt, sbcc;
  942 
  943         unp = sotounpcb(so);
  944         KASSERT(unp != NULL, ("%s: unp == NULL", __func__));
  945         KASSERT(so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET,
  946             ("%s: socktype %d", __func__, so->so_type));
  947 
  948         /*
  949          * Adjust backpressure on sender and wakeup any waiting to write.
  950          *
  951          * The unp lock is acquired to maintain the validity of the unp_conn
  952          * pointer; no lock on unp2 is required as unp2->unp_socket will be
  953          * static as long as we don't permit unp2 to disconnect from unp,
  954          * which is prevented by the lock on unp.  We cache values from
  955          * so_rcv to avoid holding the so_rcv lock over the entire
  956          * transaction on the remote so_snd.
  957          */
  958         SOCKBUF_LOCK(&so->so_rcv);
  959         mbcnt = so->so_rcv.sb_mbcnt;
  960         sbcc = sbavail(&so->so_rcv);
  961         SOCKBUF_UNLOCK(&so->so_rcv);
  962         /*
  963          * There is a benign race condition at this point.  If we're planning to
  964          * clear SB_STOP, but uipc_send is called on the connected socket at
  965          * this instant, it might add data to the sockbuf and set SB_STOP.  Then
  966          * we would erroneously clear SB_STOP below, even though the sockbuf is
  967          * full.  The race is benign because the only ill effect is to allow the
  968          * sockbuf to exceed its size limit, and the size limits are not
  969          * strictly guaranteed anyway.
  970          */
  971         UNP_PCB_LOCK(unp);
  972         unp2 = unp->unp_conn;
  973         if (unp2 == NULL) {
  974                 UNP_PCB_UNLOCK(unp);
  975                 return (0);
  976         }
  977         so2 = unp2->unp_socket;
  978         SOCKBUF_LOCK(&so2->so_snd);
  979         if (sbcc < so2->so_snd.sb_hiwat && mbcnt < so2->so_snd.sb_mbmax)
  980                 so2->so_snd.sb_flags &= ~SB_STOP;
  981         sowwakeup_locked(so2);
  982         UNP_PCB_UNLOCK(unp);
  983         return (0);
  984 }
  985 
  986 static int
  987 uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
  988     struct mbuf *control, struct thread *td)
  989 {
  990         struct unpcb *unp, *unp2;
  991         struct socket *so2;
  992         u_int mbcnt, sbcc;
  993         int freed, error;
  994 
  995         unp = sotounpcb(so);
  996         KASSERT(unp != NULL, ("%s: unp == NULL", __func__));
  997         KASSERT(so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM ||
  998             so->so_type == SOCK_SEQPACKET,
  999             ("%s: socktype %d", __func__, so->so_type));
 1000 
 1001         freed = error = 0;
 1002         if (flags & PRUS_OOB) {
 1003                 error = EOPNOTSUPP;
 1004                 goto release;
 1005         }
 1006         if (control != NULL && (error = unp_internalize(&control, td)))
 1007                 goto release;
 1008 
 1009         unp2 = NULL;
 1010         switch (so->so_type) {
 1011         case SOCK_DGRAM:
 1012         {
 1013                 const struct sockaddr *from;
 1014 
 1015                 if (nam != NULL) {
 1016                         error = unp_connect(so, nam, td);
 1017                         if (error != 0)
 1018                                 break;
 1019                 }
 1020                 UNP_PCB_LOCK(unp);
 1021 
 1022                 /*
 1023                  * Because connect() and send() are non-atomic in a sendto()
 1024                  * with a target address, it's possible that the socket will
 1025                  * have disconnected before the send() can run.  In that case
 1026                  * return the slightly counter-intuitive but otherwise
 1027                  * correct error that the socket is not connected.
 1028                  */
 1029                 unp2 = unp_pcb_lock_peer(unp);
 1030                 if (unp2 == NULL) {
 1031                         UNP_PCB_UNLOCK(unp);
 1032                         error = ENOTCONN;
 1033                         break;
 1034                 }
 1035 
 1036                 if (unp2->unp_flags & UNP_WANTCRED)
 1037                         control = unp_addsockcred(td, control);
 1038                 if (unp->unp_addr != NULL)
 1039                         from = (struct sockaddr *)unp->unp_addr;
 1040                 else
 1041                         from = &sun_noname;
 1042                 so2 = unp2->unp_socket;
 1043                 SOCKBUF_LOCK(&so2->so_rcv);
 1044                 if (sbappendaddr_locked(&so2->so_rcv, from, m,
 1045                     control)) {
 1046                         sorwakeup_locked(so2);
 1047                         m = NULL;
 1048                         control = NULL;
 1049                 } else {
 1050                         soroverflow_locked(so2);
 1051                         error = ENOBUFS;
 1052                 }
 1053                 if (nam != NULL)
 1054                         unp_disconnect(unp, unp2);
 1055                 else
 1056                         unp_pcb_unlock_pair(unp, unp2);
 1057                 break;
 1058         }
 1059 
 1060         case SOCK_SEQPACKET:
 1061         case SOCK_STREAM:
 1062                 if ((so->so_state & SS_ISCONNECTED) == 0) {
 1063                         if (nam != NULL) {
 1064                                 error = unp_connect(so, nam, td);
 1065                                 if (error != 0)
 1066                                         break;
 1067                         } else {
 1068                                 error = ENOTCONN;
 1069                                 break;
 1070                         }
 1071                 }
 1072 
 1073                 UNP_PCB_LOCK(unp);
 1074                 if ((unp2 = unp_pcb_lock_peer(unp)) == NULL) {
 1075                         UNP_PCB_UNLOCK(unp);
 1076                         error = ENOTCONN;
 1077                         break;
 1078                 } else if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 1079                         unp_pcb_unlock_pair(unp, unp2);
 1080                         error = EPIPE;
 1081                         break;
 1082                 }
 1083                 UNP_PCB_UNLOCK(unp);
 1084                 if ((so2 = unp2->unp_socket) == NULL) {
 1085                         UNP_PCB_UNLOCK(unp2);
 1086                         error = ENOTCONN;
 1087                         break;
 1088                 }
 1089                 SOCKBUF_LOCK(&so2->so_rcv);
 1090                 if (unp2->unp_flags & UNP_WANTCRED) {
 1091                         /*
 1092                          * Credentials are passed only once on SOCK_STREAM
 1093                          * and SOCK_SEQPACKET.
 1094                          */
 1095                         unp2->unp_flags &= ~UNP_WANTCRED;
 1096                         control = unp_addsockcred(td, control);
 1097                 }
 1098 
 1099                 /*
 1100                  * Send to paired receive port and wake up readers.  Don't
 1101                  * check for space available in the receive buffer if we're
 1102                  * attaching ancillary data; Unix domain sockets only check
 1103                  * for space in the sending sockbuf, and that check is
 1104                  * performed one level up the stack.  At that level we cannot
 1105                  * precisely account for the amount of buffer space used
 1106                  * (e.g., because control messages are not yet internalized).
 1107                  */
 1108                 switch (so->so_type) {
 1109                 case SOCK_STREAM:
 1110                         if (control != NULL) {
 1111                                 sbappendcontrol_locked(&so2->so_rcv, m,
 1112                                     control, flags);
 1113                                 control = NULL;
 1114                         } else
 1115                                 sbappend_locked(&so2->so_rcv, m, flags);
 1116                         break;
 1117 
 1118                 case SOCK_SEQPACKET:
 1119                         if (sbappendaddr_nospacecheck_locked(&so2->so_rcv,
 1120                             &sun_noname, m, control))
 1121                                 control = NULL;
 1122                         break;
 1123                 }
 1124 
 1125                 mbcnt = so2->so_rcv.sb_mbcnt;
 1126                 sbcc = sbavail(&so2->so_rcv);
 1127                 if (sbcc)
 1128                         sorwakeup_locked(so2);
 1129                 else
 1130                         SOCKBUF_UNLOCK(&so2->so_rcv);
 1131 
 1132                 /*
 1133                  * The PCB lock on unp2 protects the SB_STOP flag.  Without it,
 1134                  * it would be possible for uipc_rcvd to be called at this
 1135                  * point, drain the receiving sockbuf, clear SB_STOP, and then
 1136                  * we would set SB_STOP below.  That could lead to an empty
 1137                  * sockbuf having SB_STOP set
 1138                  */
 1139                 SOCKBUF_LOCK(&so->so_snd);
 1140                 if (sbcc >= so->so_snd.sb_hiwat || mbcnt >= so->so_snd.sb_mbmax)
 1141                         so->so_snd.sb_flags |= SB_STOP;
 1142                 SOCKBUF_UNLOCK(&so->so_snd);
 1143                 UNP_PCB_UNLOCK(unp2);
 1144                 m = NULL;
 1145                 break;
 1146         }
 1147 
 1148         /*
 1149          * PRUS_EOF is equivalent to pru_send followed by pru_shutdown.
 1150          */
 1151         if (flags & PRUS_EOF) {
 1152                 UNP_PCB_LOCK(unp);
 1153                 socantsendmore(so);
 1154                 unp_shutdown(unp);
 1155                 UNP_PCB_UNLOCK(unp);
 1156         }
 1157         if (control != NULL && error != 0)
 1158                 unp_dispose_mbuf(control);
 1159 
 1160 release:
 1161         if (control != NULL)
 1162                 m_freem(control);
 1163         /*
 1164          * In case of PRUS_NOTREADY, uipc_ready() is responsible
 1165          * for freeing memory.
 1166          */   
 1167         if (m != NULL && (flags & PRUS_NOTREADY) == 0)
 1168                 m_freem(m);
 1169         return (error);
 1170 }
 1171 
 1172 static bool
 1173 uipc_ready_scan(struct socket *so, struct mbuf *m, int count, int *errorp)
 1174 {
 1175         struct mbuf *mb, *n;
 1176         struct sockbuf *sb;
 1177 
 1178         SOCK_LOCK(so);
 1179         if (SOLISTENING(so)) {
 1180                 SOCK_UNLOCK(so);
 1181                 return (false);
 1182         }
 1183         mb = NULL;
 1184         sb = &so->so_rcv;
 1185         SOCKBUF_LOCK(sb);
 1186         if (sb->sb_fnrdy != NULL) {
 1187                 for (mb = sb->sb_mb, n = mb->m_nextpkt; mb != NULL;) {
 1188                         if (mb == m) {
 1189                                 *errorp = sbready(sb, m, count);
 1190                                 break;
 1191                         }
 1192                         mb = mb->m_next;
 1193                         if (mb == NULL) {
 1194                                 mb = n;
 1195                                 if (mb != NULL)
 1196                                         n = mb->m_nextpkt;
 1197                         }
 1198                 }
 1199         }
 1200         SOCKBUF_UNLOCK(sb);
 1201         SOCK_UNLOCK(so);
 1202         return (mb != NULL);
 1203 }
 1204 
 1205 static int
 1206 uipc_ready(struct socket *so, struct mbuf *m, int count)
 1207 {
 1208         struct unpcb *unp, *unp2;
 1209         struct socket *so2;
 1210         int error, i;
 1211 
 1212         unp = sotounpcb(so);
 1213 
 1214         KASSERT(so->so_type == SOCK_STREAM,
 1215             ("%s: unexpected socket type for %p", __func__, so));
 1216 
 1217         UNP_PCB_LOCK(unp);
 1218         if ((unp2 = unp_pcb_lock_peer(unp)) != NULL) {
 1219                 UNP_PCB_UNLOCK(unp);
 1220                 so2 = unp2->unp_socket;
 1221                 SOCKBUF_LOCK(&so2->so_rcv);
 1222                 if ((error = sbready(&so2->so_rcv, m, count)) == 0)
 1223                         sorwakeup_locked(so2);
 1224                 else
 1225                         SOCKBUF_UNLOCK(&so2->so_rcv);
 1226                 UNP_PCB_UNLOCK(unp2);
 1227                 return (error);
 1228         }
 1229         UNP_PCB_UNLOCK(unp);
 1230 
 1231         /*
 1232          * The receiving socket has been disconnected, but may still be valid.
 1233          * In this case, the now-ready mbufs are still present in its socket
 1234          * buffer, so perform an exhaustive search before giving up and freeing
 1235          * the mbufs.
 1236          */
 1237         UNP_LINK_RLOCK();
 1238         LIST_FOREACH(unp, &unp_shead, unp_link) {
 1239                 if (uipc_ready_scan(unp->unp_socket, m, count, &error))
 1240                         break;
 1241         }
 1242         UNP_LINK_RUNLOCK();
 1243 
 1244         if (unp == NULL) {
 1245                 for (i = 0; i < count; i++)
 1246                         m = m_free(m);
 1247                 error = ECONNRESET;
 1248         }
 1249         return (error);
 1250 }
 1251 
 1252 static int
 1253 uipc_sense(struct socket *so, struct stat *sb)
 1254 {
 1255         struct unpcb *unp;
 1256 
 1257         unp = sotounpcb(so);
 1258         KASSERT(unp != NULL, ("uipc_sense: unp == NULL"));
 1259 
 1260         sb->st_blksize = so->so_snd.sb_hiwat;
 1261         sb->st_dev = NODEV;
 1262         sb->st_ino = unp->unp_ino;
 1263         return (0);
 1264 }
 1265 
 1266 static int
 1267 uipc_shutdown(struct socket *so)
 1268 {
 1269         struct unpcb *unp;
 1270 
 1271         unp = sotounpcb(so);
 1272         KASSERT(unp != NULL, ("uipc_shutdown: unp == NULL"));
 1273 
 1274         UNP_PCB_LOCK(unp);
 1275         socantsendmore(so);
 1276         unp_shutdown(unp);
 1277         UNP_PCB_UNLOCK(unp);
 1278         return (0);
 1279 }
 1280 
 1281 static int
 1282 uipc_sockaddr(struct socket *so, struct sockaddr **nam)
 1283 {
 1284         struct unpcb *unp;
 1285         const struct sockaddr *sa;
 1286 
 1287         unp = sotounpcb(so);
 1288         KASSERT(unp != NULL, ("uipc_sockaddr: unp == NULL"));
 1289 
 1290         *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
 1291         UNP_PCB_LOCK(unp);
 1292         if (unp->unp_addr != NULL)
 1293                 sa = (struct sockaddr *) unp->unp_addr;
 1294         else
 1295                 sa = &sun_noname;
 1296         bcopy(sa, *nam, sa->sa_len);
 1297         UNP_PCB_UNLOCK(unp);
 1298         return (0);
 1299 }
 1300 
 1301 static struct pr_usrreqs uipc_usrreqs_dgram = {
 1302         .pru_abort =            uipc_abort,
 1303         .pru_accept =           uipc_accept,
 1304         .pru_attach =           uipc_attach,
 1305         .pru_bind =             uipc_bind,
 1306         .pru_bindat =           uipc_bindat,
 1307         .pru_connect =          uipc_connect,
 1308         .pru_connectat =        uipc_connectat,
 1309         .pru_connect2 =         uipc_connect2,
 1310         .pru_detach =           uipc_detach,
 1311         .pru_disconnect =       uipc_disconnect,
 1312         .pru_listen =           uipc_listen,
 1313         .pru_peeraddr =         uipc_peeraddr,
 1314         .pru_rcvd =             uipc_rcvd,
 1315         .pru_send =             uipc_send,
 1316         .pru_sense =            uipc_sense,
 1317         .pru_shutdown =         uipc_shutdown,
 1318         .pru_sockaddr =         uipc_sockaddr,
 1319         .pru_soreceive =        soreceive_dgram,
 1320         .pru_close =            uipc_close,
 1321 };
 1322 
 1323 static struct pr_usrreqs uipc_usrreqs_seqpacket = {
 1324         .pru_abort =            uipc_abort,
 1325         .pru_accept =           uipc_accept,
 1326         .pru_attach =           uipc_attach,
 1327         .pru_bind =             uipc_bind,
 1328         .pru_bindat =           uipc_bindat,
 1329         .pru_connect =          uipc_connect,
 1330         .pru_connectat =        uipc_connectat,
 1331         .pru_connect2 =         uipc_connect2,
 1332         .pru_detach =           uipc_detach,
 1333         .pru_disconnect =       uipc_disconnect,
 1334         .pru_listen =           uipc_listen,
 1335         .pru_peeraddr =         uipc_peeraddr,
 1336         .pru_rcvd =             uipc_rcvd,
 1337         .pru_send =             uipc_send,
 1338         .pru_sense =            uipc_sense,
 1339         .pru_shutdown =         uipc_shutdown,
 1340         .pru_sockaddr =         uipc_sockaddr,
 1341         .pru_soreceive =        soreceive_generic,      /* XXX: or...? */
 1342         .pru_close =            uipc_close,
 1343 };
 1344 
 1345 static struct pr_usrreqs uipc_usrreqs_stream = {
 1346         .pru_abort =            uipc_abort,
 1347         .pru_accept =           uipc_accept,
 1348         .pru_attach =           uipc_attach,
 1349         .pru_bind =             uipc_bind,
 1350         .pru_bindat =           uipc_bindat,
 1351         .pru_connect =          uipc_connect,
 1352         .pru_connectat =        uipc_connectat,
 1353         .pru_connect2 =         uipc_connect2,
 1354         .pru_detach =           uipc_detach,
 1355         .pru_disconnect =       uipc_disconnect,
 1356         .pru_listen =           uipc_listen,
 1357         .pru_peeraddr =         uipc_peeraddr,
 1358         .pru_rcvd =             uipc_rcvd,
 1359         .pru_send =             uipc_send,
 1360         .pru_ready =            uipc_ready,
 1361         .pru_sense =            uipc_sense,
 1362         .pru_shutdown =         uipc_shutdown,
 1363         .pru_sockaddr =         uipc_sockaddr,
 1364         .pru_soreceive =        soreceive_generic,
 1365         .pru_close =            uipc_close,
 1366 };
 1367 
 1368 static int
 1369 uipc_ctloutput(struct socket *so, struct sockopt *sopt)
 1370 {
 1371         struct unpcb *unp;
 1372         struct xucred xu;
 1373         int error, optval;
 1374 
 1375         if (sopt->sopt_level != SOL_LOCAL)
 1376                 return (EINVAL);
 1377 
 1378         unp = sotounpcb(so);
 1379         KASSERT(unp != NULL, ("uipc_ctloutput: unp == NULL"));
 1380         error = 0;
 1381         switch (sopt->sopt_dir) {
 1382         case SOPT_GET:
 1383                 switch (sopt->sopt_name) {
 1384                 case LOCAL_PEERCRED:
 1385                         UNP_PCB_LOCK(unp);
 1386                         if (unp->unp_flags & UNP_HAVEPC)
 1387                                 xu = unp->unp_peercred;
 1388                         else {
 1389                                 if (so->so_type == SOCK_STREAM)
 1390                                         error = ENOTCONN;
 1391                                 else
 1392                                         error = EINVAL;
 1393                         }
 1394                         UNP_PCB_UNLOCK(unp);
 1395                         if (error == 0)
 1396                                 error = sooptcopyout(sopt, &xu, sizeof(xu));
 1397                         break;
 1398 
 1399                 case LOCAL_CREDS:
 1400                         /* Unlocked read. */
 1401                         optval = unp->unp_flags & UNP_WANTCRED ? 1 : 0;
 1402                         error = sooptcopyout(sopt, &optval, sizeof(optval));
 1403                         break;
 1404 
 1405                 case LOCAL_CONNWAIT:
 1406                         /* Unlocked read. */
 1407                         optval = unp->unp_flags & UNP_CONNWAIT ? 1 : 0;
 1408                         error = sooptcopyout(sopt, &optval, sizeof(optval));
 1409                         break;
 1410 
 1411                 default:
 1412                         error = EOPNOTSUPP;
 1413                         break;
 1414                 }
 1415                 break;
 1416 
 1417         case SOPT_SET:
 1418                 switch (sopt->sopt_name) {
 1419                 case LOCAL_CREDS:
 1420                 case LOCAL_CONNWAIT:
 1421                         error = sooptcopyin(sopt, &optval, sizeof(optval),
 1422                                             sizeof(optval));
 1423                         if (error)
 1424                                 break;
 1425 
 1426 #define OPTSET(bit) do {                                                \
 1427         UNP_PCB_LOCK(unp);                                              \
 1428         if (optval)                                                     \
 1429                 unp->unp_flags |= bit;                                  \
 1430         else                                                            \
 1431                 unp->unp_flags &= ~bit;                                 \
 1432         UNP_PCB_UNLOCK(unp);                                            \
 1433 } while (0)
 1434 
 1435                         switch (sopt->sopt_name) {
 1436                         case LOCAL_CREDS:
 1437                                 OPTSET(UNP_WANTCRED);
 1438                                 break;
 1439 
 1440                         case LOCAL_CONNWAIT:
 1441                                 OPTSET(UNP_CONNWAIT);
 1442                                 break;
 1443 
 1444                         default:
 1445                                 break;
 1446                         }
 1447                         break;
 1448 #undef  OPTSET
 1449                 default:
 1450                         error = ENOPROTOOPT;
 1451                         break;
 1452                 }
 1453                 break;
 1454 
 1455         default:
 1456                 error = EOPNOTSUPP;
 1457                 break;
 1458         }
 1459         return (error);
 1460 }
 1461 
 1462 static int
 1463 unp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 1464 {
 1465 
 1466         return (unp_connectat(AT_FDCWD, so, nam, td));
 1467 }
 1468 
 1469 static int
 1470 unp_connectat(int fd, struct socket *so, struct sockaddr *nam,
 1471     struct thread *td)
 1472 {
 1473         struct mtx *vplock;
 1474         struct sockaddr_un *soun;
 1475         struct vnode *vp;
 1476         struct socket *so2;
 1477         struct unpcb *unp, *unp2, *unp3;
 1478         struct nameidata nd;
 1479         char buf[SOCK_MAXADDRLEN];
 1480         struct sockaddr *sa;
 1481         cap_rights_t rights;
 1482         int error, len;
 1483         bool connreq;
 1484 
 1485         if (nam->sa_family != AF_UNIX)
 1486                 return (EAFNOSUPPORT);
 1487         if (nam->sa_len > sizeof(struct sockaddr_un))
 1488                 return (EINVAL);
 1489         len = nam->sa_len - offsetof(struct sockaddr_un, sun_path);
 1490         if (len <= 0)
 1491                 return (EINVAL);
 1492         soun = (struct sockaddr_un *)nam;
 1493         bcopy(soun->sun_path, buf, len);
 1494         buf[len] = 0;
 1495 
 1496         unp = sotounpcb(so);
 1497         UNP_PCB_LOCK(unp);
 1498         for (;;) {
 1499                 /*
 1500                  * Wait for connection state to stabilize.  If a connection
 1501                  * already exists, give up.  For datagram sockets, which permit
 1502                  * multiple consecutive connect(2) calls, upper layers are
 1503                  * responsible for disconnecting in advance of a subsequent
 1504                  * connect(2), but this is not synchronized with PCB connection
 1505                  * state.
 1506                  *
 1507                  * Also make sure that no threads are currently attempting to
 1508                  * lock the peer socket, to ensure that unp_conn cannot
 1509                  * transition between two valid sockets while locks are dropped.
 1510                  */
 1511                 if (unp->unp_conn != NULL) {
 1512                         UNP_PCB_UNLOCK(unp);
 1513                         return (EISCONN);
 1514                 }
 1515                 if ((unp->unp_flags & UNP_CONNECTING) != 0) {
 1516                         UNP_PCB_UNLOCK(unp);
 1517                         return (EALREADY);
 1518                 }
 1519                 if (unp->unp_pairbusy > 0) {
 1520                         unp->unp_flags |= UNP_WAITING;
 1521                         mtx_sleep(unp, UNP_PCB_LOCKPTR(unp), 0, "unpeer", 0);
 1522                         continue;
 1523                 }
 1524                 break;
 1525         }
 1526         unp->unp_flags |= UNP_CONNECTING;
 1527         UNP_PCB_UNLOCK(unp);
 1528 
 1529         connreq = (so->so_proto->pr_flags & PR_CONNREQUIRED) != 0;
 1530         if (connreq)
 1531                 sa = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
 1532         else
 1533                 sa = NULL;
 1534         NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF,
 1535             UIO_SYSSPACE, buf, fd, cap_rights_init(&rights, CAP_CONNECTAT), td);
 1536         error = namei(&nd);
 1537         if (error)
 1538                 vp = NULL;
 1539         else
 1540                 vp = nd.ni_vp;
 1541         ASSERT_VOP_LOCKED(vp, "unp_connect");
 1542         NDFREE(&nd, NDF_ONLY_PNBUF);
 1543         if (error)
 1544                 goto bad;
 1545 
 1546         if (vp->v_type != VSOCK) {
 1547                 error = ENOTSOCK;
 1548                 goto bad;
 1549         }
 1550 #ifdef MAC
 1551         error = mac_vnode_check_open(td->td_ucred, vp, VWRITE | VREAD);
 1552         if (error)
 1553                 goto bad;
 1554 #endif
 1555         error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td);
 1556         if (error)
 1557                 goto bad;
 1558 
 1559         unp = sotounpcb(so);
 1560         KASSERT(unp != NULL, ("unp_connect: unp == NULL"));
 1561 
 1562         vplock = mtx_pool_find(mtxpool_sleep, vp);
 1563         mtx_lock(vplock);
 1564         VOP_UNP_CONNECT(vp, &unp2);
 1565         if (unp2 == NULL) {
 1566                 error = ECONNREFUSED;
 1567                 goto bad2;
 1568         }
 1569         so2 = unp2->unp_socket;
 1570         if (so->so_type != so2->so_type) {
 1571                 error = EPROTOTYPE;
 1572                 goto bad2;
 1573         }
 1574         if (connreq) {
 1575                 if (so2->so_options & SO_ACCEPTCONN) {
 1576                         CURVNET_SET(so2->so_vnet);
 1577                         so2 = sonewconn(so2, 0);
 1578                         CURVNET_RESTORE();
 1579                 } else
 1580                         so2 = NULL;
 1581                 if (so2 == NULL) {
 1582                         error = ECONNREFUSED;
 1583                         goto bad2;
 1584                 }
 1585                 unp3 = sotounpcb(so2);
 1586                 unp_pcb_lock_pair(unp2, unp3);
 1587                 if (unp2->unp_addr != NULL) {
 1588                         bcopy(unp2->unp_addr, sa, unp2->unp_addr->sun_len);
 1589                         unp3->unp_addr = (struct sockaddr_un *) sa;
 1590                         sa = NULL;
 1591                 }
 1592 
 1593                 unp_copy_peercred(td, unp3, unp, unp2);
 1594 
 1595                 UNP_PCB_UNLOCK(unp2);
 1596                 unp2 = unp3;
 1597 
 1598                 /*
 1599                  * It is safe to block on the PCB lock here since unp2 is
 1600                  * nascent and cannot be connected to any other sockets.
 1601                  */
 1602                 UNP_PCB_LOCK(unp);
 1603 #ifdef MAC
 1604                 mac_socketpeer_set_from_socket(so, so2);
 1605                 mac_socketpeer_set_from_socket(so2, so);
 1606 #endif
 1607         } else {
 1608                 unp_pcb_lock_pair(unp, unp2);
 1609         }
 1610         KASSERT(unp2 != NULL && so2 != NULL && unp2->unp_socket == so2 &&
 1611             sotounpcb(so2) == unp2,
 1612             ("%s: unp2 %p so2 %p", __func__, unp2, so2));
 1613         error = unp_connect2(so, so2, PRU_CONNECT);
 1614         unp_pcb_unlock_pair(unp, unp2);
 1615 bad2:
 1616         mtx_unlock(vplock);
 1617 bad:
 1618         if (vp != NULL) {
 1619                 vput(vp);
 1620         }
 1621         free(sa, M_SONAME);
 1622         UNP_PCB_LOCK(unp);
 1623         KASSERT((unp->unp_flags & UNP_CONNECTING) != 0,
 1624             ("%s: unp %p has UNP_CONNECTING clear", __func__, unp));
 1625         unp->unp_flags &= ~UNP_CONNECTING;
 1626         UNP_PCB_UNLOCK(unp);
 1627         return (error);
 1628 }
 1629 
 1630 /*
 1631  * Set socket peer credentials at connection time.
 1632  *
 1633  * The client's PCB credentials are copied from its process structure.  The
 1634  * server's PCB credentials are copied from the socket on which it called
 1635  * listen(2).  uipc_listen cached that process's credentials at the time.
 1636  */
 1637 void
 1638 unp_copy_peercred(struct thread *td, struct unpcb *client_unp,
 1639     struct unpcb *server_unp, struct unpcb *listen_unp)
 1640 {
 1641         cru2xt(td, &client_unp->unp_peercred);
 1642         client_unp->unp_flags |= UNP_HAVEPC;
 1643 
 1644         memcpy(&server_unp->unp_peercred, &listen_unp->unp_peercred,
 1645             sizeof(server_unp->unp_peercred));
 1646         server_unp->unp_flags |= UNP_HAVEPC;
 1647         if (listen_unp->unp_flags & UNP_WANTCRED)
 1648                 client_unp->unp_flags |= UNP_WANTCRED;
 1649 }
 1650 
 1651 static int
 1652 unp_connect2(struct socket *so, struct socket *so2, int req)
 1653 {
 1654         struct unpcb *unp;
 1655         struct unpcb *unp2;
 1656 
 1657         unp = sotounpcb(so);
 1658         KASSERT(unp != NULL, ("unp_connect2: unp == NULL"));
 1659         unp2 = sotounpcb(so2);
 1660         KASSERT(unp2 != NULL, ("unp_connect2: unp2 == NULL"));
 1661 
 1662         UNP_PCB_LOCK_ASSERT(unp);
 1663         UNP_PCB_LOCK_ASSERT(unp2);
 1664         KASSERT(unp->unp_conn == NULL,
 1665             ("%s: socket %p is already connected", __func__, unp));
 1666 
 1667         if (so2->so_type != so->so_type)
 1668                 return (EPROTOTYPE);
 1669         unp->unp_conn = unp2;
 1670         unp_pcb_hold(unp2);
 1671         unp_pcb_hold(unp);
 1672         switch (so->so_type) {
 1673         case SOCK_DGRAM:
 1674                 UNP_REF_LIST_LOCK();
 1675                 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink);
 1676                 UNP_REF_LIST_UNLOCK();
 1677                 soisconnected(so);
 1678                 break;
 1679 
 1680         case SOCK_STREAM:
 1681         case SOCK_SEQPACKET:
 1682                 KASSERT(unp2->unp_conn == NULL,
 1683                     ("%s: socket %p is already connected", __func__, unp2));
 1684                 unp2->unp_conn = unp;
 1685                 if (req == PRU_CONNECT &&
 1686                     ((unp->unp_flags | unp2->unp_flags) & UNP_CONNWAIT))
 1687                         soisconnecting(so);
 1688                 else
 1689                         soisconnected(so);
 1690                 soisconnected(so2);
 1691                 break;
 1692 
 1693         default:
 1694                 panic("unp_connect2");
 1695         }
 1696         return (0);
 1697 }
 1698 
 1699 static void
 1700 unp_disconnect(struct unpcb *unp, struct unpcb *unp2)
 1701 {
 1702         struct socket *so, *so2;
 1703 #ifdef INVARIANTS
 1704         struct unpcb *unptmp;
 1705 #endif
 1706 
 1707         UNP_PCB_LOCK_ASSERT(unp);
 1708         UNP_PCB_LOCK_ASSERT(unp2);
 1709         KASSERT(unp->unp_conn == unp2,
 1710             ("%s: unpcb %p is not connected to %p", __func__, unp, unp2));
 1711 
 1712         unp->unp_conn = NULL;
 1713         so = unp->unp_socket;
 1714         so2 = unp2->unp_socket;
 1715         switch (unp->unp_socket->so_type) {
 1716         case SOCK_DGRAM:
 1717                 UNP_REF_LIST_LOCK();
 1718 #ifdef INVARIANTS
 1719                 LIST_FOREACH(unptmp, &unp2->unp_refs, unp_reflink) {
 1720                         if (unptmp == unp)
 1721                                 break;
 1722                 }
 1723                 KASSERT(unptmp != NULL,
 1724                     ("%s: %p not found in reflist of %p", __func__, unp, unp2));
 1725 #endif
 1726                 LIST_REMOVE(unp, unp_reflink);
 1727                 UNP_REF_LIST_UNLOCK();
 1728                 if (so) {
 1729                         SOCK_LOCK(so);
 1730                         so->so_state &= ~SS_ISCONNECTED;
 1731                         SOCK_UNLOCK(so);
 1732                 }
 1733                 break;
 1734 
 1735         case SOCK_STREAM:
 1736         case SOCK_SEQPACKET:
 1737                 if (so)
 1738                         soisdisconnected(so);
 1739                 MPASS(unp2->unp_conn == unp);
 1740                 unp2->unp_conn = NULL;
 1741                 if (so2)
 1742                         soisdisconnected(so2);
 1743                 break;
 1744         }
 1745 
 1746         if (unp == unp2) {
 1747                 unp_pcb_rele_notlast(unp);
 1748                 if (!unp_pcb_rele(unp))
 1749                         UNP_PCB_UNLOCK(unp);
 1750         } else {
 1751                 if (!unp_pcb_rele(unp))
 1752                         UNP_PCB_UNLOCK(unp);
 1753                 if (!unp_pcb_rele(unp2))
 1754                         UNP_PCB_UNLOCK(unp2);
 1755         }
 1756 }
 1757 
 1758 /*
 1759  * unp_pcblist() walks the global list of struct unpcb's to generate a
 1760  * pointer list, bumping the refcount on each unpcb.  It then copies them out
 1761  * sequentially, validating the generation number on each to see if it has
 1762  * been detached.  All of this is necessary because copyout() may sleep on
 1763  * disk I/O.
 1764  */
 1765 static int
 1766 unp_pcblist(SYSCTL_HANDLER_ARGS)
 1767 {
 1768         struct unpcb *unp, **unp_list;
 1769         unp_gen_t gencnt;
 1770         struct xunpgen *xug;
 1771         struct unp_head *head;
 1772         struct xunpcb *xu;
 1773         u_int i;
 1774         int error, n;
 1775 
 1776         switch ((intptr_t)arg1) {
 1777         case SOCK_STREAM:
 1778                 head = &unp_shead;
 1779                 break;
 1780 
 1781         case SOCK_DGRAM:
 1782                 head = &unp_dhead;
 1783                 break;
 1784 
 1785         case SOCK_SEQPACKET:
 1786                 head = &unp_sphead;
 1787                 break;
 1788 
 1789         default:
 1790                 panic("unp_pcblist: arg1 %d", (int)(intptr_t)arg1);
 1791         }
 1792 
 1793         /*
 1794          * The process of preparing the PCB list is too time-consuming and
 1795          * resource-intensive to repeat twice on every request.
 1796          */
 1797         if (req->oldptr == NULL) {
 1798                 n = unp_count;
 1799                 req->oldidx = 2 * (sizeof *xug)
 1800                         + (n + n/8) * sizeof(struct xunpcb);
 1801                 return (0);
 1802         }
 1803 
 1804         if (req->newptr != NULL)
 1805                 return (EPERM);
 1806 
 1807         /*
 1808          * OK, now we're committed to doing something.
 1809          */
 1810         xug = malloc(sizeof(*xug), M_TEMP, M_WAITOK | M_ZERO);
 1811         UNP_LINK_RLOCK();
 1812         gencnt = unp_gencnt;
 1813         n = unp_count;
 1814         UNP_LINK_RUNLOCK();
 1815 
 1816         xug->xug_len = sizeof *xug;
 1817         xug->xug_count = n;
 1818         xug->xug_gen = gencnt;
 1819         xug->xug_sogen = so_gencnt;
 1820         error = SYSCTL_OUT(req, xug, sizeof *xug);
 1821         if (error) {
 1822                 free(xug, M_TEMP);
 1823                 return (error);
 1824         }
 1825 
 1826         unp_list = malloc(n * sizeof *unp_list, M_TEMP, M_WAITOK);
 1827 
 1828         UNP_LINK_RLOCK();
 1829         for (unp = LIST_FIRST(head), i = 0; unp && i < n;
 1830              unp = LIST_NEXT(unp, unp_link)) {
 1831                 UNP_PCB_LOCK(unp);
 1832                 if (unp->unp_gencnt <= gencnt) {
 1833                         if (cr_cansee(req->td->td_ucred,
 1834                             unp->unp_socket->so_cred)) {
 1835                                 UNP_PCB_UNLOCK(unp);
 1836                                 continue;
 1837                         }
 1838                         unp_list[i++] = unp;
 1839                         unp_pcb_hold(unp);
 1840                 }
 1841                 UNP_PCB_UNLOCK(unp);
 1842         }
 1843         UNP_LINK_RUNLOCK();
 1844         n = i;                  /* In case we lost some during malloc. */
 1845 
 1846         error = 0;
 1847         xu = malloc(sizeof(*xu), M_TEMP, M_WAITOK | M_ZERO);
 1848         for (i = 0; i < n; i++) {
 1849                 unp = unp_list[i];
 1850                 UNP_PCB_LOCK(unp);
 1851                 if (unp_pcb_rele(unp))
 1852                         continue;
 1853 
 1854                 if (unp->unp_gencnt <= gencnt) {
 1855                         xu->xu_len = sizeof *xu;
 1856                         xu->xu_unpp = (uintptr_t)unp;
 1857                         /*
 1858                          * XXX - need more locking here to protect against
 1859                          * connect/disconnect races for SMP.
 1860                          */
 1861                         if (unp->unp_addr != NULL)
 1862                                 bcopy(unp->unp_addr, &xu->xu_addr,
 1863                                       unp->unp_addr->sun_len);
 1864                         else
 1865                                 bzero(&xu->xu_addr, sizeof(xu->xu_addr));
 1866                         if (unp->unp_conn != NULL &&
 1867                             unp->unp_conn->unp_addr != NULL)
 1868                                 bcopy(unp->unp_conn->unp_addr,
 1869                                       &xu->xu_caddr,
 1870                                       unp->unp_conn->unp_addr->sun_len);
 1871                         else
 1872                                 bzero(&xu->xu_caddr, sizeof(xu->xu_caddr));
 1873                         xu->unp_vnode = (uintptr_t)unp->unp_vnode;
 1874                         xu->unp_conn = (uintptr_t)unp->unp_conn;
 1875                         xu->xu_firstref = (uintptr_t)LIST_FIRST(&unp->unp_refs);
 1876                         xu->xu_nextref = (uintptr_t)LIST_NEXT(unp, unp_reflink);
 1877                         xu->unp_gencnt = unp->unp_gencnt;
 1878                         sotoxsocket(unp->unp_socket, &xu->xu_socket);
 1879                         UNP_PCB_UNLOCK(unp);
 1880                         error = SYSCTL_OUT(req, xu, sizeof *xu);
 1881                 } else {
 1882                         UNP_PCB_UNLOCK(unp);
 1883                 }
 1884         }
 1885         free(xu, M_TEMP);
 1886         if (!error) {
 1887                 /*
 1888                  * Give the user an updated idea of our state.  If the
 1889                  * generation differs from what we told her before, she knows
 1890                  * that something happened while we were processing this
 1891                  * request, and it might be necessary to retry.
 1892                  */
 1893                 xug->xug_gen = unp_gencnt;
 1894                 xug->xug_sogen = so_gencnt;
 1895                 xug->xug_count = unp_count;
 1896                 error = SYSCTL_OUT(req, xug, sizeof *xug);
 1897         }
 1898         free(unp_list, M_TEMP);
 1899         free(xug, M_TEMP);
 1900         return (error);
 1901 }
 1902 
 1903 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLTYPE_OPAQUE | CTLFLAG_RD,
 1904     (void *)(intptr_t)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb",
 1905     "List of active local datagram sockets");
 1906 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLTYPE_OPAQUE | CTLFLAG_RD,
 1907     (void *)(intptr_t)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb",
 1908     "List of active local stream sockets");
 1909 SYSCTL_PROC(_net_local_seqpacket, OID_AUTO, pcblist,
 1910     CTLTYPE_OPAQUE | CTLFLAG_RD,
 1911     (void *)(intptr_t)SOCK_SEQPACKET, 0, unp_pcblist, "S,xunpcb",
 1912     "List of active local seqpacket sockets");
 1913 
 1914 static void
 1915 unp_shutdown(struct unpcb *unp)
 1916 {
 1917         struct unpcb *unp2;
 1918         struct socket *so;
 1919 
 1920         UNP_PCB_LOCK_ASSERT(unp);
 1921 
 1922         unp2 = unp->unp_conn;
 1923         if ((unp->unp_socket->so_type == SOCK_STREAM ||
 1924             (unp->unp_socket->so_type == SOCK_SEQPACKET)) && unp2 != NULL) {
 1925                 so = unp2->unp_socket;
 1926                 if (so != NULL)
 1927                         socantrcvmore(so);
 1928         }
 1929 }
 1930 
 1931 static void
 1932 unp_drop(struct unpcb *unp)
 1933 {
 1934         struct socket *so = unp->unp_socket;
 1935         struct unpcb *unp2;
 1936 
 1937         /*
 1938          * Regardless of whether the socket's peer dropped the connection
 1939          * with this socket by aborting or disconnecting, POSIX requires
 1940          * that ECONNRESET is returned.
 1941          */
 1942 
 1943         UNP_PCB_LOCK(unp);
 1944         if (so)
 1945                 so->so_error = ECONNRESET;
 1946         if ((unp2 = unp_pcb_lock_peer(unp)) != NULL) {
 1947                 /* Last reference dropped in unp_disconnect(). */
 1948                 unp_pcb_rele_notlast(unp);
 1949                 unp_disconnect(unp, unp2);
 1950         } else if (!unp_pcb_rele(unp)) {
 1951                 UNP_PCB_UNLOCK(unp);
 1952         }
 1953 }
 1954 
 1955 static void
 1956 unp_freerights(struct filedescent **fdep, int fdcount)
 1957 {
 1958         struct file *fp;
 1959         int i;
 1960 
 1961         KASSERT(fdcount > 0, ("%s: fdcount %d", __func__, fdcount));
 1962 
 1963         for (i = 0; i < fdcount; i++) {
 1964                 fp = fdep[i]->fde_file;
 1965                 filecaps_free(&fdep[i]->fde_caps);
 1966                 unp_discard(fp);
 1967         }
 1968         free(fdep[0], M_FILECAPS);
 1969 }
 1970 
 1971 static int
 1972 unp_externalize(struct mbuf *control, struct mbuf **controlp, int flags)
 1973 {
 1974         struct thread *td = curthread;          /* XXX */
 1975         struct cmsghdr *cm = mtod(control, struct cmsghdr *);
 1976         int i;
 1977         int *fdp;
 1978         struct filedesc *fdesc = td->td_proc->p_fd;
 1979         struct filedescent **fdep;
 1980         void *data;
 1981         socklen_t clen = control->m_len, datalen;
 1982         int error, newfds;
 1983         u_int newlen;
 1984 
 1985         UNP_LINK_UNLOCK_ASSERT();
 1986 
 1987         error = 0;
 1988         if (controlp != NULL) /* controlp == NULL => free control messages */
 1989                 *controlp = NULL;
 1990         while (cm != NULL) {
 1991                 if (sizeof(*cm) > clen || cm->cmsg_len > clen) {
 1992                         error = EINVAL;
 1993                         break;
 1994                 }
 1995                 data = CMSG_DATA(cm);
 1996                 datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
 1997                 if (cm->cmsg_level == SOL_SOCKET
 1998                     && cm->cmsg_type == SCM_RIGHTS) {
 1999                         newfds = datalen / sizeof(*fdep);
 2000                         if (newfds == 0)
 2001                                 goto next;
 2002                         fdep = data;
 2003 
 2004                         /* If we're not outputting the descriptors free them. */
 2005                         if (error || controlp == NULL) {
 2006                                 unp_freerights(fdep, newfds);
 2007                                 goto next;
 2008                         }
 2009                         FILEDESC_XLOCK(fdesc);
 2010 
 2011                         /*
 2012                          * Now change each pointer to an fd in the global
 2013                          * table to an integer that is the index to the local
 2014                          * fd table entry that we set up to point to the
 2015                          * global one we are transferring.
 2016                          */
 2017                         newlen = newfds * sizeof(int);
 2018                         *controlp = sbcreatecontrol(NULL, newlen,
 2019                             SCM_RIGHTS, SOL_SOCKET);
 2020                         if (*controlp == NULL) {
 2021                                 FILEDESC_XUNLOCK(fdesc);
 2022                                 error = E2BIG;
 2023                                 unp_freerights(fdep, newfds);
 2024                                 goto next;
 2025                         }
 2026 
 2027                         fdp = (int *)
 2028                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 2029                         if (fdallocn(td, 0, fdp, newfds) != 0) {
 2030                                 FILEDESC_XUNLOCK(fdesc);
 2031                                 error = EMSGSIZE;
 2032                                 unp_freerights(fdep, newfds);
 2033                                 m_freem(*controlp);
 2034                                 *controlp = NULL;
 2035                                 goto next;
 2036                         }
 2037                         for (i = 0; i < newfds; i++, fdp++) {
 2038                                 _finstall(fdesc, fdep[i]->fde_file, *fdp,
 2039                                     (flags & MSG_CMSG_CLOEXEC) != 0 ? UF_EXCLOSE : 0,
 2040                                     &fdep[i]->fde_caps);
 2041                                 unp_externalize_fp(fdep[i]->fde_file);
 2042                         }
 2043 
 2044                         /*
 2045                          * The new type indicates that the mbuf data refers to
 2046                          * kernel resources that may need to be released before
 2047                          * the mbuf is freed.
 2048                          */
 2049                         m_chtype(*controlp, MT_EXTCONTROL);
 2050                         FILEDESC_XUNLOCK(fdesc);
 2051                         free(fdep[0], M_FILECAPS);
 2052                 } else {
 2053                         /* We can just copy anything else across. */
 2054                         if (error || controlp == NULL)
 2055                                 goto next;
 2056                         *controlp = sbcreatecontrol(NULL, datalen,
 2057                             cm->cmsg_type, cm->cmsg_level);
 2058                         if (*controlp == NULL) {
 2059                                 error = ENOBUFS;
 2060                                 goto next;
 2061                         }
 2062                         bcopy(data,
 2063                             CMSG_DATA(mtod(*controlp, struct cmsghdr *)),
 2064                             datalen);
 2065                 }
 2066                 controlp = &(*controlp)->m_next;
 2067 
 2068 next:
 2069                 if (CMSG_SPACE(datalen) < clen) {
 2070                         clen -= CMSG_SPACE(datalen);
 2071                         cm = (struct cmsghdr *)
 2072                             ((caddr_t)cm + CMSG_SPACE(datalen));
 2073                 } else {
 2074                         clen = 0;
 2075                         cm = NULL;
 2076                 }
 2077         }
 2078 
 2079         m_freem(control);
 2080         return (error);
 2081 }
 2082 
 2083 static void
 2084 unp_zone_change(void *tag)
 2085 {
 2086 
 2087         uma_zone_set_max(unp_zone, maxsockets);
 2088 }
 2089 
 2090 #ifdef INVARIANTS
 2091 static void
 2092 unp_zdtor(void *mem, int size __unused, void *arg __unused)
 2093 {
 2094         struct unpcb *unp;
 2095 
 2096         unp = mem;
 2097 
 2098         KASSERT(LIST_EMPTY(&unp->unp_refs),
 2099             ("%s: unpcb %p has lingering refs", __func__, unp));
 2100         KASSERT(unp->unp_socket == NULL,
 2101             ("%s: unpcb %p has socket backpointer", __func__, unp));
 2102         KASSERT(unp->unp_vnode == NULL,
 2103             ("%s: unpcb %p has vnode references", __func__, unp));
 2104         KASSERT(unp->unp_conn == NULL,
 2105             ("%s: unpcb %p is still connected", __func__, unp));
 2106         KASSERT(unp->unp_addr == NULL,
 2107             ("%s: unpcb %p has leaked addr", __func__, unp));
 2108 }
 2109 #endif
 2110 
 2111 static void
 2112 unp_init(void)
 2113 {
 2114         uma_dtor dtor;
 2115 
 2116 #ifdef VIMAGE
 2117         if (!IS_DEFAULT_VNET(curvnet))
 2118                 return;
 2119 #endif
 2120 
 2121 #ifdef INVARIANTS
 2122         dtor = unp_zdtor;
 2123 #else
 2124         dtor = NULL;
 2125 #endif
 2126         unp_zone = uma_zcreate("unpcb", sizeof(struct unpcb), NULL, dtor,
 2127             NULL, NULL, UMA_ALIGN_CACHE, 0);
 2128         uma_zone_set_max(unp_zone, maxsockets);
 2129         uma_zone_set_warning(unp_zone, "kern.ipc.maxsockets limit reached");
 2130         EVENTHANDLER_REGISTER(maxsockets_change, unp_zone_change,
 2131             NULL, EVENTHANDLER_PRI_ANY);
 2132         LIST_INIT(&unp_dhead);
 2133         LIST_INIT(&unp_shead);
 2134         LIST_INIT(&unp_sphead);
 2135         SLIST_INIT(&unp_defers);
 2136         TIMEOUT_TASK_INIT(taskqueue_thread, &unp_gc_task, 0, unp_gc, NULL);
 2137         TASK_INIT(&unp_defer_task, 0, unp_process_defers, NULL);
 2138         UNP_LINK_LOCK_INIT();
 2139         UNP_DEFERRED_LOCK_INIT();
 2140 }
 2141 
 2142 static void
 2143 unp_internalize_cleanup_rights(struct mbuf *control)
 2144 {
 2145         struct cmsghdr *cp;
 2146         struct mbuf *m;
 2147         void *data;
 2148         socklen_t datalen;
 2149 
 2150         for (m = control; m != NULL; m = m->m_next) {
 2151                 cp = mtod(m, struct cmsghdr *);
 2152                 if (cp->cmsg_level != SOL_SOCKET ||
 2153                     cp->cmsg_type != SCM_RIGHTS)
 2154                         continue;
 2155                 data = CMSG_DATA(cp);
 2156                 datalen = (caddr_t)cp + cp->cmsg_len - (caddr_t)data;
 2157                 unp_freerights(data, datalen / sizeof(struct filedesc *));
 2158         }
 2159 }
 2160 
 2161 static int
 2162 unp_internalize(struct mbuf **controlp, struct thread *td)
 2163 {
 2164         struct mbuf *control, **initial_controlp;
 2165         struct proc *p;
 2166         struct filedesc *fdesc;
 2167         struct bintime *bt;
 2168         struct cmsghdr *cm;
 2169         struct cmsgcred *cmcred;
 2170         struct filedescent *fde, **fdep, *fdev;
 2171         struct file *fp;
 2172         struct timeval *tv;
 2173         struct timespec *ts;
 2174         void *data;
 2175         socklen_t clen, datalen;
 2176         int i, j, error, *fdp, oldfds;
 2177         u_int newlen;
 2178 
 2179         UNP_LINK_UNLOCK_ASSERT();
 2180 
 2181         p = td->td_proc;
 2182         fdesc = p->p_fd;
 2183         error = 0;
 2184         control = *controlp;
 2185         clen = control->m_len;
 2186         *controlp = NULL;
 2187         initial_controlp = controlp;
 2188         for (cm = mtod(control, struct cmsghdr *); cm != NULL;) {
 2189                 if (sizeof(*cm) > clen || cm->cmsg_level != SOL_SOCKET
 2190                     || cm->cmsg_len > clen || cm->cmsg_len < sizeof(*cm)) {
 2191                         error = EINVAL;
 2192                         goto out;
 2193                 }
 2194                 data = CMSG_DATA(cm);
 2195                 datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
 2196 
 2197                 switch (cm->cmsg_type) {
 2198                 /*
 2199                  * Fill in credential information.
 2200                  */
 2201                 case SCM_CREDS:
 2202                         *controlp = sbcreatecontrol(NULL, sizeof(*cmcred),
 2203                             SCM_CREDS, SOL_SOCKET);
 2204                         if (*controlp == NULL) {
 2205                                 error = ENOBUFS;
 2206                                 goto out;
 2207                         }
 2208                         cmcred = (struct cmsgcred *)
 2209                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 2210                         cmcred->cmcred_pid = p->p_pid;
 2211                         cmcred->cmcred_uid = td->td_ucred->cr_ruid;
 2212                         cmcred->cmcred_gid = td->td_ucred->cr_rgid;
 2213                         cmcred->cmcred_euid = td->td_ucred->cr_uid;
 2214                         cmcred->cmcred_ngroups = MIN(td->td_ucred->cr_ngroups,
 2215                             CMGROUP_MAX);
 2216                         for (i = 0; i < cmcred->cmcred_ngroups; i++)
 2217                                 cmcred->cmcred_groups[i] =
 2218                                     td->td_ucred->cr_groups[i];
 2219                         break;
 2220 
 2221                 case SCM_RIGHTS:
 2222                         oldfds = datalen / sizeof (int);
 2223                         if (oldfds == 0)
 2224                                 break;
 2225                         /*
 2226                          * Check that all the FDs passed in refer to legal
 2227                          * files.  If not, reject the entire operation.
 2228                          */
 2229                         fdp = data;
 2230                         FILEDESC_SLOCK(fdesc);
 2231                         for (i = 0; i < oldfds; i++, fdp++) {
 2232                                 fp = fget_locked(fdesc, *fdp);
 2233                                 if (fp == NULL) {
 2234                                         FILEDESC_SUNLOCK(fdesc);
 2235                                         error = EBADF;
 2236                                         goto out;
 2237                                 }
 2238                                 if (!(fp->f_ops->fo_flags & DFLAG_PASSABLE)) {
 2239                                         FILEDESC_SUNLOCK(fdesc);
 2240                                         error = EOPNOTSUPP;
 2241                                         goto out;
 2242                                 }
 2243 
 2244                         }
 2245 
 2246                         /*
 2247                          * Now replace the integer FDs with pointers to the
 2248                          * file structure and capability rights.
 2249                          */
 2250                         newlen = oldfds * sizeof(fdep[0]);
 2251                         *controlp = sbcreatecontrol(NULL, newlen,
 2252                             SCM_RIGHTS, SOL_SOCKET);
 2253                         if (*controlp == NULL) {
 2254                                 FILEDESC_SUNLOCK(fdesc);
 2255                                 error = E2BIG;
 2256                                 goto out;
 2257                         }
 2258                         fdp = data;
 2259                         for (i = 0; i < oldfds; i++, fdp++) {
 2260                                 if (!fhold(fdesc->fd_ofiles[*fdp].fde_file)) {
 2261                                         fdp = data;
 2262                                         for (j = 0; j < i; j++, fdp++) {
 2263                                                 fdrop(fdesc->fd_ofiles[*fdp].
 2264                                                     fde_file, td);
 2265                                         }
 2266                                         FILEDESC_SUNLOCK(fdesc);
 2267                                         error = EBADF;
 2268                                         goto out;
 2269                                 }
 2270                         }
 2271                         fdp = data;
 2272                         fdep = (struct filedescent **)
 2273                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 2274                         fdev = malloc(sizeof(*fdev) * oldfds, M_FILECAPS,
 2275                             M_WAITOK);
 2276                         for (i = 0; i < oldfds; i++, fdev++, fdp++) {
 2277                                 fde = &fdesc->fd_ofiles[*fdp];
 2278                                 fdep[i] = fdev;
 2279                                 fdep[i]->fde_file = fde->fde_file;
 2280                                 filecaps_copy(&fde->fde_caps,
 2281                                     &fdep[i]->fde_caps, true);
 2282                                 unp_internalize_fp(fdep[i]->fde_file);
 2283                         }
 2284                         FILEDESC_SUNLOCK(fdesc);
 2285                         break;
 2286 
 2287                 case SCM_TIMESTAMP:
 2288                         *controlp = sbcreatecontrol(NULL, sizeof(*tv),
 2289                             SCM_TIMESTAMP, SOL_SOCKET);
 2290                         if (*controlp == NULL) {
 2291                                 error = ENOBUFS;
 2292                                 goto out;
 2293                         }
 2294                         tv = (struct timeval *)
 2295                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 2296                         microtime(tv);
 2297                         break;
 2298 
 2299                 case SCM_BINTIME:
 2300                         *controlp = sbcreatecontrol(NULL, sizeof(*bt),
 2301                             SCM_BINTIME, SOL_SOCKET);
 2302                         if (*controlp == NULL) {
 2303                                 error = ENOBUFS;
 2304                                 goto out;
 2305                         }
 2306                         bt = (struct bintime *)
 2307                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 2308                         bintime(bt);
 2309                         break;
 2310 
 2311                 case SCM_REALTIME:
 2312                         *controlp = sbcreatecontrol(NULL, sizeof(*ts),
 2313                             SCM_REALTIME, SOL_SOCKET);
 2314                         if (*controlp == NULL) {
 2315                                 error = ENOBUFS;
 2316                                 goto out;
 2317                         }
 2318                         ts = (struct timespec *)
 2319                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 2320                         nanotime(ts);
 2321                         break;
 2322 
 2323                 case SCM_MONOTONIC:
 2324                         *controlp = sbcreatecontrol(NULL, sizeof(*ts),
 2325                             SCM_MONOTONIC, SOL_SOCKET);
 2326                         if (*controlp == NULL) {
 2327                                 error = ENOBUFS;
 2328                                 goto out;
 2329                         }
 2330                         ts = (struct timespec *)
 2331                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 2332                         nanouptime(ts);
 2333                         break;
 2334 
 2335                 default:
 2336                         error = EINVAL;
 2337                         goto out;
 2338                 }
 2339 
 2340                 if (*controlp != NULL)
 2341                         controlp = &(*controlp)->m_next;
 2342                 if (CMSG_SPACE(datalen) < clen) {
 2343                         clen -= CMSG_SPACE(datalen);
 2344                         cm = (struct cmsghdr *)
 2345                             ((caddr_t)cm + CMSG_SPACE(datalen));
 2346                 } else {
 2347                         clen = 0;
 2348                         cm = NULL;
 2349                 }
 2350         }
 2351 
 2352 out:
 2353         if (error != 0 && initial_controlp != NULL)
 2354                 unp_internalize_cleanup_rights(*initial_controlp);
 2355         m_freem(control);
 2356         return (error);
 2357 }
 2358 
 2359 static struct mbuf *
 2360 unp_addsockcred(struct thread *td, struct mbuf *control)
 2361 {
 2362         struct mbuf *m, *n, *n_prev;
 2363         struct sockcred *sc;
 2364         const struct cmsghdr *cm;
 2365         int ngroups;
 2366         int i;
 2367 
 2368         ngroups = MIN(td->td_ucred->cr_ngroups, CMGROUP_MAX);
 2369         m = sbcreatecontrol(NULL, SOCKCREDSIZE(ngroups), SCM_CREDS, SOL_SOCKET);
 2370         if (m == NULL)
 2371                 return (control);
 2372 
 2373         sc = (struct sockcred *) CMSG_DATA(mtod(m, struct cmsghdr *));
 2374         sc->sc_uid = td->td_ucred->cr_ruid;
 2375         sc->sc_euid = td->td_ucred->cr_uid;
 2376         sc->sc_gid = td->td_ucred->cr_rgid;
 2377         sc->sc_egid = td->td_ucred->cr_gid;
 2378         sc->sc_ngroups = ngroups;
 2379         for (i = 0; i < sc->sc_ngroups; i++)
 2380                 sc->sc_groups[i] = td->td_ucred->cr_groups[i];
 2381 
 2382         /*
 2383          * Unlink SCM_CREDS control messages (struct cmsgcred), since just
 2384          * created SCM_CREDS control message (struct sockcred) has another
 2385          * format.
 2386          */
 2387         if (control != NULL)
 2388                 for (n = control, n_prev = NULL; n != NULL;) {
 2389                         cm = mtod(n, struct cmsghdr *);
 2390                         if (cm->cmsg_level == SOL_SOCKET &&
 2391                             cm->cmsg_type == SCM_CREDS) {
 2392                                 if (n_prev == NULL)
 2393                                         control = n->m_next;
 2394                                 else
 2395                                         n_prev->m_next = n->m_next;
 2396                                 n = m_free(n);
 2397                         } else {
 2398                                 n_prev = n;
 2399                                 n = n->m_next;
 2400                         }
 2401                 }
 2402 
 2403         /* Prepend it to the head. */
 2404         m->m_next = control;
 2405         return (m);
 2406 }
 2407 
 2408 static struct unpcb *
 2409 fptounp(struct file *fp)
 2410 {
 2411         struct socket *so;
 2412 
 2413         if (fp->f_type != DTYPE_SOCKET)
 2414                 return (NULL);
 2415         if ((so = fp->f_data) == NULL)
 2416                 return (NULL);
 2417         if (so->so_proto->pr_domain != &localdomain)
 2418                 return (NULL);
 2419         return sotounpcb(so);
 2420 }
 2421 
 2422 static void
 2423 unp_discard(struct file *fp)
 2424 {
 2425         struct unp_defer *dr;
 2426 
 2427         if (unp_externalize_fp(fp)) {
 2428                 dr = malloc(sizeof(*dr), M_TEMP, M_WAITOK);
 2429                 dr->ud_fp = fp;
 2430                 UNP_DEFERRED_LOCK();
 2431                 SLIST_INSERT_HEAD(&unp_defers, dr, ud_link);
 2432                 UNP_DEFERRED_UNLOCK();
 2433                 atomic_add_int(&unp_defers_count, 1);
 2434                 taskqueue_enqueue(taskqueue_thread, &unp_defer_task);
 2435         } else
 2436                 (void) closef(fp, (struct thread *)NULL);
 2437 }
 2438 
 2439 static void
 2440 unp_process_defers(void *arg __unused, int pending)
 2441 {
 2442         struct unp_defer *dr;
 2443         SLIST_HEAD(, unp_defer) drl;
 2444         int count;
 2445 
 2446         SLIST_INIT(&drl);
 2447         for (;;) {
 2448                 UNP_DEFERRED_LOCK();
 2449                 if (SLIST_FIRST(&unp_defers) == NULL) {
 2450                         UNP_DEFERRED_UNLOCK();
 2451                         break;
 2452                 }
 2453                 SLIST_SWAP(&unp_defers, &drl, unp_defer);
 2454                 UNP_DEFERRED_UNLOCK();
 2455                 count = 0;
 2456                 while ((dr = SLIST_FIRST(&drl)) != NULL) {
 2457                         SLIST_REMOVE_HEAD(&drl, ud_link);
 2458                         closef(dr->ud_fp, NULL);
 2459                         free(dr, M_TEMP);
 2460                         count++;
 2461                 }
 2462                 atomic_add_int(&unp_defers_count, -count);
 2463         }
 2464 }
 2465 
 2466 static void
 2467 unp_internalize_fp(struct file *fp)
 2468 {
 2469         struct unpcb *unp;
 2470 
 2471         UNP_LINK_WLOCK();
 2472         if ((unp = fptounp(fp)) != NULL) {
 2473                 unp->unp_file = fp;
 2474                 unp->unp_msgcount++;
 2475         }
 2476         unp_rights++;
 2477         UNP_LINK_WUNLOCK();
 2478 }
 2479 
 2480 static int
 2481 unp_externalize_fp(struct file *fp)
 2482 {
 2483         struct unpcb *unp;
 2484         int ret;
 2485 
 2486         UNP_LINK_WLOCK();
 2487         if ((unp = fptounp(fp)) != NULL) {
 2488                 unp->unp_msgcount--;
 2489                 ret = 1;
 2490         } else
 2491                 ret = 0;
 2492         unp_rights--;
 2493         UNP_LINK_WUNLOCK();
 2494         return (ret);
 2495 }
 2496 
 2497 /*
 2498  * unp_defer indicates whether additional work has been defered for a future
 2499  * pass through unp_gc().  It is thread local and does not require explicit
 2500  * synchronization.
 2501  */
 2502 static int      unp_marked;
 2503 static int      unp_unreachable;
 2504 
 2505 static void
 2506 unp_accessable(struct filedescent **fdep, int fdcount)
 2507 {
 2508         struct unpcb *unp;
 2509         struct file *fp;
 2510         int i;
 2511 
 2512         for (i = 0; i < fdcount; i++) {
 2513                 fp = fdep[i]->fde_file;
 2514                 if ((unp = fptounp(fp)) == NULL)
 2515                         continue;
 2516                 if (unp->unp_gcflag & UNPGC_REF)
 2517                         continue;
 2518                 unp->unp_gcflag &= ~UNPGC_DEAD;
 2519                 unp->unp_gcflag |= UNPGC_REF;
 2520                 unp_marked++;
 2521         }
 2522 }
 2523 
 2524 static void
 2525 unp_gc_process(struct unpcb *unp)
 2526 {
 2527         struct socket *so, *soa;
 2528         struct file *fp;
 2529 
 2530         /* Already processed. */
 2531         if (unp->unp_gcflag & UNPGC_SCANNED)
 2532                 return;
 2533         fp = unp->unp_file;
 2534 
 2535         /*
 2536          * Check for a socket potentially in a cycle.  It must be in a
 2537          * queue as indicated by msgcount, and this must equal the file
 2538          * reference count.  Note that when msgcount is 0 the file is NULL.
 2539          */
 2540         if ((unp->unp_gcflag & UNPGC_REF) == 0 && fp &&
 2541             unp->unp_msgcount != 0 && fp->f_count == unp->unp_msgcount) {
 2542                 unp->unp_gcflag |= UNPGC_DEAD;
 2543                 unp_unreachable++;
 2544                 return;
 2545         }
 2546 
 2547         so = unp->unp_socket;
 2548         SOCK_LOCK(so);
 2549         if (SOLISTENING(so)) {
 2550                 /*
 2551                  * Mark all sockets in our accept queue.
 2552                  */
 2553                 TAILQ_FOREACH(soa, &so->sol_comp, so_list) {
 2554                         if (sotounpcb(soa)->unp_gcflag & UNPGC_IGNORE_RIGHTS)
 2555                                 continue;
 2556                         SOCKBUF_LOCK(&soa->so_rcv);
 2557                         unp_scan(soa->so_rcv.sb_mb, unp_accessable);
 2558                         SOCKBUF_UNLOCK(&soa->so_rcv);
 2559                 }
 2560         } else {
 2561                 /*
 2562                  * Mark all sockets we reference with RIGHTS.
 2563                  */
 2564                 if ((unp->unp_gcflag & UNPGC_IGNORE_RIGHTS) == 0) {
 2565                         SOCKBUF_LOCK(&so->so_rcv);
 2566                         unp_scan(so->so_rcv.sb_mb, unp_accessable);
 2567                         SOCKBUF_UNLOCK(&so->so_rcv);
 2568                 }
 2569         }
 2570         SOCK_UNLOCK(so);
 2571         unp->unp_gcflag |= UNPGC_SCANNED;
 2572 }
 2573 
 2574 static int unp_recycled;
 2575 SYSCTL_INT(_net_local, OID_AUTO, recycled, CTLFLAG_RD, &unp_recycled, 0, 
 2576     "Number of unreachable sockets claimed by the garbage collector.");
 2577 
 2578 static int unp_taskcount;
 2579 SYSCTL_INT(_net_local, OID_AUTO, taskcount, CTLFLAG_RD, &unp_taskcount, 0, 
 2580     "Number of times the garbage collector has run.");
 2581 
 2582 static void
 2583 unp_gc(__unused void *arg, int pending)
 2584 {
 2585         struct unp_head *heads[] = { &unp_dhead, &unp_shead, &unp_sphead,
 2586                                     NULL };
 2587         struct unp_head **head;
 2588         struct file *f, **unref;
 2589         struct unpcb *unp;
 2590         int i, total;
 2591 
 2592         unp_taskcount++;
 2593         UNP_LINK_RLOCK();
 2594         /*
 2595          * First clear all gc flags from previous runs, apart from
 2596          * UNPGC_IGNORE_RIGHTS.
 2597          */
 2598         for (head = heads; *head != NULL; head++)
 2599                 LIST_FOREACH(unp, *head, unp_link)
 2600                         unp->unp_gcflag =
 2601                             (unp->unp_gcflag & UNPGC_IGNORE_RIGHTS);
 2602 
 2603         /*
 2604          * Scan marking all reachable sockets with UNPGC_REF.  Once a socket
 2605          * is reachable all of the sockets it references are reachable.
 2606          * Stop the scan once we do a complete loop without discovering
 2607          * a new reachable socket.
 2608          */
 2609         do {
 2610                 unp_unreachable = 0;
 2611                 unp_marked = 0;
 2612                 for (head = heads; *head != NULL; head++)
 2613                         LIST_FOREACH(unp, *head, unp_link)
 2614                                 unp_gc_process(unp);
 2615         } while (unp_marked);
 2616         UNP_LINK_RUNLOCK();
 2617         if (unp_unreachable == 0)
 2618                 return;
 2619 
 2620         /*
 2621          * Allocate space for a local list of dead unpcbs.
 2622          */
 2623         unref = malloc(unp_unreachable * sizeof(struct file *),
 2624             M_TEMP, M_WAITOK);
 2625 
 2626         /*
 2627          * Iterate looking for sockets which have been specifically marked
 2628          * as as unreachable and store them locally.
 2629          */
 2630         UNP_LINK_RLOCK();
 2631         for (total = 0, head = heads; *head != NULL; head++)
 2632                 LIST_FOREACH(unp, *head, unp_link)
 2633                         if ((unp->unp_gcflag & UNPGC_DEAD) != 0) {
 2634                                 f = unp->unp_file;
 2635                                 if (unp->unp_msgcount == 0 || f == NULL ||
 2636                                     f->f_count != unp->unp_msgcount ||
 2637                                     !fhold(f))
 2638                                         continue;
 2639                                 unref[total++] = f;
 2640                                 KASSERT(total <= unp_unreachable,
 2641                                     ("unp_gc: incorrect unreachable count."));
 2642                         }
 2643         UNP_LINK_RUNLOCK();
 2644 
 2645         /*
 2646          * Now flush all sockets, free'ing rights.  This will free the
 2647          * struct files associated with these sockets but leave each socket
 2648          * with one remaining ref.
 2649          */
 2650         for (i = 0; i < total; i++) {
 2651                 struct socket *so;
 2652 
 2653                 so = unref[i]->f_data;
 2654                 CURVNET_SET(so->so_vnet);
 2655                 sorflush(so);
 2656                 CURVNET_RESTORE();
 2657         }
 2658 
 2659         /*
 2660          * And finally release the sockets so they can be reclaimed.
 2661          */
 2662         for (i = 0; i < total; i++)
 2663                 fdrop(unref[i], NULL);
 2664         unp_recycled += total;
 2665         free(unref, M_TEMP);
 2666 }
 2667 
 2668 static void
 2669 unp_dispose_mbuf(struct mbuf *m)
 2670 {
 2671 
 2672         if (m)
 2673                 unp_scan(m, unp_freerights);
 2674 }
 2675 
 2676 /*
 2677  * Synchronize against unp_gc, which can trip over data as we are freeing it.
 2678  */
 2679 static void
 2680 unp_dispose(struct socket *so)
 2681 {
 2682         struct unpcb *unp;
 2683 
 2684         unp = sotounpcb(so);
 2685         UNP_LINK_WLOCK();
 2686         unp->unp_gcflag |= UNPGC_IGNORE_RIGHTS;
 2687         UNP_LINK_WUNLOCK();
 2688         if (!SOLISTENING(so))
 2689                 unp_dispose_mbuf(so->so_rcv.sb_mb);
 2690 }
 2691 
 2692 static void
 2693 unp_scan(struct mbuf *m0, void (*op)(struct filedescent **, int))
 2694 {
 2695         struct mbuf *m;
 2696         struct cmsghdr *cm;
 2697         void *data;
 2698         socklen_t clen, datalen;
 2699 
 2700         while (m0 != NULL) {
 2701                 for (m = m0; m; m = m->m_next) {
 2702                         if (m->m_type != MT_CONTROL)
 2703                                 continue;
 2704 
 2705                         cm = mtod(m, struct cmsghdr *);
 2706                         clen = m->m_len;
 2707 
 2708                         while (cm != NULL) {
 2709                                 if (sizeof(*cm) > clen || cm->cmsg_len > clen)
 2710                                         break;
 2711 
 2712                                 data = CMSG_DATA(cm);
 2713                                 datalen = (caddr_t)cm + cm->cmsg_len
 2714                                     - (caddr_t)data;
 2715 
 2716                                 if (cm->cmsg_level == SOL_SOCKET &&
 2717                                     cm->cmsg_type == SCM_RIGHTS) {
 2718                                         (*op)(data, datalen /
 2719                                             sizeof(struct filedescent *));
 2720                                 }
 2721 
 2722                                 if (CMSG_SPACE(datalen) < clen) {
 2723                                         clen -= CMSG_SPACE(datalen);
 2724                                         cm = (struct cmsghdr *)
 2725                                             ((caddr_t)cm + CMSG_SPACE(datalen));
 2726                                 } else {
 2727                                         clen = 0;
 2728                                         cm = NULL;
 2729                                 }
 2730                         }
 2731                 }
 2732                 m0 = m0->m_nextpkt;
 2733         }
 2734 }
 2735 
 2736 /*
 2737  * A helper function called by VFS before socket-type vnode reclamation.
 2738  * For an active vnode it clears unp_vnode pointer and decrements unp_vnode
 2739  * use count.
 2740  */
 2741 void
 2742 vfs_unp_reclaim(struct vnode *vp)
 2743 {
 2744         struct unpcb *unp;
 2745         int active;
 2746         struct mtx *vplock;
 2747 
 2748         ASSERT_VOP_ELOCKED(vp, "vfs_unp_reclaim");
 2749         KASSERT(vp->v_type == VSOCK,
 2750             ("vfs_unp_reclaim: vp->v_type != VSOCK"));
 2751 
 2752         active = 0;
 2753         vplock = mtx_pool_find(mtxpool_sleep, vp);
 2754         mtx_lock(vplock);
 2755         VOP_UNP_CONNECT(vp, &unp);
 2756         if (unp == NULL)
 2757                 goto done;
 2758         UNP_PCB_LOCK(unp);
 2759         if (unp->unp_vnode == vp) {
 2760                 VOP_UNP_DETACH(vp);
 2761                 unp->unp_vnode = NULL;
 2762                 active = 1;
 2763         }
 2764         UNP_PCB_UNLOCK(unp);
 2765  done:
 2766         mtx_unlock(vplock);
 2767         if (active)
 2768                 vunref(vp);
 2769 }
 2770 
 2771 #ifdef DDB
 2772 static void
 2773 db_print_indent(int indent)
 2774 {
 2775         int i;
 2776 
 2777         for (i = 0; i < indent; i++)
 2778                 db_printf(" ");
 2779 }
 2780 
 2781 static void
 2782 db_print_unpflags(int unp_flags)
 2783 {
 2784         int comma;
 2785 
 2786         comma = 0;
 2787         if (unp_flags & UNP_HAVEPC) {
 2788                 db_printf("%sUNP_HAVEPC", comma ? ", " : "");
 2789                 comma = 1;
 2790         }
 2791         if (unp_flags & UNP_WANTCRED) {
 2792                 db_printf("%sUNP_WANTCRED", comma ? ", " : "");
 2793                 comma = 1;
 2794         }
 2795         if (unp_flags & UNP_CONNWAIT) {
 2796                 db_printf("%sUNP_CONNWAIT", comma ? ", " : "");
 2797                 comma = 1;
 2798         }
 2799         if (unp_flags & UNP_CONNECTING) {
 2800                 db_printf("%sUNP_CONNECTING", comma ? ", " : "");
 2801                 comma = 1;
 2802         }
 2803         if (unp_flags & UNP_BINDING) {
 2804                 db_printf("%sUNP_BINDING", comma ? ", " : "");
 2805                 comma = 1;
 2806         }
 2807 }
 2808 
 2809 static void
 2810 db_print_xucred(int indent, struct xucred *xu)
 2811 {
 2812         int comma, i;
 2813 
 2814         db_print_indent(indent);
 2815         db_printf("cr_version: %u   cr_uid: %u   cr_pid: %d   cr_ngroups: %d\n",
 2816             xu->cr_version, xu->cr_uid, xu->cr_pid, xu->cr_ngroups);
 2817         db_print_indent(indent);
 2818         db_printf("cr_groups: ");
 2819         comma = 0;
 2820         for (i = 0; i < xu->cr_ngroups; i++) {
 2821                 db_printf("%s%u", comma ? ", " : "", xu->cr_groups[i]);
 2822                 comma = 1;
 2823         }
 2824         db_printf("\n");
 2825 }
 2826 
 2827 static void
 2828 db_print_unprefs(int indent, struct unp_head *uh)
 2829 {
 2830         struct unpcb *unp;
 2831         int counter;
 2832 
 2833         counter = 0;
 2834         LIST_FOREACH(unp, uh, unp_reflink) {
 2835                 if (counter % 4 == 0)
 2836                         db_print_indent(indent);
 2837                 db_printf("%p  ", unp);
 2838                 if (counter % 4 == 3)
 2839                         db_printf("\n");
 2840                 counter++;
 2841         }
 2842         if (counter != 0 && counter % 4 != 0)
 2843                 db_printf("\n");
 2844 }
 2845 
 2846 DB_SHOW_COMMAND(unpcb, db_show_unpcb)
 2847 {
 2848         struct unpcb *unp;
 2849 
 2850         if (!have_addr) {
 2851                 db_printf("usage: show unpcb <addr>\n");
 2852                 return;
 2853         }
 2854         unp = (struct unpcb *)addr;
 2855 
 2856         db_printf("unp_socket: %p   unp_vnode: %p\n", unp->unp_socket,
 2857             unp->unp_vnode);
 2858 
 2859         db_printf("unp_ino: %ju   unp_conn: %p\n", (uintmax_t)unp->unp_ino,
 2860             unp->unp_conn);
 2861 
 2862         db_printf("unp_refs:\n");
 2863         db_print_unprefs(2, &unp->unp_refs);
 2864 
 2865         /* XXXRW: Would be nice to print the full address, if any. */
 2866         db_printf("unp_addr: %p\n", unp->unp_addr);
 2867 
 2868         db_printf("unp_gencnt: %llu\n",
 2869             (unsigned long long)unp->unp_gencnt);
 2870 
 2871         db_printf("unp_flags: %x (", unp->unp_flags);
 2872         db_print_unpflags(unp->unp_flags);
 2873         db_printf(")\n");
 2874 
 2875         db_printf("unp_peercred:\n");
 2876         db_print_xucred(2, &unp->unp_peercred);
 2877 
 2878         db_printf("unp_refcount: %u\n", unp->unp_refcount);
 2879 }
 2880 #endif

Cache object: 43298d96c29781ed7136e9b6c9883bb8


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.