The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/uipc_usrreq.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: uipc_usrreq.c,v 1.203 2022/05/28 22:08:46 andvar Exp $ */
    2 
    3 /*-
    4  * Copyright (c) 1998, 2000, 2004, 2008, 2009, 2020 The NetBSD Foundation, Inc.
    5  * All rights reserved.
    6  *
    7  * This code is derived from software contributed to The NetBSD Foundation
    8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
    9  * NASA Ames Research Center, and by Andrew Doran.
   10  *
   11  * Redistribution and use in source and binary forms, with or without
   12  * modification, are permitted provided that the following conditions
   13  * are met:
   14  * 1. Redistributions of source code must retain the above copyright
   15  *    notice, this list of conditions and the following disclaimer.
   16  * 2. Redistributions in binary form must reproduce the above copyright
   17  *    notice, this list of conditions and the following disclaimer in the
   18  *    documentation and/or other materials provided with the distribution.
   19  *
   20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   30  * POSSIBILITY OF SUCH DAMAGE.
   31  */
   32 
   33 /*
   34  * Copyright (c) 1982, 1986, 1989, 1991, 1993
   35  *      The Regents of the University of California.  All rights reserved.
   36  *
   37  * Redistribution and use in source and binary forms, with or without
   38  * modification, are permitted provided that the following conditions
   39  * are met:
   40  * 1. Redistributions of source code must retain the above copyright
   41  *    notice, this list of conditions and the following disclaimer.
   42  * 2. Redistributions in binary form must reproduce the above copyright
   43  *    notice, this list of conditions and the following disclaimer in the
   44  *    documentation and/or other materials provided with the distribution.
   45  * 3. Neither the name of the University nor the names of its contributors
   46  *    may be used to endorse or promote products derived from this software
   47  *    without specific prior written permission.
   48  *
   49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   59  * SUCH DAMAGE.
   60  *
   61  *      @(#)uipc_usrreq.c       8.9 (Berkeley) 5/14/95
   62  */
   63 
   64 /*
   65  * Copyright (c) 1997 Christopher G. Demetriou.  All rights reserved.
   66  *
   67  * Redistribution and use in source and binary forms, with or without
   68  * modification, are permitted provided that the following conditions
   69  * are met:
   70  * 1. Redistributions of source code must retain the above copyright
   71  *    notice, this list of conditions and the following disclaimer.
   72  * 2. Redistributions in binary form must reproduce the above copyright
   73  *    notice, this list of conditions and the following disclaimer in the
   74  *    documentation and/or other materials provided with the distribution.
   75  * 3. All advertising materials mentioning features or use of this software
   76  *    must display the following acknowledgement:
   77  *      This product includes software developed by the University of
   78  *      California, Berkeley and its contributors.
   79  * 4. Neither the name of the University nor the names of its contributors
   80  *    may be used to endorse or promote products derived from this software
   81  *    without specific prior written permission.
   82  *
   83  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   84  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   85  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   86  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   87  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   88  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   89  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   90  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   91  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   92  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   93  * SUCH DAMAGE.
   94  *
   95  *      @(#)uipc_usrreq.c       8.9 (Berkeley) 5/14/95
   96  */
   97 
   98 #include <sys/cdefs.h>
   99 __KERNEL_RCSID(0, "$NetBSD: uipc_usrreq.c,v 1.203 2022/05/28 22:08:46 andvar Exp $");
  100 
  101 #ifdef _KERNEL_OPT
  102 #include "opt_compat_netbsd.h"
  103 #endif
  104 
  105 #include <sys/param.h>
  106 #include <sys/systm.h>
  107 #include <sys/proc.h>
  108 #include <sys/filedesc.h>
  109 #include <sys/domain.h>
  110 #include <sys/protosw.h>
  111 #include <sys/socket.h>
  112 #include <sys/socketvar.h>
  113 #include <sys/unpcb.h>
  114 #include <sys/un.h>
  115 #include <sys/namei.h>
  116 #include <sys/vnode.h>
  117 #include <sys/file.h>
  118 #include <sys/stat.h>
  119 #include <sys/mbuf.h>
  120 #include <sys/kauth.h>
  121 #include <sys/kmem.h>
  122 #include <sys/atomic.h>
  123 #include <sys/uidinfo.h>
  124 #include <sys/kernel.h>
  125 #include <sys/kthread.h>
  126 #include <sys/compat_stub.h>
  127 
  128 #include <compat/sys/socket.h>
  129 #include <compat/net/route_70.h>
  130 
  131 /*
  132  * Unix communications domain.
  133  *
  134  * TODO:
  135  *      RDM
  136  *      rethink name space problems
  137  *      need a proper out-of-band
  138  *
  139  * Notes on locking:
  140  *
  141  * The generic rules noted in uipc_socket2.c apply.  In addition:
  142  *
  143  * o We have a global lock, uipc_lock.
  144  *
  145  * o All datagram sockets are locked by uipc_lock.
  146  *
  147  * o For stream socketpairs, the two endpoints are created sharing the same
  148  *   independent lock.  Sockets presented to PRU_CONNECT2 must already have
  149  *   matching locks.
  150  *
  151  * o Stream sockets created via socket() start life with their own
  152  *   independent lock.
  153  * 
  154  * o Stream connections to a named endpoint are slightly more complicated.
  155  *   Sockets that have called listen() have their lock pointer mutated to
  156  *   the global uipc_lock.  When establishing a connection, the connecting
  157  *   socket also has its lock mutated to uipc_lock, which matches the head
  158  *   (listening socket).  We create a new socket for accept() to return, and
  159  *   that also shares the head's lock.  Until the connection is completely
  160  *   done on both ends, all three sockets are locked by uipc_lock.  Once the
  161  *   connection is complete, the association with the head's lock is broken.
  162  *   The connecting socket and the socket returned from accept() have their
  163  *   lock pointers mutated away from uipc_lock, and back to the connecting
  164  *   socket's original, independent lock.  The head continues to be locked
  165  *   by uipc_lock.
  166  *
  167  * o If uipc_lock is determined to be a significant source of contention,
  168  *   it could easily be hashed out.  It is difficult to simply make it an
  169  *   independent lock because of visibility / garbage collection issues:
  170  *   if a socket has been associated with a lock at any point, that lock
  171  *   must remain valid until the socket is no longer visible in the system.
  172  *   The lock must not be freed or otherwise destroyed until any sockets
  173  *   that had referenced it have also been destroyed.
  174  */
  175 const struct sockaddr_un sun_noname = {
  176         .sun_len = offsetof(struct sockaddr_un, sun_path),
  177         .sun_family = AF_LOCAL,
  178 };
  179 ino_t   unp_ino;                        /* prototype for fake inode numbers */
  180 
  181 static struct mbuf * unp_addsockcred(struct lwp *, struct mbuf *);
  182 static void   unp_discard_later(file_t *);
  183 static void   unp_discard_now(file_t *);
  184 static void   unp_disconnect1(struct unpcb *);
  185 static bool   unp_drop(struct unpcb *, int);
  186 static int    unp_internalize(struct mbuf **);
  187 static void   unp_mark(file_t *);
  188 static void   unp_scan(struct mbuf *, void (*)(file_t *), int);
  189 static void   unp_shutdown1(struct unpcb *);
  190 static void   unp_thread(void *);
  191 static void   unp_thread_kick(void);
  192 
  193 static kmutex_t *uipc_lock;
  194 
  195 static kcondvar_t unp_thread_cv;
  196 static lwp_t *unp_thread_lwp;
  197 static SLIST_HEAD(,file) unp_thread_discard;
  198 static int unp_defer;
  199 static struct sysctllog *usrreq_sysctllog;
  200 static void unp_sysctl_create(void);
  201 
  202 /* Compat interface */
  203 
  204 struct mbuf * stub_compat_70_unp_addsockcred(lwp_t *, struct mbuf *);
  205 
  206 struct mbuf * stub_compat_70_unp_addsockcred(struct lwp *lwp,
  207     struct mbuf *control)
  208 {
  209 
  210 /* just copy our initial argument */
  211         return control;
  212 }
  213 
  214 bool compat70_ocreds_valid = false;
  215 
  216 /*
  217  * Initialize Unix protocols.
  218  */
  219 void
  220 uipc_init(void)
  221 {
  222         int error;
  223 
  224         unp_sysctl_create();
  225 
  226         uipc_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
  227         cv_init(&unp_thread_cv, "unpgc");
  228 
  229         error = kthread_create(PRI_NONE, KTHREAD_MPSAFE, NULL, unp_thread,
  230             NULL, &unp_thread_lwp, "unpgc");
  231         if (error != 0)
  232                 panic("uipc_init %d", error);
  233 }
  234 
  235 static void
  236 unp_connid(struct lwp *l, struct unpcb *unp, int flags)
  237 {
  238         unp->unp_connid.unp_pid = l->l_proc->p_pid;
  239         unp->unp_connid.unp_euid = kauth_cred_geteuid(l->l_cred);
  240         unp->unp_connid.unp_egid = kauth_cred_getegid(l->l_cred);
  241         unp->unp_flags |= flags;
  242 }
  243 
  244 /*
  245  * A connection succeeded: disassociate both endpoints from the head's
  246  * lock, and make them share their own lock.  There is a race here: for
  247  * a very brief time one endpoint will be locked by a different lock
  248  * than the other end.  However, since the current thread holds the old
  249  * lock (the listening socket's lock, the head) access can still only be
  250  * made to one side of the connection.
  251  */
  252 static void
  253 unp_setpeerlocks(struct socket *so, struct socket *so2)
  254 {
  255         struct unpcb *unp;
  256         kmutex_t *lock;
  257 
  258         KASSERT(solocked2(so, so2));
  259 
  260         /*
  261          * Bail out if either end of the socket is not yet fully
  262          * connected or accepted.  We only break the lock association
  263          * with the head when the pair of sockets stand completely
  264          * on their own.
  265          */
  266         KASSERT(so->so_head == NULL);
  267         if (so2->so_head != NULL)
  268                 return;
  269 
  270         /*
  271          * Drop references to old lock.  A third reference (from the
  272          * queue head) must be held as we still hold its lock.  Bonus:
  273          * we don't need to worry about garbage collecting the lock.
  274          */
  275         lock = so->so_lock;
  276         KASSERT(lock == uipc_lock);
  277         mutex_obj_free(lock);
  278         mutex_obj_free(lock);
  279 
  280         /*
  281          * Grab stream lock from the initiator and share between the two
  282          * endpoints.  Issue memory barrier to ensure all modifications
  283          * become globally visible before the lock change.  so2 is
  284          * assumed not to have a stream lock, because it was created
  285          * purely for the server side to accept this connection and
  286          * started out life using the domain-wide lock.
  287          */
  288         unp = sotounpcb(so);
  289         KASSERT(unp->unp_streamlock != NULL);
  290         KASSERT(sotounpcb(so2)->unp_streamlock == NULL);
  291         lock = unp->unp_streamlock;
  292         unp->unp_streamlock = NULL;
  293         mutex_obj_hold(lock);
  294         /*
  295          * Ensure lock is initialized before publishing it with
  296          * solockreset.  Pairs with atomic_load_consume in solock and
  297          * various loops to reacquire lock after wakeup.
  298          */
  299         membar_release();
  300         /*
  301          * possible race if lock is not held - see comment in
  302          * uipc_usrreq(PRU_ACCEPT).
  303          */
  304         KASSERT(mutex_owned(lock));
  305         solockreset(so, lock);
  306         solockreset(so2, lock);
  307 }
  308 
  309 /*
  310  * Reset a socket's lock back to the domain-wide lock.
  311  */
  312 static void
  313 unp_resetlock(struct socket *so)
  314 {
  315         kmutex_t *olock, *nlock;
  316         struct unpcb *unp;
  317 
  318         KASSERT(solocked(so));
  319 
  320         olock = so->so_lock;
  321         nlock = uipc_lock;
  322         if (olock == nlock)
  323                 return;
  324         unp = sotounpcb(so);
  325         KASSERT(unp->unp_streamlock == NULL);
  326         unp->unp_streamlock = olock;
  327         mutex_obj_hold(nlock);
  328         mutex_enter(nlock);
  329         solockreset(so, nlock);
  330         mutex_exit(olock);
  331 }
  332 
  333 static void
  334 unp_free(struct unpcb *unp)
  335 {
  336         if (unp->unp_addr)
  337                 free(unp->unp_addr, M_SONAME);
  338         if (unp->unp_streamlock != NULL)
  339                 mutex_obj_free(unp->unp_streamlock);
  340         kmem_free(unp, sizeof(*unp));
  341 }
  342 
  343 static int
  344 unp_output(struct mbuf *m, struct mbuf *control, struct unpcb *unp)
  345 {
  346         struct socket *so2;
  347         const struct sockaddr_un *sun;
  348 
  349         /* XXX: server side closed the socket */
  350         if (unp->unp_conn == NULL)
  351                 return ECONNREFUSED;
  352         so2 = unp->unp_conn->unp_socket;
  353 
  354         KASSERT(solocked(so2));
  355 
  356         if (unp->unp_addr)
  357                 sun = unp->unp_addr;
  358         else
  359                 sun = &sun_noname;
  360         if (unp->unp_conn->unp_flags & UNP_WANTCRED)
  361                 control = unp_addsockcred(curlwp, control);
  362         if (unp->unp_conn->unp_flags & UNP_OWANTCRED)
  363                 MODULE_HOOK_CALL(uipc_unp_70_hook, (curlwp, control),
  364                     stub_compat_70_unp_addsockcred(curlwp, control), control);
  365         if (sbappendaddr(&so2->so_rcv, (const struct sockaddr *)sun, m,
  366             control) == 0) {
  367                 unp_dispose(control);
  368                 m_freem(control);
  369                 m_freem(m);
  370                 /* Don't call soroverflow because we're returning this
  371                  * error directly to the sender. */
  372                 so2->so_rcv.sb_overflowed++;
  373                 return ENOBUFS;
  374         } else {
  375                 sorwakeup(so2);
  376                 return 0;
  377         }
  378 }
  379 
  380 static void
  381 unp_setaddr(struct socket *so, struct sockaddr *nam, bool peeraddr)
  382 {
  383         const struct sockaddr_un *sun = NULL;
  384         struct unpcb *unp;
  385 
  386         KASSERT(solocked(so));
  387         unp = sotounpcb(so);
  388 
  389         if (peeraddr) {
  390                 if (unp->unp_conn && unp->unp_conn->unp_addr)
  391                         sun = unp->unp_conn->unp_addr;
  392         } else {
  393                 if (unp->unp_addr)
  394                         sun = unp->unp_addr;
  395         }
  396         if (sun == NULL)
  397                 sun = &sun_noname;
  398 
  399         memcpy(nam, sun, sun->sun_len);
  400 }
  401 
  402 static int
  403 unp_rcvd(struct socket *so, int flags, struct lwp *l)
  404 {
  405         struct unpcb *unp = sotounpcb(so);
  406         struct socket *so2;
  407         u_int newhiwat;
  408 
  409         KASSERT(solocked(so));
  410         KASSERT(unp != NULL);
  411 
  412         switch (so->so_type) {
  413 
  414         case SOCK_DGRAM:
  415                 panic("uipc 1");
  416                 /*NOTREACHED*/
  417 
  418         case SOCK_SEQPACKET: /* FALLTHROUGH */
  419         case SOCK_STREAM:
  420 #define rcv (&so->so_rcv)
  421 #define snd (&so2->so_snd)
  422                 if (unp->unp_conn == 0)
  423                         break;
  424                 so2 = unp->unp_conn->unp_socket;
  425                 KASSERT(solocked2(so, so2));
  426                 /*
  427                  * Adjust backpressure on sender
  428                  * and wakeup any waiting to write.
  429                  */
  430                 snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
  431                 unp->unp_mbcnt = rcv->sb_mbcnt;
  432                 newhiwat = snd->sb_hiwat + unp->unp_cc - rcv->sb_cc;
  433                 (void)chgsbsize(so2->so_uidinfo,
  434                     &snd->sb_hiwat, newhiwat, RLIM_INFINITY);
  435                 unp->unp_cc = rcv->sb_cc;
  436                 sowwakeup(so2);
  437 #undef snd
  438 #undef rcv
  439                 break;
  440 
  441         default:
  442                 panic("uipc 2");
  443         }
  444 
  445         return 0;
  446 }
  447 
  448 static int
  449 unp_recvoob(struct socket *so, struct mbuf *m, int flags)
  450 {
  451         KASSERT(solocked(so));
  452 
  453         return EOPNOTSUPP;
  454 }
  455 
  456 static int
  457 unp_send(struct socket *so, struct mbuf *m, struct sockaddr *nam,
  458     struct mbuf *control, struct lwp *l)
  459 {
  460         struct unpcb *unp = sotounpcb(so);
  461         int error = 0;
  462         u_int newhiwat;
  463         struct socket *so2;
  464 
  465         KASSERT(solocked(so));
  466         KASSERT(unp != NULL);
  467         KASSERT(m != NULL);
  468 
  469         /*
  470          * Note: unp_internalize() rejects any control message
  471          * other than SCM_RIGHTS, and only allows one.  This
  472          * has the side-effect of preventing a caller from
  473          * forging SCM_CREDS.
  474          */
  475         if (control) {
  476                 sounlock(so);
  477                 error = unp_internalize(&control);
  478                 solock(so);
  479                 if (error != 0) {
  480                         m_freem(control);
  481                         m_freem(m);
  482                         return error;
  483                 }
  484         }
  485 
  486         switch (so->so_type) {
  487 
  488         case SOCK_DGRAM: {
  489                 KASSERT(so->so_lock == uipc_lock);
  490                 if (nam) {
  491                         if ((so->so_state & SS_ISCONNECTED) != 0)
  492                                 error = EISCONN;
  493                         else {
  494                                 /*
  495                                  * Note: once connected, the
  496                                  * socket's lock must not be
  497                                  * dropped until we have sent
  498                                  * the message and disconnected.
  499                                  * This is necessary to prevent
  500                                  * intervening control ops, like
  501                                  * another connection.
  502                                  */
  503                                 error = unp_connect(so, nam, l);
  504                         }
  505                 } else {
  506                         if ((so->so_state & SS_ISCONNECTED) == 0)
  507                                 error = ENOTCONN;
  508                 }
  509                 if (error) {
  510                         unp_dispose(control);
  511                         m_freem(control);
  512                         m_freem(m);
  513                         return error;
  514                 }
  515                 error = unp_output(m, control, unp);
  516                 if (nam)
  517                         unp_disconnect1(unp);
  518                 break;
  519         }
  520 
  521         case SOCK_SEQPACKET: /* FALLTHROUGH */
  522         case SOCK_STREAM:
  523 #define rcv (&so2->so_rcv)
  524 #define snd (&so->so_snd)
  525                 if (unp->unp_conn == NULL) {
  526                         error = ENOTCONN;
  527                         break;
  528                 }
  529                 so2 = unp->unp_conn->unp_socket;
  530                 KASSERT(solocked2(so, so2));
  531                 if (unp->unp_conn->unp_flags & UNP_WANTCRED) {
  532                         /*
  533                          * Credentials are passed only once on
  534                          * SOCK_STREAM and SOCK_SEQPACKET.
  535                          */
  536                         unp->unp_conn->unp_flags &= ~UNP_WANTCRED;
  537                         control = unp_addsockcred(l, control);
  538                 }
  539                 if (unp->unp_conn->unp_flags & UNP_OWANTCRED) {
  540                         /*
  541                          * Credentials are passed only once on
  542                          * SOCK_STREAM and SOCK_SEQPACKET.
  543                          */
  544                         unp->unp_conn->unp_flags &= ~UNP_OWANTCRED;
  545                         MODULE_HOOK_CALL(uipc_unp_70_hook, (curlwp, control),
  546                             stub_compat_70_unp_addsockcred(curlwp, control),
  547                             control);
  548                 }
  549                 /*
  550                  * Send to paired receive port, and then reduce
  551                  * send buffer hiwater marks to maintain backpressure.
  552                  * Wake up readers.
  553                  */
  554                 if (control) {
  555                         if (sbappendcontrol(rcv, m, control) != 0)
  556                                 control = NULL;
  557                 } else {
  558                         switch(so->so_type) {
  559                         case SOCK_SEQPACKET:
  560                                 sbappendrecord(rcv, m);
  561                                 break;
  562                         case SOCK_STREAM:
  563                                 sbappend(rcv, m);
  564                                 break;
  565                         default:
  566                                 panic("uipc_usrreq");
  567                                 break;
  568                         }
  569                 }
  570                 snd->sb_mbmax -=
  571                     rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
  572                 unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
  573                 newhiwat = snd->sb_hiwat -
  574                     (rcv->sb_cc - unp->unp_conn->unp_cc);
  575                 (void)chgsbsize(so->so_uidinfo,
  576                     &snd->sb_hiwat, newhiwat, RLIM_INFINITY);
  577                 unp->unp_conn->unp_cc = rcv->sb_cc;
  578                 sorwakeup(so2);
  579 #undef snd
  580 #undef rcv
  581                 if (control != NULL) {
  582                         unp_dispose(control);
  583                         m_freem(control);
  584                 }
  585                 break;
  586 
  587         default:
  588                 panic("uipc 4");
  589         }
  590 
  591         return error;
  592 }
  593 
  594 static int
  595 unp_sendoob(struct socket *so, struct mbuf *m, struct mbuf * control)
  596 {
  597         KASSERT(solocked(so));
  598 
  599         m_freem(m);
  600         m_freem(control);
  601 
  602         return EOPNOTSUPP;
  603 }
  604 
  605 /*
  606  * Unix domain socket option processing.
  607  */
  608 int
  609 uipc_ctloutput(int op, struct socket *so, struct sockopt *sopt)
  610 {
  611         struct unpcb *unp = sotounpcb(so);
  612         int optval = 0, error = 0;
  613 
  614         KASSERT(solocked(so));
  615 
  616         if (sopt->sopt_level != SOL_LOCAL) {
  617                 error = ENOPROTOOPT;
  618         } else switch (op) {
  619 
  620         case PRCO_SETOPT:
  621                 switch (sopt->sopt_name) {
  622                 case LOCAL_OCREDS:
  623                         if (!compat70_ocreds_valid)  {
  624                                 error = ENOPROTOOPT;
  625                                 break;
  626                         }
  627                         /* FALLTHROUGH */
  628                 case LOCAL_CREDS:
  629                 case LOCAL_CONNWAIT:
  630                         error = sockopt_getint(sopt, &optval);
  631                         if (error)
  632                                 break;
  633                         switch (sopt->sopt_name) {
  634 #define OPTSET(bit) \
  635         if (optval) \
  636                 unp->unp_flags |= (bit); \
  637         else \
  638                 unp->unp_flags &= ~(bit);
  639 
  640                         case LOCAL_CREDS:
  641                                 OPTSET(UNP_WANTCRED);
  642                                 break;
  643                         case LOCAL_CONNWAIT:
  644                                 OPTSET(UNP_CONNWAIT);
  645                                 break;
  646                         case LOCAL_OCREDS:
  647                                 OPTSET(UNP_OWANTCRED);
  648                                 break;
  649                         }
  650                         break;
  651 #undef OPTSET
  652 
  653                 default:
  654                         error = ENOPROTOOPT;
  655                         break;
  656                 }
  657                 break;
  658 
  659         case PRCO_GETOPT:
  660                 sounlock(so);
  661                 switch (sopt->sopt_name) {
  662                 case LOCAL_PEEREID:
  663                         if (unp->unp_flags & UNP_EIDSVALID) {
  664                                 error = sockopt_set(sopt, &unp->unp_connid,
  665                                     sizeof(unp->unp_connid));
  666                         } else {
  667                                 error = EINVAL;
  668                         }
  669                         break;
  670                 case LOCAL_CREDS:
  671 #define OPTBIT(bit)     (unp->unp_flags & (bit) ? 1 : 0)
  672 
  673                         optval = OPTBIT(UNP_WANTCRED);
  674                         error = sockopt_setint(sopt, optval);
  675                         break;
  676                 case LOCAL_OCREDS:
  677                         if (compat70_ocreds_valid) {
  678                                 optval = OPTBIT(UNP_OWANTCRED);
  679                                 error = sockopt_setint(sopt, optval);
  680                                 break;
  681                         }
  682 #undef OPTBIT
  683                         /* FALLTHROUGH */
  684                 default:
  685                         error = ENOPROTOOPT;
  686                         break;
  687                 }
  688                 solock(so);
  689                 break;
  690         }
  691         return (error);
  692 }
  693 
  694 /*
  695  * Both send and receive buffers are allocated PIPSIZ bytes of buffering
  696  * for stream sockets, although the total for sender and receiver is
  697  * actually only PIPSIZ.
  698  * Datagram sockets really use the sendspace as the maximum datagram size,
  699  * and don't really want to reserve the sendspace.  Their recvspace should
  700  * be large enough for at least one max-size datagram plus address.
  701  */
  702 #ifndef PIPSIZ
  703 #define PIPSIZ  8192
  704 #endif
  705 u_long  unpst_sendspace = PIPSIZ;
  706 u_long  unpst_recvspace = PIPSIZ;
  707 u_long  unpdg_sendspace = 2*1024;       /* really max datagram size */
  708 u_long  unpdg_recvspace = 16*1024;
  709 
  710 u_int   unp_rights;                     /* files in flight */
  711 u_int   unp_rights_ratio = 2;           /* limit, fraction of maxfiles */
  712 
  713 static int
  714 unp_attach(struct socket *so, int proto)
  715 {
  716         struct unpcb *unp = sotounpcb(so);
  717         u_long sndspc, rcvspc;
  718         int error;
  719 
  720         KASSERT(unp == NULL);
  721 
  722         switch (so->so_type) {
  723         case SOCK_SEQPACKET:
  724                 /* FALLTHROUGH */
  725         case SOCK_STREAM:
  726                 if (so->so_lock == NULL) {
  727                         so->so_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
  728                         solock(so);
  729                 }
  730                 sndspc = unpst_sendspace;
  731                 rcvspc = unpst_recvspace;
  732                 break;
  733 
  734         case SOCK_DGRAM:
  735                 if (so->so_lock == NULL) {
  736                         mutex_obj_hold(uipc_lock);
  737                         so->so_lock = uipc_lock;
  738                         solock(so);
  739                 }
  740                 sndspc = unpdg_sendspace;
  741                 rcvspc = unpdg_recvspace;
  742                 break;
  743 
  744         default:
  745                 panic("unp_attach");
  746         }
  747 
  748         if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
  749                 error = soreserve(so, sndspc, rcvspc);
  750                 if (error) {
  751                         return error;
  752                 }
  753         }
  754 
  755         unp = kmem_zalloc(sizeof(*unp), KM_SLEEP);
  756         nanotime(&unp->unp_ctime);
  757         unp->unp_socket = so;
  758         so->so_pcb = unp;
  759 
  760         KASSERT(solocked(so));
  761         return 0;
  762 }
  763 
  764 static void
  765 unp_detach(struct socket *so)
  766 {
  767         struct unpcb *unp;
  768         vnode_t *vp;
  769 
  770         unp = sotounpcb(so);
  771         KASSERT(unp != NULL);
  772         KASSERT(solocked(so));
  773  retry:
  774         if ((vp = unp->unp_vnode) != NULL) {
  775                 sounlock(so);
  776                 /* Acquire v_interlock to protect against unp_connect(). */
  777                 /* XXXAD racy */
  778                 mutex_enter(vp->v_interlock);
  779                 vp->v_socket = NULL;
  780                 mutex_exit(vp->v_interlock);
  781                 vrele(vp);
  782                 solock(so);
  783                 unp->unp_vnode = NULL;
  784         }
  785         if (unp->unp_conn)
  786                 unp_disconnect1(unp);
  787         while (unp->unp_refs) {
  788                 KASSERT(solocked2(so, unp->unp_refs->unp_socket));
  789                 if (unp_drop(unp->unp_refs, ECONNRESET)) {
  790                         solock(so);
  791                         goto retry;
  792                 }
  793         }
  794         soisdisconnected(so);
  795         so->so_pcb = NULL;
  796         if (unp_rights) {
  797                 /*
  798                  * Normally the receive buffer is flushed later, in sofree,
  799                  * but if our receive buffer holds references to files that
  800                  * are now garbage, we will enqueue those file references to
  801                  * the garbage collector and kick it into action.
  802                  */
  803                 sorflush(so);
  804                 unp_free(unp);
  805                 unp_thread_kick();
  806         } else
  807                 unp_free(unp);
  808 }
  809 
  810 static int
  811 unp_accept(struct socket *so, struct sockaddr *nam)
  812 {
  813         struct unpcb *unp = sotounpcb(so);
  814         struct socket *so2;
  815 
  816         KASSERT(solocked(so));
  817         KASSERT(nam != NULL);
  818 
  819         /* XXX code review required to determine if unp can ever be NULL */
  820         if (unp == NULL)
  821                 return EINVAL;
  822 
  823         KASSERT(so->so_lock == uipc_lock);
  824         /*
  825          * Mark the initiating STREAM socket as connected *ONLY*
  826          * after it's been accepted.  This prevents a client from
  827          * overrunning a server and receiving ECONNREFUSED.
  828          */
  829         if (unp->unp_conn == NULL) {
  830                 /*
  831                  * This will use the empty socket and will not
  832                  * allocate.
  833                  */
  834                 unp_setaddr(so, nam, true);
  835                 return 0;
  836         }
  837         so2 = unp->unp_conn->unp_socket;
  838         if (so2->so_state & SS_ISCONNECTING) {
  839                 KASSERT(solocked2(so, so->so_head));
  840                 KASSERT(solocked2(so2, so->so_head));
  841                 soisconnected(so2);
  842         }
  843         /*
  844          * If the connection is fully established, break the
  845          * association with uipc_lock and give the connected
  846          * pair a separate lock to share.
  847          * There is a race here: sotounpcb(so2)->unp_streamlock
  848          * is not locked, so when changing so2->so_lock
  849          * another thread can grab it while so->so_lock is still
  850          * pointing to the (locked) uipc_lock.
  851          * this should be harmless, except that this makes
  852          * solocked2() and solocked() unreliable.
  853          * Another problem is that unp_setaddr() expects the
  854          * the socket locked. Grabbing sotounpcb(so2)->unp_streamlock
  855          * fixes both issues.
  856          */
  857         mutex_enter(sotounpcb(so2)->unp_streamlock);
  858         unp_setpeerlocks(so2, so);
  859         /*
  860          * Only now return peer's address, as we may need to
  861          * block in order to allocate memory.
  862          *
  863          * XXX Minor race: connection can be broken while
  864          * lock is dropped in unp_setaddr().  We will return
  865          * error == 0 and sun_noname as the peer address.
  866          */
  867         unp_setaddr(so, nam, true);
  868         /* so_lock now points to unp_streamlock */
  869         mutex_exit(so2->so_lock);
  870         return 0;
  871 }
  872 
  873 static int
  874 unp_ioctl(struct socket *so, u_long cmd, void *nam, struct ifnet *ifp)
  875 {
  876         return EOPNOTSUPP;
  877 }
  878 
  879 static int
  880 unp_stat(struct socket *so, struct stat *ub)
  881 {
  882         struct unpcb *unp;
  883         struct socket *so2;
  884 
  885         KASSERT(solocked(so));
  886 
  887         unp = sotounpcb(so);
  888         if (unp == NULL)
  889                 return EINVAL;
  890 
  891         ub->st_blksize = so->so_snd.sb_hiwat;
  892         switch (so->so_type) {
  893         case SOCK_SEQPACKET: /* FALLTHROUGH */
  894         case SOCK_STREAM:
  895                 if (unp->unp_conn == 0) 
  896                         break;
  897 
  898                 so2 = unp->unp_conn->unp_socket;
  899                 KASSERT(solocked2(so, so2));
  900                 ub->st_blksize += so2->so_rcv.sb_cc;
  901                 break;
  902         default:
  903                 break;
  904         }
  905         ub->st_dev = NODEV;
  906         if (unp->unp_ino == 0)
  907                 unp->unp_ino = unp_ino++;
  908         ub->st_atimespec = ub->st_mtimespec = ub->st_ctimespec = unp->unp_ctime;
  909         ub->st_ino = unp->unp_ino;
  910         ub->st_uid = so->so_uidinfo->ui_uid;
  911         ub->st_gid = so->so_egid;
  912         return (0);
  913 }
  914 
  915 static int
  916 unp_peeraddr(struct socket *so, struct sockaddr *nam)
  917 {
  918         KASSERT(solocked(so));
  919         KASSERT(sotounpcb(so) != NULL);
  920         KASSERT(nam != NULL);
  921 
  922         unp_setaddr(so, nam, true);
  923         return 0;
  924 }
  925 
  926 static int
  927 unp_sockaddr(struct socket *so, struct sockaddr *nam)
  928 {
  929         KASSERT(solocked(so));
  930         KASSERT(sotounpcb(so) != NULL);
  931         KASSERT(nam != NULL);
  932 
  933         unp_setaddr(so, nam, false);
  934         return 0;
  935 }
  936 
  937 /*
  938  * we only need to perform this allocation until syscalls other than
  939  * bind are adjusted to use sockaddr_big.
  940  */
  941 static struct sockaddr_un *
  942 makeun_sb(struct sockaddr *nam, size_t *addrlen)
  943 {
  944         struct sockaddr_un *sun;
  945 
  946         *addrlen = nam->sa_len + 1;
  947         sun = malloc(*addrlen, M_SONAME, M_WAITOK);
  948         memcpy(sun, nam, nam->sa_len);
  949         *(((char *)sun) + nam->sa_len) = '\0';
  950         return sun;
  951 }
  952 
  953 static int
  954 unp_bind(struct socket *so, struct sockaddr *nam, struct lwp *l)
  955 {
  956         struct sockaddr_un *sun;
  957         struct unpcb *unp;
  958         vnode_t *vp;
  959         struct vattr vattr;
  960         size_t addrlen;
  961         int error;
  962         struct pathbuf *pb;
  963         struct nameidata nd;
  964         proc_t *p;
  965 
  966         unp = sotounpcb(so);
  967 
  968         KASSERT(solocked(so));
  969         KASSERT(unp != NULL);
  970         KASSERT(nam != NULL);
  971 
  972         if (unp->unp_vnode != NULL)
  973                 return (EINVAL);
  974         if ((unp->unp_flags & UNP_BUSY) != 0) {
  975                 /*
  976                  * EALREADY may not be strictly accurate, but since this
  977                  * is a major application error it's hardly a big deal.
  978                  */
  979                 return (EALREADY);
  980         }
  981         unp->unp_flags |= UNP_BUSY;
  982         sounlock(so);
  983 
  984         p = l->l_proc;
  985         sun = makeun_sb(nam, &addrlen);
  986 
  987         pb = pathbuf_create(sun->sun_path);
  988         if (pb == NULL) {
  989                 error = ENOMEM;
  990                 goto bad;
  991         }
  992         NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT | TRYEMULROOT, pb);
  993 
  994 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
  995         if ((error = namei(&nd)) != 0) {
  996                 pathbuf_destroy(pb);
  997                 goto bad;
  998         }
  999         vp = nd.ni_vp;
 1000         if (vp != NULL) {
 1001                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 1002                 if (nd.ni_dvp == vp)
 1003                         vrele(nd.ni_dvp);
 1004                 else
 1005                         vput(nd.ni_dvp);
 1006                 vrele(vp);
 1007                 pathbuf_destroy(pb);
 1008                 error = EADDRINUSE;
 1009                 goto bad;
 1010         }
 1011         vattr_null(&vattr);
 1012         vattr.va_type = VSOCK;
 1013         vattr.va_mode = ACCESSPERMS & ~(p->p_cwdi->cwdi_cmask);
 1014         error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 1015         if (error) {
 1016                 vput(nd.ni_dvp);
 1017                 pathbuf_destroy(pb);
 1018                 goto bad;
 1019         }
 1020         vp = nd.ni_vp;
 1021         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 1022         solock(so);
 1023         vp->v_socket = unp->unp_socket;
 1024         unp->unp_vnode = vp;
 1025         unp->unp_addrlen = addrlen;
 1026         unp->unp_addr = sun;
 1027         VOP_UNLOCK(vp);
 1028         vput(nd.ni_dvp);
 1029         unp->unp_flags &= ~UNP_BUSY;
 1030         pathbuf_destroy(pb);
 1031         return (0);
 1032 
 1033  bad:
 1034         free(sun, M_SONAME);
 1035         solock(so);
 1036         unp->unp_flags &= ~UNP_BUSY;
 1037         return (error);
 1038 }
 1039 
 1040 static int
 1041 unp_listen(struct socket *so, struct lwp *l)
 1042 {
 1043         struct unpcb *unp = sotounpcb(so);
 1044 
 1045         KASSERT(solocked(so));
 1046         KASSERT(unp != NULL);
 1047 
 1048         /*
 1049          * If the socket can accept a connection, it must be
 1050          * locked by uipc_lock.
 1051          */
 1052         unp_resetlock(so);
 1053         if (unp->unp_vnode == NULL)
 1054                 return EINVAL;
 1055 
 1056         unp_connid(l, unp, UNP_EIDSBIND);
 1057         return 0;
 1058 }
 1059 
 1060 static int
 1061 unp_disconnect(struct socket *so)
 1062 {
 1063         KASSERT(solocked(so));
 1064         KASSERT(sotounpcb(so) != NULL);
 1065 
 1066         unp_disconnect1(sotounpcb(so));
 1067         return 0;
 1068 }
 1069 
 1070 static int
 1071 unp_shutdown(struct socket *so)
 1072 {
 1073         KASSERT(solocked(so));
 1074         KASSERT(sotounpcb(so) != NULL);
 1075 
 1076         socantsendmore(so);
 1077         unp_shutdown1(sotounpcb(so));
 1078         return 0;
 1079 }
 1080 
 1081 static int
 1082 unp_abort(struct socket *so)
 1083 {
 1084         KASSERT(solocked(so));
 1085         KASSERT(sotounpcb(so) != NULL);
 1086 
 1087         (void)unp_drop(sotounpcb(so), ECONNABORTED);
 1088         KASSERT(so->so_head == NULL);
 1089         KASSERT(so->so_pcb != NULL);
 1090         unp_detach(so);
 1091         return 0;
 1092 }
 1093 
 1094 static int
 1095 unp_connect1(struct socket *so, struct socket *so2, struct lwp *l)
 1096 {
 1097         struct unpcb *unp = sotounpcb(so);
 1098         struct unpcb *unp2;
 1099 
 1100         if (so2->so_type != so->so_type)
 1101                 return EPROTOTYPE;
 1102 
 1103         /*
 1104          * All three sockets involved must be locked by same lock:
 1105          *
 1106          * local endpoint (so)
 1107          * remote endpoint (so2)
 1108          * queue head (so2->so_head, only if PR_CONNREQUIRED)
 1109          */
 1110         KASSERT(solocked2(so, so2));
 1111         KASSERT(so->so_head == NULL);
 1112         if (so2->so_head != NULL) {
 1113                 KASSERT(so2->so_lock == uipc_lock);
 1114                 KASSERT(solocked2(so2, so2->so_head));
 1115         }
 1116 
 1117         unp2 = sotounpcb(so2);
 1118         unp->unp_conn = unp2;
 1119 
 1120         switch (so->so_type) {
 1121 
 1122         case SOCK_DGRAM:
 1123                 unp->unp_nextref = unp2->unp_refs;
 1124                 unp2->unp_refs = unp;
 1125                 soisconnected(so);
 1126                 break;
 1127 
 1128         case SOCK_SEQPACKET: /* FALLTHROUGH */
 1129         case SOCK_STREAM:
 1130 
 1131                 /*
 1132                  * SOCK_SEQPACKET and SOCK_STREAM cases are handled by callers
 1133                  * which are unp_connect() or unp_connect2().
 1134                  */
 1135 
 1136                 break;
 1137 
 1138         default:
 1139                 panic("unp_connect1");
 1140         }
 1141 
 1142         return 0;
 1143 }
 1144 
 1145 int
 1146 unp_connect(struct socket *so, struct sockaddr *nam, struct lwp *l)
 1147 {
 1148         struct sockaddr_un *sun;
 1149         vnode_t *vp;
 1150         struct socket *so2, *so3;
 1151         struct unpcb *unp, *unp2, *unp3;
 1152         size_t addrlen;
 1153         int error;
 1154         struct pathbuf *pb;
 1155         struct nameidata nd;
 1156 
 1157         unp = sotounpcb(so);
 1158         if ((unp->unp_flags & UNP_BUSY) != 0) {
 1159                 /*
 1160                  * EALREADY may not be strictly accurate, but since this
 1161                  * is a major application error it's hardly a big deal.
 1162                  */
 1163                 return (EALREADY);
 1164         }
 1165         unp->unp_flags |= UNP_BUSY;
 1166         sounlock(so);
 1167 
 1168         sun = makeun_sb(nam, &addrlen);
 1169         pb = pathbuf_create(sun->sun_path);
 1170         if (pb == NULL) {
 1171                 error = ENOMEM;
 1172                 goto bad2;
 1173         }
 1174 
 1175         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
 1176 
 1177         if ((error = namei(&nd)) != 0) {
 1178                 pathbuf_destroy(pb);
 1179                 goto bad2;
 1180         }
 1181         vp = nd.ni_vp;
 1182         pathbuf_destroy(pb);
 1183         if (vp->v_type != VSOCK) {
 1184                 error = ENOTSOCK;
 1185                 goto bad;
 1186         }
 1187         if ((error = VOP_ACCESS(vp, VWRITE, l->l_cred)) != 0)
 1188                 goto bad;
 1189         /* Acquire v_interlock to protect against unp_detach(). */
 1190         mutex_enter(vp->v_interlock);
 1191         so2 = vp->v_socket;
 1192         if (so2 == NULL) {
 1193                 mutex_exit(vp->v_interlock);
 1194                 error = ECONNREFUSED;
 1195                 goto bad;
 1196         }
 1197         if (so->so_type != so2->so_type) {
 1198                 mutex_exit(vp->v_interlock);
 1199                 error = EPROTOTYPE;
 1200                 goto bad;
 1201         }
 1202         solock(so);
 1203         unp_resetlock(so);
 1204         mutex_exit(vp->v_interlock);
 1205         if ((so->so_proto->pr_flags & PR_CONNREQUIRED) != 0) {
 1206                 /*
 1207                  * This may seem somewhat fragile but is OK: if we can
 1208                  * see SO_ACCEPTCONN set on the endpoint, then it must
 1209                  * be locked by the domain-wide uipc_lock.
 1210                  */
 1211                 KASSERT((so2->so_options & SO_ACCEPTCONN) == 0 ||
 1212                     so2->so_lock == uipc_lock);
 1213                 if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
 1214                     (so3 = sonewconn(so2, false)) == NULL) {
 1215                         error = ECONNREFUSED;
 1216                         sounlock(so);
 1217                         goto bad;
 1218                 }
 1219                 unp2 = sotounpcb(so2);
 1220                 unp3 = sotounpcb(so3);
 1221                 if (unp2->unp_addr) {
 1222                         unp3->unp_addr = malloc(unp2->unp_addrlen,
 1223                             M_SONAME, M_WAITOK);
 1224                         memcpy(unp3->unp_addr, unp2->unp_addr,
 1225                             unp2->unp_addrlen);
 1226                         unp3->unp_addrlen = unp2->unp_addrlen;
 1227                 }
 1228                 unp3->unp_flags = unp2->unp_flags;
 1229                 so2 = so3;
 1230                 /*
 1231                  * The connector's (client's) credentials are copied from its
 1232                  * process structure at the time of connect() (which is now).
 1233                  */
 1234                 unp_connid(l, unp3, UNP_EIDSVALID);
 1235                  /*
 1236                   * The receiver's (server's) credentials are copied from the
 1237                   * unp_peercred member of socket on which the former called
 1238                   * listen(); unp_listen() cached that process's credentials
 1239                   * at that time so we can use them now.
 1240                   */
 1241                 if (unp2->unp_flags & UNP_EIDSBIND) {
 1242                         memcpy(&unp->unp_connid, &unp2->unp_connid,
 1243                             sizeof(unp->unp_connid));
 1244                         unp->unp_flags |= UNP_EIDSVALID;
 1245                 }
 1246         }
 1247         error = unp_connect1(so, so2, l);
 1248         if (error) {
 1249                 sounlock(so);
 1250                 goto bad;
 1251         }
 1252         unp2 = sotounpcb(so2);
 1253         switch (so->so_type) {
 1254 
 1255         /*
 1256          * SOCK_DGRAM and default cases are handled in prior call to
 1257          * unp_connect1(), do not add a default case without fixing
 1258          * unp_connect1().
 1259          */
 1260 
 1261         case SOCK_SEQPACKET: /* FALLTHROUGH */
 1262         case SOCK_STREAM:
 1263                 unp2->unp_conn = unp;
 1264                 if ((unp->unp_flags | unp2->unp_flags) & UNP_CONNWAIT)
 1265                         soisconnecting(so);
 1266                 else
 1267                         soisconnected(so);
 1268                 soisconnected(so2);
 1269                 /*
 1270                  * If the connection is fully established, break the
 1271                  * association with uipc_lock and give the connected
 1272                  * pair a separate lock to share.
 1273                  */
 1274                 KASSERT(so2->so_head != NULL);
 1275                 unp_setpeerlocks(so, so2);
 1276                 break;
 1277 
 1278         }
 1279         sounlock(so);
 1280  bad:
 1281         vput(vp);
 1282  bad2:
 1283         free(sun, M_SONAME);
 1284         solock(so);
 1285         unp->unp_flags &= ~UNP_BUSY;
 1286         return (error);
 1287 }
 1288 
 1289 int
 1290 unp_connect2(struct socket *so, struct socket *so2)
 1291 {
 1292         struct unpcb *unp = sotounpcb(so);
 1293         struct unpcb *unp2;
 1294         int error = 0;
 1295 
 1296         KASSERT(solocked2(so, so2));
 1297 
 1298         error = unp_connect1(so, so2, curlwp);
 1299         if (error)
 1300                 return error;
 1301 
 1302         unp2 = sotounpcb(so2);
 1303         switch (so->so_type) {
 1304 
 1305         /*
 1306          * SOCK_DGRAM and default cases are handled in prior call to
 1307          * unp_connect1(), do not add a default case without fixing
 1308          * unp_connect1().
 1309          */
 1310 
 1311         case SOCK_SEQPACKET: /* FALLTHROUGH */
 1312         case SOCK_STREAM:
 1313                 unp2->unp_conn = unp;
 1314                 soisconnected(so);
 1315                 soisconnected(so2);
 1316                 break;
 1317 
 1318         }
 1319         return error;
 1320 }
 1321 
 1322 static void
 1323 unp_disconnect1(struct unpcb *unp)
 1324 {
 1325         struct unpcb *unp2 = unp->unp_conn;
 1326         struct socket *so;
 1327 
 1328         if (unp2 == 0)
 1329                 return;
 1330         unp->unp_conn = 0;
 1331         so = unp->unp_socket;
 1332         switch (so->so_type) {
 1333         case SOCK_DGRAM:
 1334                 if (unp2->unp_refs == unp)
 1335                         unp2->unp_refs = unp->unp_nextref;
 1336                 else {
 1337                         unp2 = unp2->unp_refs;
 1338                         for (;;) {
 1339                                 KASSERT(solocked2(so, unp2->unp_socket));
 1340                                 if (unp2 == 0)
 1341                                         panic("unp_disconnect1");
 1342                                 if (unp2->unp_nextref == unp)
 1343                                         break;
 1344                                 unp2 = unp2->unp_nextref;
 1345                         }
 1346                         unp2->unp_nextref = unp->unp_nextref;
 1347                 }
 1348                 unp->unp_nextref = 0;
 1349                 so->so_state &= ~SS_ISCONNECTED;
 1350                 break;
 1351 
 1352         case SOCK_SEQPACKET: /* FALLTHROUGH */
 1353         case SOCK_STREAM:
 1354                 KASSERT(solocked2(so, unp2->unp_socket));
 1355                 soisdisconnected(so);
 1356                 unp2->unp_conn = 0;
 1357                 soisdisconnected(unp2->unp_socket);
 1358                 break;
 1359         }
 1360 }
 1361 
 1362 static void
 1363 unp_shutdown1(struct unpcb *unp)
 1364 {
 1365         struct socket *so;
 1366 
 1367         switch(unp->unp_socket->so_type) {
 1368         case SOCK_SEQPACKET: /* FALLTHROUGH */
 1369         case SOCK_STREAM:
 1370                 if (unp->unp_conn && (so = unp->unp_conn->unp_socket))
 1371                         socantrcvmore(so);
 1372                 break;
 1373         default:
 1374                 break;
 1375         }
 1376 }
 1377 
 1378 static bool
 1379 unp_drop(struct unpcb *unp, int errno)
 1380 {
 1381         struct socket *so = unp->unp_socket;
 1382 
 1383         KASSERT(solocked(so));
 1384 
 1385         so->so_error = errno;
 1386         unp_disconnect1(unp);
 1387         if (so->so_head) {
 1388                 so->so_pcb = NULL;
 1389                 /* sofree() drops the socket lock */
 1390                 sofree(so);
 1391                 unp_free(unp);
 1392                 return true;
 1393         }
 1394         return false;
 1395 }
 1396 
 1397 #ifdef notdef
 1398 unp_drain(void)
 1399 {
 1400 
 1401 }
 1402 #endif
 1403 
 1404 int
 1405 unp_externalize(struct mbuf *rights, struct lwp *l, int flags)
 1406 {
 1407         struct cmsghdr * const cm = mtod(rights, struct cmsghdr *);
 1408         struct proc * const p = l->l_proc;
 1409         file_t **rp;
 1410         int error = 0;
 1411 
 1412         const size_t nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) /
 1413             sizeof(file_t *);
 1414         if (nfds == 0)
 1415                 goto noop;
 1416 
 1417         int * const fdp = kmem_alloc(nfds * sizeof(int), KM_SLEEP);
 1418         rw_enter(&p->p_cwdi->cwdi_lock, RW_READER);
 1419 
 1420         /* Make sure the recipient should be able to see the files.. */
 1421         rp = (file_t **)CMSG_DATA(cm);
 1422         for (size_t i = 0; i < nfds; i++) {
 1423                 file_t * const fp = *rp++;
 1424                 if (fp == NULL) {
 1425                         error = EINVAL;
 1426                         goto out;
 1427                 }
 1428                 /*
 1429                  * If we are in a chroot'ed directory, and
 1430                  * someone wants to pass us a directory, make
 1431                  * sure it's inside the subtree we're allowed
 1432                  * to access.
 1433                  */
 1434                 if (p->p_cwdi->cwdi_rdir != NULL && fp->f_type == DTYPE_VNODE) {
 1435                         vnode_t *vp = fp->f_vnode;
 1436                         if ((vp->v_type == VDIR) &&
 1437                             !vn_isunder(vp, p->p_cwdi->cwdi_rdir, l)) {
 1438                                 error = EPERM;
 1439                                 goto out;
 1440                         }
 1441                 }
 1442         }
 1443 
 1444  restart:
 1445         /*
 1446          * First loop -- allocate file descriptor table slots for the
 1447          * new files.
 1448          */
 1449         for (size_t i = 0; i < nfds; i++) {
 1450                 if ((error = fd_alloc(p, 0, &fdp[i])) != 0) {
 1451                         /*
 1452                          * Back out what we've done so far.
 1453                          */
 1454                         while (i-- > 0) {
 1455                                 fd_abort(p, NULL, fdp[i]);
 1456                         }
 1457                         if (error == ENOSPC) {
 1458                                 fd_tryexpand(p);
 1459                                 error = 0;
 1460                                 goto restart;
 1461                         }
 1462                         /*
 1463                          * This is the error that has historically
 1464                          * been returned, and some callers may
 1465                          * expect it.
 1466                          */
 1467                         error = EMSGSIZE;
 1468                         goto out;
 1469                 }
 1470         }
 1471 
 1472         /*
 1473          * Now that adding them has succeeded, update all of the
 1474          * file passing state and affix the descriptors.
 1475          */
 1476         rp = (file_t **)CMSG_DATA(cm);
 1477         int *ofdp = (int *)CMSG_DATA(cm);
 1478         for (size_t i = 0; i < nfds; i++) {
 1479                 file_t * const fp = *rp++;
 1480                 const int fd = fdp[i];
 1481                 atomic_dec_uint(&unp_rights);
 1482                 fd_set_exclose(l, fd, (flags & O_CLOEXEC) != 0);
 1483                 fd_affix(p, fp, fd);
 1484                 /*
 1485                  * Done with this file pointer, replace it with a fd;
 1486                  */
 1487                 *ofdp++ = fd;
 1488                 mutex_enter(&fp->f_lock);
 1489                 fp->f_msgcount--;
 1490                 mutex_exit(&fp->f_lock);
 1491                 /*
 1492                  * Note that fd_affix() adds a reference to the file.
 1493                  * The file may already have been closed by another
 1494                  * LWP in the process, so we must drop the reference
 1495                  * added by unp_internalize() with closef().
 1496                  */
 1497                 closef(fp);
 1498         }
 1499 
 1500         /*
 1501          * Adjust length, in case of transition from large file_t
 1502          * pointers to ints.
 1503          */
 1504         if (sizeof(file_t *) != sizeof(int)) {
 1505                 cm->cmsg_len = CMSG_LEN(nfds * sizeof(int));
 1506                 rights->m_len = CMSG_SPACE(nfds * sizeof(int));
 1507         }
 1508  out:
 1509         if (__predict_false(error != 0)) {
 1510                 file_t **const fpp = (file_t **)CMSG_DATA(cm);
 1511                 for (size_t i = 0; i < nfds; i++)
 1512                         unp_discard_now(fpp[i]);
 1513                 /*
 1514                  * Truncate the array so that nobody will try to interpret
 1515                  * what is now garbage in it.
 1516                  */
 1517                 cm->cmsg_len = CMSG_LEN(0);
 1518                 rights->m_len = CMSG_SPACE(0);
 1519         }
 1520         rw_exit(&p->p_cwdi->cwdi_lock);
 1521         kmem_free(fdp, nfds * sizeof(int));
 1522 
 1523  noop:
 1524         /*
 1525          * Don't disclose kernel memory in the alignment space.
 1526          */
 1527         KASSERT(cm->cmsg_len <= rights->m_len);
 1528         memset(&mtod(rights, char *)[cm->cmsg_len], 0, rights->m_len -
 1529             cm->cmsg_len);
 1530         return error;
 1531 }
 1532 
 1533 static int
 1534 unp_internalize(struct mbuf **controlp)
 1535 {
 1536         filedesc_t *fdescp = curlwp->l_fd;
 1537         fdtab_t *dt;
 1538         struct mbuf *control = *controlp;
 1539         struct cmsghdr *newcm, *cm = mtod(control, struct cmsghdr *);
 1540         file_t **rp, **files;
 1541         file_t *fp;
 1542         int i, fd, *fdp;
 1543         int nfds, error;
 1544         u_int maxmsg;
 1545 
 1546         error = 0;
 1547         newcm = NULL;
 1548 
 1549         /* Sanity check the control message header. */
 1550         if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
 1551             cm->cmsg_len > control->m_len ||
 1552             cm->cmsg_len < CMSG_ALIGN(sizeof(*cm)))
 1553                 return (EINVAL);
 1554 
 1555         /*
 1556          * Verify that the file descriptors are valid, and acquire
 1557          * a reference to each.
 1558          */
 1559         nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof(int);
 1560         fdp = (int *)CMSG_DATA(cm);
 1561         maxmsg = maxfiles / unp_rights_ratio;
 1562         for (i = 0; i < nfds; i++) {
 1563                 fd = *fdp++;
 1564                 if (atomic_inc_uint_nv(&unp_rights) > maxmsg) {
 1565                         atomic_dec_uint(&unp_rights);
 1566                         nfds = i;
 1567                         error = EAGAIN;
 1568                         goto out;
 1569                 }
 1570                 if ((fp = fd_getfile(fd)) == NULL
 1571                     || fp->f_type == DTYPE_KQUEUE) {
 1572                         if (fp)
 1573                                 fd_putfile(fd);
 1574                         atomic_dec_uint(&unp_rights);
 1575                         nfds = i;
 1576                         error = EBADF;
 1577                         goto out;
 1578                 }
 1579         }
 1580 
 1581         /* Allocate new space and copy header into it. */
 1582         newcm = malloc(CMSG_SPACE(nfds * sizeof(file_t *)), M_MBUF, M_WAITOK);
 1583         if (newcm == NULL) {
 1584                 error = E2BIG;
 1585                 goto out;
 1586         }
 1587         memcpy(newcm, cm, sizeof(struct cmsghdr));
 1588         memset(newcm + 1, 0, CMSG_LEN(0) - sizeof(struct cmsghdr));
 1589         files = (file_t **)CMSG_DATA(newcm);
 1590 
 1591         /*
 1592          * Transform the file descriptors into file_t pointers, in
 1593          * reverse order so that if pointers are bigger than ints, the
 1594          * int won't get until we're done.  No need to lock, as we have
 1595          * already validated the descriptors with fd_getfile().
 1596          */
 1597         fdp = (int *)CMSG_DATA(cm) + nfds;
 1598         rp = files + nfds;
 1599         for (i = 0; i < nfds; i++) {
 1600                 dt = atomic_load_consume(&fdescp->fd_dt);
 1601                 fp = atomic_load_consume(&dt->dt_ff[*--fdp]->ff_file);
 1602                 KASSERT(fp != NULL);
 1603                 mutex_enter(&fp->f_lock);
 1604                 *--rp = fp;
 1605                 fp->f_count++;
 1606                 fp->f_msgcount++;
 1607                 mutex_exit(&fp->f_lock);
 1608         }
 1609 
 1610  out:
 1611         /* Release descriptor references. */
 1612         fdp = (int *)CMSG_DATA(cm);
 1613         for (i = 0; i < nfds; i++) {
 1614                 fd_putfile(*fdp++);
 1615                 if (error != 0) {
 1616                         atomic_dec_uint(&unp_rights);
 1617                 }
 1618         }
 1619 
 1620         if (error == 0) {
 1621                 if (control->m_flags & M_EXT) {
 1622                         m_freem(control);
 1623                         *controlp = control = m_get(M_WAIT, MT_CONTROL);
 1624                 }
 1625                 MEXTADD(control, newcm, CMSG_SPACE(nfds * sizeof(file_t *)),
 1626                     M_MBUF, NULL, NULL);
 1627                 cm = newcm;
 1628                 /*
 1629                  * Adjust message & mbuf to note amount of space
 1630                  * actually used.
 1631                  */
 1632                 cm->cmsg_len = CMSG_LEN(nfds * sizeof(file_t *));
 1633                 control->m_len = CMSG_SPACE(nfds * sizeof(file_t *));
 1634         }
 1635 
 1636         return error;
 1637 }
 1638 
 1639 struct mbuf *
 1640 unp_addsockcred(struct lwp *l, struct mbuf *control)
 1641 {
 1642         struct sockcred *sc;
 1643         struct mbuf *m;
 1644         void *p;
 1645 
 1646         m = sbcreatecontrol1(&p, SOCKCREDSIZE(kauth_cred_ngroups(l->l_cred)),
 1647                 SCM_CREDS, SOL_SOCKET, M_WAITOK);
 1648         if (m == NULL)
 1649                 return control;
 1650 
 1651         sc = p;
 1652         sc->sc_pid = l->l_proc->p_pid;
 1653         sc->sc_uid = kauth_cred_getuid(l->l_cred);
 1654         sc->sc_euid = kauth_cred_geteuid(l->l_cred);
 1655         sc->sc_gid = kauth_cred_getgid(l->l_cred);
 1656         sc->sc_egid = kauth_cred_getegid(l->l_cred);
 1657         sc->sc_ngroups = kauth_cred_ngroups(l->l_cred);
 1658 
 1659         for (int i = 0; i < sc->sc_ngroups; i++)
 1660                 sc->sc_groups[i] = kauth_cred_group(l->l_cred, i);
 1661 
 1662         return m_add(control, m);
 1663 }
 1664 
 1665 /*
 1666  * Do a mark-sweep GC of files in the system, to free up any which are
 1667  * caught in flight to an about-to-be-closed socket.  Additionally,
 1668  * process deferred file closures.
 1669  */
 1670 static void
 1671 unp_gc(file_t *dp)
 1672 {
 1673         extern  struct domain unixdomain;
 1674         file_t *fp, *np;
 1675         struct socket *so, *so1;
 1676         u_int i, oflags, rflags;
 1677         bool didwork;
 1678 
 1679         KASSERT(curlwp == unp_thread_lwp);
 1680         KASSERT(mutex_owned(&filelist_lock));
 1681 
 1682         /*
 1683          * First, process deferred file closures.
 1684          */
 1685         while (!SLIST_EMPTY(&unp_thread_discard)) {
 1686                 fp = SLIST_FIRST(&unp_thread_discard);
 1687                 KASSERT(fp->f_unpcount > 0);
 1688                 KASSERT(fp->f_count > 0);
 1689                 KASSERT(fp->f_msgcount > 0);
 1690                 KASSERT(fp->f_count >= fp->f_unpcount);
 1691                 KASSERT(fp->f_count >= fp->f_msgcount);
 1692                 KASSERT(fp->f_msgcount >= fp->f_unpcount);
 1693                 SLIST_REMOVE_HEAD(&unp_thread_discard, f_unplist);
 1694                 i = fp->f_unpcount;
 1695                 fp->f_unpcount = 0;
 1696                 mutex_exit(&filelist_lock);
 1697                 for (; i != 0; i--) {
 1698                         unp_discard_now(fp);
 1699                 }
 1700                 mutex_enter(&filelist_lock);
 1701         }
 1702 
 1703         /*
 1704          * Clear mark bits.  Ensure that we don't consider new files
 1705          * entering the file table during this loop (they will not have
 1706          * FSCAN set).
 1707          */
 1708         unp_defer = 0;
 1709         LIST_FOREACH(fp, &filehead, f_list) {
 1710                 for (oflags = fp->f_flag;; oflags = rflags) {
 1711                         rflags = atomic_cas_uint(&fp->f_flag, oflags,
 1712                             (oflags | FSCAN) & ~(FMARK|FDEFER));
 1713                         if (__predict_true(oflags == rflags)) {
 1714                                 break;
 1715                         }
 1716                 }
 1717         }
 1718 
 1719         /*
 1720          * Iterate over the set of sockets, marking ones believed (based on
 1721          * refcount) to be referenced from a process, and marking for rescan
 1722          * sockets which are queued on a socket.  Recan continues descending
 1723          * and searching for sockets referenced by sockets (FDEFER), until
 1724          * there are no more socket->socket references to be discovered.
 1725          */
 1726         do {
 1727                 didwork = false;
 1728                 for (fp = LIST_FIRST(&filehead); fp != NULL; fp = np) {
 1729                         KASSERT(mutex_owned(&filelist_lock));
 1730                         np = LIST_NEXT(fp, f_list);
 1731                         mutex_enter(&fp->f_lock);
 1732                         if ((fp->f_flag & FDEFER) != 0) {
 1733                                 atomic_and_uint(&fp->f_flag, ~FDEFER);
 1734                                 unp_defer--;
 1735                                 if (fp->f_count == 0) {
 1736                                         /*
 1737                                          * XXX: closef() doesn't pay attention
 1738                                          * to FDEFER
 1739                                          */
 1740                                         mutex_exit(&fp->f_lock);
 1741                                         continue;
 1742                                 }
 1743                         } else {
 1744                                 if (fp->f_count == 0 ||
 1745                                     (fp->f_flag & FMARK) != 0 ||
 1746                                     fp->f_count == fp->f_msgcount ||
 1747                                     fp->f_unpcount != 0) {
 1748                                         mutex_exit(&fp->f_lock);
 1749                                         continue;
 1750                                 }
 1751                         }
 1752                         atomic_or_uint(&fp->f_flag, FMARK);
 1753 
 1754                         if (fp->f_type != DTYPE_SOCKET ||
 1755                             (so = fp->f_socket) == NULL ||
 1756                             so->so_proto->pr_domain != &unixdomain ||
 1757                             (so->so_proto->pr_flags & PR_RIGHTS) == 0) {
 1758                                 mutex_exit(&fp->f_lock);
 1759                                 continue;
 1760                         }
 1761 
 1762                         /* Gain file ref, mark our position, and unlock. */
 1763                         didwork = true;
 1764                         LIST_INSERT_AFTER(fp, dp, f_list);
 1765                         fp->f_count++;
 1766                         mutex_exit(&fp->f_lock);
 1767                         mutex_exit(&filelist_lock);
 1768 
 1769                         /*
 1770                          * Mark files referenced from sockets queued on the
 1771                          * accept queue as well.
 1772                          */
 1773                         solock(so);
 1774                         unp_scan(so->so_rcv.sb_mb, unp_mark, 0);
 1775                         if ((so->so_options & SO_ACCEPTCONN) != 0) {
 1776                                 TAILQ_FOREACH(so1, &so->so_q0, so_qe) {
 1777                                         unp_scan(so1->so_rcv.sb_mb, unp_mark, 0);
 1778                                 }
 1779                                 TAILQ_FOREACH(so1, &so->so_q, so_qe) {
 1780                                         unp_scan(so1->so_rcv.sb_mb, unp_mark, 0);
 1781                                 }
 1782                         }
 1783                         sounlock(so);
 1784 
 1785                         /* Re-lock and restart from where we left off. */
 1786                         closef(fp);
 1787                         mutex_enter(&filelist_lock);
 1788                         np = LIST_NEXT(dp, f_list);
 1789                         LIST_REMOVE(dp, f_list);
 1790                 }
 1791                 /*
 1792                  * Bail early if we did nothing in the loop above.  Could
 1793                  * happen because of concurrent activity causing unp_defer
 1794                  * to get out of sync.
 1795                  */
 1796         } while (unp_defer != 0 && didwork);
 1797 
 1798         /*
 1799          * Sweep pass.
 1800          *
 1801          * We grab an extra reference to each of the files that are
 1802          * not otherwise accessible and then free the rights that are
 1803          * stored in messages on them.
 1804          */
 1805         for (fp = LIST_FIRST(&filehead); fp != NULL; fp = np) {
 1806                 KASSERT(mutex_owned(&filelist_lock));
 1807                 np = LIST_NEXT(fp, f_list);
 1808                 mutex_enter(&fp->f_lock);
 1809 
 1810                 /*
 1811                  * Ignore non-sockets.
 1812                  * Ignore dead sockets, or sockets with pending close.
 1813                  * Ignore sockets obviously referenced elsewhere. 
 1814                  * Ignore sockets marked as referenced by our scan.
 1815                  * Ignore new sockets that did not exist during the scan.
 1816                  */
 1817                 if (fp->f_type != DTYPE_SOCKET ||
 1818                     fp->f_count == 0 || fp->f_unpcount != 0 ||
 1819                     fp->f_count != fp->f_msgcount ||
 1820                     (fp->f_flag & (FMARK | FSCAN)) != FSCAN) {
 1821                         mutex_exit(&fp->f_lock);
 1822                         continue;
 1823                 }
 1824 
 1825                 /* Gain file ref, mark our position, and unlock. */
 1826                 LIST_INSERT_AFTER(fp, dp, f_list);
 1827                 fp->f_count++;
 1828                 mutex_exit(&fp->f_lock);
 1829                 mutex_exit(&filelist_lock);
 1830 
 1831                 /*
 1832                  * Flush all data from the socket's receive buffer.
 1833                  * This will cause files referenced only by the
 1834                  * socket to be queued for close.
 1835                  */
 1836                 so = fp->f_socket;
 1837                 solock(so);
 1838                 sorflush(so);
 1839                 sounlock(so);
 1840 
 1841                 /* Re-lock and restart from where we left off. */
 1842                 closef(fp);
 1843                 mutex_enter(&filelist_lock);
 1844                 np = LIST_NEXT(dp, f_list);
 1845                 LIST_REMOVE(dp, f_list);
 1846         }
 1847 }
 1848 
 1849 /*
 1850  * Garbage collector thread.  While SCM_RIGHTS messages are in transit,
 1851  * wake once per second to garbage collect.  Run continually while we
 1852  * have deferred closes to process.
 1853  */
 1854 static void
 1855 unp_thread(void *cookie)
 1856 {
 1857         file_t *dp;
 1858 
 1859         /* Allocate a dummy file for our scans. */
 1860         if ((dp = fgetdummy()) == NULL) {
 1861                 panic("unp_thread");
 1862         }
 1863 
 1864         mutex_enter(&filelist_lock);
 1865         for (;;) {
 1866                 KASSERT(mutex_owned(&filelist_lock));
 1867                 if (SLIST_EMPTY(&unp_thread_discard)) {
 1868                         if (unp_rights != 0) {
 1869                                 (void)cv_timedwait(&unp_thread_cv,
 1870                                     &filelist_lock, hz);
 1871                         } else {
 1872                                 cv_wait(&unp_thread_cv, &filelist_lock);
 1873                         }
 1874                 }
 1875                 unp_gc(dp);
 1876         }
 1877         /* NOTREACHED */
 1878 }
 1879 
 1880 /*
 1881  * Kick the garbage collector into action if there is something for
 1882  * it to process.
 1883  */
 1884 static void
 1885 unp_thread_kick(void)
 1886 {
 1887 
 1888         if (!SLIST_EMPTY(&unp_thread_discard) || unp_rights != 0) {
 1889                 mutex_enter(&filelist_lock);
 1890                 cv_signal(&unp_thread_cv);
 1891                 mutex_exit(&filelist_lock);
 1892         }
 1893 }
 1894 
 1895 void
 1896 unp_dispose(struct mbuf *m)
 1897 {
 1898 
 1899         if (m)
 1900                 unp_scan(m, unp_discard_later, 1);
 1901 }
 1902 
 1903 void
 1904 unp_scan(struct mbuf *m0, void (*op)(file_t *), int discard)
 1905 {
 1906         struct mbuf *m;
 1907         file_t **rp, *fp;
 1908         struct cmsghdr *cm;
 1909         int i, qfds;
 1910 
 1911         while (m0) {
 1912                 for (m = m0; m; m = m->m_next) {
 1913                         if (m->m_type != MT_CONTROL ||
 1914                             m->m_len < sizeof(*cm)) {
 1915                                 continue;
 1916                         }
 1917                         cm = mtod(m, struct cmsghdr *);
 1918                         if (cm->cmsg_level != SOL_SOCKET ||
 1919                             cm->cmsg_type != SCM_RIGHTS)
 1920                                 continue;
 1921                         qfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm)))
 1922                             / sizeof(file_t *);
 1923                         rp = (file_t **)CMSG_DATA(cm);
 1924                         for (i = 0; i < qfds; i++) {
 1925                                 fp = *rp;
 1926                                 if (discard) {
 1927                                         *rp = 0;
 1928                                 }
 1929                                 (*op)(fp);
 1930                                 rp++;
 1931                         }
 1932                 }
 1933                 m0 = m0->m_nextpkt;
 1934         }
 1935 }
 1936 
 1937 void
 1938 unp_mark(file_t *fp)
 1939 {
 1940 
 1941         if (fp == NULL)
 1942                 return;
 1943 
 1944         /* If we're already deferred, don't screw up the defer count */
 1945         mutex_enter(&fp->f_lock);
 1946         if (fp->f_flag & (FMARK | FDEFER)) {
 1947                 mutex_exit(&fp->f_lock);
 1948                 return;
 1949         }
 1950 
 1951         /*
 1952          * Minimize the number of deferrals...  Sockets are the only type of
 1953          * file which can hold references to another file, so just mark
 1954          * other files, and defer unmarked sockets for the next pass.
 1955          */
 1956         if (fp->f_type == DTYPE_SOCKET) {
 1957                 unp_defer++;
 1958                 KASSERT(fp->f_count != 0);
 1959                 atomic_or_uint(&fp->f_flag, FDEFER);
 1960         } else {
 1961                 atomic_or_uint(&fp->f_flag, FMARK);
 1962         }
 1963         mutex_exit(&fp->f_lock);
 1964 }
 1965 
 1966 static void
 1967 unp_discard_now(file_t *fp)
 1968 {
 1969 
 1970         if (fp == NULL)
 1971                 return;
 1972 
 1973         KASSERT(fp->f_count > 0);
 1974         KASSERT(fp->f_msgcount > 0);
 1975 
 1976         mutex_enter(&fp->f_lock);
 1977         fp->f_msgcount--;
 1978         mutex_exit(&fp->f_lock);
 1979         atomic_dec_uint(&unp_rights);
 1980         (void)closef(fp);
 1981 }
 1982 
 1983 static void
 1984 unp_discard_later(file_t *fp)
 1985 {
 1986 
 1987         if (fp == NULL)
 1988                 return;
 1989 
 1990         KASSERT(fp->f_count > 0);
 1991         KASSERT(fp->f_msgcount > 0);
 1992 
 1993         mutex_enter(&filelist_lock);
 1994         if (fp->f_unpcount++ == 0) {
 1995                 SLIST_INSERT_HEAD(&unp_thread_discard, fp, f_unplist);
 1996         }
 1997         mutex_exit(&filelist_lock);
 1998 }
 1999 
 2000 static void
 2001 unp_sysctl_create(void)
 2002 {
 2003 
 2004         KASSERT(usrreq_sysctllog == NULL);
 2005         sysctl_createv(&usrreq_sysctllog, 0, NULL, NULL,
 2006                        CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
 2007                        CTLTYPE_LONG, "sendspace",
 2008                        SYSCTL_DESCR("Default stream send space"),
 2009                        NULL, 0, &unpst_sendspace, 0,
 2010                        CTL_NET, PF_LOCAL, SOCK_STREAM, CTL_CREATE, CTL_EOL);
 2011         sysctl_createv(&usrreq_sysctllog, 0, NULL, NULL,
 2012                        CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
 2013                        CTLTYPE_LONG, "recvspace",
 2014                        SYSCTL_DESCR("Default stream recv space"),
 2015                        NULL, 0, &unpst_recvspace, 0,
 2016                        CTL_NET, PF_LOCAL, SOCK_STREAM, CTL_CREATE, CTL_EOL);
 2017         sysctl_createv(&usrreq_sysctllog, 0, NULL, NULL,
 2018                        CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
 2019                        CTLTYPE_LONG, "sendspace",
 2020                        SYSCTL_DESCR("Default datagram send space"),
 2021                        NULL, 0, &unpdg_sendspace, 0,
 2022                        CTL_NET, PF_LOCAL, SOCK_DGRAM, CTL_CREATE, CTL_EOL);
 2023         sysctl_createv(&usrreq_sysctllog, 0, NULL, NULL,
 2024                        CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
 2025                        CTLTYPE_LONG, "recvspace",
 2026                        SYSCTL_DESCR("Default datagram recv space"),
 2027                        NULL, 0, &unpdg_recvspace, 0,
 2028                        CTL_NET, PF_LOCAL, SOCK_DGRAM, CTL_CREATE, CTL_EOL);
 2029         sysctl_createv(&usrreq_sysctllog, 0, NULL, NULL,
 2030                        CTLFLAG_PERMANENT|CTLFLAG_READONLY,
 2031                        CTLTYPE_INT, "inflight",
 2032                        SYSCTL_DESCR("File descriptors in flight"),
 2033                        NULL, 0, &unp_rights, 0,
 2034                        CTL_NET, PF_LOCAL, CTL_CREATE, CTL_EOL);
 2035         sysctl_createv(&usrreq_sysctllog, 0, NULL, NULL,
 2036                        CTLFLAG_PERMANENT|CTLFLAG_READONLY,
 2037                        CTLTYPE_INT, "deferred",
 2038                        SYSCTL_DESCR("File descriptors deferred for close"),
 2039                        NULL, 0, &unp_defer, 0,
 2040                        CTL_NET, PF_LOCAL, CTL_CREATE, CTL_EOL);
 2041 }
 2042 
 2043 const struct pr_usrreqs unp_usrreqs = {
 2044         .pr_attach      = unp_attach,
 2045         .pr_detach      = unp_detach,
 2046         .pr_accept      = unp_accept,
 2047         .pr_bind        = unp_bind,
 2048         .pr_listen      = unp_listen,
 2049         .pr_connect     = unp_connect,
 2050         .pr_connect2    = unp_connect2,
 2051         .pr_disconnect  = unp_disconnect,
 2052         .pr_shutdown    = unp_shutdown,
 2053         .pr_abort       = unp_abort,
 2054         .pr_ioctl       = unp_ioctl,
 2055         .pr_stat        = unp_stat,
 2056         .pr_peeraddr    = unp_peeraddr,
 2057         .pr_sockaddr    = unp_sockaddr,
 2058         .pr_rcvd        = unp_rcvd,
 2059         .pr_recvoob     = unp_recvoob,
 2060         .pr_send        = unp_send,
 2061         .pr_sendoob     = unp_sendoob,
 2062 };

Cache object: 4827218202d2f85b5c10ef0c62e056c7


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.