The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/uipc_usrreq.c

Version: -  FREEBSD  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-2  -  FREEBSD-11-1  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-4  -  FREEBSD-10-3  -  FREEBSD-10-2  -  FREEBSD-10-1  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-3  -  FREEBSD-9-2  -  FREEBSD-9-1  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-4  -  FREEBSD-8-3  -  FREEBSD-8-2  -  FREEBSD-8-1  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-4  -  FREEBSD-7-3  -  FREEBSD-7-2  -  FREEBSD-7-1  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-4  -  FREEBSD-6-3  -  FREEBSD-6-2  -  FREEBSD-6-1  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-5  -  FREEBSD-5-4  -  FREEBSD-5-3  -  FREEBSD-5-2  -  FREEBSD-5-1  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  xnu-1456.1.26  -  xnu-1699.24.8  -  xnu-2050.18.24  -  OPENSOLARIS  -  minix-3-1-1 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1982, 1986, 1989, 1991, 1993
    3  *      The Regents of the University of California.
    4  * Copyright (c) 2004-2007 Robert N. M. Watson
    5  * All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  * 4. Neither the name of the University nor the names of its contributors
   16  *    may be used to endorse or promote products derived from this software
   17  *    without specific prior written permission.
   18  *
   19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   29  * SUCH DAMAGE.
   30  *
   31  *      From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94
   32  */
   33 
   34 /*
   35  * UNIX Domain (Local) Sockets
   36  *
   37  * This is an implementation of UNIX (local) domain sockets.  Each socket has
   38  * an associated struct unpcb (UNIX protocol control block).  Stream sockets
   39  * may be connected to 0 or 1 other socket.  Datagram sockets may be
   40  * connected to 0, 1, or many other sockets.  Sockets may be created and
   41  * connected in pairs (socketpair(2)), or bound/connected to using the file
   42  * system name space.  For most purposes, only the receive socket buffer is
   43  * used, as sending on one socket delivers directly to the receive socket
   44  * buffer of a second socket.  The implementation is substantially
   45  * complicated by the fact that "ancillary data", such as file descriptors or
   46  * credentials, may be passed across UNIX domain sockets.  The potential for
   47  * passing UNIX domain sockets over other UNIX domain sockets requires the
   48  * implementation of a simple garbage collector to find and tear down cycles
   49  * of disconnected sockets.
   50  */
   51 
   52 #include <sys/cdefs.h>
   53 __FBSDID("$FreeBSD: releng/6.4/sys/kern/uipc_usrreq.c 172428 2007-10-03 21:06:05Z jhb $");
   54 
   55 #include "opt_mac.h"
   56 
   57 #include <sys/param.h>
   58 #include <sys/domain.h>
   59 #include <sys/fcntl.h>
   60 #include <sys/malloc.h>         /* XXX must be before <sys/file.h> */
   61 #include <sys/eventhandler.h>
   62 #include <sys/file.h>
   63 #include <sys/filedesc.h>
   64 #include <sys/jail.h>
   65 #include <sys/kernel.h>
   66 #include <sys/lock.h>
   67 #include <sys/mac.h>
   68 #include <sys/mbuf.h>
   69 #include <sys/mount.h>
   70 #include <sys/mutex.h>
   71 #include <sys/namei.h>
   72 #include <sys/proc.h>
   73 #include <sys/protosw.h>
   74 #include <sys/resourcevar.h>
   75 #include <sys/socket.h>
   76 #include <sys/socketvar.h>
   77 #include <sys/signalvar.h>
   78 #include <sys/stat.h>
   79 #include <sys/sx.h>
   80 #include <sys/sysctl.h>
   81 #include <sys/systm.h>
   82 #include <sys/taskqueue.h>
   83 #include <sys/un.h>
   84 #include <sys/unpcb.h>
   85 #include <sys/vnode.h>
   86 
   87 #include <vm/uma.h>
   88 
   89 /*
   90  * We allocate wrapper objects that add the reference count to an existing
   91  * unpcb in 6.x to preserve the ABI layout of unpcb.
   92  */
   93 struct unpcb_wrapper {
   94         struct  unpcb unpw_unpcb;
   95         u_int   unpw_refcount;
   96 };
   97 
   98 #define UNP_REFCOUNT(unp)       (((struct unpcb_wrapper *)(unp))->unpw_refcount)
   99 
  100 static uma_zone_t unp_zone;
  101 static  unp_gen_t unp_gencnt;
  102 static  u_int unp_count;
  103 
  104 static  struct unp_head unp_shead, unp_dhead;
  105 
  106 /*
  107  * Unix communications domain.
  108  *
  109  * TODO:
  110  *      SEQPACKET, RDM
  111  *      rethink name space problems
  112  *      need a proper out-of-band
  113  *      lock pushdown
  114  */
  115 static const struct     sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL };
  116 static ino_t    unp_ino;                /* prototype for fake inode numbers */
  117 struct mbuf *unp_addsockcred(struct thread *, struct mbuf *);
  118 
  119 /*
  120  * Both send and receive buffers are allocated PIPSIZ bytes of buffering for
  121  * stream sockets, although the total for sender and receiver is actually
  122  * only PIPSIZ.
  123  *
  124  * Datagram sockets really use the sendspace as the maximum datagram size,
  125  * and don't really want to reserve the sendspace.  Their recvspace should be
  126  * large enough for at least one max-size datagram plus address.
  127  */
  128 #ifndef PIPSIZ
  129 #define PIPSIZ  8192
  130 #endif
  131 static u_long   unpst_sendspace = PIPSIZ;
  132 static u_long   unpst_recvspace = PIPSIZ;
  133 static u_long   unpdg_sendspace = 2*1024;       /* really max datagram size */
  134 static u_long   unpdg_recvspace = 4*1024;
  135 
  136 static int      unp_rights;                     /* file descriptors in flight */
  137 
  138 SYSCTL_NODE(_net, PF_LOCAL, local, CTLFLAG_RW, 0, "Local domain");
  139 SYSCTL_NODE(_net_local, SOCK_STREAM, stream, CTLFLAG_RW, 0, "SOCK_STREAM");
  140 SYSCTL_NODE(_net_local, SOCK_DGRAM, dgram, CTLFLAG_RW, 0, "SOCK_DGRAM");
  141 
  142 SYSCTL_ULONG(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW,
  143            &unpst_sendspace, 0, "");
  144 SYSCTL_ULONG(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW,
  145            &unpst_recvspace, 0, "");
  146 SYSCTL_ULONG(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW,
  147            &unpdg_sendspace, 0, "");
  148 SYSCTL_ULONG(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW,
  149            &unpdg_recvspace, 0, "");
  150 SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, "");
  151 
  152 /*
  153  * Currently, UNIX domain sockets are protected by a single subsystem lock,
  154  * which covers global data structures and variables, the contents of each
  155  * per-socket unpcb structure, and the so_pcb field in sockets attached to
  156  * the UNIX domain.  This provides for a moderate degree of paralellism, as
  157  * receive operations on UNIX domain sockets do not need to acquire the
  158  * subsystem lock.  Finer grained locking to permit send() without acquiring
  159  * a global lock would be a logical next step.
  160  *
  161  * The UNIX domain socket lock preceds all socket layer locks, including the
  162  * socket lock and socket buffer lock, permitting UNIX domain socket code to
  163  * call into socket support routines without releasing its locks.
  164  *
  165  * Some caution is required in areas where the UNIX domain socket code enters
  166  * VFS in order to create or find rendezvous points.  This results in
  167  * dropping of the UNIX domain socket subsystem lock, acquisition of the
  168  * Giant lock, and potential sleeping.  This increases the chances of races,
  169  * and exposes weaknesses in the socket->protocol API by offering poor
  170  * failure modes.
  171  */
  172 static struct mtx unp_mtx;
  173 #define UNP_LOCK_INIT() \
  174         mtx_init(&unp_mtx, "unp", NULL, MTX_DEF | MTX_RECURSE)
  175 #define UNP_LOCK()              mtx_lock(&unp_mtx)
  176 #define UNP_UNLOCK()            mtx_unlock(&unp_mtx)
  177 #define UNP_LOCK_ASSERT()       mtx_assert(&unp_mtx, MA_OWNED)
  178 #define UNP_UNLOCK_ASSERT()     mtx_assert(&unp_mtx, MA_NOTOWNED)
  179 
  180 /*
  181  * Garbage collection of cyclic file descriptor/socket references occurs
  182  * asynchronously in a taskqueue context in order to avoid recursion and
  183  * reentrance in the UNIX domain socket, file descriptor, and socket layer
  184  * code.  See unp_gc() for a full description.
  185  */
  186 static struct task      unp_gc_task;
  187 
  188 static int     unp_attach(struct socket *);
  189 static void    unp_detach(struct unpcb *);
  190 static int     unp_connect(struct socket *,struct sockaddr *, struct thread *);
  191 static int     unp_connect2(struct socket *so, struct socket *so2, int);
  192 static void    unp_disconnect(struct unpcb *);
  193 static void    unp_shutdown(struct unpcb *);
  194 static void    unp_drop(struct unpcb *, int);
  195 static void    unp_gc(__unused void *, int);
  196 static void    unp_scan(struct mbuf *, void (*)(struct file *));
  197 static void    unp_mark(struct file *);
  198 static void    unp_discard(struct file *);
  199 static void    unp_freerights(struct file **, int);
  200 static int     unp_internalize(struct mbuf **, struct thread *);
  201 static int     unp_listen(struct socket *, struct unpcb *, struct thread *);
  202 
  203 static int
  204 uipc_abort(struct socket *so)
  205 {
  206         struct unpcb *unp;
  207 
  208         UNP_LOCK();
  209         unp = sotounpcb(so);
  210         if (unp == NULL) {
  211                 UNP_UNLOCK();
  212                 return (EINVAL);
  213         }
  214         unp_drop(unp, ECONNABORTED);
  215         unp_detach(unp);
  216         UNP_UNLOCK_ASSERT();
  217         ACCEPT_LOCK();
  218         SOCK_LOCK(so);
  219         sotryfree(so);
  220         return (0);
  221 }
  222 
  223 static int
  224 uipc_accept(struct socket *so, struct sockaddr **nam)
  225 {
  226         struct unpcb *unp;
  227         const struct sockaddr *sa;
  228 
  229         /*
  230          * Pass back name of connected socket, if it was bound and we are
  231          * still connected (our peer may have closed already!).
  232          */
  233         *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
  234         UNP_LOCK();
  235         unp = sotounpcb(so);
  236         if (unp == NULL) {
  237                 UNP_UNLOCK();
  238                 free(*nam, M_SONAME);
  239                 *nam = NULL;
  240                 return (EINVAL);
  241         }
  242         if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL)
  243                 sa = (struct sockaddr *) unp->unp_conn->unp_addr;
  244         else
  245                 sa = &sun_noname;
  246         bcopy(sa, *nam, sa->sa_len);
  247         UNP_UNLOCK();
  248         return (0);
  249 }
  250 
  251 static int
  252 uipc_attach(struct socket *so, int proto, struct thread *td)
  253 {
  254         struct unpcb *unp = sotounpcb(so);
  255 
  256         if (unp != NULL)
  257                 return (EISCONN);
  258         return (unp_attach(so));
  259 }
  260 
  261 static int
  262 uipc_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
  263 {
  264         struct sockaddr_un *soun = (struct sockaddr_un *)nam;
  265         struct vattr vattr;
  266         int error, namelen;
  267         struct nameidata nd;
  268         struct unpcb *unp;
  269         struct vnode *vp;
  270         struct mount *mp;
  271         char *buf;
  272 
  273         namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path);
  274         if (namelen <= 0)
  275                 return (EINVAL);
  276 
  277         UNP_LOCK();
  278         unp = sotounpcb(so);
  279         if (unp == NULL) {
  280                 UNP_UNLOCK();
  281                 return (EINVAL);
  282         }
  283 
  284         /*
  285          * We don't allow simultaneous bind() calls on a single UNIX domain
  286          * socket, so flag in-progress operations, and return an error if an
  287          * operation is already in progress.
  288          *
  289          * Historically, we have not allowed a socket to be rebound, so this
  290          * also returns an error.  Not allowing re-binding certainly
  291          * simplifies the implementation and avoids a great many possible
  292          * failure modes.
  293          */
  294         if (unp->unp_vnode != NULL) {
  295                 UNP_UNLOCK();
  296                 return (EINVAL);
  297         }
  298         if (unp->unp_flags & UNP_BINDING) {
  299                 UNP_UNLOCK();
  300                 return (EALREADY);
  301         }
  302         unp->unp_flags |= UNP_BINDING;
  303         UNP_UNLOCK();
  304 
  305         buf = malloc(namelen + 1, M_TEMP, M_WAITOK);
  306         strlcpy(buf, soun->sun_path, namelen + 1);
  307 
  308         mtx_lock(&Giant);
  309 restart:
  310         mtx_assert(&Giant, MA_OWNED);
  311         NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT | SAVENAME, UIO_SYSSPACE,
  312             buf, td);
  313 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
  314         error = namei(&nd);
  315         if (error)
  316                 goto error;
  317         vp = nd.ni_vp;
  318         if (vp != NULL || vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
  319                 NDFREE(&nd, NDF_ONLY_PNBUF);
  320                 if (nd.ni_dvp == vp)
  321                         vrele(nd.ni_dvp);
  322                 else
  323                         vput(nd.ni_dvp);
  324                 if (vp != NULL) {
  325                         vrele(vp);
  326                         error = EADDRINUSE;
  327                         goto error;
  328                 }
  329                 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH);
  330                 if (error)
  331                         goto error;
  332                 goto restart;
  333         }
  334         VATTR_NULL(&vattr);
  335         vattr.va_type = VSOCK;
  336         vattr.va_mode = (ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask);
  337 #ifdef MAC
  338         error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
  339             &vattr);
  340 #endif
  341         if (error == 0) {
  342                 VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
  343                 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
  344         }
  345         NDFREE(&nd, NDF_ONLY_PNBUF);
  346         vput(nd.ni_dvp);
  347         if (error) {
  348                 vn_finished_write(mp);
  349                 goto error;
  350         }
  351         vp = nd.ni_vp;
  352         ASSERT_VOP_ELOCKED(vp, "uipc_bind");
  353         soun = (struct sockaddr_un *)sodupsockaddr(nam, M_WAITOK);
  354         UNP_LOCK();
  355         vp->v_socket = unp->unp_socket;
  356         unp->unp_vnode = vp;
  357         unp->unp_addr = soun;
  358         unp->unp_flags &= ~UNP_BINDING;
  359         UNP_UNLOCK();
  360         VOP_UNLOCK(vp, 0, td);
  361         vn_finished_write(mp);
  362         mtx_unlock(&Giant);
  363         free(buf, M_TEMP);
  364         return (0);
  365 error:
  366         UNP_LOCK();
  367         unp->unp_flags &= ~UNP_BINDING;
  368         UNP_UNLOCK();
  369         mtx_unlock(&Giant);
  370         free(buf, M_TEMP);
  371         return (error);
  372 }
  373 
  374 static int
  375 uipc_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
  376 {
  377         struct unpcb *unp;
  378         int error;
  379 
  380         KASSERT(td == curthread, ("uipc_connect: td != curthread"));
  381 
  382         UNP_LOCK();
  383         unp = sotounpcb(so);
  384         if (unp == NULL) {
  385                 UNP_UNLOCK();
  386                 return (EINVAL);
  387         }
  388         error = unp_connect(so, nam, td);
  389         UNP_UNLOCK();
  390         return (error);
  391 }
  392 
  393 int
  394 uipc_connect2(struct socket *so1, struct socket *so2)
  395 {
  396         struct unpcb *unp;
  397         int error;
  398 
  399         UNP_LOCK();
  400         unp = sotounpcb(so1);
  401         if (unp == NULL) {
  402                 UNP_UNLOCK();
  403                 return (EINVAL);
  404         }
  405         error = unp_connect2(so1, so2, PRU_CONNECT2);
  406         UNP_UNLOCK();
  407         return (error);
  408 }
  409 
  410 /* control is EOPNOTSUPP */
  411 
  412 static int
  413 uipc_detach(struct socket *so)
  414 {
  415         struct unpcb *unp;
  416 
  417         UNP_LOCK();
  418         unp = sotounpcb(so);
  419         if (unp == NULL) {
  420                 UNP_UNLOCK();
  421                 return (EINVAL);
  422         }
  423         unp_detach(unp);
  424         UNP_UNLOCK_ASSERT();
  425         return (0);
  426 }
  427 
  428 static int
  429 uipc_disconnect(struct socket *so)
  430 {
  431         struct unpcb *unp;
  432 
  433         UNP_LOCK();
  434         unp = sotounpcb(so);
  435         if (unp == NULL) {
  436                 UNP_UNLOCK();
  437                 return (EINVAL);
  438         }
  439         unp_disconnect(unp);
  440         UNP_UNLOCK();
  441         return (0);
  442 }
  443 
  444 static int
  445 uipc_listen(struct socket *so, struct thread *td)
  446 {
  447         struct unpcb *unp;
  448         int error;
  449 
  450         UNP_LOCK();
  451         unp = sotounpcb(so);
  452         if (unp == NULL || unp->unp_vnode == NULL) {
  453                 UNP_UNLOCK();
  454                 return (EINVAL);
  455         }
  456         error = unp_listen(so, unp, td);
  457         UNP_UNLOCK();
  458         return (error);
  459 }
  460 
  461 static int
  462 uipc_peeraddr(struct socket *so, struct sockaddr **nam)
  463 {
  464         struct unpcb *unp;
  465         const struct sockaddr *sa;
  466 
  467         *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
  468         UNP_LOCK();
  469         unp = sotounpcb(so);
  470         if (unp == NULL) {
  471                 UNP_UNLOCK();
  472                 free(*nam, M_SONAME);
  473                 *nam = NULL;
  474                 return (EINVAL);
  475         }
  476         if (unp->unp_conn != NULL && unp->unp_conn->unp_addr!= NULL)
  477                 sa = (struct sockaddr *) unp->unp_conn->unp_addr;
  478         else {
  479                 /*
  480                  * XXX: It seems that this test always fails even when
  481                  * connection is established.  So, this else clause is
  482                  * added as workaround to return PF_LOCAL sockaddr.
  483                  */
  484                 sa = &sun_noname;
  485         }
  486         bcopy(sa, *nam, sa->sa_len);
  487         UNP_UNLOCK();
  488         return (0);
  489 }
  490 
  491 static int
  492 uipc_rcvd(struct socket *so, int flags)
  493 {
  494         struct unpcb *unp;
  495         struct socket *so2;
  496         u_int mbcnt, sbcc;
  497         u_long newhiwat;
  498 
  499         UNP_LOCK();
  500         unp = sotounpcb(so);
  501         if (unp == NULL) {
  502                 UNP_UNLOCK();
  503                 return (EINVAL);
  504         }
  505         switch (so->so_type) {
  506         case SOCK_DGRAM:
  507                 panic("uipc_rcvd DGRAM?");
  508                 /*NOTREACHED*/
  509 
  510         case SOCK_STREAM:
  511                 /*
  512                  * Adjust backpressure on sender and wakeup any waiting to
  513                  * write.
  514                  */
  515                 SOCKBUF_LOCK(&so->so_rcv);
  516                 mbcnt = so->so_rcv.sb_mbcnt;
  517                 sbcc = so->so_rcv.sb_cc;
  518                 SOCKBUF_UNLOCK(&so->so_rcv);
  519                 if (unp->unp_conn == NULL)
  520                         break;
  521                 so2 = unp->unp_conn->unp_socket;
  522                 SOCKBUF_LOCK(&so2->so_snd);
  523                 so2->so_snd.sb_mbmax += unp->unp_mbcnt - mbcnt;
  524                 newhiwat = so2->so_snd.sb_hiwat + unp->unp_cc - sbcc;
  525                 (void)chgsbsize(so2->so_cred->cr_uidinfo, &so2->so_snd.sb_hiwat,
  526                     newhiwat, RLIM_INFINITY);
  527                 sowwakeup_locked(so2);
  528                 unp->unp_mbcnt = mbcnt;
  529                 unp->unp_cc = so->so_rcv.sb_cc;
  530                 break;
  531 
  532         default:
  533                 panic("uipc_rcvd unknown socktype");
  534         }
  535         UNP_UNLOCK();
  536         return (0);
  537 }
  538 
  539 /* pru_rcvoob is EOPNOTSUPP */
  540 
  541 static int
  542 uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
  543     struct mbuf *control, struct thread *td)
  544 {
  545         struct unpcb *unp, *unp2;
  546         struct socket *so2;
  547         u_int mbcnt, sbcc;
  548         u_long newhiwat;
  549         int error = 0;
  550 
  551         unp = sotounpcb(so);
  552         if (unp == NULL) {
  553                 error = EINVAL;
  554                 goto release;
  555         }
  556         if (flags & PRUS_OOB) {
  557                 error = EOPNOTSUPP;
  558                 goto release;
  559         }
  560 
  561         if (control != NULL && (error = unp_internalize(&control, td)))
  562                 goto release;
  563 
  564         UNP_LOCK();
  565         unp = sotounpcb(so);
  566         if (unp == NULL) {
  567                 UNP_UNLOCK();
  568                 error = EINVAL;
  569                 goto dispose_release;
  570         }
  571 
  572         switch (so->so_type) {
  573         case SOCK_DGRAM:
  574         {
  575                 const struct sockaddr *from;
  576 
  577                 if (nam != NULL) {
  578                         if (unp->unp_conn != NULL) {
  579                                 error = EISCONN;
  580                                 break;
  581                         }
  582                         error = unp_connect(so, nam, td);
  583                         if (error)
  584                                 break;
  585                 }
  586                 /*
  587                  * Because connect() and send() are non-atomic in a sendto()
  588                  * with a target address, it's possible that the socket will
  589                  * have disconnected before the send() can run.  In that case
  590                  * return the slightly counter-intuitive but otherwise
  591                  * correct error that the socket is not connected.
  592                  */
  593                 if (unp->unp_conn == NULL) {
  594                         error = ENOTCONN;
  595                         break;
  596                 }
  597                 unp2 = unp->unp_conn;
  598                 so2 = unp2->unp_socket;
  599                 if (unp->unp_addr != NULL)
  600                         from = (struct sockaddr *)unp->unp_addr;
  601                 else
  602                         from = &sun_noname;
  603                 if (unp2->unp_flags & UNP_WANTCRED)
  604                         control = unp_addsockcred(td, control);
  605                 SOCKBUF_LOCK(&so2->so_rcv);
  606                 if (sbappendaddr_locked(&so2->so_rcv, from, m, control)) {
  607                         sorwakeup_locked(so2);
  608                         m = NULL;
  609                         control = NULL;
  610                 } else {
  611                         SOCKBUF_UNLOCK(&so2->so_rcv);
  612                         error = ENOBUFS;
  613                 }
  614                 if (nam != NULL)
  615                         unp_disconnect(unp);
  616                 break;
  617         }
  618 
  619         case SOCK_STREAM:
  620                 /*
  621                  * Connect if not connected yet.
  622                  *
  623                  * Note: A better implementation would complain if not equal
  624                  * to the peer's address.
  625                  */
  626                 if ((so->so_state & SS_ISCONNECTED) == 0) {
  627                         if (nam != NULL) {
  628                                 error = unp_connect(so, nam, td);
  629                                 if (error)
  630                                         break;  /* XXX */
  631                         } else {
  632                                 error = ENOTCONN;
  633                                 break;
  634                         }
  635                 }
  636 
  637                 /* Lockless read. */
  638                 if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
  639                         error = EPIPE;
  640                         break;
  641                 }
  642                 /*
  643                  * Because connect() and send() are non-atomic in a sendto()
  644                  * with a target address, it's possible that the socket will
  645                  * have disconnected before the send() can run.  In that case
  646                  * return the slightly counter-intuitive but otherwise
  647                  * correct error that the socket is not connected.
  648                  */
  649                 unp2 = unp->unp_conn;
  650                 if (unp2 == NULL) {
  651                         SOCKBUF_UNLOCK(&so->so_snd);
  652                         error = ENOTCONN;
  653                         break;
  654                 }
  655                 so2 = unp2->unp_socket;
  656                 SOCKBUF_LOCK(&so2->so_rcv);
  657                 if (unp2->unp_flags & UNP_WANTCRED) {
  658                         /*
  659                          * Credentials are passed only once on
  660                          * SOCK_STREAM.
  661                          */
  662                         unp2->unp_flags &= ~UNP_WANTCRED;
  663                         control = unp_addsockcred(td, control);
  664                 }
  665                 /*
  666                  * Send to paired receive port, and then reduce send buffer
  667                  * hiwater marks to maintain backpressure.  Wake up readers.
  668                  */
  669                 if (control != NULL) {
  670                         if (sbappendcontrol_locked(&so2->so_rcv, m, control))
  671                                 control = NULL;
  672                 } else {
  673                         sbappend_locked(&so2->so_rcv, m);
  674                 }
  675                 mbcnt = so2->so_rcv.sb_mbcnt - unp2->unp_mbcnt;
  676                 unp2->unp_mbcnt = so2->so_rcv.sb_mbcnt;
  677                 sbcc = so2->so_rcv.sb_cc;
  678                 sorwakeup_locked(so2);
  679 
  680                 SOCKBUF_LOCK(&so->so_snd);
  681                 newhiwat = so->so_snd.sb_hiwat - (sbcc - unp2->unp_cc);
  682                 (void)chgsbsize(so->so_cred->cr_uidinfo, &so->so_snd.sb_hiwat,
  683                     newhiwat, RLIM_INFINITY);
  684                 so->so_snd.sb_mbmax -= mbcnt;
  685                 SOCKBUF_UNLOCK(&so->so_snd);
  686 
  687                 unp2->unp_cc = sbcc;
  688                 m = NULL;
  689                 break;
  690 
  691         default:
  692                 panic("uipc_send unknown socktype");
  693         }
  694 
  695         /*
  696          * SEND_EOF is equivalent to a SEND followed by
  697          * a SHUTDOWN.
  698          */
  699         if (flags & PRUS_EOF) {
  700                 socantsendmore(so);
  701                 unp_shutdown(unp);
  702         }
  703         UNP_UNLOCK();
  704 
  705 dispose_release:
  706         if (control != NULL && error != 0)
  707                 unp_dispose(control);
  708 
  709 release:
  710         if (control != NULL)
  711                 m_freem(control);
  712         if (m != NULL)
  713                 m_freem(m);
  714         return (error);
  715 }
  716 
  717 static int
  718 uipc_sense(struct socket *so, struct stat *sb)
  719 {
  720         struct unpcb *unp;
  721         struct socket *so2;
  722 
  723         UNP_LOCK();
  724         unp = sotounpcb(so);
  725         if (unp == NULL) {
  726                 UNP_UNLOCK();
  727                 return (EINVAL);
  728         }
  729         sb->st_blksize = so->so_snd.sb_hiwat;
  730         if (so->so_type == SOCK_STREAM && unp->unp_conn != NULL) {
  731                 so2 = unp->unp_conn->unp_socket;
  732                 sb->st_blksize += so2->so_rcv.sb_cc;
  733         }
  734         sb->st_dev = NODEV;
  735         if (unp->unp_ino == 0)
  736                 unp->unp_ino = (++unp_ino == 0) ? ++unp_ino : unp_ino;
  737         sb->st_ino = unp->unp_ino;
  738         UNP_UNLOCK();
  739         return (0);
  740 }
  741 
  742 static int
  743 uipc_shutdown(struct socket *so)
  744 {
  745         struct unpcb *unp;
  746 
  747         UNP_LOCK();
  748         unp = sotounpcb(so);
  749         if (unp == NULL) {
  750                 UNP_UNLOCK();
  751                 return (EINVAL);
  752         }
  753         socantsendmore(so);
  754         unp_shutdown(unp);
  755         UNP_UNLOCK();
  756         return (0);
  757 }
  758 
  759 static int
  760 uipc_sockaddr(struct socket *so, struct sockaddr **nam)
  761 {
  762         struct unpcb *unp;
  763         const struct sockaddr *sa;
  764 
  765         *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
  766         UNP_LOCK();
  767         unp = sotounpcb(so);
  768         if (unp == NULL) {
  769                 UNP_UNLOCK();
  770                 free(*nam, M_SONAME);
  771                 *nam = NULL;
  772                 return (EINVAL);
  773         }
  774         if (unp->unp_addr != NULL)
  775                 sa = (struct sockaddr *) unp->unp_addr;
  776         else
  777                 sa = &sun_noname;
  778         bcopy(sa, *nam, sa->sa_len);
  779         UNP_UNLOCK();
  780         return (0);
  781 }
  782 
  783 struct pr_usrreqs uipc_usrreqs = {
  784         .pru_abort =            uipc_abort,
  785         .pru_accept =           uipc_accept,
  786         .pru_attach =           uipc_attach,
  787         .pru_bind =             uipc_bind,
  788         .pru_connect =          uipc_connect,
  789         .pru_connect2 =         uipc_connect2,
  790         .pru_detach =           uipc_detach,
  791         .pru_disconnect =       uipc_disconnect,
  792         .pru_listen =           uipc_listen,
  793         .pru_peeraddr =         uipc_peeraddr,
  794         .pru_rcvd =             uipc_rcvd,
  795         .pru_send =             uipc_send,
  796         .pru_sense =            uipc_sense,
  797         .pru_shutdown =         uipc_shutdown,
  798         .pru_sockaddr =         uipc_sockaddr,
  799         .pru_sosend =           sosend,
  800         .pru_soreceive =        soreceive,
  801         .pru_sopoll =           sopoll,
  802 };
  803 
  804 int
  805 uipc_ctloutput(struct socket *so, struct sockopt *sopt)
  806 {
  807         struct unpcb *unp;
  808         struct xucred xu;
  809         int error, optval;
  810 
  811         if (sopt->sopt_level != 0)
  812                 return (EINVAL);
  813 
  814         UNP_LOCK();
  815         unp = sotounpcb(so);
  816         if (unp == NULL) {
  817                 UNP_UNLOCK();
  818                 return (EINVAL);
  819         }
  820         error = 0;
  821 
  822         switch (sopt->sopt_dir) {
  823         case SOPT_GET:
  824                 switch (sopt->sopt_name) {
  825                 case LOCAL_PEERCRED:
  826                         if (unp->unp_flags & UNP_HAVEPC)
  827                                 xu = unp->unp_peercred;
  828                         else {
  829                                 if (so->so_type == SOCK_STREAM)
  830                                         error = ENOTCONN;
  831                                 else
  832                                         error = EINVAL;
  833                         }
  834                         if (error == 0)
  835                                 error = sooptcopyout(sopt, &xu, sizeof(xu));
  836                         break;
  837                 case LOCAL_CREDS:
  838                         optval = unp->unp_flags & UNP_WANTCRED ? 1 : 0;
  839                         error = sooptcopyout(sopt, &optval, sizeof(optval));
  840                         break;
  841                 case LOCAL_CONNWAIT:
  842                         optval = unp->unp_flags & UNP_CONNWAIT ? 1 : 0;
  843                         error = sooptcopyout(sopt, &optval, sizeof(optval));
  844                         break;
  845                 default:
  846                         error = EOPNOTSUPP;
  847                         break;
  848                 }
  849                 break;
  850         case SOPT_SET:
  851                 switch (sopt->sopt_name) {
  852                 case LOCAL_CREDS:
  853                 case LOCAL_CONNWAIT:
  854                         error = sooptcopyin(sopt, &optval, sizeof(optval),
  855                                             sizeof(optval));
  856                         if (error)
  857                                 break;
  858 
  859 #define OPTSET(bit) \
  860         if (optval) \
  861                 unp->unp_flags |= bit; \
  862         else \
  863                 unp->unp_flags &= ~bit;
  864 
  865                         switch (sopt->sopt_name) {
  866                         case LOCAL_CREDS:
  867                                 OPTSET(UNP_WANTCRED);
  868                                 break;
  869                         case LOCAL_CONNWAIT:
  870                                 OPTSET(UNP_CONNWAIT);
  871                                 break;
  872                         default:
  873                                 break;
  874                         }
  875                         break;
  876 #undef  OPTSET
  877                 default:
  878                         error = ENOPROTOOPT;
  879                         break;
  880                 }
  881                 break;
  882         default:
  883                 error = EOPNOTSUPP;
  884                 break;
  885         }
  886         UNP_UNLOCK();
  887         return (error);
  888 }
  889 
  890 static int
  891 unp_attach(struct socket *so)
  892 {
  893         struct unpcb *unp;
  894         int error;
  895 
  896         if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
  897                 switch (so->so_type) {
  898                 case SOCK_STREAM:
  899                         error = soreserve(so, unpst_sendspace, unpst_recvspace);
  900                         break;
  901 
  902                 case SOCK_DGRAM:
  903                         error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
  904                         break;
  905 
  906                 default:
  907                         panic("unp_attach");
  908                 }
  909                 if (error)
  910                         return (error);
  911         }
  912         unp = uma_zalloc(unp_zone, M_NOWAIT | M_ZERO);
  913         if (unp == NULL)
  914                 return (ENOBUFS);
  915         LIST_INIT(&unp->unp_refs);
  916         unp->unp_socket = so;
  917         so->so_pcb = unp;
  918 
  919         UNP_REFCOUNT(unp) = 1;
  920         UNP_LOCK();
  921         unp->unp_gencnt = ++unp_gencnt;
  922         unp_count++;
  923         LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ? &unp_dhead : &unp_shead,
  924             unp, unp_link);
  925         UNP_UNLOCK();
  926 
  927         return (0);
  928 }
  929 
  930 /*
  931  * Definitions of protocols supported in the LOCAL domain.
  932  */
  933 static struct domain localdomain;
  934 static struct protosw localsw[] = {
  935 {
  936         .pr_type =              SOCK_STREAM,
  937         .pr_domain =            &localdomain,
  938         .pr_flags =             PR_CONNREQUIRED|PR_WANTRCVD|PR_RIGHTS,
  939         .pr_ctloutput =         &uipc_ctloutput,
  940         .pr_usrreqs =           &uipc_usrreqs
  941 },
  942 {
  943         .pr_type =              SOCK_DGRAM,
  944         .pr_domain =            &localdomain,
  945         .pr_flags =             PR_ATOMIC|PR_ADDR|PR_RIGHTS,
  946         .pr_usrreqs =           &uipc_usrreqs
  947 },
  948 };
  949 
  950 static struct domain localdomain = {
  951         .dom_family =           AF_LOCAL,
  952         .dom_name =             "local",
  953         .dom_init =             unp_init,
  954         .dom_externalize =      unp_externalize,
  955         .dom_dispose =          unp_dispose,
  956         .dom_protosw =          localsw,
  957         .dom_protoswNPROTOSW =  &localsw[sizeof(localsw)/sizeof(localsw[0])]
  958 };
  959 DOMAIN_SET(local);
  960 
  961 static void
  962 unp_detach(struct unpcb *unp)
  963 {
  964         struct sockaddr_un *saved_unp_addr;     
  965         struct vnode *vp;
  966         int local_unp_rights;
  967         int freeunp;
  968 
  969         UNP_LOCK_ASSERT();
  970 
  971         LIST_REMOVE(unp, unp_link);
  972         unp->unp_gencnt = ++unp_gencnt;
  973         --unp_count;
  974         if ((vp = unp->unp_vnode) != NULL) {
  975                 /*
  976                  * XXXRW: should v_socket be frobbed only while holding
  977                  * Giant?
  978                  */
  979                 unp->unp_vnode->v_socket = NULL;
  980                 unp->unp_vnode = NULL;
  981         }
  982         if (unp->unp_conn != NULL)
  983                 unp_disconnect(unp);
  984         while (!LIST_EMPTY(&unp->unp_refs)) {
  985                 struct unpcb *ref = LIST_FIRST(&unp->unp_refs);
  986                 unp_drop(ref, ECONNRESET);
  987         }
  988         soisdisconnected(unp->unp_socket);
  989         unp->unp_socket->so_pcb = NULL;
  990         local_unp_rights = unp_rights;
  991         saved_unp_addr = unp->unp_addr;
  992         unp->unp_addr = NULL;
  993         UNP_REFCOUNT(unp)--;
  994         freeunp = (UNP_REFCOUNT(unp) == 0);
  995         UNP_UNLOCK();
  996         if (saved_unp_addr != NULL)
  997                 FREE(saved_unp_addr, M_SONAME);
  998         if (freeunp)
  999                 uma_zfree(unp_zone, unp);
 1000         if (vp) {
 1001                 int vfslocked;
 1002 
 1003                 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 1004                 vrele(vp);
 1005                 VFS_UNLOCK_GIANT(vfslocked);
 1006         }
 1007         if (local_unp_rights)
 1008                 taskqueue_enqueue(taskqueue_thread, &unp_gc_task);
 1009 }
 1010 
 1011 static int
 1012 unp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 1013 {
 1014         struct sockaddr_un *soun = (struct sockaddr_un *)nam;
 1015         struct vnode *vp;
 1016         struct socket *so2, *so3;
 1017         struct unpcb *unp, *unp2, *unp3;
 1018         int error, len;
 1019         struct nameidata nd;
 1020         char buf[SOCK_MAXADDRLEN];
 1021         struct sockaddr *sa;
 1022 
 1023         UNP_LOCK_ASSERT();
 1024         unp = sotounpcb(so);
 1025 
 1026         len = nam->sa_len - offsetof(struct sockaddr_un, sun_path);
 1027         if (len <= 0)
 1028                 return (EINVAL);
 1029         strlcpy(buf, soun->sun_path, len + 1);
 1030         if (unp->unp_flags & UNP_CONNECTING) {
 1031                 UNP_UNLOCK();
 1032                 return (EALREADY);
 1033         }
 1034         unp->unp_flags |= UNP_CONNECTING;
 1035         UNP_UNLOCK();
 1036         sa = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
 1037         mtx_lock(&Giant);
 1038         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, buf, td);
 1039         error = namei(&nd);
 1040         if (error)
 1041                 vp = NULL;
 1042         else
 1043                 vp = nd.ni_vp;
 1044         ASSERT_VOP_LOCKED(vp, "unp_connect");
 1045         NDFREE(&nd, NDF_ONLY_PNBUF);
 1046         if (error)
 1047                 goto bad;
 1048 
 1049         if (vp->v_type != VSOCK) {
 1050                 error = ENOTSOCK;
 1051                 goto bad;
 1052         }
 1053 #ifdef MAC
 1054         error = mac_check_vnode_open(td->td_ucred, vp, VWRITE | VREAD);
 1055         if (error)
 1056                 goto bad;
 1057 #endif
 1058         error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td);
 1059         if (error)
 1060                 goto bad;
 1061         mtx_unlock(&Giant);
 1062         UNP_LOCK();
 1063         unp = sotounpcb(so);
 1064         if (unp == NULL) {
 1065                 error = EINVAL;
 1066                 goto bad2;
 1067         }
 1068         so2 = vp->v_socket;
 1069         if (so2 == NULL) {
 1070                 error = ECONNREFUSED;
 1071                 goto bad2;
 1072         }
 1073         if (so->so_type != so2->so_type) {
 1074                 error = EPROTOTYPE;
 1075                 goto bad2;
 1076         }
 1077         if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
 1078                 if (so2->so_options & SO_ACCEPTCONN)
 1079                         so3 = sonewconn(so2, 0);
 1080                 else
 1081                         so3 = NULL;
 1082                 if (so3 == NULL) {
 1083                         error = ECONNREFUSED;
 1084                         goto bad2;
 1085                 }
 1086                 unp = sotounpcb(so);
 1087                 unp2 = sotounpcb(so2);
 1088                 unp3 = sotounpcb(so3);
 1089                 if (unp2->unp_addr != NULL) {
 1090                         bcopy(unp2->unp_addr, sa, unp2->unp_addr->sun_len);
 1091                         unp3->unp_addr = (struct sockaddr_un *) sa;
 1092                         sa = NULL;
 1093                 }
 1094                 /*
 1095                  * unp_peercred management:
 1096                  *
 1097                  * The connecter's (client's) credentials are copied from its
 1098                  * process structure at the time of connect() (which is now).
 1099                  */
 1100                 cru2x(td->td_ucred, &unp3->unp_peercred);
 1101                 unp3->unp_flags |= UNP_HAVEPC;
 1102                 /*
 1103                  * The receiver's (server's) credentials are copied from the
 1104                  * unp_peercred member of socket on which the former called
 1105                  * listen(); unp_listen() cached that process's credentials
 1106                  * at that time so we can use them now.
 1107                  */
 1108                 KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED,
 1109                     ("unp_connect: listener without cached peercred"));
 1110                 memcpy(&unp->unp_peercred, &unp2->unp_peercred,
 1111                     sizeof(unp->unp_peercred));
 1112                 unp->unp_flags |= UNP_HAVEPC;
 1113                 if (unp2->unp_flags & UNP_WANTCRED)
 1114                         unp3->unp_flags |= UNP_WANTCRED;
 1115 #ifdef MAC
 1116                 SOCK_LOCK(so);
 1117                 mac_set_socket_peer_from_socket(so, so3);
 1118                 mac_set_socket_peer_from_socket(so3, so);
 1119                 SOCK_UNLOCK(so);
 1120 #endif
 1121 
 1122                 so2 = so3;
 1123         }
 1124         error = unp_connect2(so, so2, PRU_CONNECT);
 1125 bad2:
 1126         UNP_UNLOCK();
 1127         mtx_lock(&Giant);
 1128 bad:
 1129         mtx_assert(&Giant, MA_OWNED);
 1130         if (vp != NULL)
 1131                 vput(vp);
 1132         mtx_unlock(&Giant);
 1133         free(sa, M_SONAME);
 1134         UNP_LOCK();
 1135         unp->unp_flags &= ~UNP_CONNECTING;
 1136         return (error);
 1137 }
 1138 
 1139 static int
 1140 unp_connect2(struct socket *so, struct socket *so2, int req)
 1141 {
 1142         struct unpcb *unp = sotounpcb(so);
 1143         struct unpcb *unp2;
 1144 
 1145         UNP_LOCK_ASSERT();
 1146 
 1147         if (so2->so_type != so->so_type)
 1148                 return (EPROTOTYPE);
 1149         unp2 = sotounpcb(so2);
 1150         unp->unp_conn = unp2;
 1151         switch (so->so_type) {
 1152         case SOCK_DGRAM:
 1153                 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink);
 1154                 soisconnected(so);
 1155                 break;
 1156 
 1157         case SOCK_STREAM:
 1158                 unp2->unp_conn = unp;
 1159                 if (req == PRU_CONNECT &&
 1160                     ((unp->unp_flags | unp2->unp_flags) & UNP_CONNWAIT))
 1161                         soisconnecting(so);
 1162                 else
 1163                         soisconnected(so);
 1164                 soisconnected(so2);
 1165                 break;
 1166 
 1167         default:
 1168                 panic("unp_connect2");
 1169         }
 1170         return (0);
 1171 }
 1172 
 1173 static void
 1174 unp_disconnect(struct unpcb *unp)
 1175 {
 1176         struct unpcb *unp2 = unp->unp_conn;
 1177         struct socket *so;
 1178 
 1179         UNP_LOCK_ASSERT();
 1180 
 1181         if (unp2 == NULL)
 1182                 return;
 1183         unp->unp_conn = NULL;
 1184         switch (unp->unp_socket->so_type) {
 1185 
 1186         case SOCK_DGRAM:
 1187                 LIST_REMOVE(unp, unp_reflink);
 1188                 so = unp->unp_socket;
 1189                 SOCK_LOCK(so);
 1190                 so->so_state &= ~SS_ISCONNECTED;
 1191                 SOCK_UNLOCK(so);
 1192                 break;
 1193 
 1194         case SOCK_STREAM:
 1195                 soisdisconnected(unp->unp_socket);
 1196                 unp2->unp_conn = NULL;
 1197                 soisdisconnected(unp2->unp_socket);
 1198                 break;
 1199         }
 1200 }
 1201 
 1202 /*
 1203  * unp_pcblist() assumes that UNIX domain socket memory is never reclaimed by
 1204  * the zone (UMA_ZONE_NOFREE), and as such potentially stale pointers are
 1205  * safe to reference.  It first scans the list of struct unpcb's to generate
 1206  * a pointer list, then it rescans its list one entry at a time to
 1207  * externalize and copyout.  It checks the generation number to see if a
 1208  * struct unpcb has been reused, and will skip it if so.
 1209  */
 1210 static int
 1211 unp_pcblist(SYSCTL_HANDLER_ARGS)
 1212 {
 1213         int error, i, n;
 1214         int freeunp;
 1215         struct unpcb *unp, **unp_list;
 1216         unp_gen_t gencnt;
 1217         struct xunpgen *xug;
 1218         struct unp_head *head;
 1219         struct xunpcb *xu;
 1220 
 1221         head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
 1222 
 1223         /*
 1224          * The process of preparing the PCB list is too time-consuming and
 1225          * resource-intensive to repeat twice on every request.
 1226          */
 1227         if (req->oldptr == NULL) {
 1228                 n = unp_count;
 1229                 req->oldidx = 2 * (sizeof *xug)
 1230                         + (n + n/8) * sizeof(struct xunpcb);
 1231                 return (0);
 1232         }
 1233 
 1234         if (req->newptr != NULL)
 1235                 return (EPERM);
 1236 
 1237         /*
 1238          * OK, now we're committed to doing something.
 1239          */
 1240         xug = malloc(sizeof(*xug), M_TEMP, M_WAITOK);
 1241         UNP_LOCK();
 1242         gencnt = unp_gencnt;
 1243         n = unp_count;
 1244         UNP_UNLOCK();
 1245 
 1246         xug->xug_len = sizeof *xug;
 1247         xug->xug_count = n;
 1248         xug->xug_gen = gencnt;
 1249         xug->xug_sogen = so_gencnt;
 1250         error = SYSCTL_OUT(req, xug, sizeof *xug);
 1251         if (error) {
 1252                 free(xug, M_TEMP);
 1253                 return (error);
 1254         }
 1255 
 1256         unp_list = malloc(n * sizeof *unp_list, M_TEMP, M_WAITOK);
 1257 
 1258         UNP_LOCK();
 1259         for (unp = LIST_FIRST(head), i = 0; unp && i < n;
 1260              unp = LIST_NEXT(unp, unp_link)) {
 1261                 if (unp->unp_gencnt <= gencnt) {
 1262                         if (cr_cansee(req->td->td_ucred,
 1263                             unp->unp_socket->so_cred))
 1264                                 continue;
 1265                         unp_list[i++] = unp;
 1266                         UNP_REFCOUNT(unp)++;
 1267                 }
 1268         }
 1269         UNP_UNLOCK();
 1270         n = i;                  /* In case we lost some during malloc. */
 1271 
 1272         error = 0;
 1273         xu = malloc(sizeof(*xu), M_TEMP, M_WAITOK | M_ZERO);
 1274         for (i = 0; i < n; i++) {
 1275                 unp = unp_list[i];
 1276                 UNP_LOCK();
 1277                 UNP_REFCOUNT(unp)--;
 1278                 if (UNP_REFCOUNT(unp) != 0 && unp->unp_gencnt <= gencnt) {
 1279                         xu->xu_len = sizeof *xu;
 1280                         xu->xu_unpp = unp;
 1281                         /*
 1282                          * XXX - need more locking here to protect against
 1283                          * connect/disconnect races for SMP.
 1284                          */
 1285                         if (unp->unp_addr != NULL)
 1286                                 bcopy(unp->unp_addr, &xu->xu_addr,
 1287                                       unp->unp_addr->sun_len);
 1288                         if (unp->unp_conn != NULL &&
 1289                             unp->unp_conn->unp_addr != NULL)
 1290                                 bcopy(unp->unp_conn->unp_addr,
 1291                                       &xu->xu_caddr,
 1292                                       unp->unp_conn->unp_addr->sun_len);
 1293                         bcopy(unp, &xu->xu_unp, sizeof *unp);
 1294                         sotoxsocket(unp->unp_socket, &xu->xu_socket);
 1295                         UNP_UNLOCK();
 1296                         error = SYSCTL_OUT(req, xu, sizeof *xu);
 1297                 } else {
 1298                         freeunp = (UNP_REFCOUNT(unp) == 0);
 1299                         UNP_UNLOCK();
 1300                         if (freeunp) 
 1301                                 uma_zfree(unp_zone, unp);
 1302                 }
 1303         }
 1304         free(xu, M_TEMP);
 1305         if (!error) {
 1306                 /*
 1307                  * Give the user an updated idea of our state.  If the
 1308                  * generation differs from what we told her before, she knows
 1309                  * that something happened while we were processing this
 1310                  * request, and it might be necessary to retry.
 1311                  */
 1312                 xug->xug_gen = unp_gencnt;
 1313                 xug->xug_sogen = so_gencnt;
 1314                 xug->xug_count = unp_count;
 1315                 error = SYSCTL_OUT(req, xug, sizeof *xug);
 1316         }
 1317         free(unp_list, M_TEMP);
 1318         free(xug, M_TEMP);
 1319         return (error);
 1320 }
 1321 
 1322 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLFLAG_RD,
 1323             (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb",
 1324             "List of active local datagram sockets");
 1325 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLFLAG_RD,
 1326             (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb",
 1327             "List of active local stream sockets");
 1328 
 1329 static void
 1330 unp_shutdown(struct unpcb *unp)
 1331 {
 1332         struct socket *so;
 1333 
 1334         UNP_LOCK_ASSERT();
 1335 
 1336         if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn &&
 1337             (so = unp->unp_conn->unp_socket))
 1338                 socantrcvmore(so);
 1339 }
 1340 
 1341 static void
 1342 unp_drop(struct unpcb *unp, int errno)
 1343 {
 1344         struct socket *so = unp->unp_socket;
 1345 
 1346         UNP_LOCK_ASSERT();
 1347 
 1348         so->so_error = errno;
 1349         unp_disconnect(unp);
 1350 }
 1351 
 1352 static void
 1353 unp_freerights(struct file **rp, int fdcount)
 1354 {
 1355         int i;
 1356         struct file *fp;
 1357 
 1358         for (i = 0; i < fdcount; i++) {
 1359                 fp = *rp;
 1360                 /*
 1361                  * Zero the pointer before calling unp_discard since it may
 1362                  * end up in unp_gc()..
 1363                  */
 1364                 *rp++ = 0;
 1365                 unp_discard(fp);
 1366         }
 1367 }
 1368 
 1369 int
 1370 unp_externalize(struct mbuf *control, struct mbuf **controlp)
 1371 {
 1372         struct thread *td = curthread;          /* XXX */
 1373         struct cmsghdr *cm = mtod(control, struct cmsghdr *);
 1374         int i;
 1375         int *fdp;
 1376         struct file **rp;
 1377         struct file *fp;
 1378         void *data;
 1379         socklen_t clen = control->m_len, datalen;
 1380         int error, newfds;
 1381         int f;
 1382         u_int newlen;
 1383 
 1384         UNP_UNLOCK_ASSERT();
 1385 
 1386         error = 0;
 1387         if (controlp != NULL) /* controlp == NULL => free control messages */
 1388                 *controlp = NULL;
 1389 
 1390         while (cm != NULL) {
 1391                 if (sizeof(*cm) > clen || cm->cmsg_len > clen) {
 1392                         error = EINVAL;
 1393                         break;
 1394                 }
 1395 
 1396                 data = CMSG_DATA(cm);
 1397                 datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
 1398 
 1399                 if (cm->cmsg_level == SOL_SOCKET
 1400                     && cm->cmsg_type == SCM_RIGHTS) {
 1401                         newfds = datalen / sizeof(struct file *);
 1402                         rp = data;
 1403 
 1404                         /* If we're not outputting the descriptors free them. */
 1405                         if (error || controlp == NULL) {
 1406                                 unp_freerights(rp, newfds);
 1407                                 goto next;
 1408                         }
 1409                         FILEDESC_LOCK(td->td_proc->p_fd);
 1410                         /* if the new FD's will not fit free them.  */
 1411                         if (!fdavail(td, newfds)) {
 1412                                 FILEDESC_UNLOCK(td->td_proc->p_fd);
 1413                                 error = EMSGSIZE;
 1414                                 unp_freerights(rp, newfds);
 1415                                 goto next;
 1416                         }
 1417                         /*
 1418                          * Now change each pointer to an fd in the global
 1419                          * table to an integer that is the index to the local
 1420                          * fd table entry that we set up to point to the
 1421                          * global one we are transferring.
 1422                          */
 1423                         newlen = newfds * sizeof(int);
 1424                         *controlp = sbcreatecontrol(NULL, newlen,
 1425                             SCM_RIGHTS, SOL_SOCKET);
 1426                         if (*controlp == NULL) {
 1427                                 FILEDESC_UNLOCK(td->td_proc->p_fd);
 1428                                 error = E2BIG;
 1429                                 unp_freerights(rp, newfds);
 1430                                 goto next;
 1431                         }
 1432 
 1433                         fdp = (int *)
 1434                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 1435                         for (i = 0; i < newfds; i++) {
 1436                                 if (fdalloc(td, 0, &f))
 1437                                         panic("unp_externalize fdalloc failed");
 1438                                 fp = *rp++;
 1439                                 td->td_proc->p_fd->fd_ofiles[f] = fp;
 1440                                 FILE_LOCK(fp);
 1441                                 fp->f_msgcount--;
 1442                                 FILE_UNLOCK(fp);
 1443                                 unp_rights--;
 1444                                 *fdp++ = f;
 1445                         }
 1446                         FILEDESC_UNLOCK(td->td_proc->p_fd);
 1447                 } else {
 1448                         /* We can just copy anything else across. */
 1449                         if (error || controlp == NULL)
 1450                                 goto next;
 1451                         *controlp = sbcreatecontrol(NULL, datalen,
 1452                             cm->cmsg_type, cm->cmsg_level);
 1453                         if (*controlp == NULL) {
 1454                                 error = ENOBUFS;
 1455                                 goto next;
 1456                         }
 1457                         bcopy(data,
 1458                             CMSG_DATA(mtod(*controlp, struct cmsghdr *)),
 1459                             datalen);
 1460                 }
 1461 
 1462                 controlp = &(*controlp)->m_next;
 1463 
 1464 next:
 1465                 if (CMSG_SPACE(datalen) < clen) {
 1466                         clen -= CMSG_SPACE(datalen);
 1467                         cm = (struct cmsghdr *)
 1468                             ((caddr_t)cm + CMSG_SPACE(datalen));
 1469                 } else {
 1470                         clen = 0;
 1471                         cm = NULL;
 1472                 }
 1473         }
 1474 
 1475         m_freem(control);
 1476 
 1477         return (error);
 1478 }
 1479 
 1480 static void
 1481 unp_zone_change(void *tag)
 1482 {
 1483 
 1484         uma_zone_set_max(unp_zone, maxsockets);
 1485 }
 1486 
 1487 void
 1488 unp_init(void)
 1489 {
 1490 
 1491         unp_zone = uma_zcreate("unpcb", sizeof(struct unpcb_wrapper), NULL,
 1492             NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 1493         if (unp_zone == NULL)
 1494                 panic("unp_init");
 1495         uma_zone_set_max(unp_zone, maxsockets);
 1496         EVENTHANDLER_REGISTER(maxsockets_change, unp_zone_change,
 1497             NULL, EVENTHANDLER_PRI_ANY);
 1498         LIST_INIT(&unp_dhead);
 1499         LIST_INIT(&unp_shead);
 1500         TASK_INIT(&unp_gc_task, 0, unp_gc, NULL);
 1501         UNP_LOCK_INIT();
 1502 }
 1503 
 1504 static int
 1505 unp_internalize(struct mbuf **controlp, struct thread *td)
 1506 {
 1507         struct mbuf *control = *controlp;
 1508         struct proc *p = td->td_proc;
 1509         struct filedesc *fdescp = p->p_fd;
 1510         struct cmsghdr *cm = mtod(control, struct cmsghdr *);
 1511         struct cmsgcred *cmcred;
 1512         struct file **rp;
 1513         struct file *fp;
 1514         struct timeval *tv;
 1515         int i, fd, *fdp;
 1516         void *data;
 1517         socklen_t clen = control->m_len, datalen;
 1518         int error, oldfds;
 1519         u_int newlen;
 1520 
 1521         UNP_UNLOCK_ASSERT();
 1522 
 1523         error = 0;
 1524         *controlp = NULL;
 1525 
 1526         while (cm != NULL) {
 1527                 if (sizeof(*cm) > clen || cm->cmsg_level != SOL_SOCKET
 1528                     || cm->cmsg_len > clen) {
 1529                         error = EINVAL;
 1530                         goto out;
 1531                 }
 1532 
 1533                 data = CMSG_DATA(cm);
 1534                 datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
 1535 
 1536                 switch (cm->cmsg_type) {
 1537                 /*
 1538                  * Fill in credential information.
 1539                  */
 1540                 case SCM_CREDS:
 1541                         *controlp = sbcreatecontrol(NULL, sizeof(*cmcred),
 1542                             SCM_CREDS, SOL_SOCKET);
 1543                         if (*controlp == NULL) {
 1544                                 error = ENOBUFS;
 1545                                 goto out;
 1546                         }
 1547 
 1548                         cmcred = (struct cmsgcred *)
 1549                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 1550                         cmcred->cmcred_pid = p->p_pid;
 1551                         cmcred->cmcred_uid = td->td_ucred->cr_ruid;
 1552                         cmcred->cmcred_gid = td->td_ucred->cr_rgid;
 1553                         cmcred->cmcred_euid = td->td_ucred->cr_uid;
 1554                         cmcred->cmcred_ngroups = MIN(td->td_ucred->cr_ngroups,
 1555                                                         CMGROUP_MAX);
 1556                         for (i = 0; i < cmcred->cmcred_ngroups; i++)
 1557                                 cmcred->cmcred_groups[i] =
 1558                                     td->td_ucred->cr_groups[i];
 1559                         break;
 1560 
 1561                 case SCM_RIGHTS:
 1562                         oldfds = datalen / sizeof (int);
 1563                         /*
 1564                          * Check that all the FDs passed in refer to legal
 1565                          * files.  If not, reject the entire operation.
 1566                          */
 1567                         fdp = data;
 1568                         FILEDESC_LOCK(fdescp);
 1569                         for (i = 0; i < oldfds; i++) {
 1570                                 fd = *fdp++;
 1571                                 if ((unsigned)fd >= fdescp->fd_nfiles ||
 1572                                     fdescp->fd_ofiles[fd] == NULL) {
 1573                                         FILEDESC_UNLOCK(fdescp);
 1574                                         error = EBADF;
 1575                                         goto out;
 1576                                 }
 1577                                 fp = fdescp->fd_ofiles[fd];
 1578                                 if (!(fp->f_ops->fo_flags & DFLAG_PASSABLE)) {
 1579                                         FILEDESC_UNLOCK(fdescp);
 1580                                         error = EOPNOTSUPP;
 1581                                         goto out;
 1582                                 }
 1583 
 1584                         }
 1585                         /*
 1586                          * Now replace the integer FDs with pointers to the
 1587                          * associated global file table entry..
 1588                          */
 1589                         newlen = oldfds * sizeof(struct file *);
 1590                         *controlp = sbcreatecontrol(NULL, newlen,
 1591                             SCM_RIGHTS, SOL_SOCKET);
 1592                         if (*controlp == NULL) {
 1593                                 FILEDESC_UNLOCK(fdescp);
 1594                                 error = E2BIG;
 1595                                 goto out;
 1596                         }
 1597 
 1598                         fdp = data;
 1599                         rp = (struct file **)
 1600                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 1601                         for (i = 0; i < oldfds; i++) {
 1602                                 fp = fdescp->fd_ofiles[*fdp++];
 1603                                 *rp++ = fp;
 1604                                 FILE_LOCK(fp);
 1605                                 fp->f_count++;
 1606                                 fp->f_msgcount++;
 1607                                 FILE_UNLOCK(fp);
 1608                                 unp_rights++;
 1609                         }
 1610                         FILEDESC_UNLOCK(fdescp);
 1611                         break;
 1612 
 1613                 case SCM_TIMESTAMP:
 1614                         *controlp = sbcreatecontrol(NULL, sizeof(*tv),
 1615                             SCM_TIMESTAMP, SOL_SOCKET);
 1616                         if (*controlp == NULL) {
 1617                                 error = ENOBUFS;
 1618                                 goto out;
 1619                         }
 1620                         tv = (struct timeval *)
 1621                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 1622                         microtime(tv);
 1623                         break;
 1624 
 1625                 default:
 1626                         error = EINVAL;
 1627                         goto out;
 1628                 }
 1629 
 1630                 controlp = &(*controlp)->m_next;
 1631 
 1632                 if (CMSG_SPACE(datalen) < clen) {
 1633                         clen -= CMSG_SPACE(datalen);
 1634                         cm = (struct cmsghdr *)
 1635                             ((caddr_t)cm + CMSG_SPACE(datalen));
 1636                 } else {
 1637                         clen = 0;
 1638                         cm = NULL;
 1639                 }
 1640         }
 1641 
 1642 out:
 1643         m_freem(control);
 1644 
 1645         return (error);
 1646 }
 1647 
 1648 struct mbuf *
 1649 unp_addsockcred(struct thread *td, struct mbuf *control)
 1650 {
 1651         struct mbuf *m, *n, *n_prev;
 1652         struct sockcred *sc;
 1653         const struct cmsghdr *cm;
 1654         int ngroups;
 1655         int i;
 1656 
 1657         ngroups = MIN(td->td_ucred->cr_ngroups, CMGROUP_MAX);
 1658 
 1659         m = sbcreatecontrol(NULL, SOCKCREDSIZE(ngroups), SCM_CREDS, SOL_SOCKET);
 1660         if (m == NULL)
 1661                 return (control);
 1662 
 1663         sc = (struct sockcred *) CMSG_DATA(mtod(m, struct cmsghdr *));
 1664         sc->sc_uid = td->td_ucred->cr_ruid;
 1665         sc->sc_euid = td->td_ucred->cr_uid;
 1666         sc->sc_gid = td->td_ucred->cr_rgid;
 1667         sc->sc_egid = td->td_ucred->cr_gid;
 1668         sc->sc_ngroups = ngroups;
 1669         for (i = 0; i < sc->sc_ngroups; i++)
 1670                 sc->sc_groups[i] = td->td_ucred->cr_groups[i];
 1671 
 1672         /*
 1673          * Unlink SCM_CREDS control messages (struct cmsgcred), since just
 1674          * created SCM_CREDS control message (struct sockcred) has another
 1675          * format.
 1676          */
 1677         if (control != NULL)
 1678                 for (n = control, n_prev = NULL; n != NULL;) {
 1679                         cm = mtod(n, struct cmsghdr *);
 1680                         if (cm->cmsg_level == SOL_SOCKET &&
 1681                             cm->cmsg_type == SCM_CREDS) {
 1682                                 if (n_prev == NULL)
 1683                                         control = n->m_next;
 1684                                 else
 1685                                         n_prev->m_next = n->m_next;
 1686                                 n = m_free(n);
 1687                         } else {
 1688                                 n_prev = n;
 1689                                 n = n->m_next;
 1690                         }
 1691                 }
 1692 
 1693         /* Prepend it to the head. */
 1694         m->m_next = control;
 1695 
 1696         return (m);
 1697 }
 1698 
 1699 /*
 1700  * unp_defer indicates whether additional work has been defered for a future
 1701  * pass through unp_gc().  It is thread local and does not require explicit
 1702  * synchronization.
 1703  */
 1704 static int      unp_defer;
 1705 
 1706 static int unp_taskcount;
 1707 SYSCTL_INT(_net_local, OID_AUTO, taskcount, CTLFLAG_RD, &unp_taskcount, 0, "");
 1708 
 1709 static int unp_recycled;
 1710 SYSCTL_INT(_net_local, OID_AUTO, recycled, CTLFLAG_RD, &unp_recycled, 0, "");
 1711 
 1712 static void
 1713 unp_gc(__unused void *arg, int pending)
 1714 {
 1715         struct file *fp, *nextfp;
 1716         struct socket *so;
 1717         struct file **extra_ref, **fpp;
 1718         int nunref, i;
 1719         int nfiles_snap;
 1720         int nfiles_slack = 20;
 1721 
 1722         unp_taskcount++;
 1723         unp_defer = 0;
 1724         /*
 1725          * Before going through all this, set all FDs to be NOT deferred and
 1726          * NOT externally accessible
 1727          */
 1728         sx_slock(&filelist_lock);
 1729         LIST_FOREACH(fp, &filehead, f_list)
 1730                 fp->f_gcflag &= ~(FMARK|FDEFER);
 1731         do {
 1732                 LIST_FOREACH(fp, &filehead, f_list) {
 1733                         FILE_LOCK(fp);
 1734                         /*
 1735                          * If the file is not open, skip it -- could be a
 1736                          * file in the process of being opened, or in the
 1737                          * process of being closed.  If the file is
 1738                          * "closing", it may have been marked for deferred
 1739                          * consideration.  Clear the flag now if so.
 1740                          */
 1741                         if (fp->f_count == 0) {
 1742                                 if (fp->f_gcflag & FDEFER)
 1743                                         unp_defer--;
 1744                                 fp->f_gcflag &= ~(FMARK|FDEFER);
 1745                                 FILE_UNLOCK(fp);
 1746                                 continue;
 1747                         }
 1748                         /*
 1749                          * If we already marked it as 'defer' in a previous
 1750                          * pass, then try to process it this time and un-mark
 1751                          * it.
 1752                          */
 1753                         if (fp->f_gcflag & FDEFER) {
 1754                                 fp->f_gcflag &= ~FDEFER;
 1755                                 unp_defer--;
 1756                         } else {
 1757                                 /*
 1758                                  * if it's not deferred, then check if it's
 1759                                  * already marked.. if so skip it
 1760                                  */
 1761                                 if (fp->f_gcflag & FMARK) {
 1762                                         FILE_UNLOCK(fp);
 1763                                         continue;
 1764                                 }
 1765                                 /*
 1766                                  * If all references are from messages in
 1767                                  * transit, then skip it. it's not externally
 1768                                  * accessible.
 1769                                  */
 1770                                 if (fp->f_count == fp->f_msgcount) {
 1771                                         FILE_UNLOCK(fp);
 1772                                         continue;
 1773                                 }
 1774                                 /*
 1775                                  * If it got this far then it must be
 1776                                  * externally accessible.
 1777                                  */
 1778                                 fp->f_gcflag |= FMARK;
 1779                         }
 1780                         /*
 1781                          * Either it was deferred, or it is externally
 1782                          * accessible and not already marked so.  Now check
 1783                          * if it is possibly one of OUR sockets.
 1784                          */
 1785                         if (fp->f_type != DTYPE_SOCKET ||
 1786                             (so = fp->f_data) == NULL) {
 1787                                 FILE_UNLOCK(fp);
 1788                                 continue;
 1789                         }
 1790                         if (so->so_proto->pr_domain != &localdomain ||
 1791                             (so->so_proto->pr_flags & PR_RIGHTS) == 0) {
 1792                                 FILE_UNLOCK(fp);
 1793                                 continue;
 1794                         }
 1795 
 1796                         /*
 1797                          * Tell any other threads that do a subsequent
 1798                          * fdrop() that we are scanning the message
 1799                          * buffers.
 1800                          */
 1801                         fp->f_gcflag |= FWAIT;
 1802                         FILE_UNLOCK(fp);
 1803 
 1804                         /*
 1805                          * So, Ok, it's one of our sockets and it IS
 1806                          * externally accessible (or was deferred).  Now we
 1807                          * look to see if we hold any file descriptors in its
 1808                          * message buffers. Follow those links and mark them
 1809                          * as accessible too.
 1810                          */
 1811                         SOCKBUF_LOCK(&so->so_rcv);
 1812                         unp_scan(so->so_rcv.sb_mb, unp_mark);
 1813                         SOCKBUF_UNLOCK(&so->so_rcv);
 1814 
 1815                         /*
 1816                          * Wake up any threads waiting in fdrop().
 1817                          */
 1818                         FILE_LOCK(fp);
 1819                         fp->f_gcflag &= ~FWAIT;
 1820                         wakeup(&fp->f_gcflag);
 1821                         FILE_UNLOCK(fp);
 1822                 }
 1823         } while (unp_defer);
 1824         sx_sunlock(&filelist_lock);
 1825         /*
 1826          * XXXRW: The following comments need updating for a post-SMPng and
 1827          * deferred unp_gc() world, but are still generally accurate.
 1828          *
 1829          * We grab an extra reference to each of the file table entries that
 1830          * are not otherwise accessible and then free the rights that are
 1831          * stored in messages on them.
 1832          *
 1833          * The bug in the orginal code is a little tricky, so I'll describe
 1834          * what's wrong with it here.
 1835          *
 1836          * It is incorrect to simply unp_discard each entry for f_msgcount
 1837          * times -- consider the case of sockets A and B that contain
 1838          * references to each other.  On a last close of some other socket,
 1839          * we trigger a gc since the number of outstanding rights (unp_rights)
 1840          * is non-zero.  If during the sweep phase the gc code unp_discards,
 1841          * we end up doing a (full) closef on the descriptor.  A closef on A
 1842          * results in the following chain.  Closef calls soo_close, which
 1843          * calls soclose.   Soclose calls first (through the switch
 1844          * uipc_usrreq) unp_detach, which re-invokes unp_gc.  Unp_gc simply
 1845          * returns because the previous instance had set unp_gcing, and we
 1846          * return all the way back to soclose, which marks the socket with
 1847          * SS_NOFDREF, and then calls sofree.  Sofree calls sorflush to free
 1848          * up the rights that are queued in messages on the socket A, i.e.,
 1849          * the reference on B.  The sorflush calls via the dom_dispose switch
 1850          * unp_dispose, which unp_scans with unp_discard.  This second
 1851          * instance of unp_discard just calls closef on B.
 1852          *
 1853          * Well, a similar chain occurs on B, resulting in a sorflush on B,
 1854          * which results in another closef on A.  Unfortunately, A is already
 1855          * being closed, and the descriptor has already been marked with
 1856          * SS_NOFDREF, and soclose panics at this point.
 1857          *
 1858          * Here, we first take an extra reference to each inaccessible
 1859          * descriptor.  Then, we call sorflush ourself, since we know it is a
 1860          * Unix domain socket anyhow.  After we destroy all the rights
 1861          * carried in messages, we do a last closef to get rid of our extra
 1862          * reference.  This is the last close, and the unp_detach etc will
 1863          * shut down the socket.
 1864          *
 1865          * 91/09/19, bsy@cs.cmu.edu
 1866          */
 1867 again:
 1868         nfiles_snap = openfiles + nfiles_slack; /* some slack */
 1869         extra_ref = malloc(nfiles_snap * sizeof(struct file *), M_TEMP,
 1870             M_WAITOK);
 1871         sx_slock(&filelist_lock);
 1872         if (nfiles_snap < openfiles) {
 1873                 sx_sunlock(&filelist_lock);
 1874                 free(extra_ref, M_TEMP);
 1875                 nfiles_slack += 20;
 1876                 goto again;
 1877         }
 1878         for (nunref = 0, fp = LIST_FIRST(&filehead), fpp = extra_ref;
 1879             fp != NULL; fp = nextfp) {
 1880                 nextfp = LIST_NEXT(fp, f_list);
 1881                 FILE_LOCK(fp);
 1882                 /*
 1883                  * If it's not open, skip it
 1884                  */
 1885                 if (fp->f_count == 0) {
 1886                         FILE_UNLOCK(fp);
 1887                         continue;
 1888                 }
 1889                 /*
 1890                  * If all refs are from msgs, and it's not marked accessible
 1891                  * then it must be referenced from some unreachable cycle of
 1892                  * (shut-down) FDs, so include it in our list of FDs to
 1893                  * remove.
 1894                  */
 1895                 if (fp->f_count == fp->f_msgcount && !(fp->f_gcflag & FMARK)) {
 1896                         *fpp++ = fp;
 1897                         nunref++;
 1898                         fp->f_count++;
 1899                 }
 1900                 FILE_UNLOCK(fp);
 1901         }
 1902         sx_sunlock(&filelist_lock);
 1903         /*
 1904          * For each FD on our hit list, do the following two things:
 1905          */
 1906         for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
 1907                 struct file *tfp = *fpp;
 1908                 FILE_LOCK(tfp);
 1909                 if (tfp->f_type == DTYPE_SOCKET &&
 1910                     tfp->f_data != NULL) {
 1911                         FILE_UNLOCK(tfp);
 1912                         sorflush(tfp->f_data);
 1913                 } else {
 1914                         FILE_UNLOCK(tfp);
 1915                 }
 1916         }
 1917         for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
 1918                 closef(*fpp, (struct thread *) NULL);
 1919                 unp_recycled++;
 1920         }
 1921         free(extra_ref, M_TEMP);
 1922 }
 1923 
 1924 void
 1925 unp_dispose(struct mbuf *m)
 1926 {
 1927 
 1928         if (m)
 1929                 unp_scan(m, unp_discard);
 1930 }
 1931 
 1932 static int
 1933 unp_listen(struct socket *so, struct unpcb *unp, struct thread *td)
 1934 {
 1935         int error;
 1936 
 1937         UNP_LOCK_ASSERT();
 1938 
 1939         SOCK_LOCK(so);
 1940         error = solisten_proto_check(so);
 1941         if (error == 0) {
 1942                 cru2x(td->td_ucred, &unp->unp_peercred);
 1943                 unp->unp_flags |= UNP_HAVEPCCACHED;
 1944                 solisten_proto(so);
 1945         }
 1946         SOCK_UNLOCK(so);
 1947         return (error);
 1948 }
 1949 
 1950 static void
 1951 unp_scan(struct mbuf *m0, void (*op)(struct file *))
 1952 {
 1953         struct mbuf *m;
 1954         struct file **rp;
 1955         struct cmsghdr *cm;
 1956         void *data;
 1957         int i;
 1958         socklen_t clen, datalen;
 1959         int qfds;
 1960 
 1961         while (m0 != NULL) {
 1962                 for (m = m0; m; m = m->m_next) {
 1963                         if (m->m_type != MT_CONTROL)
 1964                                 continue;
 1965 
 1966                         cm = mtod(m, struct cmsghdr *);
 1967                         clen = m->m_len;
 1968 
 1969                         while (cm != NULL) {
 1970                                 if (sizeof(*cm) > clen || cm->cmsg_len > clen)
 1971                                         break;
 1972 
 1973                                 data = CMSG_DATA(cm);
 1974                                 datalen = (caddr_t)cm + cm->cmsg_len
 1975                                     - (caddr_t)data;
 1976 
 1977                                 if (cm->cmsg_level == SOL_SOCKET &&
 1978                                     cm->cmsg_type == SCM_RIGHTS) {
 1979                                         qfds = datalen / sizeof (struct file *);
 1980                                         rp = data;
 1981                                         for (i = 0; i < qfds; i++)
 1982                                                 (*op)(*rp++);
 1983                                 }
 1984 
 1985                                 if (CMSG_SPACE(datalen) < clen) {
 1986                                         clen -= CMSG_SPACE(datalen);
 1987                                         cm = (struct cmsghdr *)
 1988                                             ((caddr_t)cm + CMSG_SPACE(datalen));
 1989                                 } else {
 1990                                         clen = 0;
 1991                                         cm = NULL;
 1992                                 }
 1993                         }
 1994                 }
 1995                 m0 = m0->m_act;
 1996         }
 1997 }
 1998 
 1999 static void
 2000 unp_mark(struct file *fp)
 2001 {
 2002         if (fp->f_gcflag & FMARK)
 2003                 return;
 2004         unp_defer++;
 2005         fp->f_gcflag |= (FMARK|FDEFER);
 2006 }
 2007 
 2008 static void
 2009 unp_discard(struct file *fp)
 2010 {
 2011         UNP_LOCK();
 2012         FILE_LOCK(fp);
 2013         fp->f_msgcount--;
 2014         unp_rights--;
 2015         FILE_UNLOCK(fp);
 2016         UNP_UNLOCK();
 2017         (void) closef(fp, (struct thread *)NULL);
 2018 }

Cache object: 4af46aaecaa4ad892b8bc126d01eb584


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.