The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/uipc_usrreq.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1982, 1986, 1989, 1991, 1993
    3  *      The Regents of the University of California.
    4  * Copyright (c) 2004-2007 Robert N. M. Watson
    5  * All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  * 4. Neither the name of the University nor the names of its contributors
   16  *    may be used to endorse or promote products derived from this software
   17  *    without specific prior written permission.
   18  *
   19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   29  * SUCH DAMAGE.
   30  *
   31  *      From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94
   32  */
   33 
   34 /*
   35  * UNIX Domain (Local) Sockets
   36  *
   37  * This is an implementation of UNIX (local) domain sockets.  Each socket has
   38  * an associated struct unpcb (UNIX protocol control block).  Stream sockets
   39  * may be connected to 0 or 1 other socket.  Datagram sockets may be
   40  * connected to 0, 1, or many other sockets.  Sockets may be created and
   41  * connected in pairs (socketpair(2)), or bound/connected to using the file
   42  * system name space.  For most purposes, only the receive socket buffer is
   43  * used, as sending on one socket delivers directly to the receive socket
   44  * buffer of a second socket.  The implementation is substantially
   45  * complicated by the fact that "ancillary data", such as file descriptors or
   46  * credentials, may be passed across UNIX domain sockets.  The potential for
   47  * passing UNIX domain sockets over other UNIX domain sockets requires the
   48  * implementation of a simple garbage collector to find and tear down cycles
   49  * of disconnected sockets.
   50  */
   51 
   52 #include <sys/cdefs.h>
   53 __FBSDID("$FreeBSD$");
   54 
   55 #include "opt_mac.h"
   56 
   57 #include <sys/param.h>
   58 #include <sys/domain.h>
   59 #include <sys/fcntl.h>
   60 #include <sys/malloc.h>         /* XXX must be before <sys/file.h> */
   61 #include <sys/eventhandler.h>
   62 #include <sys/file.h>
   63 #include <sys/filedesc.h>
   64 #include <sys/jail.h>
   65 #include <sys/kernel.h>
   66 #include <sys/lock.h>
   67 #include <sys/mac.h>
   68 #include <sys/mbuf.h>
   69 #include <sys/mount.h>
   70 #include <sys/mutex.h>
   71 #include <sys/namei.h>
   72 #include <sys/proc.h>
   73 #include <sys/protosw.h>
   74 #include <sys/resourcevar.h>
   75 #include <sys/socket.h>
   76 #include <sys/socketvar.h>
   77 #include <sys/signalvar.h>
   78 #include <sys/stat.h>
   79 #include <sys/sx.h>
   80 #include <sys/sysctl.h>
   81 #include <sys/systm.h>
   82 #include <sys/taskqueue.h>
   83 #include <sys/un.h>
   84 #include <sys/unpcb.h>
   85 #include <sys/vnode.h>
   86 
   87 #include <vm/uma.h>
   88 
   89 /*
   90  * We allocate wrapper objects that add the reference count to an existing
   91  * unpcb in 6.x to preserve the ABI layout of unpcb.
   92  */
   93 struct unpcb_wrapper {
   94         struct  unpcb unpw_unpcb;
   95         u_int   unpw_refcount;
   96 };
   97 
   98 #define UNP_REFCOUNT(unp)       (((struct unpcb_wrapper *)(unp))->unpw_refcount)
   99 
  100 static uma_zone_t unp_zone;
  101 static  unp_gen_t unp_gencnt;
  102 static  u_int unp_count;
  103 
  104 static  struct unp_head unp_shead, unp_dhead;
  105 
  106 /*
  107  * Unix communications domain.
  108  *
  109  * TODO:
  110  *      SEQPACKET, RDM
  111  *      rethink name space problems
  112  *      need a proper out-of-band
  113  *      lock pushdown
  114  */
  115 static const struct     sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL };
  116 static ino_t    unp_ino;                /* prototype for fake inode numbers */
  117 struct mbuf *unp_addsockcred(struct thread *, struct mbuf *);
  118 
  119 /*
  120  * Both send and receive buffers are allocated PIPSIZ bytes of buffering for
  121  * stream sockets, although the total for sender and receiver is actually
  122  * only PIPSIZ.
  123  *
  124  * Datagram sockets really use the sendspace as the maximum datagram size,
  125  * and don't really want to reserve the sendspace.  Their recvspace should be
  126  * large enough for at least one max-size datagram plus address.
  127  */
  128 #ifndef PIPSIZ
  129 #define PIPSIZ  8192
  130 #endif
  131 static u_long   unpst_sendspace = PIPSIZ;
  132 static u_long   unpst_recvspace = PIPSIZ;
  133 static u_long   unpdg_sendspace = 2*1024;       /* really max datagram size */
  134 static u_long   unpdg_recvspace = 4*1024;
  135 
  136 static int      unp_rights;                     /* file descriptors in flight */
  137 
  138 SYSCTL_NODE(_net, PF_LOCAL, local, CTLFLAG_RW, 0, "Local domain");
  139 SYSCTL_NODE(_net_local, SOCK_STREAM, stream, CTLFLAG_RW, 0, "SOCK_STREAM");
  140 SYSCTL_NODE(_net_local, SOCK_DGRAM, dgram, CTLFLAG_RW, 0, "SOCK_DGRAM");
  141 
  142 SYSCTL_ULONG(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW,
  143            &unpst_sendspace, 0, "");
  144 SYSCTL_ULONG(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW,
  145            &unpst_recvspace, 0, "");
  146 SYSCTL_ULONG(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW,
  147            &unpdg_sendspace, 0, "");
  148 SYSCTL_ULONG(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW,
  149            &unpdg_recvspace, 0, "");
  150 SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, "");
  151 
  152 /*
  153  * Currently, UNIX domain sockets are protected by a single subsystem lock,
  154  * which covers global data structures and variables, the contents of each
  155  * per-socket unpcb structure, and the so_pcb field in sockets attached to
  156  * the UNIX domain.  This provides for a moderate degree of paralellism, as
  157  * receive operations on UNIX domain sockets do not need to acquire the
  158  * subsystem lock.  Finer grained locking to permit send() without acquiring
  159  * a global lock would be a logical next step.
  160  *
  161  * The UNIX domain socket lock preceds all socket layer locks, including the
  162  * socket lock and socket buffer lock, permitting UNIX domain socket code to
  163  * call into socket support routines without releasing its locks.
  164  *
  165  * Some caution is required in areas where the UNIX domain socket code enters
  166  * VFS in order to create or find rendezvous points.  This results in
  167  * dropping of the UNIX domain socket subsystem lock, acquisition of the
  168  * Giant lock, and potential sleeping.  This increases the chances of races,
  169  * and exposes weaknesses in the socket->protocol API by offering poor
  170  * failure modes.
  171  */
  172 static struct mtx unp_mtx;
  173 #define UNP_LOCK_INIT() \
  174         mtx_init(&unp_mtx, "unp", NULL, MTX_DEF | MTX_RECURSE)
  175 #define UNP_LOCK()              mtx_lock(&unp_mtx)
  176 #define UNP_UNLOCK()            mtx_unlock(&unp_mtx)
  177 #define UNP_LOCK_ASSERT()       mtx_assert(&unp_mtx, MA_OWNED)
  178 #define UNP_UNLOCK_ASSERT()     mtx_assert(&unp_mtx, MA_NOTOWNED)
  179 
  180 /*
  181  * Garbage collection of cyclic file descriptor/socket references occurs
  182  * asynchronously in a taskqueue context in order to avoid recursion and
  183  * reentrance in the UNIX domain socket, file descriptor, and socket layer
  184  * code.  See unp_gc() for a full description.
  185  */
  186 static struct task      unp_gc_task;
  187 
  188 static int     unp_attach(struct socket *);
  189 static void    unp_detach(struct unpcb *);
  190 static int     unp_connect(struct socket *,struct sockaddr *, struct thread *);
  191 static int     unp_connect2(struct socket *so, struct socket *so2, int);
  192 static void    unp_disconnect(struct unpcb *);
  193 static void    unp_shutdown(struct unpcb *);
  194 static void    unp_drop(struct unpcb *, int);
  195 static void    unp_gc(__unused void *, int);
  196 static void    unp_scan(struct mbuf *, void (*)(struct file *));
  197 static void    unp_mark(struct file *);
  198 static void    unp_discard(struct file *);
  199 static void    unp_freerights(struct file **, int);
  200 static int     unp_internalize(struct mbuf **, struct thread *);
  201 static int     unp_listen(struct socket *, struct unpcb *, struct thread *);
  202 
  203 static int
  204 uipc_abort(struct socket *so)
  205 {
  206         struct unpcb *unp;
  207 
  208         UNP_LOCK();
  209         unp = sotounpcb(so);
  210         if (unp == NULL) {
  211                 UNP_UNLOCK();
  212                 return (EINVAL);
  213         }
  214         unp_drop(unp, ECONNABORTED);
  215         unp_detach(unp);
  216         UNP_UNLOCK_ASSERT();
  217         ACCEPT_LOCK();
  218         SOCK_LOCK(so);
  219         sotryfree(so);
  220         return (0);
  221 }
  222 
  223 static int
  224 uipc_accept(struct socket *so, struct sockaddr **nam)
  225 {
  226         struct unpcb *unp;
  227         const struct sockaddr *sa;
  228 
  229         /*
  230          * Pass back name of connected socket, if it was bound and we are
  231          * still connected (our peer may have closed already!).
  232          */
  233         *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
  234         UNP_LOCK();
  235         unp = sotounpcb(so);
  236         if (unp == NULL) {
  237                 UNP_UNLOCK();
  238                 free(*nam, M_SONAME);
  239                 *nam = NULL;
  240                 return (EINVAL);
  241         }
  242         if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL)
  243                 sa = (struct sockaddr *) unp->unp_conn->unp_addr;
  244         else
  245                 sa = &sun_noname;
  246         bcopy(sa, *nam, sa->sa_len);
  247         UNP_UNLOCK();
  248         return (0);
  249 }
  250 
  251 static int
  252 uipc_attach(struct socket *so, int proto, struct thread *td)
  253 {
  254         struct unpcb *unp = sotounpcb(so);
  255 
  256         if (unp != NULL)
  257                 return (EISCONN);
  258         return (unp_attach(so));
  259 }
  260 
  261 static int
  262 uipc_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
  263 {
  264         struct sockaddr_un *soun = (struct sockaddr_un *)nam;
  265         struct vattr vattr;
  266         int error, namelen;
  267         struct nameidata nd;
  268         struct unpcb *unp;
  269         struct vnode *vp;
  270         struct mount *mp;
  271         char *buf;
  272 
  273         namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path);
  274         if (namelen <= 0)
  275                 return (EINVAL);
  276 
  277         UNP_LOCK();
  278         unp = sotounpcb(so);
  279         if (unp == NULL) {
  280                 UNP_UNLOCK();
  281                 return (EINVAL);
  282         }
  283 
  284         /*
  285          * We don't allow simultaneous bind() calls on a single UNIX domain
  286          * socket, so flag in-progress operations, and return an error if an
  287          * operation is already in progress.
  288          *
  289          * Historically, we have not allowed a socket to be rebound, so this
  290          * also returns an error.  Not allowing re-binding certainly
  291          * simplifies the implementation and avoids a great many possible
  292          * failure modes.
  293          */
  294         if (unp->unp_vnode != NULL) {
  295                 UNP_UNLOCK();
  296                 return (EINVAL);
  297         }
  298         if (unp->unp_flags & UNP_BINDING) {
  299                 UNP_UNLOCK();
  300                 return (EALREADY);
  301         }
  302         unp->unp_flags |= UNP_BINDING;
  303         UNP_UNLOCK();
  304 
  305         buf = malloc(namelen + 1, M_TEMP, M_WAITOK);
  306         strlcpy(buf, soun->sun_path, namelen + 1);
  307 
  308         mtx_lock(&Giant);
  309 restart:
  310         mtx_assert(&Giant, MA_OWNED);
  311         NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT | SAVENAME, UIO_SYSSPACE,
  312             buf, td);
  313 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
  314         error = namei(&nd);
  315         if (error)
  316                 goto error;
  317         vp = nd.ni_vp;
  318         if (vp != NULL || vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
  319                 NDFREE(&nd, NDF_ONLY_PNBUF);
  320                 if (nd.ni_dvp == vp)
  321                         vrele(nd.ni_dvp);
  322                 else
  323                         vput(nd.ni_dvp);
  324                 if (vp != NULL) {
  325                         vrele(vp);
  326                         error = EADDRINUSE;
  327                         goto error;
  328                 }
  329                 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH);
  330                 if (error)
  331                         goto error;
  332                 goto restart;
  333         }
  334         VATTR_NULL(&vattr);
  335         vattr.va_type = VSOCK;
  336         vattr.va_mode = (ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask);
  337 #ifdef MAC
  338         error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
  339             &vattr);
  340 #endif
  341         if (error == 0) {
  342                 VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
  343                 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
  344         }
  345         NDFREE(&nd, NDF_ONLY_PNBUF);
  346         vput(nd.ni_dvp);
  347         if (error) {
  348                 vn_finished_write(mp);
  349                 goto error;
  350         }
  351         vp = nd.ni_vp;
  352         ASSERT_VOP_ELOCKED(vp, "uipc_bind");
  353         soun = (struct sockaddr_un *)sodupsockaddr(nam, M_WAITOK);
  354         UNP_LOCK();
  355         vp->v_socket = unp->unp_socket;
  356         unp->unp_vnode = vp;
  357         unp->unp_addr = soun;
  358         unp->unp_flags &= ~UNP_BINDING;
  359         UNP_UNLOCK();
  360         VOP_UNLOCK(vp, 0, td);
  361         vn_finished_write(mp);
  362         mtx_unlock(&Giant);
  363         free(buf, M_TEMP);
  364         return (0);
  365 error:
  366         UNP_LOCK();
  367         unp->unp_flags &= ~UNP_BINDING;
  368         UNP_UNLOCK();
  369         mtx_unlock(&Giant);
  370         free(buf, M_TEMP);
  371         return (error);
  372 }
  373 
  374 static int
  375 uipc_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
  376 {
  377         struct unpcb *unp;
  378         int error;
  379 
  380         KASSERT(td == curthread, ("uipc_connect: td != curthread"));
  381 
  382         UNP_LOCK();
  383         unp = sotounpcb(so);
  384         if (unp == NULL) {
  385                 UNP_UNLOCK();
  386                 return (EINVAL);
  387         }
  388         error = unp_connect(so, nam, td);
  389         UNP_UNLOCK();
  390         return (error);
  391 }
  392 
  393 int
  394 uipc_connect2(struct socket *so1, struct socket *so2)
  395 {
  396         struct unpcb *unp;
  397         int error;
  398 
  399         UNP_LOCK();
  400         unp = sotounpcb(so1);
  401         if (unp == NULL) {
  402                 UNP_UNLOCK();
  403                 return (EINVAL);
  404         }
  405         error = unp_connect2(so1, so2, PRU_CONNECT2);
  406         UNP_UNLOCK();
  407         return (error);
  408 }
  409 
  410 /* control is EOPNOTSUPP */
  411 
  412 static int
  413 uipc_detach(struct socket *so)
  414 {
  415         struct unpcb *unp;
  416 
  417         UNP_LOCK();
  418         unp = sotounpcb(so);
  419         if (unp == NULL) {
  420                 UNP_UNLOCK();
  421                 return (EINVAL);
  422         }
  423         unp_detach(unp);
  424         UNP_UNLOCK_ASSERT();
  425         return (0);
  426 }
  427 
  428 static int
  429 uipc_disconnect(struct socket *so)
  430 {
  431         struct unpcb *unp;
  432 
  433         UNP_LOCK();
  434         unp = sotounpcb(so);
  435         if (unp == NULL) {
  436                 UNP_UNLOCK();
  437                 return (EINVAL);
  438         }
  439         unp_disconnect(unp);
  440         UNP_UNLOCK();
  441         return (0);
  442 }
  443 
  444 static int
  445 uipc_listen(struct socket *so, struct thread *td)
  446 {
  447         struct unpcb *unp;
  448         int error;
  449 
  450         UNP_LOCK();
  451         unp = sotounpcb(so);
  452         if (unp == NULL || unp->unp_vnode == NULL) {
  453                 UNP_UNLOCK();
  454                 return (EINVAL);
  455         }
  456         error = unp_listen(so, unp, td);
  457         UNP_UNLOCK();
  458         return (error);
  459 }
  460 
  461 static int
  462 uipc_peeraddr(struct socket *so, struct sockaddr **nam)
  463 {
  464         struct unpcb *unp;
  465         const struct sockaddr *sa;
  466 
  467         *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
  468         UNP_LOCK();
  469         unp = sotounpcb(so);
  470         if (unp == NULL) {
  471                 UNP_UNLOCK();
  472                 free(*nam, M_SONAME);
  473                 *nam = NULL;
  474                 return (EINVAL);
  475         }
  476         if (unp->unp_conn != NULL && unp->unp_conn->unp_addr!= NULL)
  477                 sa = (struct sockaddr *) unp->unp_conn->unp_addr;
  478         else {
  479                 /*
  480                  * XXX: It seems that this test always fails even when
  481                  * connection is established.  So, this else clause is
  482                  * added as workaround to return PF_LOCAL sockaddr.
  483                  */
  484                 sa = &sun_noname;
  485         }
  486         bcopy(sa, *nam, sa->sa_len);
  487         UNP_UNLOCK();
  488         return (0);
  489 }
  490 
  491 static int
  492 uipc_rcvd(struct socket *so, int flags)
  493 {
  494         struct unpcb *unp;
  495         struct socket *so2;
  496         u_int mbcnt, sbcc;
  497         u_long newhiwat;
  498 
  499         UNP_LOCK();
  500         unp = sotounpcb(so);
  501         if (unp == NULL) {
  502                 UNP_UNLOCK();
  503                 return (EINVAL);
  504         }
  505         switch (so->so_type) {
  506         case SOCK_DGRAM:
  507                 panic("uipc_rcvd DGRAM?");
  508                 /*NOTREACHED*/
  509 
  510         case SOCK_STREAM:
  511                 /*
  512                  * Adjust backpressure on sender and wakeup any waiting to
  513                  * write.
  514                  */
  515                 SOCKBUF_LOCK(&so->so_rcv);
  516                 mbcnt = so->so_rcv.sb_mbcnt;
  517                 sbcc = so->so_rcv.sb_cc;
  518                 SOCKBUF_UNLOCK(&so->so_rcv);
  519                 if (unp->unp_conn == NULL)
  520                         break;
  521                 so2 = unp->unp_conn->unp_socket;
  522                 SOCKBUF_LOCK(&so2->so_snd);
  523                 so2->so_snd.sb_mbmax += unp->unp_mbcnt - mbcnt;
  524                 newhiwat = so2->so_snd.sb_hiwat + unp->unp_cc - sbcc;
  525                 (void)chgsbsize(so2->so_cred->cr_uidinfo, &so2->so_snd.sb_hiwat,
  526                     newhiwat, RLIM_INFINITY);
  527                 sowwakeup_locked(so2);
  528                 unp->unp_mbcnt = mbcnt;
  529                 unp->unp_cc = so->so_rcv.sb_cc;
  530                 break;
  531 
  532         default:
  533                 panic("uipc_rcvd unknown socktype");
  534         }
  535         UNP_UNLOCK();
  536         return (0);
  537 }
  538 
  539 /* pru_rcvoob is EOPNOTSUPP */
  540 
  541 static int
  542 uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
  543     struct mbuf *control, struct thread *td)
  544 {
  545         struct unpcb *unp, *unp2;
  546         struct socket *so2;
  547         u_int mbcnt, sbcc;
  548         u_long newhiwat;
  549         int error = 0;
  550 
  551         unp = sotounpcb(so);
  552         if (unp == NULL) {
  553                 error = EINVAL;
  554                 goto release;
  555         }
  556         if (flags & PRUS_OOB) {
  557                 error = EOPNOTSUPP;
  558                 goto release;
  559         }
  560 
  561         if (control != NULL && (error = unp_internalize(&control, td)))
  562                 goto release;
  563 
  564         UNP_LOCK();
  565         unp = sotounpcb(so);
  566         if (unp == NULL) {
  567                 UNP_UNLOCK();
  568                 error = EINVAL;
  569                 goto dispose_release;
  570         }
  571 
  572         switch (so->so_type) {
  573         case SOCK_DGRAM:
  574         {
  575                 const struct sockaddr *from;
  576 
  577                 if (nam != NULL) {
  578                         if (unp->unp_conn != NULL) {
  579                                 error = EISCONN;
  580                                 break;
  581                         }
  582                         error = unp_connect(so, nam, td);
  583                         if (error)
  584                                 break;
  585                 }
  586                 /*
  587                  * Because connect() and send() are non-atomic in a sendto()
  588                  * with a target address, it's possible that the socket will
  589                  * have disconnected before the send() can run.  In that case
  590                  * return the slightly counter-intuitive but otherwise
  591                  * correct error that the socket is not connected.
  592                  */
  593                 if (unp->unp_conn == NULL) {
  594                         error = ENOTCONN;
  595                         break;
  596                 }
  597                 unp2 = unp->unp_conn;
  598                 so2 = unp2->unp_socket;
  599                 if (unp->unp_addr != NULL)
  600                         from = (struct sockaddr *)unp->unp_addr;
  601                 else
  602                         from = &sun_noname;
  603                 if (unp2->unp_flags & UNP_WANTCRED)
  604                         control = unp_addsockcred(td, control);
  605                 SOCKBUF_LOCK(&so2->so_rcv);
  606                 if (sbappendaddr_locked(&so2->so_rcv, from, m, control)) {
  607                         sorwakeup_locked(so2);
  608                         m = NULL;
  609                         control = NULL;
  610                 } else {
  611                         SOCKBUF_UNLOCK(&so2->so_rcv);
  612                         error = ENOBUFS;
  613                 }
  614                 if (nam != NULL)
  615                         unp_disconnect(unp);
  616                 break;
  617         }
  618 
  619         case SOCK_STREAM:
  620                 /*
  621                  * Connect if not connected yet.
  622                  *
  623                  * Note: A better implementation would complain if not equal
  624                  * to the peer's address.
  625                  */
  626                 if ((so->so_state & SS_ISCONNECTED) == 0) {
  627                         if (nam != NULL) {
  628                                 error = unp_connect(so, nam, td);
  629                                 if (error)
  630                                         break;  /* XXX */
  631                         } else {
  632                                 error = ENOTCONN;
  633                                 break;
  634                         }
  635                 }
  636 
  637                 /* Lockless read. */
  638                 if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
  639                         error = EPIPE;
  640                         break;
  641                 }
  642                 /*
  643                  * Because connect() and send() are non-atomic in a sendto()
  644                  * with a target address, it's possible that the socket will
  645                  * have disconnected before the send() can run.  In that case
  646                  * return the slightly counter-intuitive but otherwise
  647                  * correct error that the socket is not connected.
  648                  */
  649                 unp2 = unp->unp_conn;
  650                 if (unp2 == NULL) {
  651                         SOCKBUF_UNLOCK(&so->so_snd);
  652                         error = ENOTCONN;
  653                         break;
  654                 }
  655                 so2 = unp2->unp_socket;
  656                 SOCKBUF_LOCK(&so2->so_rcv);
  657                 if (unp2->unp_flags & UNP_WANTCRED) {
  658                         /*
  659                          * Credentials are passed only once on
  660                          * SOCK_STREAM.
  661                          */
  662                         unp2->unp_flags &= ~UNP_WANTCRED;
  663                         control = unp_addsockcred(td, control);
  664                 }
  665                 /*
  666                  * Send to paired receive port, and then reduce send buffer
  667                  * hiwater marks to maintain backpressure.  Wake up readers.
  668                  */
  669                 if (control != NULL) {
  670                         if (sbappendcontrol_locked(&so2->so_rcv, m, control))
  671                                 control = NULL;
  672                 } else {
  673                         sbappend_locked(&so2->so_rcv, m);
  674                 }
  675                 mbcnt = so2->so_rcv.sb_mbcnt - unp2->unp_mbcnt;
  676                 unp2->unp_mbcnt = so2->so_rcv.sb_mbcnt;
  677                 sbcc = so2->so_rcv.sb_cc;
  678                 sorwakeup_locked(so2);
  679 
  680                 SOCKBUF_LOCK(&so->so_snd);
  681                 newhiwat = so->so_snd.sb_hiwat - (sbcc - unp2->unp_cc);
  682                 (void)chgsbsize(so->so_cred->cr_uidinfo, &so->so_snd.sb_hiwat,
  683                     newhiwat, RLIM_INFINITY);
  684                 so->so_snd.sb_mbmax -= mbcnt;
  685                 SOCKBUF_UNLOCK(&so->so_snd);
  686 
  687                 unp2->unp_cc = sbcc;
  688                 m = NULL;
  689                 break;
  690 
  691         default:
  692                 panic("uipc_send unknown socktype");
  693         }
  694 
  695         /*
  696          * SEND_EOF is equivalent to a SEND followed by
  697          * a SHUTDOWN.
  698          */
  699         if (flags & PRUS_EOF) {
  700                 socantsendmore(so);
  701                 unp_shutdown(unp);
  702         }
  703         UNP_UNLOCK();
  704 
  705 dispose_release:
  706         if (control != NULL && error != 0)
  707                 unp_dispose(control);
  708 
  709 release:
  710         if (control != NULL)
  711                 m_freem(control);
  712         if (m != NULL)
  713                 m_freem(m);
  714         return (error);
  715 }
  716 
  717 static int
  718 uipc_sense(struct socket *so, struct stat *sb)
  719 {
  720         struct unpcb *unp;
  721         struct socket *so2;
  722 
  723         UNP_LOCK();
  724         unp = sotounpcb(so);
  725         if (unp == NULL) {
  726                 UNP_UNLOCK();
  727                 return (EINVAL);
  728         }
  729         sb->st_blksize = so->so_snd.sb_hiwat;
  730         if (so->so_type == SOCK_STREAM && unp->unp_conn != NULL) {
  731                 so2 = unp->unp_conn->unp_socket;
  732                 sb->st_blksize += so2->so_rcv.sb_cc;
  733         }
  734         sb->st_dev = NODEV;
  735         if (unp->unp_ino == 0)
  736                 unp->unp_ino = (++unp_ino == 0) ? ++unp_ino : unp_ino;
  737         sb->st_ino = unp->unp_ino;
  738         UNP_UNLOCK();
  739         return (0);
  740 }
  741 
  742 static int
  743 uipc_shutdown(struct socket *so)
  744 {
  745         struct unpcb *unp;
  746 
  747         UNP_LOCK();
  748         unp = sotounpcb(so);
  749         if (unp == NULL) {
  750                 UNP_UNLOCK();
  751                 return (EINVAL);
  752         }
  753         socantsendmore(so);
  754         unp_shutdown(unp);
  755         UNP_UNLOCK();
  756         return (0);
  757 }
  758 
  759 static int
  760 uipc_sockaddr(struct socket *so, struct sockaddr **nam)
  761 {
  762         struct unpcb *unp;
  763         const struct sockaddr *sa;
  764 
  765         *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
  766         UNP_LOCK();
  767         unp = sotounpcb(so);
  768         if (unp == NULL) {
  769                 UNP_UNLOCK();
  770                 free(*nam, M_SONAME);
  771                 *nam = NULL;
  772                 return (EINVAL);
  773         }
  774         if (unp->unp_addr != NULL)
  775                 sa = (struct sockaddr *) unp->unp_addr;
  776         else
  777                 sa = &sun_noname;
  778         bcopy(sa, *nam, sa->sa_len);
  779         UNP_UNLOCK();
  780         return (0);
  781 }
  782 
  783 struct pr_usrreqs uipc_usrreqs = {
  784         .pru_abort =            uipc_abort,
  785         .pru_accept =           uipc_accept,
  786         .pru_attach =           uipc_attach,
  787         .pru_bind =             uipc_bind,
  788         .pru_connect =          uipc_connect,
  789         .pru_connect2 =         uipc_connect2,
  790         .pru_detach =           uipc_detach,
  791         .pru_disconnect =       uipc_disconnect,
  792         .pru_listen =           uipc_listen,
  793         .pru_peeraddr =         uipc_peeraddr,
  794         .pru_rcvd =             uipc_rcvd,
  795         .pru_send =             uipc_send,
  796         .pru_sense =            uipc_sense,
  797         .pru_shutdown =         uipc_shutdown,
  798         .pru_sockaddr =         uipc_sockaddr,
  799         .pru_sosend =           sosend,
  800         .pru_soreceive =        soreceive,
  801         .pru_sopoll =           sopoll,
  802 };
  803 
  804 int
  805 uipc_ctloutput(struct socket *so, struct sockopt *sopt)
  806 {
  807         struct unpcb *unp;
  808         struct xucred xu;
  809         int error, optval;
  810 
  811         if (sopt->sopt_level != 0)
  812                 return (EINVAL);
  813 
  814         UNP_LOCK();
  815         unp = sotounpcb(so);
  816         if (unp == NULL) {
  817                 UNP_UNLOCK();
  818                 return (EINVAL);
  819         }
  820         error = 0;
  821 
  822         switch (sopt->sopt_dir) {
  823         case SOPT_GET:
  824                 switch (sopt->sopt_name) {
  825                 case LOCAL_PEERCRED:
  826                         if (unp->unp_flags & UNP_HAVEPC)
  827                                 xu = unp->unp_peercred;
  828                         else {
  829                                 if (so->so_type == SOCK_STREAM)
  830                                         error = ENOTCONN;
  831                                 else
  832                                         error = EINVAL;
  833                         }
  834                         if (error == 0)
  835                                 error = sooptcopyout(sopt, &xu, sizeof(xu));
  836                         break;
  837                 case LOCAL_CREDS:
  838                         optval = unp->unp_flags & UNP_WANTCRED ? 1 : 0;
  839                         error = sooptcopyout(sopt, &optval, sizeof(optval));
  840                         break;
  841                 case LOCAL_CONNWAIT:
  842                         optval = unp->unp_flags & UNP_CONNWAIT ? 1 : 0;
  843                         error = sooptcopyout(sopt, &optval, sizeof(optval));
  844                         break;
  845                 default:
  846                         error = EOPNOTSUPP;
  847                         break;
  848                 }
  849                 break;
  850         case SOPT_SET:
  851                 switch (sopt->sopt_name) {
  852                 case LOCAL_CREDS:
  853                 case LOCAL_CONNWAIT:
  854                         error = sooptcopyin(sopt, &optval, sizeof(optval),
  855                                             sizeof(optval));
  856                         if (error)
  857                                 break;
  858 
  859 #define OPTSET(bit) \
  860         if (optval) \
  861                 unp->unp_flags |= bit; \
  862         else \
  863                 unp->unp_flags &= ~bit;
  864 
  865                         switch (sopt->sopt_name) {
  866                         case LOCAL_CREDS:
  867                                 OPTSET(UNP_WANTCRED);
  868                                 break;
  869                         case LOCAL_CONNWAIT:
  870                                 OPTSET(UNP_CONNWAIT);
  871                                 break;
  872                         default:
  873                                 break;
  874                         }
  875                         break;
  876 #undef  OPTSET
  877                 default:
  878                         error = ENOPROTOOPT;
  879                         break;
  880                 }
  881                 break;
  882         default:
  883                 error = EOPNOTSUPP;
  884                 break;
  885         }
  886         UNP_UNLOCK();
  887         return (error);
  888 }
  889 
  890 static int
  891 unp_attach(struct socket *so)
  892 {
  893         struct unpcb *unp;
  894         int error;
  895 
  896         if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
  897                 switch (so->so_type) {
  898                 case SOCK_STREAM:
  899                         error = soreserve(so, unpst_sendspace, unpst_recvspace);
  900                         break;
  901 
  902                 case SOCK_DGRAM:
  903                         error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
  904                         break;
  905 
  906                 default:
  907                         panic("unp_attach");
  908                 }
  909                 if (error)
  910                         return (error);
  911         }
  912         unp = uma_zalloc(unp_zone, M_NOWAIT | M_ZERO);
  913         if (unp == NULL)
  914                 return (ENOBUFS);
  915         LIST_INIT(&unp->unp_refs);
  916         unp->unp_socket = so;
  917         so->so_pcb = unp;
  918 
  919         UNP_REFCOUNT(unp) = 1;
  920         UNP_LOCK();
  921         unp->unp_gencnt = ++unp_gencnt;
  922         unp_count++;
  923         LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ? &unp_dhead : &unp_shead,
  924             unp, unp_link);
  925         UNP_UNLOCK();
  926 
  927         return (0);
  928 }
  929 
  930 /*
  931  * Definitions of protocols supported in the LOCAL domain.
  932  */
  933 static struct domain localdomain;
  934 static struct protosw localsw[] = {
  935 {
  936         .pr_type =              SOCK_STREAM,
  937         .pr_domain =            &localdomain,
  938         .pr_flags =             PR_CONNREQUIRED|PR_WANTRCVD|PR_RIGHTS,
  939         .pr_ctloutput =         &uipc_ctloutput,
  940         .pr_usrreqs =           &uipc_usrreqs
  941 },
  942 {
  943         .pr_type =              SOCK_DGRAM,
  944         .pr_domain =            &localdomain,
  945         .pr_flags =             PR_ATOMIC|PR_ADDR|PR_RIGHTS,
  946         .pr_usrreqs =           &uipc_usrreqs
  947 },
  948 };
  949 
  950 static struct domain localdomain = {
  951         .dom_family =           AF_LOCAL,
  952         .dom_name =             "local",
  953         .dom_init =             unp_init,
  954         .dom_externalize =      unp_externalize,
  955         .dom_dispose =          unp_dispose,
  956         .dom_protosw =          localsw,
  957         .dom_protoswNPROTOSW =  &localsw[sizeof(localsw)/sizeof(localsw[0])]
  958 };
  959 DOMAIN_SET(local);
  960 
  961 static void
  962 unp_detach(struct unpcb *unp)
  963 {
  964         struct sockaddr_un *saved_unp_addr;     
  965         struct vnode *vp;
  966         int local_unp_rights;
  967         int freeunp;
  968 
  969         UNP_LOCK_ASSERT();
  970 
  971         LIST_REMOVE(unp, unp_link);
  972         unp->unp_gencnt = ++unp_gencnt;
  973         --unp_count;
  974         if ((vp = unp->unp_vnode) != NULL) {
  975                 /*
  976                  * XXXRW: should v_socket be frobbed only while holding
  977                  * Giant?
  978                  */
  979                 unp->unp_vnode->v_socket = NULL;
  980                 unp->unp_vnode = NULL;
  981         }
  982         if (unp->unp_conn != NULL)
  983                 unp_disconnect(unp);
  984         while (!LIST_EMPTY(&unp->unp_refs)) {
  985                 struct unpcb *ref = LIST_FIRST(&unp->unp_refs);
  986                 unp_drop(ref, ECONNRESET);
  987         }
  988         soisdisconnected(unp->unp_socket);
  989         unp->unp_socket->so_pcb = NULL;
  990         local_unp_rights = unp_rights;
  991         saved_unp_addr = unp->unp_addr;
  992         unp->unp_addr = NULL;
  993         UNP_REFCOUNT(unp)--;
  994         freeunp = (UNP_REFCOUNT(unp) == 0);
  995         UNP_UNLOCK();
  996         if (saved_unp_addr != NULL)
  997                 FREE(saved_unp_addr, M_SONAME);
  998         if (freeunp)
  999                 uma_zfree(unp_zone, unp);
 1000         if (vp) {
 1001                 int vfslocked;
 1002 
 1003                 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 1004                 vrele(vp);
 1005                 VFS_UNLOCK_GIANT(vfslocked);
 1006         }
 1007         if (local_unp_rights)
 1008                 taskqueue_enqueue(taskqueue_thread, &unp_gc_task);
 1009 }
 1010 
 1011 static int
 1012 unp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 1013 {
 1014         struct sockaddr_un *soun = (struct sockaddr_un *)nam;
 1015         struct vnode *vp;
 1016         struct socket *so2, *so3;
 1017         struct unpcb *unp, *unp2, *unp3;
 1018         int error, len;
 1019         struct nameidata nd;
 1020         char buf[SOCK_MAXADDRLEN];
 1021         struct sockaddr *sa;
 1022 
 1023         UNP_LOCK_ASSERT();
 1024         unp = sotounpcb(so);
 1025 
 1026         len = nam->sa_len - offsetof(struct sockaddr_un, sun_path);
 1027         if (len <= 0)
 1028                 return (EINVAL);
 1029         strlcpy(buf, soun->sun_path, len + 1);
 1030         if (unp->unp_flags & UNP_CONNECTING)
 1031                 return (EALREADY);
 1032         unp->unp_flags |= UNP_CONNECTING;
 1033         UNP_UNLOCK();
 1034         sa = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
 1035         mtx_lock(&Giant);
 1036         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, buf, td);
 1037         error = namei(&nd);
 1038         if (error)
 1039                 vp = NULL;
 1040         else
 1041                 vp = nd.ni_vp;
 1042         ASSERT_VOP_LOCKED(vp, "unp_connect");
 1043         NDFREE(&nd, NDF_ONLY_PNBUF);
 1044         if (error)
 1045                 goto bad;
 1046 
 1047         if (vp->v_type != VSOCK) {
 1048                 error = ENOTSOCK;
 1049                 goto bad;
 1050         }
 1051 #ifdef MAC
 1052         error = mac_check_vnode_open(td->td_ucred, vp, VWRITE | VREAD);
 1053         if (error)
 1054                 goto bad;
 1055 #endif
 1056         error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td);
 1057         if (error)
 1058                 goto bad;
 1059         mtx_unlock(&Giant);
 1060         UNP_LOCK();
 1061         unp = sotounpcb(so);
 1062         if (unp == NULL) {
 1063                 error = EINVAL;
 1064                 goto bad2;
 1065         }
 1066         so2 = vp->v_socket;
 1067         if (so2 == NULL) {
 1068                 error = ECONNREFUSED;
 1069                 goto bad2;
 1070         }
 1071         if (so->so_type != so2->so_type) {
 1072                 error = EPROTOTYPE;
 1073                 goto bad2;
 1074         }
 1075         if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
 1076                 if (so2->so_options & SO_ACCEPTCONN)
 1077                         so3 = sonewconn(so2, 0);
 1078                 else
 1079                         so3 = NULL;
 1080                 if (so3 == NULL) {
 1081                         error = ECONNREFUSED;
 1082                         goto bad2;
 1083                 }
 1084                 unp = sotounpcb(so);
 1085                 unp2 = sotounpcb(so2);
 1086                 unp3 = sotounpcb(so3);
 1087                 if (unp2->unp_addr != NULL) {
 1088                         bcopy(unp2->unp_addr, sa, unp2->unp_addr->sun_len);
 1089                         unp3->unp_addr = (struct sockaddr_un *) sa;
 1090                         sa = NULL;
 1091                 }
 1092                 /*
 1093                  * unp_peercred management:
 1094                  *
 1095                  * The connecter's (client's) credentials are copied from its
 1096                  * process structure at the time of connect() (which is now).
 1097                  */
 1098                 cru2x(td->td_ucred, &unp3->unp_peercred);
 1099                 unp3->unp_flags |= UNP_HAVEPC;
 1100                 /*
 1101                  * The receiver's (server's) credentials are copied from the
 1102                  * unp_peercred member of socket on which the former called
 1103                  * listen(); unp_listen() cached that process's credentials
 1104                  * at that time so we can use them now.
 1105                  */
 1106                 KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED,
 1107                     ("unp_connect: listener without cached peercred"));
 1108                 memcpy(&unp->unp_peercred, &unp2->unp_peercred,
 1109                     sizeof(unp->unp_peercred));
 1110                 unp->unp_flags |= UNP_HAVEPC;
 1111                 if (unp2->unp_flags & UNP_WANTCRED)
 1112                         unp3->unp_flags |= UNP_WANTCRED;
 1113 #ifdef MAC
 1114                 SOCK_LOCK(so);
 1115                 mac_set_socket_peer_from_socket(so, so3);
 1116                 mac_set_socket_peer_from_socket(so3, so);
 1117                 SOCK_UNLOCK(so);
 1118 #endif
 1119 
 1120                 so2 = so3;
 1121         }
 1122         error = unp_connect2(so, so2, PRU_CONNECT);
 1123 bad2:
 1124         UNP_UNLOCK();
 1125         mtx_lock(&Giant);
 1126 bad:
 1127         mtx_assert(&Giant, MA_OWNED);
 1128         if (vp != NULL)
 1129                 vput(vp);
 1130         mtx_unlock(&Giant);
 1131         free(sa, M_SONAME);
 1132         UNP_LOCK();
 1133         unp->unp_flags &= ~UNP_CONNECTING;
 1134         return (error);
 1135 }
 1136 
 1137 static int
 1138 unp_connect2(struct socket *so, struct socket *so2, int req)
 1139 {
 1140         struct unpcb *unp = sotounpcb(so);
 1141         struct unpcb *unp2;
 1142 
 1143         UNP_LOCK_ASSERT();
 1144 
 1145         if (so2->so_type != so->so_type)
 1146                 return (EPROTOTYPE);
 1147         unp2 = sotounpcb(so2);
 1148         unp->unp_conn = unp2;
 1149         switch (so->so_type) {
 1150         case SOCK_DGRAM:
 1151                 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink);
 1152                 soisconnected(so);
 1153                 break;
 1154 
 1155         case SOCK_STREAM:
 1156                 unp2->unp_conn = unp;
 1157                 if (req == PRU_CONNECT &&
 1158                     ((unp->unp_flags | unp2->unp_flags) & UNP_CONNWAIT))
 1159                         soisconnecting(so);
 1160                 else
 1161                         soisconnected(so);
 1162                 soisconnected(so2);
 1163                 break;
 1164 
 1165         default:
 1166                 panic("unp_connect2");
 1167         }
 1168         return (0);
 1169 }
 1170 
 1171 static void
 1172 unp_disconnect(struct unpcb *unp)
 1173 {
 1174         struct unpcb *unp2 = unp->unp_conn;
 1175         struct socket *so;
 1176 
 1177         UNP_LOCK_ASSERT();
 1178 
 1179         if (unp2 == NULL)
 1180                 return;
 1181         unp->unp_conn = NULL;
 1182         switch (unp->unp_socket->so_type) {
 1183 
 1184         case SOCK_DGRAM:
 1185                 LIST_REMOVE(unp, unp_reflink);
 1186                 so = unp->unp_socket;
 1187                 SOCK_LOCK(so);
 1188                 so->so_state &= ~SS_ISCONNECTED;
 1189                 SOCK_UNLOCK(so);
 1190                 break;
 1191 
 1192         case SOCK_STREAM:
 1193                 soisdisconnected(unp->unp_socket);
 1194                 unp2->unp_conn = NULL;
 1195                 soisdisconnected(unp2->unp_socket);
 1196                 break;
 1197         }
 1198 }
 1199 
 1200 /*
 1201  * unp_pcblist() assumes that UNIX domain socket memory is never reclaimed by
 1202  * the zone (UMA_ZONE_NOFREE), and as such potentially stale pointers are
 1203  * safe to reference.  It first scans the list of struct unpcb's to generate
 1204  * a pointer list, then it rescans its list one entry at a time to
 1205  * externalize and copyout.  It checks the generation number to see if a
 1206  * struct unpcb has been reused, and will skip it if so.
 1207  */
 1208 static int
 1209 unp_pcblist(SYSCTL_HANDLER_ARGS)
 1210 {
 1211         int error, i, n;
 1212         int freeunp;
 1213         struct unpcb *unp, **unp_list;
 1214         unp_gen_t gencnt;
 1215         struct xunpgen *xug;
 1216         struct unp_head *head;
 1217         struct xunpcb *xu;
 1218 
 1219         head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
 1220 
 1221         /*
 1222          * The process of preparing the PCB list is too time-consuming and
 1223          * resource-intensive to repeat twice on every request.
 1224          */
 1225         if (req->oldptr == NULL) {
 1226                 n = unp_count;
 1227                 req->oldidx = 2 * (sizeof *xug)
 1228                         + (n + n/8) * sizeof(struct xunpcb);
 1229                 return (0);
 1230         }
 1231 
 1232         if (req->newptr != NULL)
 1233                 return (EPERM);
 1234 
 1235         /*
 1236          * OK, now we're committed to doing something.
 1237          */
 1238         xug = malloc(sizeof(*xug), M_TEMP, M_WAITOK);
 1239         UNP_LOCK();
 1240         gencnt = unp_gencnt;
 1241         n = unp_count;
 1242         UNP_UNLOCK();
 1243 
 1244         xug->xug_len = sizeof *xug;
 1245         xug->xug_count = n;
 1246         xug->xug_gen = gencnt;
 1247         xug->xug_sogen = so_gencnt;
 1248         error = SYSCTL_OUT(req, xug, sizeof *xug);
 1249         if (error) {
 1250                 free(xug, M_TEMP);
 1251                 return (error);
 1252         }
 1253 
 1254         unp_list = malloc(n * sizeof *unp_list, M_TEMP, M_WAITOK);
 1255 
 1256         UNP_LOCK();
 1257         for (unp = LIST_FIRST(head), i = 0; unp && i < n;
 1258              unp = LIST_NEXT(unp, unp_link)) {
 1259                 if (unp->unp_gencnt <= gencnt) {
 1260                         if (cr_cansee(req->td->td_ucred,
 1261                             unp->unp_socket->so_cred))
 1262                                 continue;
 1263                         unp_list[i++] = unp;
 1264                         UNP_REFCOUNT(unp)++;
 1265                 }
 1266         }
 1267         UNP_UNLOCK();
 1268         n = i;                  /* In case we lost some during malloc. */
 1269 
 1270         error = 0;
 1271         xu = malloc(sizeof(*xu), M_TEMP, M_WAITOK | M_ZERO);
 1272         for (i = 0; i < n; i++) {
 1273                 unp = unp_list[i];
 1274                 UNP_LOCK();
 1275                 UNP_REFCOUNT(unp)--;
 1276                 if (UNP_REFCOUNT(unp) != 0 && unp->unp_gencnt <= gencnt) {
 1277                         xu->xu_len = sizeof *xu;
 1278                         xu->xu_unpp = unp;
 1279                         /*
 1280                          * XXX - need more locking here to protect against
 1281                          * connect/disconnect races for SMP.
 1282                          */
 1283                         if (unp->unp_addr != NULL)
 1284                                 bcopy(unp->unp_addr, &xu->xu_addr,
 1285                                       unp->unp_addr->sun_len);
 1286                         if (unp->unp_conn != NULL &&
 1287                             unp->unp_conn->unp_addr != NULL)
 1288                                 bcopy(unp->unp_conn->unp_addr,
 1289                                       &xu->xu_caddr,
 1290                                       unp->unp_conn->unp_addr->sun_len);
 1291                         bcopy(unp, &xu->xu_unp, sizeof *unp);
 1292                         sotoxsocket(unp->unp_socket, &xu->xu_socket);
 1293                         UNP_UNLOCK();
 1294                         error = SYSCTL_OUT(req, xu, sizeof *xu);
 1295                 } else {
 1296                         freeunp = (UNP_REFCOUNT(unp) == 0);
 1297                         UNP_UNLOCK();
 1298                         if (freeunp) 
 1299                                 uma_zfree(unp_zone, unp);
 1300                 }
 1301         }
 1302         free(xu, M_TEMP);
 1303         if (!error) {
 1304                 /*
 1305                  * Give the user an updated idea of our state.  If the
 1306                  * generation differs from what we told her before, she knows
 1307                  * that something happened while we were processing this
 1308                  * request, and it might be necessary to retry.
 1309                  */
 1310                 xug->xug_gen = unp_gencnt;
 1311                 xug->xug_sogen = so_gencnt;
 1312                 xug->xug_count = unp_count;
 1313                 error = SYSCTL_OUT(req, xug, sizeof *xug);
 1314         }
 1315         free(unp_list, M_TEMP);
 1316         free(xug, M_TEMP);
 1317         return (error);
 1318 }
 1319 
 1320 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLFLAG_RD,
 1321             (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb",
 1322             "List of active local datagram sockets");
 1323 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLFLAG_RD,
 1324             (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb",
 1325             "List of active local stream sockets");
 1326 
 1327 static void
 1328 unp_shutdown(struct unpcb *unp)
 1329 {
 1330         struct socket *so;
 1331 
 1332         UNP_LOCK_ASSERT();
 1333 
 1334         if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn &&
 1335             (so = unp->unp_conn->unp_socket))
 1336                 socantrcvmore(so);
 1337 }
 1338 
 1339 static void
 1340 unp_drop(struct unpcb *unp, int errno)
 1341 {
 1342         struct socket *so = unp->unp_socket;
 1343 
 1344         UNP_LOCK_ASSERT();
 1345 
 1346         so->so_error = errno;
 1347         unp_disconnect(unp);
 1348 }
 1349 
 1350 static void
 1351 unp_freerights(struct file **rp, int fdcount)
 1352 {
 1353         int i;
 1354         struct file *fp;
 1355 
 1356         for (i = 0; i < fdcount; i++) {
 1357                 fp = *rp;
 1358                 /*
 1359                  * Zero the pointer before calling unp_discard since it may
 1360                  * end up in unp_gc()..
 1361                  */
 1362                 *rp++ = 0;
 1363                 unp_discard(fp);
 1364         }
 1365 }
 1366 
 1367 int
 1368 unp_externalize(struct mbuf *control, struct mbuf **controlp)
 1369 {
 1370         struct thread *td = curthread;          /* XXX */
 1371         struct cmsghdr *cm = mtod(control, struct cmsghdr *);
 1372         int i;
 1373         int *fdp;
 1374         struct file **rp;
 1375         struct file *fp;
 1376         void *data;
 1377         socklen_t clen = control->m_len, datalen;
 1378         int error, newfds;
 1379         int f;
 1380         u_int newlen;
 1381 
 1382         UNP_UNLOCK_ASSERT();
 1383 
 1384         error = 0;
 1385         if (controlp != NULL) /* controlp == NULL => free control messages */
 1386                 *controlp = NULL;
 1387 
 1388         while (cm != NULL) {
 1389                 if (sizeof(*cm) > clen || cm->cmsg_len > clen) {
 1390                         error = EINVAL;
 1391                         break;
 1392                 }
 1393 
 1394                 data = CMSG_DATA(cm);
 1395                 datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
 1396 
 1397                 if (cm->cmsg_level == SOL_SOCKET
 1398                     && cm->cmsg_type == SCM_RIGHTS) {
 1399                         newfds = datalen / sizeof(struct file *);
 1400                         rp = data;
 1401 
 1402                         /* If we're not outputting the descriptors free them. */
 1403                         if (error || controlp == NULL) {
 1404                                 unp_freerights(rp, newfds);
 1405                                 goto next;
 1406                         }
 1407                         FILEDESC_LOCK(td->td_proc->p_fd);
 1408                         /* if the new FD's will not fit free them.  */
 1409                         if (!fdavail(td, newfds)) {
 1410                                 FILEDESC_UNLOCK(td->td_proc->p_fd);
 1411                                 error = EMSGSIZE;
 1412                                 unp_freerights(rp, newfds);
 1413                                 goto next;
 1414                         }
 1415                         /*
 1416                          * Now change each pointer to an fd in the global
 1417                          * table to an integer that is the index to the local
 1418                          * fd table entry that we set up to point to the
 1419                          * global one we are transferring.
 1420                          */
 1421                         newlen = newfds * sizeof(int);
 1422                         *controlp = sbcreatecontrol(NULL, newlen,
 1423                             SCM_RIGHTS, SOL_SOCKET);
 1424                         if (*controlp == NULL) {
 1425                                 FILEDESC_UNLOCK(td->td_proc->p_fd);
 1426                                 error = E2BIG;
 1427                                 unp_freerights(rp, newfds);
 1428                                 goto next;
 1429                         }
 1430 
 1431                         fdp = (int *)
 1432                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 1433                         for (i = 0; i < newfds; i++) {
 1434                                 if (fdalloc(td, 0, &f))
 1435                                         panic("unp_externalize fdalloc failed");
 1436                                 fp = *rp++;
 1437                                 td->td_proc->p_fd->fd_ofiles[f] = fp;
 1438                                 FILE_LOCK(fp);
 1439                                 fp->f_msgcount--;
 1440                                 FILE_UNLOCK(fp);
 1441                                 unp_rights--;
 1442                                 *fdp++ = f;
 1443                         }
 1444                         FILEDESC_UNLOCK(td->td_proc->p_fd);
 1445                 } else {
 1446                         /* We can just copy anything else across. */
 1447                         if (error || controlp == NULL)
 1448                                 goto next;
 1449                         *controlp = sbcreatecontrol(NULL, datalen,
 1450                             cm->cmsg_type, cm->cmsg_level);
 1451                         if (*controlp == NULL) {
 1452                                 error = ENOBUFS;
 1453                                 goto next;
 1454                         }
 1455                         bcopy(data,
 1456                             CMSG_DATA(mtod(*controlp, struct cmsghdr *)),
 1457                             datalen);
 1458                 }
 1459 
 1460                 controlp = &(*controlp)->m_next;
 1461 
 1462 next:
 1463                 if (CMSG_SPACE(datalen) < clen) {
 1464                         clen -= CMSG_SPACE(datalen);
 1465                         cm = (struct cmsghdr *)
 1466                             ((caddr_t)cm + CMSG_SPACE(datalen));
 1467                 } else {
 1468                         clen = 0;
 1469                         cm = NULL;
 1470                 }
 1471         }
 1472 
 1473         m_freem(control);
 1474 
 1475         return (error);
 1476 }
 1477 
 1478 static void
 1479 unp_zone_change(void *tag)
 1480 {
 1481 
 1482         uma_zone_set_max(unp_zone, maxsockets);
 1483 }
 1484 
 1485 void
 1486 unp_init(void)
 1487 {
 1488 
 1489         unp_zone = uma_zcreate("unpcb", sizeof(struct unpcb_wrapper), NULL,
 1490             NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 1491         if (unp_zone == NULL)
 1492                 panic("unp_init");
 1493         uma_zone_set_max(unp_zone, maxsockets);
 1494         EVENTHANDLER_REGISTER(maxsockets_change, unp_zone_change,
 1495             NULL, EVENTHANDLER_PRI_ANY);
 1496         LIST_INIT(&unp_dhead);
 1497         LIST_INIT(&unp_shead);
 1498         TASK_INIT(&unp_gc_task, 0, unp_gc, NULL);
 1499         UNP_LOCK_INIT();
 1500 }
 1501 
 1502 static int
 1503 unp_internalize(struct mbuf **controlp, struct thread *td)
 1504 {
 1505         struct mbuf *control = *controlp;
 1506         struct proc *p = td->td_proc;
 1507         struct filedesc *fdescp = p->p_fd;
 1508         struct cmsghdr *cm = mtod(control, struct cmsghdr *);
 1509         struct cmsgcred *cmcred;
 1510         struct file **rp;
 1511         struct file *fp;
 1512         struct timeval *tv;
 1513         int i, fd, *fdp;
 1514         void *data;
 1515         socklen_t clen = control->m_len, datalen;
 1516         int error, oldfds;
 1517         u_int newlen;
 1518 
 1519         UNP_UNLOCK_ASSERT();
 1520 
 1521         error = 0;
 1522         *controlp = NULL;
 1523 
 1524         while (cm != NULL) {
 1525                 if (sizeof(*cm) > clen || cm->cmsg_level != SOL_SOCKET
 1526                     || cm->cmsg_len > clen) {
 1527                         error = EINVAL;
 1528                         goto out;
 1529                 }
 1530 
 1531                 data = CMSG_DATA(cm);
 1532                 datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
 1533 
 1534                 switch (cm->cmsg_type) {
 1535                 /*
 1536                  * Fill in credential information.
 1537                  */
 1538                 case SCM_CREDS:
 1539                         *controlp = sbcreatecontrol(NULL, sizeof(*cmcred),
 1540                             SCM_CREDS, SOL_SOCKET);
 1541                         if (*controlp == NULL) {
 1542                                 error = ENOBUFS;
 1543                                 goto out;
 1544                         }
 1545 
 1546                         cmcred = (struct cmsgcred *)
 1547                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 1548                         cmcred->cmcred_pid = p->p_pid;
 1549                         cmcred->cmcred_uid = td->td_ucred->cr_ruid;
 1550                         cmcred->cmcred_gid = td->td_ucred->cr_rgid;
 1551                         cmcred->cmcred_euid = td->td_ucred->cr_uid;
 1552                         cmcred->cmcred_ngroups = MIN(td->td_ucred->cr_ngroups,
 1553                                                         CMGROUP_MAX);
 1554                         for (i = 0; i < cmcred->cmcred_ngroups; i++)
 1555                                 cmcred->cmcred_groups[i] =
 1556                                     td->td_ucred->cr_groups[i];
 1557                         break;
 1558 
 1559                 case SCM_RIGHTS:
 1560                         oldfds = datalen / sizeof (int);
 1561                         /*
 1562                          * Check that all the FDs passed in refer to legal
 1563                          * files.  If not, reject the entire operation.
 1564                          */
 1565                         fdp = data;
 1566                         FILEDESC_LOCK(fdescp);
 1567                         for (i = 0; i < oldfds; i++) {
 1568                                 fd = *fdp++;
 1569                                 if ((unsigned)fd >= fdescp->fd_nfiles ||
 1570                                     fdescp->fd_ofiles[fd] == NULL) {
 1571                                         FILEDESC_UNLOCK(fdescp);
 1572                                         error = EBADF;
 1573                                         goto out;
 1574                                 }
 1575                                 fp = fdescp->fd_ofiles[fd];
 1576                                 if (!(fp->f_ops->fo_flags & DFLAG_PASSABLE)) {
 1577                                         FILEDESC_UNLOCK(fdescp);
 1578                                         error = EOPNOTSUPP;
 1579                                         goto out;
 1580                                 }
 1581 
 1582                         }
 1583                         /*
 1584                          * Now replace the integer FDs with pointers to the
 1585                          * associated global file table entry..
 1586                          */
 1587                         newlen = oldfds * sizeof(struct file *);
 1588                         *controlp = sbcreatecontrol(NULL, newlen,
 1589                             SCM_RIGHTS, SOL_SOCKET);
 1590                         if (*controlp == NULL) {
 1591                                 FILEDESC_UNLOCK(fdescp);
 1592                                 error = E2BIG;
 1593                                 goto out;
 1594                         }
 1595 
 1596                         fdp = data;
 1597                         rp = (struct file **)
 1598                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 1599                         for (i = 0; i < oldfds; i++) {
 1600                                 fp = fdescp->fd_ofiles[*fdp++];
 1601                                 *rp++ = fp;
 1602                                 FILE_LOCK(fp);
 1603                                 fp->f_count++;
 1604                                 fp->f_msgcount++;
 1605                                 FILE_UNLOCK(fp);
 1606                                 unp_rights++;
 1607                         }
 1608                         FILEDESC_UNLOCK(fdescp);
 1609                         break;
 1610 
 1611                 case SCM_TIMESTAMP:
 1612                         *controlp = sbcreatecontrol(NULL, sizeof(*tv),
 1613                             SCM_TIMESTAMP, SOL_SOCKET);
 1614                         if (*controlp == NULL) {
 1615                                 error = ENOBUFS;
 1616                                 goto out;
 1617                         }
 1618                         tv = (struct timeval *)
 1619                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 1620                         microtime(tv);
 1621                         break;
 1622 
 1623                 default:
 1624                         error = EINVAL;
 1625                         goto out;
 1626                 }
 1627 
 1628                 controlp = &(*controlp)->m_next;
 1629 
 1630                 if (CMSG_SPACE(datalen) < clen) {
 1631                         clen -= CMSG_SPACE(datalen);
 1632                         cm = (struct cmsghdr *)
 1633                             ((caddr_t)cm + CMSG_SPACE(datalen));
 1634                 } else {
 1635                         clen = 0;
 1636                         cm = NULL;
 1637                 }
 1638         }
 1639 
 1640 out:
 1641         m_freem(control);
 1642 
 1643         return (error);
 1644 }
 1645 
 1646 struct mbuf *
 1647 unp_addsockcred(struct thread *td, struct mbuf *control)
 1648 {
 1649         struct mbuf *m, *n, *n_prev;
 1650         struct sockcred *sc;
 1651         const struct cmsghdr *cm;
 1652         int ngroups;
 1653         int i;
 1654 
 1655         ngroups = MIN(td->td_ucred->cr_ngroups, CMGROUP_MAX);
 1656 
 1657         m = sbcreatecontrol(NULL, SOCKCREDSIZE(ngroups), SCM_CREDS, SOL_SOCKET);
 1658         if (m == NULL)
 1659                 return (control);
 1660 
 1661         sc = (struct sockcred *) CMSG_DATA(mtod(m, struct cmsghdr *));
 1662         sc->sc_uid = td->td_ucred->cr_ruid;
 1663         sc->sc_euid = td->td_ucred->cr_uid;
 1664         sc->sc_gid = td->td_ucred->cr_rgid;
 1665         sc->sc_egid = td->td_ucred->cr_gid;
 1666         sc->sc_ngroups = ngroups;
 1667         for (i = 0; i < sc->sc_ngroups; i++)
 1668                 sc->sc_groups[i] = td->td_ucred->cr_groups[i];
 1669 
 1670         /*
 1671          * Unlink SCM_CREDS control messages (struct cmsgcred), since just
 1672          * created SCM_CREDS control message (struct sockcred) has another
 1673          * format.
 1674          */
 1675         if (control != NULL)
 1676                 for (n = control, n_prev = NULL; n != NULL;) {
 1677                         cm = mtod(n, struct cmsghdr *);
 1678                         if (cm->cmsg_level == SOL_SOCKET &&
 1679                             cm->cmsg_type == SCM_CREDS) {
 1680                                 if (n_prev == NULL)
 1681                                         control = n->m_next;
 1682                                 else
 1683                                         n_prev->m_next = n->m_next;
 1684                                 n = m_free(n);
 1685                         } else {
 1686                                 n_prev = n;
 1687                                 n = n->m_next;
 1688                         }
 1689                 }
 1690 
 1691         /* Prepend it to the head. */
 1692         m->m_next = control;
 1693 
 1694         return (m);
 1695 }
 1696 
 1697 /*
 1698  * unp_defer indicates whether additional work has been defered for a future
 1699  * pass through unp_gc().  It is thread local and does not require explicit
 1700  * synchronization.
 1701  */
 1702 static int      unp_defer;
 1703 
 1704 static int unp_taskcount;
 1705 SYSCTL_INT(_net_local, OID_AUTO, taskcount, CTLFLAG_RD, &unp_taskcount, 0, "");
 1706 
 1707 static int unp_recycled;
 1708 SYSCTL_INT(_net_local, OID_AUTO, recycled, CTLFLAG_RD, &unp_recycled, 0, "");
 1709 
 1710 static void
 1711 unp_gc(__unused void *arg, int pending)
 1712 {
 1713         struct file *fp, *nextfp;
 1714         struct socket *so;
 1715         struct file **extra_ref, **fpp;
 1716         int nunref, i;
 1717         int nfiles_snap;
 1718         int nfiles_slack = 20;
 1719 
 1720         unp_taskcount++;
 1721         unp_defer = 0;
 1722         /*
 1723          * Before going through all this, set all FDs to be NOT deferred and
 1724          * NOT externally accessible
 1725          */
 1726         sx_slock(&filelist_lock);
 1727         LIST_FOREACH(fp, &filehead, f_list)
 1728                 fp->f_gcflag &= ~(FMARK|FDEFER);
 1729         do {
 1730                 LIST_FOREACH(fp, &filehead, f_list) {
 1731                         FILE_LOCK(fp);
 1732                         /*
 1733                          * If the file is not open, skip it -- could be a
 1734                          * file in the process of being opened, or in the
 1735                          * process of being closed.  If the file is
 1736                          * "closing", it may have been marked for deferred
 1737                          * consideration.  Clear the flag now if so.
 1738                          */
 1739                         if (fp->f_count == 0) {
 1740                                 if (fp->f_gcflag & FDEFER)
 1741                                         unp_defer--;
 1742                                 fp->f_gcflag &= ~(FMARK|FDEFER);
 1743                                 FILE_UNLOCK(fp);
 1744                                 continue;
 1745                         }
 1746                         /*
 1747                          * If we already marked it as 'defer' in a previous
 1748                          * pass, then try to process it this time and un-mark
 1749                          * it.
 1750                          */
 1751                         if (fp->f_gcflag & FDEFER) {
 1752                                 fp->f_gcflag &= ~FDEFER;
 1753                                 unp_defer--;
 1754                         } else {
 1755                                 /*
 1756                                  * if it's not deferred, then check if it's
 1757                                  * already marked.. if so skip it
 1758                                  */
 1759                                 if (fp->f_gcflag & FMARK) {
 1760                                         FILE_UNLOCK(fp);
 1761                                         continue;
 1762                                 }
 1763                                 /*
 1764                                  * If all references are from messages in
 1765                                  * transit, then skip it. it's not externally
 1766                                  * accessible.
 1767                                  */
 1768                                 if (fp->f_count == fp->f_msgcount) {
 1769                                         FILE_UNLOCK(fp);
 1770                                         continue;
 1771                                 }
 1772                                 /*
 1773                                  * If it got this far then it must be
 1774                                  * externally accessible.
 1775                                  */
 1776                                 fp->f_gcflag |= FMARK;
 1777                         }
 1778                         /*
 1779                          * Either it was deferred, or it is externally
 1780                          * accessible and not already marked so.  Now check
 1781                          * if it is possibly one of OUR sockets.
 1782                          */
 1783                         if (fp->f_type != DTYPE_SOCKET ||
 1784                             (so = fp->f_data) == NULL) {
 1785                                 FILE_UNLOCK(fp);
 1786                                 continue;
 1787                         }
 1788                         if (so->so_proto->pr_domain != &localdomain ||
 1789                             (so->so_proto->pr_flags & PR_RIGHTS) == 0) {
 1790                                 FILE_UNLOCK(fp);
 1791                                 continue;
 1792                         }
 1793 
 1794                         /*
 1795                          * Tell any other threads that do a subsequent
 1796                          * fdrop() that we are scanning the message
 1797                          * buffers.
 1798                          */
 1799                         fp->f_gcflag |= FWAIT;
 1800                         FILE_UNLOCK(fp);
 1801 
 1802                         /*
 1803                          * So, Ok, it's one of our sockets and it IS
 1804                          * externally accessible (or was deferred).  Now we
 1805                          * look to see if we hold any file descriptors in its
 1806                          * message buffers. Follow those links and mark them
 1807                          * as accessible too.
 1808                          */
 1809                         SOCKBUF_LOCK(&so->so_rcv);
 1810                         unp_scan(so->so_rcv.sb_mb, unp_mark);
 1811                         SOCKBUF_UNLOCK(&so->so_rcv);
 1812 
 1813                         /*
 1814                          * Wake up any threads waiting in fdrop().
 1815                          */
 1816                         FILE_LOCK(fp);
 1817                         fp->f_gcflag &= ~FWAIT;
 1818                         wakeup(&fp->f_gcflag);
 1819                         FILE_UNLOCK(fp);
 1820                 }
 1821         } while (unp_defer);
 1822         sx_sunlock(&filelist_lock);
 1823         /*
 1824          * XXXRW: The following comments need updating for a post-SMPng and
 1825          * deferred unp_gc() world, but are still generally accurate.
 1826          *
 1827          * We grab an extra reference to each of the file table entries that
 1828          * are not otherwise accessible and then free the rights that are
 1829          * stored in messages on them.
 1830          *
 1831          * The bug in the orginal code is a little tricky, so I'll describe
 1832          * what's wrong with it here.
 1833          *
 1834          * It is incorrect to simply unp_discard each entry for f_msgcount
 1835          * times -- consider the case of sockets A and B that contain
 1836          * references to each other.  On a last close of some other socket,
 1837          * we trigger a gc since the number of outstanding rights (unp_rights)
 1838          * is non-zero.  If during the sweep phase the gc code unp_discards,
 1839          * we end up doing a (full) closef on the descriptor.  A closef on A
 1840          * results in the following chain.  Closef calls soo_close, which
 1841          * calls soclose.   Soclose calls first (through the switch
 1842          * uipc_usrreq) unp_detach, which re-invokes unp_gc.  Unp_gc simply
 1843          * returns because the previous instance had set unp_gcing, and we
 1844          * return all the way back to soclose, which marks the socket with
 1845          * SS_NOFDREF, and then calls sofree.  Sofree calls sorflush to free
 1846          * up the rights that are queued in messages on the socket A, i.e.,
 1847          * the reference on B.  The sorflush calls via the dom_dispose switch
 1848          * unp_dispose, which unp_scans with unp_discard.  This second
 1849          * instance of unp_discard just calls closef on B.
 1850          *
 1851          * Well, a similar chain occurs on B, resulting in a sorflush on B,
 1852          * which results in another closef on A.  Unfortunately, A is already
 1853          * being closed, and the descriptor has already been marked with
 1854          * SS_NOFDREF, and soclose panics at this point.
 1855          *
 1856          * Here, we first take an extra reference to each inaccessible
 1857          * descriptor.  Then, we call sorflush ourself, since we know it is a
 1858          * Unix domain socket anyhow.  After we destroy all the rights
 1859          * carried in messages, we do a last closef to get rid of our extra
 1860          * reference.  This is the last close, and the unp_detach etc will
 1861          * shut down the socket.
 1862          *
 1863          * 91/09/19, bsy@cs.cmu.edu
 1864          */
 1865 again:
 1866         nfiles_snap = openfiles + nfiles_slack; /* some slack */
 1867         extra_ref = malloc(nfiles_snap * sizeof(struct file *), M_TEMP,
 1868             M_WAITOK);
 1869         sx_slock(&filelist_lock);
 1870         if (nfiles_snap < openfiles) {
 1871                 sx_sunlock(&filelist_lock);
 1872                 free(extra_ref, M_TEMP);
 1873                 nfiles_slack += 20;
 1874                 goto again;
 1875         }
 1876         for (nunref = 0, fp = LIST_FIRST(&filehead), fpp = extra_ref;
 1877             fp != NULL; fp = nextfp) {
 1878                 nextfp = LIST_NEXT(fp, f_list);
 1879                 FILE_LOCK(fp);
 1880                 /*
 1881                  * If it's not open, skip it
 1882                  */
 1883                 if (fp->f_count == 0) {
 1884                         FILE_UNLOCK(fp);
 1885                         continue;
 1886                 }
 1887                 /*
 1888                  * If all refs are from msgs, and it's not marked accessible
 1889                  * then it must be referenced from some unreachable cycle of
 1890                  * (shut-down) FDs, so include it in our list of FDs to
 1891                  * remove.
 1892                  */
 1893                 if (fp->f_count == fp->f_msgcount && !(fp->f_gcflag & FMARK)) {
 1894                         *fpp++ = fp;
 1895                         nunref++;
 1896                         fp->f_count++;
 1897                 }
 1898                 FILE_UNLOCK(fp);
 1899         }
 1900         sx_sunlock(&filelist_lock);
 1901         /*
 1902          * For each FD on our hit list, do the following two things:
 1903          */
 1904         for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
 1905                 struct file *tfp = *fpp;
 1906                 FILE_LOCK(tfp);
 1907                 if (tfp->f_type == DTYPE_SOCKET &&
 1908                     tfp->f_data != NULL) {
 1909                         FILE_UNLOCK(tfp);
 1910                         sorflush(tfp->f_data);
 1911                 } else {
 1912                         FILE_UNLOCK(tfp);
 1913                 }
 1914         }
 1915         for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
 1916                 closef(*fpp, (struct thread *) NULL);
 1917                 unp_recycled++;
 1918         }
 1919         free(extra_ref, M_TEMP);
 1920 }
 1921 
 1922 void
 1923 unp_dispose(struct mbuf *m)
 1924 {
 1925 
 1926         if (m)
 1927                 unp_scan(m, unp_discard);
 1928 }
 1929 
 1930 static int
 1931 unp_listen(struct socket *so, struct unpcb *unp, struct thread *td)
 1932 {
 1933         int error;
 1934 
 1935         UNP_LOCK_ASSERT();
 1936 
 1937         SOCK_LOCK(so);
 1938         error = solisten_proto_check(so);
 1939         if (error == 0) {
 1940                 cru2x(td->td_ucred, &unp->unp_peercred);
 1941                 unp->unp_flags |= UNP_HAVEPCCACHED;
 1942                 solisten_proto(so);
 1943         }
 1944         SOCK_UNLOCK(so);
 1945         return (error);
 1946 }
 1947 
 1948 static void
 1949 unp_scan(struct mbuf *m0, void (*op)(struct file *))
 1950 {
 1951         struct mbuf *m;
 1952         struct file **rp;
 1953         struct cmsghdr *cm;
 1954         void *data;
 1955         int i;
 1956         socklen_t clen, datalen;
 1957         int qfds;
 1958 
 1959         while (m0 != NULL) {
 1960                 for (m = m0; m; m = m->m_next) {
 1961                         if (m->m_type != MT_CONTROL)
 1962                                 continue;
 1963 
 1964                         cm = mtod(m, struct cmsghdr *);
 1965                         clen = m->m_len;
 1966 
 1967                         while (cm != NULL) {
 1968                                 if (sizeof(*cm) > clen || cm->cmsg_len > clen)
 1969                                         break;
 1970 
 1971                                 data = CMSG_DATA(cm);
 1972                                 datalen = (caddr_t)cm + cm->cmsg_len
 1973                                     - (caddr_t)data;
 1974 
 1975                                 if (cm->cmsg_level == SOL_SOCKET &&
 1976                                     cm->cmsg_type == SCM_RIGHTS) {
 1977                                         qfds = datalen / sizeof (struct file *);
 1978                                         rp = data;
 1979                                         for (i = 0; i < qfds; i++)
 1980                                                 (*op)(*rp++);
 1981                                 }
 1982 
 1983                                 if (CMSG_SPACE(datalen) < clen) {
 1984                                         clen -= CMSG_SPACE(datalen);
 1985                                         cm = (struct cmsghdr *)
 1986                                             ((caddr_t)cm + CMSG_SPACE(datalen));
 1987                                 } else {
 1988                                         clen = 0;
 1989                                         cm = NULL;
 1990                                 }
 1991                         }
 1992                 }
 1993                 m0 = m0->m_act;
 1994         }
 1995 }
 1996 
 1997 static void
 1998 unp_mark(struct file *fp)
 1999 {
 2000         if (fp->f_gcflag & FMARK)
 2001                 return;
 2002         unp_defer++;
 2003         fp->f_gcflag |= (FMARK|FDEFER);
 2004 }
 2005 
 2006 static void
 2007 unp_discard(struct file *fp)
 2008 {
 2009         UNP_LOCK();
 2010         FILE_LOCK(fp);
 2011         fp->f_msgcount--;
 2012         unp_rights--;
 2013         FILE_UNLOCK(fp);
 2014         UNP_UNLOCK();
 2015         (void) closef(fp, (struct thread *)NULL);
 2016 }

Cache object: 1f377a15f2737348431224b75fe41117


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.