The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/uipc_usrreq.c

Version: -  FREEBSD  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-2  -  FREEBSD-11-1  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-4  -  FREEBSD-10-3  -  FREEBSD-10-2  -  FREEBSD-10-1  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-3  -  FREEBSD-9-2  -  FREEBSD-9-1  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-4  -  FREEBSD-8-3  -  FREEBSD-8-2  -  FREEBSD-8-1  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-4  -  FREEBSD-7-3  -  FREEBSD-7-2  -  FREEBSD-7-1  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-4  -  FREEBSD-6-3  -  FREEBSD-6-2  -  FREEBSD-6-1  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-5  -  FREEBSD-5-4  -  FREEBSD-5-3  -  FREEBSD-5-2  -  FREEBSD-5-1  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  xnu-1456.1.26  -  xnu-1699.24.8  -  xnu-2050.18.24  -  OPENSOLARIS  -  minix-3-1-1 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: uipc_usrreq.c,v 1.133 2010/11/19 06:44:43 dholland Exp $       */
    2 
    3 /*-
    4  * Copyright (c) 1998, 2000, 2004, 2008, 2009 The NetBSD Foundation, Inc.
    5  * All rights reserved.
    6  *
    7  * This code is derived from software contributed to The NetBSD Foundation
    8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
    9  * NASA Ames Research Center, and by Andrew Doran.
   10  *
   11  * Redistribution and use in source and binary forms, with or without
   12  * modification, are permitted provided that the following conditions
   13  * are met:
   14  * 1. Redistributions of source code must retain the above copyright
   15  *    notice, this list of conditions and the following disclaimer.
   16  * 2. Redistributions in binary form must reproduce the above copyright
   17  *    notice, this list of conditions and the following disclaimer in the
   18  *    documentation and/or other materials provided with the distribution.
   19  *
   20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   30  * POSSIBILITY OF SUCH DAMAGE.
   31  */
   32 
   33 /*
   34  * Copyright (c) 1982, 1986, 1989, 1991, 1993
   35  *      The Regents of the University of California.  All rights reserved.
   36  *
   37  * Redistribution and use in source and binary forms, with or without
   38  * modification, are permitted provided that the following conditions
   39  * are met:
   40  * 1. Redistributions of source code must retain the above copyright
   41  *    notice, this list of conditions and the following disclaimer.
   42  * 2. Redistributions in binary form must reproduce the above copyright
   43  *    notice, this list of conditions and the following disclaimer in the
   44  *    documentation and/or other materials provided with the distribution.
   45  * 3. Neither the name of the University nor the names of its contributors
   46  *    may be used to endorse or promote products derived from this software
   47  *    without specific prior written permission.
   48  *
   49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   59  * SUCH DAMAGE.
   60  *
   61  *      @(#)uipc_usrreq.c       8.9 (Berkeley) 5/14/95
   62  */
   63 
   64 /*
   65  * Copyright (c) 1997 Christopher G. Demetriou.  All rights reserved.
   66  *
   67  * Redistribution and use in source and binary forms, with or without
   68  * modification, are permitted provided that the following conditions
   69  * are met:
   70  * 1. Redistributions of source code must retain the above copyright
   71  *    notice, this list of conditions and the following disclaimer.
   72  * 2. Redistributions in binary form must reproduce the above copyright
   73  *    notice, this list of conditions and the following disclaimer in the
   74  *    documentation and/or other materials provided with the distribution.
   75  * 3. All advertising materials mentioning features or use of this software
   76  *    must display the following acknowledgement:
   77  *      This product includes software developed by the University of
   78  *      California, Berkeley and its contributors.
   79  * 4. Neither the name of the University nor the names of its contributors
   80  *    may be used to endorse or promote products derived from this software
   81  *    without specific prior written permission.
   82  *
   83  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   84  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   85  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   86  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   87  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   88  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   89  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   90  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   91  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   92  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   93  * SUCH DAMAGE.
   94  *
   95  *      @(#)uipc_usrreq.c       8.9 (Berkeley) 5/14/95
   96  */
   97 
   98 #include <sys/cdefs.h>
   99 __KERNEL_RCSID(0, "$NetBSD: uipc_usrreq.c,v 1.133 2010/11/19 06:44:43 dholland Exp $");
  100 
  101 #include <sys/param.h>
  102 #include <sys/systm.h>
  103 #include <sys/proc.h>
  104 #include <sys/filedesc.h>
  105 #include <sys/domain.h>
  106 #include <sys/protosw.h>
  107 #include <sys/socket.h>
  108 #include <sys/socketvar.h>
  109 #include <sys/unpcb.h>
  110 #include <sys/un.h>
  111 #include <sys/namei.h>
  112 #include <sys/vnode.h>
  113 #include <sys/file.h>
  114 #include <sys/stat.h>
  115 #include <sys/mbuf.h>
  116 #include <sys/kauth.h>
  117 #include <sys/kmem.h>
  118 #include <sys/atomic.h>
  119 #include <sys/uidinfo.h>
  120 #include <sys/kernel.h>
  121 #include <sys/kthread.h>
  122 
  123 /*
  124  * Unix communications domain.
  125  *
  126  * TODO:
  127  *      SEQPACKET, RDM
  128  *      rethink name space problems
  129  *      need a proper out-of-band
  130  *
  131  * Notes on locking:
  132  *
  133  * The generic rules noted in uipc_socket2.c apply.  In addition:
  134  *
  135  * o We have a global lock, uipc_lock.
  136  *
  137  * o All datagram sockets are locked by uipc_lock.
  138  *
  139  * o For stream socketpairs, the two endpoints are created sharing the same
  140  *   independent lock.  Sockets presented to PRU_CONNECT2 must already have
  141  *   matching locks.
  142  *
  143  * o Stream sockets created via socket() start life with their own
  144  *   independent lock.
  145  * 
  146  * o Stream connections to a named endpoint are slightly more complicated.
  147  *   Sockets that have called listen() have their lock pointer mutated to
  148  *   the global uipc_lock.  When establishing a connection, the connecting
  149  *   socket also has its lock mutated to uipc_lock, which matches the head
  150  *   (listening socket).  We create a new socket for accept() to return, and
  151  *   that also shares the head's lock.  Until the connection is completely
  152  *   done on both ends, all three sockets are locked by uipc_lock.  Once the
  153  *   connection is complete, the association with the head's lock is broken.
  154  *   The connecting socket and the socket returned from accept() have their
  155  *   lock pointers mutated away from uipc_lock, and back to the connecting
  156  *   socket's original, independent lock.  The head continues to be locked
  157  *   by uipc_lock.
  158  *
  159  * o If uipc_lock is determined to be a significant source of contention,
  160  *   it could easily be hashed out.  It is difficult to simply make it an
  161  *   independent lock because of visibility / garbage collection issues:
  162  *   if a socket has been associated with a lock at any point, that lock
  163  *   must remain valid until the socket is no longer visible in the system.
  164  *   The lock must not be freed or otherwise destroyed until any sockets
  165  *   that had referenced it have also been destroyed.
  166  */
  167 const struct sockaddr_un sun_noname = {
  168         .sun_len = sizeof(sun_noname),
  169         .sun_family = AF_LOCAL,
  170 };
  171 ino_t   unp_ino;                        /* prototype for fake inode numbers */
  172 
  173 struct mbuf *unp_addsockcred(struct lwp *, struct mbuf *);
  174 static void unp_mark(file_t *);
  175 static void unp_scan(struct mbuf *, void (*)(file_t *), int);
  176 static void unp_discard_now(file_t *);
  177 static void unp_discard_later(file_t *);
  178 static void unp_thread(void *);
  179 static void unp_thread_kick(void);
  180 static kmutex_t *uipc_lock;
  181 
  182 static kcondvar_t unp_thread_cv;
  183 static lwp_t *unp_thread_lwp;
  184 static SLIST_HEAD(,file) unp_thread_discard;
  185 static int unp_defer;
  186 
  187 /*
  188  * Initialize Unix protocols.
  189  */
  190 void
  191 uipc_init(void)
  192 {
  193         int error;
  194 
  195         uipc_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
  196         cv_init(&unp_thread_cv, "unpgc");
  197 
  198         error = kthread_create(PRI_NONE, KTHREAD_MPSAFE, NULL, unp_thread,
  199             NULL, &unp_thread_lwp, "unpgc");
  200         if (error != 0)
  201                 panic("uipc_init %d", error);
  202 }
  203 
  204 /*
  205  * A connection succeeded: disassociate both endpoints from the head's
  206  * lock, and make them share their own lock.  There is a race here: for
  207  * a very brief time one endpoint will be locked by a different lock
  208  * than the other end.  However, since the current thread holds the old
  209  * lock (the listening socket's lock, the head) access can still only be
  210  * made to one side of the connection.
  211  */
  212 static void
  213 unp_setpeerlocks(struct socket *so, struct socket *so2)
  214 {
  215         struct unpcb *unp;
  216         kmutex_t *lock;
  217 
  218         KASSERT(solocked2(so, so2));
  219 
  220         /*
  221          * Bail out if either end of the socket is not yet fully
  222          * connected or accepted.  We only break the lock association
  223          * with the head when the pair of sockets stand completely
  224          * on their own.
  225          */
  226         KASSERT(so->so_head == NULL);
  227         if (so2->so_head != NULL)
  228                 return;
  229 
  230         /*
  231          * Drop references to old lock.  A third reference (from the
  232          * queue head) must be held as we still hold its lock.  Bonus:
  233          * we don't need to worry about garbage collecting the lock.
  234          */
  235         lock = so->so_lock;
  236         KASSERT(lock == uipc_lock);
  237         mutex_obj_free(lock);
  238         mutex_obj_free(lock);
  239 
  240         /*
  241          * Grab stream lock from the initiator and share between the two
  242          * endpoints.  Issue memory barrier to ensure all modifications
  243          * become globally visible before the lock change.  so2 is
  244          * assumed not to have a stream lock, because it was created
  245          * purely for the server side to accept this connection and
  246          * started out life using the domain-wide lock.
  247          */
  248         unp = sotounpcb(so);
  249         KASSERT(unp->unp_streamlock != NULL);
  250         KASSERT(sotounpcb(so2)->unp_streamlock == NULL);
  251         lock = unp->unp_streamlock;
  252         unp->unp_streamlock = NULL;
  253         mutex_obj_hold(lock);
  254         membar_exit();
  255         /*
  256          * possible race if lock is not held - see comment in
  257          * uipc_usrreq(PRU_ACCEPT).
  258          */
  259         KASSERT(mutex_owned(lock));
  260         solockreset(so, lock);
  261         solockreset(so2, lock);
  262 }
  263 
  264 /*
  265  * Reset a socket's lock back to the domain-wide lock.
  266  */
  267 static void
  268 unp_resetlock(struct socket *so)
  269 {
  270         kmutex_t *olock, *nlock;
  271         struct unpcb *unp;
  272 
  273         KASSERT(solocked(so));
  274 
  275         olock = so->so_lock;
  276         nlock = uipc_lock;
  277         if (olock == nlock)
  278                 return;
  279         unp = sotounpcb(so);
  280         KASSERT(unp->unp_streamlock == NULL);
  281         unp->unp_streamlock = olock;
  282         mutex_obj_hold(nlock);
  283         mutex_enter(nlock);
  284         solockreset(so, nlock);
  285         mutex_exit(olock);
  286 }
  287 
  288 static void
  289 unp_free(struct unpcb *unp)
  290 {
  291 
  292         if (unp->unp_addr)
  293                 free(unp->unp_addr, M_SONAME);
  294         if (unp->unp_streamlock != NULL)
  295                 mutex_obj_free(unp->unp_streamlock);
  296         free(unp, M_PCB);
  297 }
  298 
  299 int
  300 unp_output(struct mbuf *m, struct mbuf *control, struct unpcb *unp,
  301         struct lwp *l)
  302 {
  303         struct socket *so2;
  304         const struct sockaddr_un *sun;
  305 
  306         so2 = unp->unp_conn->unp_socket;
  307 
  308         KASSERT(solocked(so2));
  309 
  310         if (unp->unp_addr)
  311                 sun = unp->unp_addr;
  312         else
  313                 sun = &sun_noname;
  314         if (unp->unp_conn->unp_flags & UNP_WANTCRED)
  315                 control = unp_addsockcred(l, control);
  316         if (sbappendaddr(&so2->so_rcv, (const struct sockaddr *)sun, m,
  317             control) == 0) {
  318                 so2->so_rcv.sb_overflowed++;
  319                 unp_dispose(control);
  320                 m_freem(control);
  321                 m_freem(m);
  322                 return (ENOBUFS);
  323         } else {
  324                 sorwakeup(so2);
  325                 return (0);
  326         }
  327 }
  328 
  329 void
  330 unp_setaddr(struct socket *so, struct mbuf *nam, bool peeraddr)
  331 {
  332         const struct sockaddr_un *sun;
  333         struct unpcb *unp;
  334         bool ext;
  335 
  336         KASSERT(solocked(so));
  337         unp = sotounpcb(so);
  338         ext = false;
  339 
  340         for (;;) {
  341                 sun = NULL;
  342                 if (peeraddr) {
  343                         if (unp->unp_conn && unp->unp_conn->unp_addr)
  344                                 sun = unp->unp_conn->unp_addr;
  345                 } else {
  346                         if (unp->unp_addr)
  347                                 sun = unp->unp_addr;
  348                 }
  349                 if (sun == NULL)
  350                         sun = &sun_noname;
  351                 nam->m_len = sun->sun_len;
  352                 if (nam->m_len > MLEN && !ext) {
  353                         sounlock(so);
  354                         MEXTMALLOC(nam, MAXPATHLEN * 2, M_WAITOK);
  355                         solock(so);
  356                         ext = true;
  357                 } else {
  358                         KASSERT(nam->m_len <= MAXPATHLEN * 2);
  359                         memcpy(mtod(nam, void *), sun, (size_t)nam->m_len);
  360                         break;
  361                 }
  362         }
  363 }
  364 
  365 /*ARGSUSED*/
  366 int
  367 uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
  368         struct mbuf *control, struct lwp *l)
  369 {
  370         struct unpcb *unp = sotounpcb(so);
  371         struct socket *so2;
  372         struct proc *p;
  373         u_int newhiwat;
  374         int error = 0;
  375 
  376         if (req == PRU_CONTROL)
  377                 return (EOPNOTSUPP);
  378 
  379 #ifdef DIAGNOSTIC
  380         if (req != PRU_SEND && req != PRU_SENDOOB && control)
  381                 panic("uipc_usrreq: unexpected control mbuf");
  382 #endif
  383         p = l ? l->l_proc : NULL;
  384         if (req != PRU_ATTACH) {
  385                 if (unp == NULL) {
  386                         error = EINVAL;
  387                         goto release;
  388                 }
  389                 KASSERT(solocked(so));
  390         }
  391 
  392         switch (req) {
  393 
  394         case PRU_ATTACH:
  395                 if (unp != NULL) {
  396                         error = EISCONN;
  397                         break;
  398                 }
  399                 error = unp_attach(so);
  400                 break;
  401 
  402         case PRU_DETACH:
  403                 unp_detach(unp);
  404                 break;
  405 
  406         case PRU_BIND:
  407                 KASSERT(l != NULL);
  408                 error = unp_bind(so, nam, l);
  409                 break;
  410 
  411         case PRU_LISTEN:
  412                 /*
  413                  * If the socket can accept a connection, it must be
  414                  * locked by uipc_lock.
  415                  */
  416                 unp_resetlock(so);
  417                 if (unp->unp_vnode == NULL)
  418                         error = EINVAL;
  419                 break;
  420 
  421         case PRU_CONNECT:
  422                 KASSERT(l != NULL);
  423                 error = unp_connect(so, nam, l);
  424                 break;
  425 
  426         case PRU_CONNECT2:
  427                 error = unp_connect2(so, (struct socket *)nam, PRU_CONNECT2);
  428                 break;
  429 
  430         case PRU_DISCONNECT:
  431                 unp_disconnect(unp);
  432                 break;
  433 
  434         case PRU_ACCEPT:
  435                 KASSERT(so->so_lock == uipc_lock);
  436                 /*
  437                  * Mark the initiating STREAM socket as connected *ONLY*
  438                  * after it's been accepted.  This prevents a client from
  439                  * overrunning a server and receiving ECONNREFUSED.
  440                  */
  441                 if (unp->unp_conn == NULL)
  442                         break;
  443                 so2 = unp->unp_conn->unp_socket;
  444                 if (so2->so_state & SS_ISCONNECTING) {
  445                         KASSERT(solocked2(so, so->so_head));
  446                         KASSERT(solocked2(so2, so->so_head));
  447                         soisconnected(so2);
  448                 }
  449                 /*
  450                  * If the connection is fully established, break the
  451                  * association with uipc_lock and give the connected
  452                  * pair a seperate lock to share.
  453                  * There is a race here: sotounpcb(so2)->unp_streamlock
  454                  * is not locked, so when changing so2->so_lock
  455                  * another thread can grab it while so->so_lock is still
  456                  * pointing to the (locked) uipc_lock.
  457                  * this should be harmless, except that this makes
  458                  * solocked2() and solocked() unreliable.
  459                  * Another problem is that unp_setaddr() expects the
  460                  * the socket locked. Grabing sotounpcb(so2)->unp_streamlock
  461                  * fixes both issues.
  462                  */
  463                 mutex_enter(sotounpcb(so2)->unp_streamlock);
  464                 unp_setpeerlocks(so2, so);
  465                 /*
  466                  * Only now return peer's address, as we may need to
  467                  * block in order to allocate memory.
  468                  *
  469                  * XXX Minor race: connection can be broken while
  470                  * lock is dropped in unp_setaddr().  We will return
  471                  * error == 0 and sun_noname as the peer address.
  472                  */
  473                 unp_setaddr(so, nam, true);
  474                 /* so_lock now points to unp_streamlock */
  475                 mutex_exit(so2->so_lock);
  476                 break;
  477 
  478         case PRU_SHUTDOWN:
  479                 socantsendmore(so);
  480                 unp_shutdown(unp);
  481                 break;
  482 
  483         case PRU_RCVD:
  484                 switch (so->so_type) {
  485 
  486                 case SOCK_DGRAM:
  487                         panic("uipc 1");
  488                         /*NOTREACHED*/
  489 
  490                 case SOCK_STREAM:
  491 #define rcv (&so->so_rcv)
  492 #define snd (&so2->so_snd)
  493                         if (unp->unp_conn == 0)
  494                                 break;
  495                         so2 = unp->unp_conn->unp_socket;
  496                         KASSERT(solocked2(so, so2));
  497                         /*
  498                          * Adjust backpressure on sender
  499                          * and wakeup any waiting to write.
  500                          */
  501                         snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
  502                         unp->unp_mbcnt = rcv->sb_mbcnt;
  503                         newhiwat = snd->sb_hiwat + unp->unp_cc - rcv->sb_cc;
  504                         (void)chgsbsize(so2->so_uidinfo,
  505                             &snd->sb_hiwat, newhiwat, RLIM_INFINITY);
  506                         unp->unp_cc = rcv->sb_cc;
  507                         sowwakeup(so2);
  508 #undef snd
  509 #undef rcv
  510                         break;
  511 
  512                 default:
  513                         panic("uipc 2");
  514                 }
  515                 break;
  516 
  517         case PRU_SEND:
  518                 /*
  519                  * Note: unp_internalize() rejects any control message
  520                  * other than SCM_RIGHTS, and only allows one.  This
  521                  * has the side-effect of preventing a caller from
  522                  * forging SCM_CREDS.
  523                  */
  524                 if (control) {
  525                         sounlock(so);
  526                         error = unp_internalize(&control);
  527                         solock(so);
  528                         if (error != 0) {
  529                                 m_freem(control);
  530                                 m_freem(m);
  531                                 break;
  532                         }
  533                 }
  534                 switch (so->so_type) {
  535 
  536                 case SOCK_DGRAM: {
  537                         KASSERT(so->so_lock == uipc_lock);
  538                         if (nam) {
  539                                 if ((so->so_state & SS_ISCONNECTED) != 0)
  540                                         error = EISCONN;
  541                                 else {
  542                                         /*
  543                                          * Note: once connected, the
  544                                          * socket's lock must not be
  545                                          * dropped until we have sent
  546                                          * the message and disconnected.
  547                                          * This is necessary to prevent
  548                                          * intervening control ops, like
  549                                          * another connection.
  550                                          */
  551                                         error = unp_connect(so, nam, l);
  552                                 }
  553                         } else {
  554                                 if ((so->so_state & SS_ISCONNECTED) == 0)
  555                                         error = ENOTCONN;
  556                         }
  557                         if (error) {
  558                                 unp_dispose(control);
  559                                 m_freem(control);
  560                                 m_freem(m);
  561                                 break;
  562                         }
  563                         KASSERT(p != NULL);
  564                         error = unp_output(m, control, unp, l);
  565                         if (nam)
  566                                 unp_disconnect(unp);
  567                         break;
  568                 }
  569 
  570                 case SOCK_STREAM:
  571 #define rcv (&so2->so_rcv)
  572 #define snd (&so->so_snd)
  573                         if (unp->unp_conn == NULL) {
  574                                 error = ENOTCONN;
  575                                 break;
  576                         }
  577                         so2 = unp->unp_conn->unp_socket;
  578                         KASSERT(solocked2(so, so2));
  579                         if (unp->unp_conn->unp_flags & UNP_WANTCRED) {
  580                                 /*
  581                                  * Credentials are passed only once on
  582                                  * SOCK_STREAM.
  583                                  */
  584                                 unp->unp_conn->unp_flags &= ~UNP_WANTCRED;
  585                                 control = unp_addsockcred(l, control);
  586                         }
  587                         /*
  588                          * Send to paired receive port, and then reduce
  589                          * send buffer hiwater marks to maintain backpressure.
  590                          * Wake up readers.
  591                          */
  592                         if (control) {
  593                                 if (sbappendcontrol(rcv, m, control) != 0)
  594                                         control = NULL;
  595                         } else
  596                                 sbappend(rcv, m);
  597                         snd->sb_mbmax -=
  598                             rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
  599                         unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
  600                         newhiwat = snd->sb_hiwat -
  601                             (rcv->sb_cc - unp->unp_conn->unp_cc);
  602                         (void)chgsbsize(so->so_uidinfo,
  603                             &snd->sb_hiwat, newhiwat, RLIM_INFINITY);
  604                         unp->unp_conn->unp_cc = rcv->sb_cc;
  605                         sorwakeup(so2);
  606 #undef snd
  607 #undef rcv
  608                         if (control != NULL) {
  609                                 unp_dispose(control);
  610                                 m_freem(control);
  611                         }
  612                         break;
  613 
  614                 default:
  615                         panic("uipc 4");
  616                 }
  617                 break;
  618 
  619         case PRU_ABORT:
  620                 (void)unp_drop(unp, ECONNABORTED);
  621 
  622                 KASSERT(so->so_head == NULL);
  623 #ifdef DIAGNOSTIC
  624                 if (so->so_pcb == NULL)
  625                         panic("uipc 5: drop killed pcb");
  626 #endif
  627                 unp_detach(unp);
  628                 break;
  629 
  630         case PRU_SENSE:
  631                 ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
  632                 if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
  633                         so2 = unp->unp_conn->unp_socket;
  634                         KASSERT(solocked2(so, so2));
  635                         ((struct stat *) m)->st_blksize += so2->so_rcv.sb_cc;
  636                 }
  637                 ((struct stat *) m)->st_dev = NODEV;
  638                 if (unp->unp_ino == 0)
  639                         unp->unp_ino = unp_ino++;
  640                 ((struct stat *) m)->st_atimespec =
  641                     ((struct stat *) m)->st_mtimespec =
  642                     ((struct stat *) m)->st_ctimespec = unp->unp_ctime;
  643                 ((struct stat *) m)->st_ino = unp->unp_ino;
  644                 return (0);
  645 
  646         case PRU_RCVOOB:
  647                 error = EOPNOTSUPP;
  648                 break;
  649 
  650         case PRU_SENDOOB:
  651                 m_freem(control);
  652                 m_freem(m);
  653                 error = EOPNOTSUPP;
  654                 break;
  655 
  656         case PRU_SOCKADDR:
  657                 unp_setaddr(so, nam, false);
  658                 break;
  659 
  660         case PRU_PEERADDR:
  661                 unp_setaddr(so, nam, true);
  662                 break;
  663 
  664         default:
  665                 panic("piusrreq");
  666         }
  667 
  668 release:
  669         return (error);
  670 }
  671 
  672 /*
  673  * Unix domain socket option processing.
  674  */
  675 int
  676 uipc_ctloutput(int op, struct socket *so, struct sockopt *sopt)
  677 {
  678         struct unpcb *unp = sotounpcb(so);
  679         int optval = 0, error = 0;
  680 
  681         KASSERT(solocked(so));
  682 
  683         if (sopt->sopt_level != 0) {
  684                 error = ENOPROTOOPT;
  685         } else switch (op) {
  686 
  687         case PRCO_SETOPT:
  688                 switch (sopt->sopt_name) {
  689                 case LOCAL_CREDS:
  690                 case LOCAL_CONNWAIT:
  691                         error = sockopt_getint(sopt, &optval);
  692                         if (error)
  693                                 break;
  694                         switch (sopt->sopt_name) {
  695 #define OPTSET(bit) \
  696         if (optval) \
  697                 unp->unp_flags |= (bit); \
  698         else \
  699                 unp->unp_flags &= ~(bit);
  700 
  701                         case LOCAL_CREDS:
  702                                 OPTSET(UNP_WANTCRED);
  703                                 break;
  704                         case LOCAL_CONNWAIT:
  705                                 OPTSET(UNP_CONNWAIT);
  706                                 break;
  707                         }
  708                         break;
  709 #undef OPTSET
  710 
  711                 default:
  712                         error = ENOPROTOOPT;
  713                         break;
  714                 }
  715                 break;
  716 
  717         case PRCO_GETOPT:
  718                 sounlock(so);
  719                 switch (sopt->sopt_name) {
  720                 case LOCAL_PEEREID:
  721                         if (unp->unp_flags & UNP_EIDSVALID) {
  722                                 error = sockopt_set(sopt,
  723                                     &unp->unp_connid, sizeof(unp->unp_connid));
  724                         } else {
  725                                 error = EINVAL;
  726                         }
  727                         break;
  728                 case LOCAL_CREDS:
  729 #define OPTBIT(bit)     (unp->unp_flags & (bit) ? 1 : 0)
  730 
  731                         optval = OPTBIT(UNP_WANTCRED);
  732                         error = sockopt_setint(sopt, optval);
  733                         break;
  734 #undef OPTBIT
  735 
  736                 default:
  737                         error = ENOPROTOOPT;
  738                         break;
  739                 }
  740                 solock(so);
  741                 break;
  742         }
  743         return (error);
  744 }
  745 
  746 /*
  747  * Both send and receive buffers are allocated PIPSIZ bytes of buffering
  748  * for stream sockets, although the total for sender and receiver is
  749  * actually only PIPSIZ.
  750  * Datagram sockets really use the sendspace as the maximum datagram size,
  751  * and don't really want to reserve the sendspace.  Their recvspace should
  752  * be large enough for at least one max-size datagram plus address.
  753  */
  754 #define PIPSIZ  4096
  755 u_long  unpst_sendspace = PIPSIZ;
  756 u_long  unpst_recvspace = PIPSIZ;
  757 u_long  unpdg_sendspace = 2*1024;       /* really max datagram size */
  758 u_long  unpdg_recvspace = 4*1024;
  759 
  760 u_int   unp_rights;                     /* files in flight */
  761 u_int   unp_rights_ratio = 2;           /* limit, fraction of maxfiles */
  762 
  763 int
  764 unp_attach(struct socket *so)
  765 {
  766         struct unpcb *unp;
  767         int error;
  768 
  769         switch (so->so_type) {
  770         case SOCK_STREAM:
  771                 if (so->so_lock == NULL) {
  772                         /* 
  773                          * XXX Assuming that no socket locks are held,
  774                          * as this call may sleep.
  775                          */
  776                         so->so_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
  777                         solock(so);
  778                 }
  779                 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
  780                         error = soreserve(so, unpst_sendspace, unpst_recvspace);
  781                         if (error != 0)
  782                                 return (error);
  783                 }
  784                 break;
  785 
  786         case SOCK_DGRAM:
  787                 if (so->so_lock == NULL) {
  788                         mutex_obj_hold(uipc_lock);
  789                         so->so_lock = uipc_lock;
  790                         solock(so);
  791                 }
  792                 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
  793                         error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
  794                         if (error != 0)
  795                                 return (error);
  796                 }
  797                 break;
  798 
  799         default:
  800                 panic("unp_attach");
  801         }
  802         KASSERT(solocked(so));
  803         unp = malloc(sizeof(*unp), M_PCB, M_NOWAIT);
  804         if (unp == NULL)
  805                 return (ENOBUFS);
  806         memset(unp, 0, sizeof(*unp));
  807         unp->unp_socket = so;
  808         so->so_pcb = unp;
  809         nanotime(&unp->unp_ctime);
  810         return (0);
  811 }
  812 
  813 void
  814 unp_detach(struct unpcb *unp)
  815 {
  816         struct socket *so;
  817         vnode_t *vp;
  818 
  819         so = unp->unp_socket;
  820 
  821  retry:
  822         if ((vp = unp->unp_vnode) != NULL) {
  823                 sounlock(so);
  824                 /* Acquire v_interlock to protect against unp_connect(). */
  825                 /* XXXAD racy */
  826                 mutex_enter(&vp->v_interlock);
  827                 vp->v_socket = NULL;
  828                 vrelel(vp, 0);
  829                 solock(so);
  830                 unp->unp_vnode = NULL;
  831         }
  832         if (unp->unp_conn)
  833                 unp_disconnect(unp);
  834         while (unp->unp_refs) {
  835                 KASSERT(solocked2(so, unp->unp_refs->unp_socket));
  836                 if (unp_drop(unp->unp_refs, ECONNRESET)) {
  837                         solock(so);
  838                         goto retry;
  839                 }
  840         }
  841         soisdisconnected(so);
  842         so->so_pcb = NULL;
  843         if (unp_rights) {
  844                 /*
  845                  * Normally the receive buffer is flushed later, in sofree,
  846                  * but if our receive buffer holds references to files that
  847                  * are now garbage, we will enqueue those file references to
  848                  * the garbage collector and kick it into action.
  849                  */
  850                 sorflush(so);
  851                 unp_free(unp);
  852                 unp_thread_kick();
  853         } else
  854                 unp_free(unp);
  855 }
  856 
  857 int
  858 unp_bind(struct socket *so, struct mbuf *nam, struct lwp *l)
  859 {
  860         struct sockaddr_un *sun;
  861         struct unpcb *unp;
  862         vnode_t *vp;
  863         struct vattr vattr;
  864         size_t addrlen;
  865         int error;
  866         struct pathbuf *pb;
  867         struct nameidata nd;
  868         proc_t *p;
  869 
  870         unp = sotounpcb(so);
  871         if (unp->unp_vnode != NULL)
  872                 return (EINVAL);
  873         if ((unp->unp_flags & UNP_BUSY) != 0) {
  874                 /*
  875                  * EALREADY may not be strictly accurate, but since this
  876                  * is a major application error it's hardly a big deal.
  877                  */
  878                 return (EALREADY);
  879         }
  880         unp->unp_flags |= UNP_BUSY;
  881         sounlock(so);
  882 
  883         /*
  884          * Allocate the new sockaddr.  We have to allocate one
  885          * extra byte so that we can ensure that the pathname
  886          * is nul-terminated.
  887          */
  888         p = l->l_proc;
  889         addrlen = nam->m_len + 1;
  890         sun = malloc(addrlen, M_SONAME, M_WAITOK);
  891         m_copydata(nam, 0, nam->m_len, (void *)sun);
  892         *(((char *)sun) + nam->m_len) = '\0';
  893 
  894         pb = pathbuf_create(sun->sun_path);
  895         if (pb == NULL) {
  896                 error = ENOMEM;
  897                 goto bad;
  898         }
  899         NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT | TRYEMULROOT, pb);
  900 
  901 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
  902         if ((error = namei(&nd)) != 0) {
  903                 pathbuf_destroy(pb);
  904                 goto bad;
  905         }
  906         vp = nd.ni_vp;
  907         if (vp != NULL) {
  908                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
  909                 if (nd.ni_dvp == vp)
  910                         vrele(nd.ni_dvp);
  911                 else
  912                         vput(nd.ni_dvp);
  913                 vrele(vp);
  914                 pathbuf_destroy(pb);
  915                 error = EADDRINUSE;
  916                 goto bad;
  917         }
  918         vattr_null(&vattr);
  919         vattr.va_type = VSOCK;
  920         vattr.va_mode = ACCESSPERMS & ~(p->p_cwdi->cwdi_cmask);
  921         error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
  922         if (error) {
  923                 pathbuf_destroy(pb);
  924                 goto bad;
  925         }
  926         vp = nd.ni_vp;
  927         solock(so);
  928         vp->v_socket = unp->unp_socket;
  929         unp->unp_vnode = vp;
  930         unp->unp_addrlen = addrlen;
  931         unp->unp_addr = sun;
  932         unp->unp_connid.unp_pid = p->p_pid;
  933         unp->unp_connid.unp_euid = kauth_cred_geteuid(l->l_cred);
  934         unp->unp_connid.unp_egid = kauth_cred_getegid(l->l_cred);
  935         unp->unp_flags |= UNP_EIDSBIND;
  936         VOP_UNLOCK(vp);
  937         unp->unp_flags &= ~UNP_BUSY;
  938         pathbuf_destroy(pb);
  939         return (0);
  940 
  941  bad:
  942         free(sun, M_SONAME);
  943         solock(so);
  944         unp->unp_flags &= ~UNP_BUSY;
  945         return (error);
  946 }
  947 
  948 int
  949 unp_connect(struct socket *so, struct mbuf *nam, struct lwp *l)
  950 {
  951         struct sockaddr_un *sun;
  952         vnode_t *vp;
  953         struct socket *so2, *so3;
  954         struct unpcb *unp, *unp2, *unp3;
  955         size_t addrlen;
  956         int error;
  957         struct pathbuf *pb;
  958         struct nameidata nd;
  959 
  960         unp = sotounpcb(so);
  961         if ((unp->unp_flags & UNP_BUSY) != 0) {
  962                 /*
  963                  * EALREADY may not be strictly accurate, but since this
  964                  * is a major application error it's hardly a big deal.
  965                  */
  966                 return (EALREADY);
  967         }
  968         unp->unp_flags |= UNP_BUSY;
  969         sounlock(so);
  970 
  971         /*
  972          * Allocate a temporary sockaddr.  We have to allocate one extra
  973          * byte so that we can ensure that the pathname is nul-terminated.
  974          * When we establish the connection, we copy the other PCB's
  975          * sockaddr to our own.
  976          */
  977         addrlen = nam->m_len + 1;
  978         sun = malloc(addrlen, M_SONAME, M_WAITOK);
  979         m_copydata(nam, 0, nam->m_len, (void *)sun);
  980         *(((char *)sun) + nam->m_len) = '\0';
  981 
  982         pb = pathbuf_create(sun->sun_path);
  983         if (pb == NULL) {
  984                 error = ENOMEM;
  985                 goto bad2;
  986         }
  987 
  988         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
  989 
  990         if ((error = namei(&nd)) != 0) {
  991                 pathbuf_destroy(pb);
  992                 goto bad2;
  993         }
  994         vp = nd.ni_vp;
  995         if (vp->v_type != VSOCK) {
  996                 error = ENOTSOCK;
  997                 goto bad;
  998         }
  999         pathbuf_destroy(pb);
 1000         if ((error = VOP_ACCESS(vp, VWRITE, l->l_cred)) != 0)
 1001                 goto bad;
 1002         /* Acquire v_interlock to protect against unp_detach(). */
 1003         mutex_enter(&vp->v_interlock);
 1004         so2 = vp->v_socket;
 1005         if (so2 == NULL) {
 1006                 mutex_exit(&vp->v_interlock);
 1007                 error = ECONNREFUSED;
 1008                 goto bad;
 1009         }
 1010         if (so->so_type != so2->so_type) {
 1011                 mutex_exit(&vp->v_interlock);
 1012                 error = EPROTOTYPE;
 1013                 goto bad;
 1014         }
 1015         solock(so);
 1016         unp_resetlock(so);
 1017         mutex_exit(&vp->v_interlock);
 1018         if ((so->so_proto->pr_flags & PR_CONNREQUIRED) != 0) {
 1019                 /*
 1020                  * This may seem somewhat fragile but is OK: if we can
 1021                  * see SO_ACCEPTCONN set on the endpoint, then it must
 1022                  * be locked by the domain-wide uipc_lock.
 1023                  */
 1024                 KASSERT((so2->so_options & SO_ACCEPTCONN) == 0 ||
 1025                     so2->so_lock == uipc_lock);
 1026                 if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
 1027                     (so3 = sonewconn(so2, 0)) == NULL) {
 1028                         error = ECONNREFUSED;
 1029                         sounlock(so);
 1030                         goto bad;
 1031                 }
 1032                 unp2 = sotounpcb(so2);
 1033                 unp3 = sotounpcb(so3);
 1034                 if (unp2->unp_addr) {
 1035                         unp3->unp_addr = malloc(unp2->unp_addrlen,
 1036                             M_SONAME, M_WAITOK);
 1037                         memcpy(unp3->unp_addr, unp2->unp_addr,
 1038                             unp2->unp_addrlen);
 1039                         unp3->unp_addrlen = unp2->unp_addrlen;
 1040                 }
 1041                 unp3->unp_flags = unp2->unp_flags;
 1042                 unp3->unp_connid.unp_pid = l->l_proc->p_pid;
 1043                 unp3->unp_connid.unp_euid = kauth_cred_geteuid(l->l_cred);
 1044                 unp3->unp_connid.unp_egid = kauth_cred_getegid(l->l_cred);
 1045                 unp3->unp_flags |= UNP_EIDSVALID;
 1046                 if (unp2->unp_flags & UNP_EIDSBIND) {
 1047                         unp->unp_connid = unp2->unp_connid;
 1048                         unp->unp_flags |= UNP_EIDSVALID;
 1049                 }
 1050                 so2 = so3;
 1051         }
 1052         error = unp_connect2(so, so2, PRU_CONNECT);
 1053         sounlock(so);
 1054  bad:
 1055         vput(vp);
 1056  bad2:
 1057         free(sun, M_SONAME);
 1058         solock(so);
 1059         unp->unp_flags &= ~UNP_BUSY;
 1060         return (error);
 1061 }
 1062 
 1063 int
 1064 unp_connect2(struct socket *so, struct socket *so2, int req)
 1065 {
 1066         struct unpcb *unp = sotounpcb(so);
 1067         struct unpcb *unp2;
 1068 
 1069         if (so2->so_type != so->so_type)
 1070                 return (EPROTOTYPE);
 1071 
 1072         /*
 1073          * All three sockets involved must be locked by same lock:
 1074          *
 1075          * local endpoint (so)
 1076          * remote endpoint (so2)
 1077          * queue head (so2->so_head, only if PR_CONNREQUIRED)
 1078          */
 1079         KASSERT(solocked2(so, so2));
 1080         KASSERT(so->so_head == NULL);
 1081         if (so2->so_head != NULL) {
 1082                 KASSERT(so2->so_lock == uipc_lock);
 1083                 KASSERT(solocked2(so2, so2->so_head));
 1084         }
 1085 
 1086         unp2 = sotounpcb(so2);
 1087         unp->unp_conn = unp2;
 1088         switch (so->so_type) {
 1089 
 1090         case SOCK_DGRAM:
 1091                 unp->unp_nextref = unp2->unp_refs;
 1092                 unp2->unp_refs = unp;
 1093                 soisconnected(so);
 1094                 break;
 1095 
 1096         case SOCK_STREAM:
 1097                 unp2->unp_conn = unp;
 1098                 if (req == PRU_CONNECT &&
 1099                     ((unp->unp_flags | unp2->unp_flags) & UNP_CONNWAIT))
 1100                         soisconnecting(so);
 1101                 else
 1102                         soisconnected(so);
 1103                 soisconnected(so2);
 1104                 /*
 1105                  * If the connection is fully established, break the
 1106                  * association with uipc_lock and give the connected
 1107                  * pair a seperate lock to share.  For CONNECT2, we
 1108                  * require that the locks already match (the sockets
 1109                  * are created that way).
 1110                  */
 1111                 if (req == PRU_CONNECT) {
 1112                         KASSERT(so2->so_head != NULL);
 1113                         unp_setpeerlocks(so, so2);
 1114                 }
 1115                 break;
 1116 
 1117         default:
 1118                 panic("unp_connect2");
 1119         }
 1120         return (0);
 1121 }
 1122 
 1123 void
 1124 unp_disconnect(struct unpcb *unp)
 1125 {
 1126         struct unpcb *unp2 = unp->unp_conn;
 1127         struct socket *so;
 1128 
 1129         if (unp2 == 0)
 1130                 return;
 1131         unp->unp_conn = 0;
 1132         so = unp->unp_socket;
 1133         switch (so->so_type) {
 1134         case SOCK_DGRAM:
 1135                 if (unp2->unp_refs == unp)
 1136                         unp2->unp_refs = unp->unp_nextref;
 1137                 else {
 1138                         unp2 = unp2->unp_refs;
 1139                         for (;;) {
 1140                                 KASSERT(solocked2(so, unp2->unp_socket));
 1141                                 if (unp2 == 0)
 1142                                         panic("unp_disconnect");
 1143                                 if (unp2->unp_nextref == unp)
 1144                                         break;
 1145                                 unp2 = unp2->unp_nextref;
 1146                         }
 1147                         unp2->unp_nextref = unp->unp_nextref;
 1148                 }
 1149                 unp->unp_nextref = 0;
 1150                 so->so_state &= ~SS_ISCONNECTED;
 1151                 break;
 1152 
 1153         case SOCK_STREAM:
 1154                 KASSERT(solocked2(so, unp2->unp_socket));
 1155                 soisdisconnected(so);
 1156                 unp2->unp_conn = 0;
 1157                 soisdisconnected(unp2->unp_socket);
 1158                 break;
 1159         }
 1160 }
 1161 
 1162 #ifdef notdef
 1163 unp_abort(struct unpcb *unp)
 1164 {
 1165         unp_detach(unp);
 1166 }
 1167 #endif
 1168 
 1169 void
 1170 unp_shutdown(struct unpcb *unp)
 1171 {
 1172         struct socket *so;
 1173 
 1174         if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn &&
 1175             (so = unp->unp_conn->unp_socket))
 1176                 socantrcvmore(so);
 1177 }
 1178 
 1179 bool
 1180 unp_drop(struct unpcb *unp, int errno)
 1181 {
 1182         struct socket *so = unp->unp_socket;
 1183 
 1184         KASSERT(solocked(so));
 1185 
 1186         so->so_error = errno;
 1187         unp_disconnect(unp);
 1188         if (so->so_head) {
 1189                 so->so_pcb = NULL;
 1190                 /* sofree() drops the socket lock */
 1191                 sofree(so);
 1192                 unp_free(unp);
 1193                 return true;
 1194         }
 1195         return false;
 1196 }
 1197 
 1198 #ifdef notdef
 1199 unp_drain(void)
 1200 {
 1201 
 1202 }
 1203 #endif
 1204 
 1205 int
 1206 unp_externalize(struct mbuf *rights, struct lwp *l)
 1207 {
 1208         struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
 1209         struct proc *p = l->l_proc;
 1210         int i, *fdp;
 1211         file_t **rp;
 1212         file_t *fp;
 1213         int nfds, error = 0;
 1214 
 1215         nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) /
 1216             sizeof(file_t *);
 1217         rp = (file_t **)CMSG_DATA(cm);
 1218 
 1219         fdp = malloc(nfds * sizeof(int), M_TEMP, M_WAITOK);
 1220         rw_enter(&p->p_cwdi->cwdi_lock, RW_READER);
 1221 
 1222         /* Make sure the recipient should be able to see the files.. */
 1223         if (p->p_cwdi->cwdi_rdir != NULL) {
 1224                 rp = (file_t **)CMSG_DATA(cm);
 1225                 for (i = 0; i < nfds; i++) {
 1226                         fp = *rp++;
 1227                         /*
 1228                          * If we are in a chroot'ed directory, and
 1229                          * someone wants to pass us a directory, make
 1230                          * sure it's inside the subtree we're allowed
 1231                          * to access.
 1232                          */
 1233                         if (fp->f_type == DTYPE_VNODE) {
 1234                                 vnode_t *vp = (vnode_t *)fp->f_data;
 1235                                 if ((vp->v_type == VDIR) &&
 1236                                     !vn_isunder(vp, p->p_cwdi->cwdi_rdir, l)) {
 1237                                         error = EPERM;
 1238                                         break;
 1239                                 }
 1240                         }
 1241                 }
 1242         }
 1243 
 1244  restart:
 1245         rp = (file_t **)CMSG_DATA(cm);
 1246         if (error != 0) {
 1247                 for (i = 0; i < nfds; i++) {
 1248                         fp = *rp;
 1249                         *rp++ = 0;
 1250                         unp_discard_now(fp);
 1251                 }
 1252                 goto out;
 1253         }
 1254 
 1255         /*
 1256          * First loop -- allocate file descriptor table slots for the
 1257          * new files.
 1258          */
 1259         for (i = 0; i < nfds; i++) {
 1260                 fp = *rp++;
 1261                 if ((error = fd_alloc(p, 0, &fdp[i])) != 0) {
 1262                         /*
 1263                          * Back out what we've done so far.
 1264                          */
 1265                         for (--i; i >= 0; i--) {
 1266                                 fd_abort(p, NULL, fdp[i]);
 1267                         }
 1268                         if (error == ENOSPC) {
 1269                                 fd_tryexpand(p);
 1270                                 error = 0;
 1271                         } else {
 1272                                 /*
 1273                                  * This is the error that has historically
 1274                                  * been returned, and some callers may
 1275                                  * expect it.
 1276                                  */
 1277                                 error = EMSGSIZE;
 1278                         }
 1279                         goto restart;
 1280                 }
 1281         }
 1282 
 1283         /*
 1284          * Now that adding them has succeeded, update all of the
 1285          * file passing state and affix the descriptors.
 1286          */
 1287         rp = (file_t **)CMSG_DATA(cm);
 1288         for (i = 0; i < nfds; i++) {
 1289                 fp = *rp++;
 1290                 atomic_dec_uint(&unp_rights);
 1291                 fd_affix(p, fp, fdp[i]);
 1292                 mutex_enter(&fp->f_lock);
 1293                 fp->f_msgcount--;
 1294                 mutex_exit(&fp->f_lock);
 1295                 /*
 1296                  * Note that fd_affix() adds a reference to the file.
 1297                  * The file may already have been closed by another
 1298                  * LWP in the process, so we must drop the reference
 1299                  * added by unp_internalize() with closef().
 1300                  */
 1301                 closef(fp);
 1302         }
 1303 
 1304         /*
 1305          * Copy temporary array to message and adjust length, in case of
 1306          * transition from large file_t pointers to ints.
 1307          */
 1308         memcpy(CMSG_DATA(cm), fdp, nfds * sizeof(int));
 1309         cm->cmsg_len = CMSG_LEN(nfds * sizeof(int));
 1310         rights->m_len = CMSG_SPACE(nfds * sizeof(int));
 1311  out:
 1312         rw_exit(&p->p_cwdi->cwdi_lock);
 1313         free(fdp, M_TEMP);
 1314         return (error);
 1315 }
 1316 
 1317 int
 1318 unp_internalize(struct mbuf **controlp)
 1319 {
 1320         filedesc_t *fdescp = curlwp->l_fd;
 1321         struct mbuf *control = *controlp;
 1322         struct cmsghdr *newcm, *cm = mtod(control, struct cmsghdr *);
 1323         file_t **rp, **files;
 1324         file_t *fp;
 1325         int i, fd, *fdp;
 1326         int nfds, error;
 1327         u_int maxmsg;
 1328 
 1329         error = 0;
 1330         newcm = NULL;
 1331 
 1332         /* Sanity check the control message header. */
 1333         if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
 1334             cm->cmsg_len > control->m_len ||
 1335             cm->cmsg_len < CMSG_ALIGN(sizeof(*cm)))
 1336                 return (EINVAL);
 1337 
 1338         /*
 1339          * Verify that the file descriptors are valid, and acquire
 1340          * a reference to each.
 1341          */
 1342         nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof(int);
 1343         fdp = (int *)CMSG_DATA(cm);
 1344         maxmsg = maxfiles / unp_rights_ratio;
 1345         for (i = 0; i < nfds; i++) {
 1346                 fd = *fdp++;
 1347                 if (atomic_inc_uint_nv(&unp_rights) > maxmsg) {
 1348                         atomic_dec_uint(&unp_rights);
 1349                         nfds = i;
 1350                         error = EAGAIN;
 1351                         goto out;
 1352                 }
 1353                 if ((fp = fd_getfile(fd)) == NULL) {
 1354                         atomic_dec_uint(&unp_rights);
 1355                         nfds = i;
 1356                         error = EBADF;
 1357                         goto out;
 1358                 }
 1359         }
 1360 
 1361         /* Allocate new space and copy header into it. */
 1362         newcm = malloc(CMSG_SPACE(nfds * sizeof(file_t *)), M_MBUF, M_WAITOK);
 1363         if (newcm == NULL) {
 1364                 error = E2BIG;
 1365                 goto out;
 1366         }
 1367         memcpy(newcm, cm, sizeof(struct cmsghdr));
 1368         files = (file_t **)CMSG_DATA(newcm);
 1369 
 1370         /*
 1371          * Transform the file descriptors into file_t pointers, in
 1372          * reverse order so that if pointers are bigger than ints, the
 1373          * int won't get until we're done.  No need to lock, as we have
 1374          * already validated the descriptors with fd_getfile().
 1375          */
 1376         fdp = (int *)CMSG_DATA(cm) + nfds;
 1377         rp = files + nfds;
 1378         for (i = 0; i < nfds; i++) {
 1379                 fp = fdescp->fd_dt->dt_ff[*--fdp]->ff_file;
 1380                 KASSERT(fp != NULL);
 1381                 mutex_enter(&fp->f_lock);
 1382                 *--rp = fp;
 1383                 fp->f_count++;
 1384                 fp->f_msgcount++;
 1385                 mutex_exit(&fp->f_lock);
 1386         }
 1387 
 1388  out:
 1389         /* Release descriptor references. */
 1390         fdp = (int *)CMSG_DATA(cm);
 1391         for (i = 0; i < nfds; i++) {
 1392                 fd_putfile(*fdp++);
 1393                 if (error != 0) {
 1394                         atomic_dec_uint(&unp_rights);
 1395                 }
 1396         }
 1397 
 1398         if (error == 0) {
 1399                 if (control->m_flags & M_EXT) {
 1400                         m_freem(control);
 1401                         *controlp = control = m_get(M_WAIT, MT_CONTROL);
 1402                 }
 1403                 MEXTADD(control, newcm, CMSG_SPACE(nfds * sizeof(file_t *)),
 1404                     M_MBUF, NULL, NULL);
 1405                 cm = newcm;
 1406                 /*
 1407                  * Adjust message & mbuf to note amount of space
 1408                  * actually used.
 1409                  */
 1410                 cm->cmsg_len = CMSG_LEN(nfds * sizeof(file_t *));
 1411                 control->m_len = CMSG_SPACE(nfds * sizeof(file_t *));
 1412         }
 1413 
 1414         return error;
 1415 }
 1416 
 1417 struct mbuf *
 1418 unp_addsockcred(struct lwp *l, struct mbuf *control)
 1419 {
 1420         struct cmsghdr *cmp;
 1421         struct sockcred *sc;
 1422         struct mbuf *m, *n;
 1423         int len, space, i;
 1424 
 1425         len = CMSG_LEN(SOCKCREDSIZE(kauth_cred_ngroups(l->l_cred)));
 1426         space = CMSG_SPACE(SOCKCREDSIZE(kauth_cred_ngroups(l->l_cred)));
 1427 
 1428         m = m_get(M_WAIT, MT_CONTROL);
 1429         if (space > MLEN) {
 1430                 if (space > MCLBYTES)
 1431                         MEXTMALLOC(m, space, M_WAITOK);
 1432                 else
 1433                         m_clget(m, M_WAIT);
 1434                 if ((m->m_flags & M_EXT) == 0) {
 1435                         m_free(m);
 1436                         return (control);
 1437                 }
 1438         }
 1439 
 1440         m->m_len = space;
 1441         m->m_next = NULL;
 1442         cmp = mtod(m, struct cmsghdr *);
 1443         sc = (struct sockcred *)CMSG_DATA(cmp);
 1444         cmp->cmsg_len = len;
 1445         cmp->cmsg_level = SOL_SOCKET;
 1446         cmp->cmsg_type = SCM_CREDS;
 1447         sc->sc_uid = kauth_cred_getuid(l->l_cred);
 1448         sc->sc_euid = kauth_cred_geteuid(l->l_cred);
 1449         sc->sc_gid = kauth_cred_getgid(l->l_cred);
 1450         sc->sc_egid = kauth_cred_getegid(l->l_cred);
 1451         sc->sc_ngroups = kauth_cred_ngroups(l->l_cred);
 1452         for (i = 0; i < sc->sc_ngroups; i++)
 1453                 sc->sc_groups[i] = kauth_cred_group(l->l_cred, i);
 1454 
 1455         /*
 1456          * If a control message already exists, append us to the end.
 1457          */
 1458         if (control != NULL) {
 1459                 for (n = control; n->m_next != NULL; n = n->m_next)
 1460                         ;
 1461                 n->m_next = m;
 1462         } else
 1463                 control = m;
 1464 
 1465         return (control);
 1466 }
 1467 
 1468 /*
 1469  * Do a mark-sweep GC of files in the system, to free up any which are
 1470  * caught in flight to an about-to-be-closed socket.  Additionally,
 1471  * process deferred file closures.
 1472  */
 1473 static void
 1474 unp_gc(file_t *dp)
 1475 {
 1476         extern  struct domain unixdomain;
 1477         file_t *fp, *np;
 1478         struct socket *so, *so1;
 1479         u_int i, old, new;
 1480         bool didwork;
 1481 
 1482         KASSERT(curlwp == unp_thread_lwp);
 1483         KASSERT(mutex_owned(&filelist_lock));
 1484 
 1485         /*
 1486          * First, process deferred file closures.
 1487          */
 1488         while (!SLIST_EMPTY(&unp_thread_discard)) {
 1489                 fp = SLIST_FIRST(&unp_thread_discard);
 1490                 KASSERT(fp->f_unpcount > 0);
 1491                 KASSERT(fp->f_count > 0);
 1492                 KASSERT(fp->f_msgcount > 0);
 1493                 KASSERT(fp->f_count >= fp->f_unpcount);
 1494                 KASSERT(fp->f_count >= fp->f_msgcount);
 1495                 KASSERT(fp->f_msgcount >= fp->f_unpcount);
 1496                 SLIST_REMOVE_HEAD(&unp_thread_discard, f_unplist);
 1497                 i = fp->f_unpcount;
 1498                 fp->f_unpcount = 0;
 1499                 mutex_exit(&filelist_lock);
 1500                 for (; i != 0; i--) {
 1501                         unp_discard_now(fp);
 1502                 }
 1503                 mutex_enter(&filelist_lock);
 1504         }
 1505 
 1506         /*
 1507          * Clear mark bits.  Ensure that we don't consider new files
 1508          * entering the file table during this loop (they will not have
 1509          * FSCAN set).
 1510          */
 1511         unp_defer = 0;
 1512         LIST_FOREACH(fp, &filehead, f_list) {
 1513                 for (old = fp->f_flag;; old = new) {
 1514                         new = atomic_cas_uint(&fp->f_flag, old,
 1515                             (old | FSCAN) & ~(FMARK|FDEFER));
 1516                         if (__predict_true(old == new)) {
 1517                                 break;
 1518                         }
 1519                 }
 1520         }
 1521 
 1522         /*
 1523          * Iterate over the set of sockets, marking ones believed (based on
 1524          * refcount) to be referenced from a process, and marking for rescan
 1525          * sockets which are queued on a socket.  Recan continues descending
 1526          * and searching for sockets referenced by sockets (FDEFER), until
 1527          * there are no more socket->socket references to be discovered.
 1528          */
 1529         do {
 1530                 didwork = false;
 1531                 for (fp = LIST_FIRST(&filehead); fp != NULL; fp = np) {
 1532                         KASSERT(mutex_owned(&filelist_lock));
 1533                         np = LIST_NEXT(fp, f_list);
 1534                         mutex_enter(&fp->f_lock);
 1535                         if ((fp->f_flag & FDEFER) != 0) {
 1536                                 atomic_and_uint(&fp->f_flag, ~FDEFER);
 1537                                 unp_defer--;
 1538                                 KASSERT(fp->f_count != 0);
 1539                         } else {
 1540                                 if (fp->f_count == 0 ||
 1541                                     (fp->f_flag & FMARK) != 0 ||
 1542                                     fp->f_count == fp->f_msgcount ||
 1543                                     fp->f_unpcount != 0) {
 1544                                         mutex_exit(&fp->f_lock);
 1545                                         continue;
 1546                                 }
 1547                         }
 1548                         atomic_or_uint(&fp->f_flag, FMARK);
 1549 
 1550                         if (fp->f_type != DTYPE_SOCKET ||
 1551                             (so = fp->f_data) == NULL ||
 1552                             so->so_proto->pr_domain != &unixdomain ||
 1553                             (so->so_proto->pr_flags & PR_RIGHTS) == 0) {
 1554                                 mutex_exit(&fp->f_lock);
 1555                                 continue;
 1556                         }
 1557 
 1558                         /* Gain file ref, mark our position, and unlock. */
 1559                         didwork = true;
 1560                         LIST_INSERT_AFTER(fp, dp, f_list);
 1561                         fp->f_count++;
 1562                         mutex_exit(&fp->f_lock);
 1563                         mutex_exit(&filelist_lock);
 1564 
 1565                         /*
 1566                          * Mark files referenced from sockets queued on the
 1567                          * accept queue as well.
 1568                          */
 1569                         solock(so);
 1570                         unp_scan(so->so_rcv.sb_mb, unp_mark, 0);
 1571                         if ((so->so_options & SO_ACCEPTCONN) != 0) {
 1572                                 TAILQ_FOREACH(so1, &so->so_q0, so_qe) {
 1573                                         unp_scan(so1->so_rcv.sb_mb, unp_mark, 0);
 1574                                 }
 1575                                 TAILQ_FOREACH(so1, &so->so_q, so_qe) {
 1576                                         unp_scan(so1->so_rcv.sb_mb, unp_mark, 0);
 1577                                 }
 1578                         }
 1579                         sounlock(so);
 1580 
 1581                         /* Re-lock and restart from where we left off. */
 1582                         closef(fp);
 1583                         mutex_enter(&filelist_lock);
 1584                         np = LIST_NEXT(dp, f_list);
 1585                         LIST_REMOVE(dp, f_list);
 1586                 }
 1587                 /*
 1588                  * Bail early if we did nothing in the loop above.  Could
 1589                  * happen because of concurrent activity causing unp_defer
 1590                  * to get out of sync.
 1591                  */
 1592         } while (unp_defer != 0 && didwork);
 1593 
 1594         /*
 1595          * Sweep pass.
 1596          *
 1597          * We grab an extra reference to each of the files that are
 1598          * not otherwise accessible and then free the rights that are
 1599          * stored in messages on them.
 1600          */
 1601         for (fp = LIST_FIRST(&filehead); fp != NULL; fp = np) {
 1602                 KASSERT(mutex_owned(&filelist_lock));
 1603                 np = LIST_NEXT(fp, f_list);
 1604                 mutex_enter(&fp->f_lock);
 1605 
 1606                 /*
 1607                  * Ignore non-sockets.
 1608                  * Ignore dead sockets, or sockets with pending close.
 1609                  * Ignore sockets obviously referenced elsewhere. 
 1610                  * Ignore sockets marked as referenced by our scan.
 1611                  * Ignore new sockets that did not exist during the scan.
 1612                  */
 1613                 if (fp->f_type != DTYPE_SOCKET ||
 1614                     fp->f_count == 0 || fp->f_unpcount != 0 ||
 1615                     fp->f_count != fp->f_msgcount ||
 1616                     (fp->f_flag & (FMARK | FSCAN)) != FSCAN) {
 1617                         mutex_exit(&fp->f_lock);
 1618                         continue;
 1619                 }
 1620 
 1621                 /* Gain file ref, mark our position, and unlock. */
 1622                 LIST_INSERT_AFTER(fp, dp, f_list);
 1623                 fp->f_count++;
 1624                 mutex_exit(&fp->f_lock);
 1625                 mutex_exit(&filelist_lock);
 1626 
 1627                 /*
 1628                  * Flush all data from the socket's receive buffer.
 1629                  * This will cause files referenced only by the
 1630                  * socket to be queued for close.
 1631                  */
 1632                 so = fp->f_data;
 1633                 solock(so);
 1634                 sorflush(so);
 1635                 sounlock(so);
 1636 
 1637                 /* Re-lock and restart from where we left off. */
 1638                 closef(fp);
 1639                 mutex_enter(&filelist_lock);
 1640                 np = LIST_NEXT(dp, f_list);
 1641                 LIST_REMOVE(dp, f_list);
 1642         }
 1643 }
 1644 
 1645 /*
 1646  * Garbage collector thread.  While SCM_RIGHTS messages are in transit,
 1647  * wake once per second to garbage collect.  Run continually while we
 1648  * have deferred closes to process.
 1649  */
 1650 static void
 1651 unp_thread(void *cookie)
 1652 {
 1653         file_t *dp;
 1654 
 1655         /* Allocate a dummy file for our scans. */
 1656         if ((dp = fgetdummy()) == NULL) {
 1657                 panic("unp_thread");
 1658         }
 1659 
 1660         mutex_enter(&filelist_lock);
 1661         for (;;) {
 1662                 KASSERT(mutex_owned(&filelist_lock));
 1663                 if (SLIST_EMPTY(&unp_thread_discard)) {
 1664                         if (unp_rights != 0) {
 1665                                 (void)cv_timedwait(&unp_thread_cv,
 1666                                     &filelist_lock, hz);
 1667                         } else {
 1668                                 cv_wait(&unp_thread_cv, &filelist_lock);
 1669                         }
 1670                 }
 1671                 unp_gc(dp);
 1672         }
 1673         /* NOTREACHED */
 1674 }
 1675 
 1676 /*
 1677  * Kick the garbage collector into action if there is something for
 1678  * it to process.
 1679  */
 1680 static void
 1681 unp_thread_kick(void)
 1682 {
 1683 
 1684         if (!SLIST_EMPTY(&unp_thread_discard) || unp_rights != 0) {
 1685                 mutex_enter(&filelist_lock);
 1686                 cv_signal(&unp_thread_cv);
 1687                 mutex_exit(&filelist_lock);
 1688         }
 1689 }
 1690 
 1691 void
 1692 unp_dispose(struct mbuf *m)
 1693 {
 1694 
 1695         if (m)
 1696                 unp_scan(m, unp_discard_later, 1);
 1697 }
 1698 
 1699 void
 1700 unp_scan(struct mbuf *m0, void (*op)(file_t *), int discard)
 1701 {
 1702         struct mbuf *m;
 1703         file_t **rp, *fp;
 1704         struct cmsghdr *cm;
 1705         int i, qfds;
 1706 
 1707         while (m0) {
 1708                 for (m = m0; m; m = m->m_next) {
 1709                         if (m->m_type != MT_CONTROL ||
 1710                             m->m_len < sizeof(*cm)) {
 1711                                 continue;
 1712                         }
 1713                         cm = mtod(m, struct cmsghdr *);
 1714                         if (cm->cmsg_level != SOL_SOCKET ||
 1715                             cm->cmsg_type != SCM_RIGHTS)
 1716                                 continue;
 1717                         qfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm)))
 1718                             / sizeof(file_t *);
 1719                         rp = (file_t **)CMSG_DATA(cm);
 1720                         for (i = 0; i < qfds; i++) {
 1721                                 fp = *rp;
 1722                                 if (discard) {
 1723                                         *rp = 0;
 1724                                 }
 1725                                 (*op)(fp);
 1726                                 rp++;
 1727                         }
 1728                 }
 1729                 m0 = m0->m_nextpkt;
 1730         }
 1731 }
 1732 
 1733 void
 1734 unp_mark(file_t *fp)
 1735 {
 1736 
 1737         if (fp == NULL)
 1738                 return;
 1739 
 1740         /* If we're already deferred, don't screw up the defer count */
 1741         mutex_enter(&fp->f_lock);
 1742         if (fp->f_flag & (FMARK | FDEFER)) {
 1743                 mutex_exit(&fp->f_lock);
 1744                 return;
 1745         }
 1746 
 1747         /*
 1748          * Minimize the number of deferrals...  Sockets are the only type of
 1749          * file which can hold references to another file, so just mark
 1750          * other files, and defer unmarked sockets for the next pass.
 1751          */
 1752         if (fp->f_type == DTYPE_SOCKET) {
 1753                 unp_defer++;
 1754                 KASSERT(fp->f_count != 0);
 1755                 atomic_or_uint(&fp->f_flag, FDEFER);
 1756         } else {
 1757                 atomic_or_uint(&fp->f_flag, FMARK);
 1758         }
 1759         mutex_exit(&fp->f_lock);
 1760 }
 1761 
 1762 static void
 1763 unp_discard_now(file_t *fp)
 1764 {
 1765 
 1766         if (fp == NULL)
 1767                 return;
 1768 
 1769         KASSERT(fp->f_count > 0);
 1770         KASSERT(fp->f_msgcount > 0);
 1771 
 1772         mutex_enter(&fp->f_lock);
 1773         fp->f_msgcount--;
 1774         mutex_exit(&fp->f_lock);
 1775         atomic_dec_uint(&unp_rights);
 1776         (void)closef(fp);
 1777 }
 1778 
 1779 static void
 1780 unp_discard_later(file_t *fp)
 1781 {
 1782 
 1783         if (fp == NULL)
 1784                 return;
 1785 
 1786         KASSERT(fp->f_count > 0);
 1787         KASSERT(fp->f_msgcount > 0);
 1788 
 1789         mutex_enter(&filelist_lock);
 1790         if (fp->f_unpcount++ == 0) {
 1791                 SLIST_INSERT_HEAD(&unp_thread_discard, fp, f_unplist);
 1792         }
 1793         mutex_exit(&filelist_lock);
 1794 }

Cache object: 4827218202d2f85b5c10ef0c62e056c7


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.