The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/uipc_usrreq.c

Version: -  FREEBSD  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-2  -  FREEBSD-11-1  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-4  -  FREEBSD-10-3  -  FREEBSD-10-2  -  FREEBSD-10-1  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-3  -  FREEBSD-9-2  -  FREEBSD-9-1  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-4  -  FREEBSD-8-3  -  FREEBSD-8-2  -  FREEBSD-8-1  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-4  -  FREEBSD-7-3  -  FREEBSD-7-2  -  FREEBSD-7-1  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-4  -  FREEBSD-6-3  -  FREEBSD-6-2  -  FREEBSD-6-1  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-5  -  FREEBSD-5-4  -  FREEBSD-5-3  -  FREEBSD-5-2  -  FREEBSD-5-1  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  xnu-1456.1.26  -  xnu-1699.24.8  -  xnu-2050.18.24  -  OPENSOLARIS  -  minix-3-1-1 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1982, 1986, 1989, 1991, 1993
    3  *      The Regents of the University of California.
    4  * Copyright 2004-2005 Robert N. M. Watson
    5  * All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  * 4. Neither the name of the University nor the names of its contributors
   16  *    may be used to endorse or promote products derived from this software
   17  *    without specific prior written permission.
   18  *
   19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   29  * SUCH DAMAGE.
   30  *
   31  *      From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94
   32  */
   33 
   34 #include <sys/cdefs.h>
   35 __FBSDID("$FreeBSD: src/sys/kern/uipc_usrreq.c,v 1.138.2.15 2005/09/28 06:52:35 rwatson Exp $");
   36 
   37 #include "opt_mac.h"
   38 
   39 #include <sys/param.h>
   40 #include <sys/domain.h>
   41 #include <sys/fcntl.h>
   42 #include <sys/malloc.h>         /* XXX must be before <sys/file.h> */
   43 #include <sys/file.h>
   44 #include <sys/filedesc.h>
   45 #include <sys/jail.h>
   46 #include <sys/kernel.h>
   47 #include <sys/lock.h>
   48 #include <sys/mac.h>
   49 #include <sys/mbuf.h>
   50 #include <sys/mutex.h>
   51 #include <sys/namei.h>
   52 #include <sys/proc.h>
   53 #include <sys/protosw.h>
   54 #include <sys/resourcevar.h>
   55 #include <sys/socket.h>
   56 #include <sys/socketvar.h>
   57 #include <sys/signalvar.h>
   58 #include <sys/stat.h>
   59 #include <sys/sx.h>
   60 #include <sys/sysctl.h>
   61 #include <sys/systm.h>
   62 #include <sys/un.h>
   63 #include <sys/unpcb.h>
   64 #include <sys/vnode.h>
   65 
   66 #include <vm/uma.h>
   67 
   68 static uma_zone_t unp_zone;
   69 static  unp_gen_t unp_gencnt;
   70 static  u_int unp_count;
   71 
   72 static  struct unp_head unp_shead, unp_dhead;
   73 
   74 /*
   75  * Unix communications domain.
   76  *
   77  * TODO:
   78  *      SEQPACKET, RDM
   79  *      rethink name space problems
   80  *      need a proper out-of-band
   81  *      lock pushdown
   82  */
   83 static const struct     sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL };
   84 static ino_t    unp_ino;                /* prototype for fake inode numbers */
   85 
   86 /*
   87  * Currently, UNIX domain sockets are protected by a single subsystem lock,
   88  * which covers global data structures and variables, the contents of each
   89  * per-socket unpcb structure, and the so_pcb field in sockets attached to
   90  * the UNIX domain.  This provides for a moderate degree of paralellism, as
   91  * receive operations on UNIX domain sockets do not need to acquire the
   92  * subsystem lock.  Finer grained locking to permit send() without acquiring
   93  * a global lock would be a logical next step.
   94  *
   95  * The UNIX domain socket lock preceds all socket layer locks, including the
   96  * socket lock and socket buffer lock, permitting UNIX domain socket code to
   97  * call into socket support routines without releasing its locks.
   98  *
   99  * Some caution is required in areas where the UNIX domain socket code enters
  100  * VFS in order to create or find rendezvous points.  This results in
  101  * dropping of the UNIX domain socket subsystem lock, acquisition of the
  102  * Giant lock, and potential sleeping.  This increases the chances of races,
  103  * and exposes weaknesses in the socket->protocol API by offering poor
  104  * failure modes.
  105  */
  106 static struct mtx unp_mtx;
  107 #define UNP_LOCK_INIT() \
  108         mtx_init(&unp_mtx, "unp", NULL, MTX_DEF)
  109 #define UNP_LOCK()              mtx_lock(&unp_mtx)
  110 #define UNP_UNLOCK()            mtx_unlock(&unp_mtx)
  111 #define UNP_LOCK_ASSERT()       mtx_assert(&unp_mtx, MA_OWNED)
  112 #define UNP_UNLOCK_ASSERT()     mtx_assert(&unp_mtx, MA_NOTOWNED)
  113 
  114 static int     unp_attach(struct socket *);
  115 static void    unp_detach(struct unpcb *);
  116 static int     unp_bind(struct unpcb *,struct sockaddr *, struct thread *);
  117 static int     unp_connect(struct socket *,struct sockaddr *, struct thread *);
  118 static int     unp_connect2(struct socket *so, struct socket *so2);
  119 static void    unp_disconnect(struct unpcb *);
  120 static void    unp_shutdown(struct unpcb *);
  121 static void    unp_drop(struct unpcb *, int);
  122 static void    unp_gc(void);
  123 static void    unp_scan(struct mbuf *, void (*)(struct file *));
  124 static void    unp_mark(struct file *);
  125 static void    unp_discard(struct file *);
  126 static void    unp_freerights(struct file **, int);
  127 static int     unp_internalize(struct mbuf **, struct thread *);
  128 static int     unp_listen(struct socket *, struct unpcb *, struct thread *);
  129 
  130 static int
  131 uipc_abort(struct socket *so)
  132 {
  133         struct unpcb *unp;
  134 
  135         UNP_LOCK();
  136         unp = sotounpcb(so);
  137         if (unp == NULL) {
  138                 UNP_UNLOCK();
  139                 return (EINVAL);
  140         }
  141         unp_drop(unp, ECONNABORTED);
  142         unp_detach(unp);
  143         UNP_UNLOCK_ASSERT();
  144         ACCEPT_LOCK();
  145         SOCK_LOCK(so);
  146         sotryfree(so);
  147         return (0);
  148 }
  149 
  150 static int
  151 uipc_accept(struct socket *so, struct sockaddr **nam)
  152 {
  153         struct unpcb *unp;
  154         const struct sockaddr *sa;
  155 
  156         /*
  157          * Pass back name of connected socket,
  158          * if it was bound and we are still connected
  159          * (our peer may have closed already!).
  160          */
  161         *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
  162         UNP_LOCK();
  163         unp = sotounpcb(so);
  164         if (unp == NULL) {
  165                 UNP_UNLOCK();
  166                 free(*nam, M_SONAME);
  167                 *nam = NULL;
  168                 return (EINVAL);
  169         }
  170         if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL)
  171                 sa = (struct sockaddr *) unp->unp_conn->unp_addr;
  172         else
  173                 sa = &sun_noname;
  174         bcopy(sa, *nam, sa->sa_len);
  175         UNP_UNLOCK();
  176         return (0);
  177 }
  178 
  179 static int
  180 uipc_attach(struct socket *so, int proto, struct thread *td)
  181 {
  182         struct unpcb *unp = sotounpcb(so);
  183 
  184         if (unp != NULL)
  185                 return (EISCONN);
  186         return (unp_attach(so));
  187 }
  188 
  189 static int
  190 uipc_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
  191 {
  192         struct unpcb *unp;
  193         int error;
  194 
  195         UNP_LOCK();
  196         unp = sotounpcb(so);
  197         if (unp == NULL) {
  198                 UNP_UNLOCK();
  199                 return (EINVAL);
  200         }
  201         error = unp_bind(unp, nam, td);
  202         UNP_UNLOCK();
  203         return (error);
  204 }
  205 
  206 static int
  207 uipc_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
  208 {
  209         struct unpcb *unp;
  210         int error;
  211 
  212         KASSERT(td == curthread, ("uipc_connect: td != curthread"));
  213 
  214         UNP_LOCK();
  215         unp = sotounpcb(so);
  216         if (unp == NULL) {
  217                 UNP_UNLOCK();
  218                 return (EINVAL);
  219         }
  220         error = unp_connect(so, nam, td);
  221         UNP_UNLOCK();
  222         return (error);
  223 }
  224 
  225 int
  226 uipc_connect2(struct socket *so1, struct socket *so2)
  227 {
  228         struct unpcb *unp;
  229         int error;
  230 
  231         UNP_LOCK();
  232         unp = sotounpcb(so1);
  233         if (unp == NULL) {
  234                 UNP_UNLOCK();
  235                 return (EINVAL);
  236         }
  237         error = unp_connect2(so1, so2);
  238         UNP_UNLOCK();
  239         return (error);
  240 }
  241 
  242 /* control is EOPNOTSUPP */
  243 
  244 static int
  245 uipc_detach(struct socket *so)
  246 {
  247         struct unpcb *unp;
  248 
  249         UNP_LOCK();
  250         unp = sotounpcb(so);
  251         if (unp == NULL) {
  252                 UNP_UNLOCK();
  253                 return (EINVAL);
  254         }
  255         unp_detach(unp);
  256         UNP_UNLOCK_ASSERT();
  257         return (0);
  258 }
  259 
  260 static int
  261 uipc_disconnect(struct socket *so)
  262 {
  263         struct unpcb *unp;
  264 
  265         UNP_LOCK();
  266         unp = sotounpcb(so);
  267         if (unp == NULL) {
  268                 UNP_UNLOCK();
  269                 return (EINVAL);
  270         }
  271         unp_disconnect(unp);
  272         UNP_UNLOCK();
  273         return (0);
  274 }
  275 
  276 static int
  277 uipc_listen(struct socket *so, struct thread *td)
  278 {
  279         struct unpcb *unp;
  280         int error;
  281 
  282         UNP_LOCK();
  283         unp = sotounpcb(so);
  284         if (unp == NULL || unp->unp_vnode == NULL) {
  285                 UNP_UNLOCK();
  286                 return (EINVAL);
  287         }
  288         error = unp_listen(so, unp, td);
  289         UNP_UNLOCK();
  290         return (error);
  291 }
  292 
  293 static int
  294 uipc_peeraddr(struct socket *so, struct sockaddr **nam)
  295 {
  296         struct unpcb *unp;
  297         const struct sockaddr *sa;
  298 
  299         *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
  300         UNP_LOCK();
  301         unp = sotounpcb(so);
  302         if (unp == NULL) {
  303                 UNP_UNLOCK();
  304                 free(*nam, M_SONAME);
  305                 *nam = NULL;
  306                 return (EINVAL);
  307         }
  308         if (unp->unp_conn != NULL && unp->unp_conn->unp_addr!= NULL)
  309                 sa = (struct sockaddr *) unp->unp_conn->unp_addr;
  310         else {
  311                 /*
  312                  * XXX: It seems that this test always fails even when
  313                  * connection is established.  So, this else clause is
  314                  * added as workaround to return PF_LOCAL sockaddr.
  315                  */
  316                 sa = &sun_noname;
  317         }
  318         bcopy(sa, *nam, sa->sa_len);
  319         UNP_UNLOCK();
  320         return (0);
  321 }
  322 
  323 static int
  324 uipc_rcvd(struct socket *so, int flags)
  325 {
  326         struct unpcb *unp;
  327         struct socket *so2;
  328         u_long newhiwat;
  329 
  330         UNP_LOCK();
  331         unp = sotounpcb(so);
  332         if (unp == NULL) {
  333                 UNP_UNLOCK();
  334                 return (EINVAL);
  335         }
  336         switch (so->so_type) {
  337         case SOCK_DGRAM:
  338                 panic("uipc_rcvd DGRAM?");
  339                 /*NOTREACHED*/
  340 
  341         case SOCK_STREAM:
  342                 if (unp->unp_conn == NULL)
  343                         break;
  344                 so2 = unp->unp_conn->unp_socket;
  345                 SOCKBUF_LOCK(&so2->so_snd);
  346                 SOCKBUF_LOCK(&so->so_rcv);
  347                 /*
  348                  * Adjust backpressure on sender
  349                  * and wakeup any waiting to write.
  350                  */
  351                 so2->so_snd.sb_mbmax += unp->unp_mbcnt - so->so_rcv.sb_mbcnt;
  352                 unp->unp_mbcnt = so->so_rcv.sb_mbcnt;
  353                 newhiwat = so2->so_snd.sb_hiwat + unp->unp_cc -
  354                     so->so_rcv.sb_cc;
  355                 (void)chgsbsize(so2->so_cred->cr_uidinfo, &so2->so_snd.sb_hiwat,
  356                     newhiwat, RLIM_INFINITY);
  357                 unp->unp_cc = so->so_rcv.sb_cc;
  358                 SOCKBUF_UNLOCK(&so->so_rcv);
  359                 sowwakeup_locked(so2);
  360                 break;
  361 
  362         default:
  363                 panic("uipc_rcvd unknown socktype");
  364         }
  365         UNP_UNLOCK();
  366         return (0);
  367 }
  368 
  369 /* pru_rcvoob is EOPNOTSUPP */
  370 
  371 static int
  372 uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
  373     struct mbuf *control, struct thread *td)
  374 {
  375         int error = 0;
  376         struct unpcb *unp;
  377         struct socket *so2;
  378         u_long newhiwat;
  379 
  380         unp = sotounpcb(so);
  381         if (unp == NULL) {
  382                 error = EINVAL;
  383                 goto release;
  384         }
  385         if (flags & PRUS_OOB) {
  386                 error = EOPNOTSUPP;
  387                 goto release;
  388         }
  389 
  390         if (control != NULL && (error = unp_internalize(&control, td)))
  391                 goto release;
  392 
  393         UNP_LOCK();
  394         unp = sotounpcb(so);
  395         if (unp == NULL) {
  396                 UNP_UNLOCK();
  397                 error = EINVAL;
  398                 goto dispose_release;
  399         }
  400 
  401         switch (so->so_type) {
  402         case SOCK_DGRAM:
  403         {
  404                 const struct sockaddr *from;
  405 
  406                 if (nam != NULL) {
  407                         if (unp->unp_conn != NULL) {
  408                                 error = EISCONN;
  409                                 break;
  410                         }
  411                         error = unp_connect(so, nam, td);
  412                         if (error)
  413                                 break;
  414                 } else {
  415                         if (unp->unp_conn == NULL) {
  416                                 error = ENOTCONN;
  417                                 break;
  418                         }
  419                 }
  420                 so2 = unp->unp_conn->unp_socket;
  421                 if (unp->unp_addr != NULL)
  422                         from = (struct sockaddr *)unp->unp_addr;
  423                 else
  424                         from = &sun_noname;
  425                 SOCKBUF_LOCK(&so2->so_rcv);
  426                 if (sbappendaddr_locked(&so2->so_rcv, from, m, control)) {
  427                         sorwakeup_locked(so2);
  428                         m = NULL;
  429                         control = NULL;
  430                 } else {
  431                         SOCKBUF_UNLOCK(&so2->so_rcv);
  432                         error = ENOBUFS;
  433                 }
  434                 if (nam != NULL)
  435                         unp_disconnect(unp);
  436                 break;
  437         }
  438 
  439         case SOCK_STREAM:
  440                 /* Connect if not connected yet. */
  441                 /*
  442                  * Note: A better implementation would complain
  443                  * if not equal to the peer's address.
  444                  */
  445                 if ((so->so_state & SS_ISCONNECTED) == 0) {
  446                         if (nam != NULL) {
  447                                 error = unp_connect(so, nam, td);
  448                                 if (error)
  449                                         break;  /* XXX */
  450                         } else {
  451                                 error = ENOTCONN;
  452                                 break;
  453                         }
  454                 }
  455 
  456                 SOCKBUF_LOCK(&so->so_snd);
  457                 if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
  458                         SOCKBUF_UNLOCK(&so->so_snd);
  459                         error = EPIPE;
  460                         break;
  461                 }
  462                 if (unp->unp_conn == NULL)
  463                         panic("uipc_send connected but no connection?");
  464                 so2 = unp->unp_conn->unp_socket;
  465                 SOCKBUF_LOCK(&so2->so_rcv);
  466                 /*
  467                  * Send to paired receive port, and then reduce
  468                  * send buffer hiwater marks to maintain backpressure.
  469                  * Wake up readers.
  470                  */
  471                 if (control != NULL) {
  472                         if (sbappendcontrol_locked(&so2->so_rcv, m, control))
  473                                 control = NULL;
  474                 } else {
  475                         sbappend_locked(&so2->so_rcv, m);
  476                 }
  477                 so->so_snd.sb_mbmax -=
  478                         so2->so_rcv.sb_mbcnt - unp->unp_conn->unp_mbcnt;
  479                 unp->unp_conn->unp_mbcnt = so2->so_rcv.sb_mbcnt;
  480                 newhiwat = so->so_snd.sb_hiwat -
  481                     (so2->so_rcv.sb_cc - unp->unp_conn->unp_cc);
  482                 (void)chgsbsize(so->so_cred->cr_uidinfo, &so->so_snd.sb_hiwat,
  483                     newhiwat, RLIM_INFINITY);
  484                 SOCKBUF_UNLOCK(&so->so_snd);
  485                 unp->unp_conn->unp_cc = so2->so_rcv.sb_cc;
  486                 sorwakeup_locked(so2);
  487                 m = NULL;
  488                 break;
  489 
  490         default:
  491                 panic("uipc_send unknown socktype");
  492         }
  493 
  494         /*
  495          * SEND_EOF is equivalent to a SEND followed by
  496          * a SHUTDOWN.
  497          */
  498         if (flags & PRUS_EOF) {
  499                 socantsendmore(so);
  500                 unp_shutdown(unp);
  501         }
  502         UNP_UNLOCK();
  503 
  504 dispose_release:
  505         if (control != NULL && error != 0)
  506                 unp_dispose(control);
  507 
  508 release:
  509         if (control != NULL)
  510                 m_freem(control);
  511         if (m != NULL)
  512                 m_freem(m);
  513         return (error);
  514 }
  515 
  516 static int
  517 uipc_sense(struct socket *so, struct stat *sb)
  518 {
  519         struct unpcb *unp;
  520         struct socket *so2;
  521 
  522         UNP_LOCK();
  523         unp = sotounpcb(so);
  524         if (unp == NULL) {
  525                 UNP_UNLOCK();
  526                 return (EINVAL);
  527         }
  528         sb->st_blksize = so->so_snd.sb_hiwat;
  529         if (so->so_type == SOCK_STREAM && unp->unp_conn != NULL) {
  530                 so2 = unp->unp_conn->unp_socket;
  531                 sb->st_blksize += so2->so_rcv.sb_cc;
  532         }
  533         sb->st_dev = NODEV;
  534         if (unp->unp_ino == 0)
  535                 unp->unp_ino = (++unp_ino == 0) ? ++unp_ino : unp_ino;
  536         sb->st_ino = unp->unp_ino;
  537         UNP_UNLOCK();
  538         return (0);
  539 }
  540 
  541 static int
  542 uipc_shutdown(struct socket *so)
  543 {
  544         struct unpcb *unp;
  545 
  546         UNP_LOCK();
  547         unp = sotounpcb(so);
  548         if (unp == NULL) {
  549                 UNP_UNLOCK();
  550                 return (EINVAL);
  551         }
  552         socantsendmore(so);
  553         unp_shutdown(unp);
  554         UNP_UNLOCK();
  555         return (0);
  556 }
  557 
  558 static int
  559 uipc_sockaddr(struct socket *so, struct sockaddr **nam)
  560 {
  561         struct unpcb *unp;
  562         const struct sockaddr *sa;
  563 
  564         *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
  565         UNP_LOCK();
  566         unp = sotounpcb(so);
  567         if (unp == NULL) {
  568                 UNP_UNLOCK();
  569                 free(*nam, M_SONAME);
  570                 *nam = NULL;
  571                 return (EINVAL);
  572         }
  573         if (unp->unp_addr != NULL)
  574                 sa = (struct sockaddr *) unp->unp_addr;
  575         else
  576                 sa = &sun_noname;
  577         bcopy(sa, *nam, sa->sa_len);
  578         UNP_UNLOCK();
  579         return (0);
  580 }
  581 
  582 struct pr_usrreqs uipc_usrreqs = {
  583         uipc_abort, uipc_accept, uipc_attach, uipc_bind, uipc_connect,
  584         uipc_connect2, pru_control_notsupp, uipc_detach, uipc_disconnect,
  585         uipc_listen, uipc_peeraddr, uipc_rcvd, pru_rcvoob_notsupp,
  586         uipc_send, uipc_sense, uipc_shutdown, uipc_sockaddr,
  587         sosend, soreceive, sopoll, pru_sosetlabel_null
  588 };
  589 
  590 int
  591 uipc_ctloutput(struct socket *so, struct sockopt *sopt)
  592 {
  593         struct unpcb *unp;
  594         struct xucred xu;
  595         int error;
  596 
  597         switch (sopt->sopt_dir) {
  598         case SOPT_GET:
  599                 switch (sopt->sopt_name) {
  600                 case LOCAL_PEERCRED:
  601                         error = 0;
  602                         UNP_LOCK();
  603                         unp = sotounpcb(so);
  604                         if (unp == NULL) {
  605                                 UNP_UNLOCK();
  606                                 error = EINVAL;
  607                                 break;
  608                         }
  609                         if (unp->unp_flags & UNP_HAVEPC)
  610                                 xu = unp->unp_peercred;
  611                         else {
  612                                 if (so->so_type == SOCK_STREAM)
  613                                         error = ENOTCONN;
  614                                 else
  615                                         error = EINVAL;
  616                         }
  617                         UNP_UNLOCK();
  618                         if (error == 0)
  619                                 error = sooptcopyout(sopt, &xu, sizeof(xu));
  620                         break;
  621                 default:
  622                         error = EOPNOTSUPP;
  623                         break;
  624                 }
  625                 break;
  626         case SOPT_SET:
  627         default:
  628                 error = EOPNOTSUPP;
  629                 break;
  630         }
  631         return (error);
  632 }
  633 
  634 /*
  635  * Both send and receive buffers are allocated PIPSIZ bytes of buffering
  636  * for stream sockets, although the total for sender and receiver is
  637  * actually only PIPSIZ.
  638  * Datagram sockets really use the sendspace as the maximum datagram size,
  639  * and don't really want to reserve the sendspace.  Their recvspace should
  640  * be large enough for at least one max-size datagram plus address.
  641  */
  642 #ifndef PIPSIZ
  643 #define PIPSIZ  8192
  644 #endif
  645 static u_long   unpst_sendspace = PIPSIZ;
  646 static u_long   unpst_recvspace = PIPSIZ;
  647 static u_long   unpdg_sendspace = 2*1024;       /* really max datagram size */
  648 static u_long   unpdg_recvspace = 4*1024;
  649 
  650 static int      unp_rights;                     /* file descriptors in flight */
  651 
  652 SYSCTL_DECL(_net_local_stream);
  653 SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW,
  654            &unpst_sendspace, 0, "");
  655 SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW,
  656            &unpst_recvspace, 0, "");
  657 SYSCTL_DECL(_net_local_dgram);
  658 SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW,
  659            &unpdg_sendspace, 0, "");
  660 SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW,
  661            &unpdg_recvspace, 0, "");
  662 SYSCTL_DECL(_net_local);
  663 SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, "");
  664 
  665 static int
  666 unp_attach(struct socket *so)
  667 {
  668         struct unpcb *unp;
  669         int error;
  670 
  671         if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
  672                 switch (so->so_type) {
  673 
  674                 case SOCK_STREAM:
  675                         error = soreserve(so, unpst_sendspace, unpst_recvspace);
  676                         break;
  677 
  678                 case SOCK_DGRAM:
  679                         error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
  680                         break;
  681 
  682                 default:
  683                         panic("unp_attach");
  684                 }
  685                 if (error)
  686                         return (error);
  687         }
  688         unp = uma_zalloc(unp_zone, M_WAITOK | M_ZERO);
  689         if (unp == NULL)
  690                 return (ENOBUFS);
  691         LIST_INIT(&unp->unp_refs);
  692         unp->unp_socket = so;
  693         so->so_pcb = unp;
  694 
  695         UNP_LOCK();
  696         unp->unp_gencnt = ++unp_gencnt;
  697         unp_count++;
  698         LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ? &unp_dhead
  699                          : &unp_shead, unp, unp_link);
  700         UNP_UNLOCK();
  701 
  702         return (0);
  703 }
  704 
  705 static void
  706 unp_detach(struct unpcb *unp)
  707 {
  708         struct vnode *vp;
  709 
  710         UNP_LOCK_ASSERT();
  711 
  712         LIST_REMOVE(unp, unp_link);
  713         unp->unp_gencnt = ++unp_gencnt;
  714         --unp_count;
  715         if ((vp = unp->unp_vnode) != NULL) {
  716                 /*
  717                  * XXXRW: should v_socket be frobbed only while holding
  718                  * Giant?
  719                  */
  720                 unp->unp_vnode->v_socket = NULL;
  721                 unp->unp_vnode = NULL;
  722         }
  723         if (unp->unp_conn != NULL)
  724                 unp_disconnect(unp);
  725         while (!LIST_EMPTY(&unp->unp_refs)) {
  726                 struct unpcb *ref = LIST_FIRST(&unp->unp_refs);
  727                 unp_drop(ref, ECONNRESET);
  728         }
  729         soisdisconnected(unp->unp_socket);
  730         unp->unp_socket->so_pcb = NULL;
  731         if (unp_rights) {
  732                 /*
  733                  * Normally the receive buffer is flushed later,
  734                  * in sofree, but if our receive buffer holds references
  735                  * to descriptors that are now garbage, we will dispose
  736                  * of those descriptor references after the garbage collector
  737                  * gets them (resulting in a "panic: closef: count < 0").
  738                  */
  739                 sorflush(unp->unp_socket);
  740                 unp_gc();       /* Will unlock UNP. */
  741         } else
  742                 UNP_UNLOCK();
  743         UNP_UNLOCK_ASSERT();
  744         if (unp->unp_addr != NULL)
  745                 FREE(unp->unp_addr, M_SONAME);
  746         uma_zfree(unp_zone, unp);
  747         if (vp) {
  748                 mtx_lock(&Giant);
  749                 vrele(vp);
  750                 mtx_unlock(&Giant);
  751         }
  752 }
  753 
  754 static int
  755 unp_bind(struct unpcb *unp, struct sockaddr *nam, struct thread *td)
  756 {
  757         struct sockaddr_un *soun = (struct sockaddr_un *)nam;
  758         struct vnode *vp;
  759         struct mount *mp;
  760         struct vattr vattr;
  761         int error, namelen;
  762         struct nameidata nd;
  763         char *buf;
  764 
  765         UNP_LOCK_ASSERT();
  766 
  767         /*
  768          * XXXRW: This test-and-set of unp_vnode is non-atomic; the
  769          * unlocked read here is fine, but the value of unp_vnode needs
  770          * to be tested again after we do all the lookups to see if the
  771          * pcb is still unbound?
  772          */
  773         if (unp->unp_vnode != NULL)
  774                 return (EINVAL);
  775 
  776         namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path);
  777         if (namelen <= 0)
  778                 return (EINVAL);
  779 
  780         UNP_UNLOCK();
  781 
  782         buf = malloc(namelen + 1, M_TEMP, M_WAITOK);
  783         strlcpy(buf, soun->sun_path, namelen + 1);
  784 
  785         mtx_lock(&Giant);
  786 restart:
  787         mtx_assert(&Giant, MA_OWNED);
  788         NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT | SAVENAME, UIO_SYSSPACE,
  789             buf, td);
  790 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
  791         error = namei(&nd);
  792         if (error)
  793                 goto done;
  794         vp = nd.ni_vp;
  795         if (vp != NULL || vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
  796                 NDFREE(&nd, NDF_ONLY_PNBUF);
  797                 if (nd.ni_dvp == vp)
  798                         vrele(nd.ni_dvp);
  799                 else
  800                         vput(nd.ni_dvp);
  801                 if (vp != NULL) {
  802                         vrele(vp);
  803                         error = EADDRINUSE;
  804                         goto done;
  805                 }
  806                 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH);
  807                 if (error)
  808                         goto done;
  809                 goto restart;
  810         }
  811         VATTR_NULL(&vattr);
  812         vattr.va_type = VSOCK;
  813         vattr.va_mode = (ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask);
  814 #ifdef MAC
  815         error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
  816             &vattr);
  817 #endif
  818         if (error == 0) {
  819                 VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
  820                 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
  821         }
  822         NDFREE(&nd, NDF_ONLY_PNBUF);
  823         vput(nd.ni_dvp);
  824         if (error) {
  825                 vn_finished_write(mp);
  826                 goto done;
  827         }
  828         vp = nd.ni_vp;
  829         ASSERT_VOP_LOCKED(vp, "unp_bind");
  830         soun = (struct sockaddr_un *)sodupsockaddr(nam, M_WAITOK);
  831         UNP_LOCK();
  832         vp->v_socket = unp->unp_socket;
  833         unp->unp_vnode = vp;
  834         unp->unp_addr = soun;
  835         UNP_UNLOCK();
  836         VOP_UNLOCK(vp, 0, td);
  837         vn_finished_write(mp);
  838 done:
  839         mtx_unlock(&Giant);
  840         free(buf, M_TEMP);
  841         UNP_LOCK();
  842         return (error);
  843 }
  844 
  845 static int
  846 unp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
  847 {
  848         struct sockaddr_un *soun = (struct sockaddr_un *)nam;
  849         struct vnode *vp;
  850         struct socket *so2, *so3;
  851         struct unpcb *unp, *unp2, *unp3;
  852         int error, len;
  853         struct nameidata nd;
  854         char buf[SOCK_MAXADDRLEN];
  855         struct sockaddr *sa;
  856 
  857         UNP_LOCK_ASSERT();
  858         unp = sotounpcb(so);
  859 
  860         len = nam->sa_len - offsetof(struct sockaddr_un, sun_path);
  861         if (len <= 0)
  862                 return (EINVAL);
  863         strlcpy(buf, soun->sun_path, len + 1);
  864         UNP_UNLOCK();
  865         sa = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
  866         mtx_lock(&Giant);
  867         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, buf, td);
  868         error = namei(&nd);
  869         if (error)
  870                 vp = NULL;
  871         else
  872                 vp = nd.ni_vp;
  873         ASSERT_VOP_LOCKED(vp, "unp_connect");
  874         NDFREE(&nd, NDF_ONLY_PNBUF);
  875         if (error)
  876                 goto bad;
  877 
  878         if (vp->v_type != VSOCK) {
  879                 error = ENOTSOCK;
  880                 goto bad;
  881         }
  882         error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td);
  883         if (error)
  884                 goto bad;
  885         mtx_unlock(&Giant);
  886         UNP_LOCK();
  887         unp = sotounpcb(so);
  888         if (unp == NULL) {
  889                 error = EINVAL;
  890                 goto bad2;
  891         }
  892         so2 = vp->v_socket;
  893         if (so2 == NULL) {
  894                 error = ECONNREFUSED;
  895                 goto bad2;
  896         }
  897         if (so->so_type != so2->so_type) {
  898                 error = EPROTOTYPE;
  899                 goto bad2;
  900         }
  901         if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
  902                 if (so2->so_options & SO_ACCEPTCONN) {
  903                         /*
  904                          * NB: drop locks here so unp_attach is entered
  905                          *     w/o locks; this avoids a recursive lock
  906                          *     of the head and holding sleep locks across
  907                          *     a (potentially) blocking malloc.
  908                          */
  909                         UNP_UNLOCK();
  910                         so3 = sonewconn(so2, 0);
  911                         UNP_LOCK();
  912                 } else
  913                         so3 = NULL;
  914                 if (so3 == NULL) {
  915                         error = ECONNREFUSED;
  916                         goto bad2;
  917                 }
  918                 unp = sotounpcb(so);
  919                 unp2 = sotounpcb(so2);
  920                 unp3 = sotounpcb(so3);
  921                 if (unp2->unp_addr != NULL) {
  922                         bcopy(unp2->unp_addr, sa, unp2->unp_addr->sun_len);
  923                         unp3->unp_addr = (struct sockaddr_un *) sa;
  924                         sa = NULL;
  925                 }
  926                 /*
  927                  * unp_peercred management:
  928                  *
  929                  * The connecter's (client's) credentials are copied
  930                  * from its process structure at the time of connect()
  931                  * (which is now).
  932                  */
  933                 cru2x(td->td_ucred, &unp3->unp_peercred);
  934                 unp3->unp_flags |= UNP_HAVEPC;
  935                 /*
  936                  * The receiver's (server's) credentials are copied
  937                  * from the unp_peercred member of socket on which the
  938                  * former called listen(); unp_listen() cached that
  939                  * process's credentials at that time so we can use
  940                  * them now.
  941                  */
  942                 KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED,
  943                     ("unp_connect: listener without cached peercred"));
  944                 memcpy(&unp->unp_peercred, &unp2->unp_peercred,
  945                     sizeof(unp->unp_peercred));
  946                 unp->unp_flags |= UNP_HAVEPC;
  947 #ifdef MAC
  948                 SOCK_LOCK(so);
  949                 mac_set_socket_peer_from_socket(so, so3);
  950                 mac_set_socket_peer_from_socket(so3, so);
  951                 SOCK_UNLOCK(so);
  952 #endif
  953 
  954                 so2 = so3;
  955         }
  956         error = unp_connect2(so, so2);
  957 bad2:
  958         UNP_UNLOCK();
  959         mtx_lock(&Giant);
  960 bad:
  961         mtx_assert(&Giant, MA_OWNED);
  962         if (vp != NULL)
  963                 vput(vp);
  964         mtx_unlock(&Giant);
  965         free(sa, M_SONAME);
  966         UNP_LOCK();
  967         return (error);
  968 }
  969 
  970 static int
  971 unp_connect2(struct socket *so, struct socket *so2)
  972 {
  973         struct unpcb *unp = sotounpcb(so);
  974         struct unpcb *unp2;
  975 
  976         UNP_LOCK_ASSERT();
  977 
  978         if (so2->so_type != so->so_type)
  979                 return (EPROTOTYPE);
  980         unp2 = sotounpcb(so2);
  981         unp->unp_conn = unp2;
  982         switch (so->so_type) {
  983 
  984         case SOCK_DGRAM:
  985                 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink);
  986                 soisconnected(so);
  987                 break;
  988 
  989         case SOCK_STREAM:
  990                 unp2->unp_conn = unp;
  991                 soisconnected(so);
  992                 soisconnected(so2);
  993                 break;
  994 
  995         default:
  996                 panic("unp_connect2");
  997         }
  998         return (0);
  999 }
 1000 
 1001 static void
 1002 unp_disconnect(struct unpcb *unp)
 1003 {
 1004         struct unpcb *unp2 = unp->unp_conn;
 1005         struct socket *so;
 1006 
 1007         UNP_LOCK_ASSERT();
 1008 
 1009         if (unp2 == NULL)
 1010                 return;
 1011         unp->unp_conn = NULL;
 1012         switch (unp->unp_socket->so_type) {
 1013 
 1014         case SOCK_DGRAM:
 1015                 LIST_REMOVE(unp, unp_reflink);
 1016                 so = unp->unp_socket;
 1017                 SOCK_LOCK(so);
 1018                 so->so_state &= ~SS_ISCONNECTED;
 1019                 SOCK_UNLOCK(so);
 1020                 break;
 1021 
 1022         case SOCK_STREAM:
 1023                 soisdisconnected(unp->unp_socket);
 1024                 unp2->unp_conn = NULL;
 1025                 soisdisconnected(unp2->unp_socket);
 1026                 break;
 1027         }
 1028 }
 1029 
 1030 #ifdef notdef
 1031 void
 1032 unp_abort(struct unpcb *unp)
 1033 {
 1034 
 1035         unp_detach(unp);
 1036         UNP_UNLOCK_ASSERT();
 1037 }
 1038 #endif
 1039 
 1040 /*
 1041  * unp_pcblist() assumes that UNIX domain socket memory is never reclaimed
 1042  * by the zone (UMA_ZONE_NOFREE), and as such potentially stale pointers
 1043  * are safe to reference.  It first scans the list of struct unpcb's to
 1044  * generate a pointer list, then it rescans its list one entry at a time to
 1045  * externalize and copyout.  It checks the generation number to see if a
 1046  * struct unpcb has been reused, and will skip it if so.
 1047  */
 1048 static int
 1049 unp_pcblist(SYSCTL_HANDLER_ARGS)
 1050 {
 1051         int error, i, n;
 1052         struct unpcb *unp, **unp_list;
 1053         unp_gen_t gencnt;
 1054         struct xunpgen *xug;
 1055         struct unp_head *head;
 1056         struct xunpcb *xu;
 1057 
 1058         head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
 1059 
 1060         /*
 1061          * The process of preparing the PCB list is too time-consuming and
 1062          * resource-intensive to repeat twice on every request.
 1063          */
 1064         if (req->oldptr == NULL) {
 1065                 n = unp_count;
 1066                 req->oldidx = 2 * (sizeof *xug)
 1067                         + (n + n/8) * sizeof(struct xunpcb);
 1068                 return (0);
 1069         }
 1070 
 1071         if (req->newptr != NULL)
 1072                 return (EPERM);
 1073 
 1074         /*
 1075          * OK, now we're committed to doing something.
 1076          */
 1077         xug = malloc(sizeof(*xug), M_TEMP, M_WAITOK);
 1078         UNP_LOCK();
 1079         gencnt = unp_gencnt;
 1080         n = unp_count;
 1081         UNP_UNLOCK();
 1082 
 1083         xug->xug_len = sizeof *xug;
 1084         xug->xug_count = n;
 1085         xug->xug_gen = gencnt;
 1086         xug->xug_sogen = so_gencnt;
 1087         error = SYSCTL_OUT(req, xug, sizeof *xug);
 1088         if (error) {
 1089                 free(xug, M_TEMP);
 1090                 return (error);
 1091         }
 1092 
 1093         unp_list = malloc(n * sizeof *unp_list, M_TEMP, M_WAITOK);
 1094 
 1095         UNP_LOCK();
 1096         for (unp = LIST_FIRST(head), i = 0; unp && i < n;
 1097              unp = LIST_NEXT(unp, unp_link)) {
 1098                 if (unp->unp_gencnt <= gencnt) {
 1099                         if (cr_cansee(req->td->td_ucred,
 1100                             unp->unp_socket->so_cred))
 1101                                 continue;
 1102                         unp_list[i++] = unp;
 1103                 }
 1104         }
 1105         UNP_UNLOCK();
 1106         n = i;                  /* in case we lost some during malloc */
 1107 
 1108         error = 0;
 1109         xu = malloc(sizeof(*xu), M_TEMP, M_WAITOK | M_ZERO);
 1110         for (i = 0; i < n; i++) {
 1111                 unp = unp_list[i];
 1112                 if (unp->unp_gencnt <= gencnt) {
 1113                         xu->xu_len = sizeof *xu;
 1114                         xu->xu_unpp = unp;
 1115                         /*
 1116                          * XXX - need more locking here to protect against
 1117                          * connect/disconnect races for SMP.
 1118                          */
 1119                         if (unp->unp_addr != NULL)
 1120                                 bcopy(unp->unp_addr, &xu->xu_addr,
 1121                                       unp->unp_addr->sun_len);
 1122                         if (unp->unp_conn != NULL &&
 1123                             unp->unp_conn->unp_addr != NULL)
 1124                                 bcopy(unp->unp_conn->unp_addr,
 1125                                       &xu->xu_caddr,
 1126                                       unp->unp_conn->unp_addr->sun_len);
 1127                         bcopy(unp, &xu->xu_unp, sizeof *unp);
 1128                         sotoxsocket(unp->unp_socket, &xu->xu_socket);
 1129                         error = SYSCTL_OUT(req, xu, sizeof *xu);
 1130                 }
 1131         }
 1132         free(xu, M_TEMP);
 1133         if (!error) {
 1134                 /*
 1135                  * Give the user an updated idea of our state.
 1136                  * If the generation differs from what we told
 1137                  * her before, she knows that something happened
 1138                  * while we were processing this request, and it
 1139                  * might be necessary to retry.
 1140                  */
 1141                 xug->xug_gen = unp_gencnt;
 1142                 xug->xug_sogen = so_gencnt;
 1143                 xug->xug_count = unp_count;
 1144                 error = SYSCTL_OUT(req, xug, sizeof *xug);
 1145         }
 1146         free(unp_list, M_TEMP);
 1147         free(xug, M_TEMP);
 1148         return (error);
 1149 }
 1150 
 1151 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLFLAG_RD,
 1152             (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb",
 1153             "List of active local datagram sockets");
 1154 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLFLAG_RD,
 1155             (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb",
 1156             "List of active local stream sockets");
 1157 
 1158 static void
 1159 unp_shutdown(struct unpcb *unp)
 1160 {
 1161         struct socket *so;
 1162 
 1163         UNP_LOCK_ASSERT();
 1164 
 1165         if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn &&
 1166             (so = unp->unp_conn->unp_socket))
 1167                 socantrcvmore(so);
 1168 }
 1169 
 1170 static void
 1171 unp_drop(struct unpcb *unp, int errno)
 1172 {
 1173         struct socket *so = unp->unp_socket;
 1174 
 1175         UNP_LOCK_ASSERT();
 1176 
 1177         so->so_error = errno;
 1178         unp_disconnect(unp);
 1179 }
 1180 
 1181 #ifdef notdef
 1182 void
 1183 unp_drain(void)
 1184 {
 1185 
 1186 }
 1187 #endif
 1188 
 1189 static void
 1190 unp_freerights(struct file **rp, int fdcount)
 1191 {
 1192         int i;
 1193         struct file *fp;
 1194 
 1195         for (i = 0; i < fdcount; i++) {
 1196                 fp = *rp;
 1197                 /*
 1198                  * zero the pointer before calling
 1199                  * unp_discard since it may end up
 1200                  * in unp_gc()..
 1201                  */
 1202                 *rp++ = 0;
 1203                 unp_discard(fp);
 1204         }
 1205 }
 1206 
 1207 int
 1208 unp_externalize(struct mbuf *control, struct mbuf **controlp)
 1209 {
 1210         struct thread *td = curthread;          /* XXX */
 1211         struct cmsghdr *cm = mtod(control, struct cmsghdr *);
 1212         int i;
 1213         int *fdp;
 1214         struct file **rp;
 1215         struct file *fp;
 1216         void *data;
 1217         socklen_t clen = control->m_len, datalen;
 1218         int error, newfds;
 1219         int f;
 1220         u_int newlen;
 1221 
 1222         UNP_UNLOCK_ASSERT();
 1223 
 1224         error = 0;
 1225         if (controlp != NULL) /* controlp == NULL => free control messages */
 1226                 *controlp = NULL;
 1227 
 1228         while (cm != NULL) {
 1229                 if (sizeof(*cm) > clen || cm->cmsg_len > clen) {
 1230                         error = EINVAL;
 1231                         break;
 1232                 }
 1233 
 1234                 data = CMSG_DATA(cm);
 1235                 datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
 1236 
 1237                 if (cm->cmsg_level == SOL_SOCKET
 1238                     && cm->cmsg_type == SCM_RIGHTS) {
 1239                         newfds = datalen / sizeof(struct file *);
 1240                         rp = data;
 1241 
 1242                         /* If we're not outputting the descriptors free them. */
 1243                         if (error || controlp == NULL) {
 1244                                 unp_freerights(rp, newfds);
 1245                                 goto next;
 1246                         }
 1247                         FILEDESC_LOCK(td->td_proc->p_fd);
 1248                         /* if the new FD's will not fit free them.  */
 1249                         if (!fdavail(td, newfds)) {
 1250                                 FILEDESC_UNLOCK(td->td_proc->p_fd);
 1251                                 error = EMSGSIZE;
 1252                                 unp_freerights(rp, newfds);
 1253                                 goto next;
 1254                         }
 1255                         /*
 1256                          * now change each pointer to an fd in the global
 1257                          * table to an integer that is the index to the
 1258                          * local fd table entry that we set up to point
 1259                          * to the global one we are transferring.
 1260                          */
 1261                         newlen = newfds * sizeof(int);
 1262                         *controlp = sbcreatecontrol(NULL, newlen,
 1263                             SCM_RIGHTS, SOL_SOCKET);
 1264                         if (*controlp == NULL) {
 1265                                 FILEDESC_UNLOCK(td->td_proc->p_fd);
 1266                                 error = E2BIG;
 1267                                 unp_freerights(rp, newfds);
 1268                                 goto next;
 1269                         }
 1270 
 1271                         fdp = (int *)
 1272                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 1273                         for (i = 0; i < newfds; i++) {
 1274                                 if (fdalloc(td, 0, &f))
 1275                                         panic("unp_externalize fdalloc failed");
 1276                                 fp = *rp++;
 1277                                 td->td_proc->p_fd->fd_ofiles[f] = fp;
 1278                                 FILE_LOCK(fp);
 1279                                 fp->f_msgcount--;
 1280                                 FILE_UNLOCK(fp);
 1281                                 unp_rights--;
 1282                                 *fdp++ = f;
 1283                         }
 1284                         FILEDESC_UNLOCK(td->td_proc->p_fd);
 1285                 } else { /* We can just copy anything else across */
 1286                         if (error || controlp == NULL)
 1287                                 goto next;
 1288                         *controlp = sbcreatecontrol(NULL, datalen,
 1289                             cm->cmsg_type, cm->cmsg_level);
 1290                         if (*controlp == NULL) {
 1291                                 error = ENOBUFS;
 1292                                 goto next;
 1293                         }
 1294                         bcopy(data,
 1295                             CMSG_DATA(mtod(*controlp, struct cmsghdr *)),
 1296                             datalen);
 1297                 }
 1298 
 1299                 controlp = &(*controlp)->m_next;
 1300 
 1301 next:
 1302                 if (CMSG_SPACE(datalen) < clen) {
 1303                         clen -= CMSG_SPACE(datalen);
 1304                         cm = (struct cmsghdr *)
 1305                             ((caddr_t)cm + CMSG_SPACE(datalen));
 1306                 } else {
 1307                         clen = 0;
 1308                         cm = NULL;
 1309                 }
 1310         }
 1311 
 1312         m_freem(control);
 1313 
 1314         return (error);
 1315 }
 1316 
 1317 void
 1318 unp_init(void)
 1319 {
 1320         unp_zone = uma_zcreate("unpcb", sizeof(struct unpcb), NULL, NULL,
 1321             NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 1322         if (unp_zone == NULL)
 1323                 panic("unp_init");
 1324         uma_zone_set_max(unp_zone, nmbclusters);
 1325         LIST_INIT(&unp_dhead);
 1326         LIST_INIT(&unp_shead);
 1327 
 1328         UNP_LOCK_INIT();
 1329 }
 1330 
 1331 static int
 1332 unp_internalize(struct mbuf **controlp, struct thread *td)
 1333 {
 1334         struct mbuf *control = *controlp;
 1335         struct proc *p = td->td_proc;
 1336         struct filedesc *fdescp = p->p_fd;
 1337         struct cmsghdr *cm = mtod(control, struct cmsghdr *);
 1338         struct cmsgcred *cmcred;
 1339         struct file **rp;
 1340         struct file *fp;
 1341         struct timeval *tv;
 1342         int i, fd, *fdp;
 1343         void *data;
 1344         socklen_t clen = control->m_len, datalen;
 1345         int error, oldfds;
 1346         u_int newlen;
 1347 
 1348         UNP_UNLOCK_ASSERT();
 1349 
 1350         error = 0;
 1351         *controlp = NULL;
 1352 
 1353         while (cm != NULL) {
 1354                 if (sizeof(*cm) > clen || cm->cmsg_level != SOL_SOCKET
 1355                     || cm->cmsg_len > clen) {
 1356                         error = EINVAL;
 1357                         goto out;
 1358                 }
 1359 
 1360                 data = CMSG_DATA(cm);
 1361                 datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
 1362 
 1363                 switch (cm->cmsg_type) {
 1364                 /*
 1365                  * Fill in credential information.
 1366                  */
 1367                 case SCM_CREDS:
 1368                         *controlp = sbcreatecontrol(NULL, sizeof(*cmcred),
 1369                             SCM_CREDS, SOL_SOCKET);
 1370                         if (*controlp == NULL) {
 1371                                 error = ENOBUFS;
 1372                                 goto out;
 1373                         }
 1374 
 1375                         cmcred = (struct cmsgcred *)
 1376                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 1377                         cmcred->cmcred_pid = p->p_pid;
 1378                         cmcred->cmcred_uid = td->td_ucred->cr_ruid;
 1379                         cmcred->cmcred_gid = td->td_ucred->cr_rgid;
 1380                         cmcred->cmcred_euid = td->td_ucred->cr_uid;
 1381                         cmcred->cmcred_ngroups = MIN(td->td_ucred->cr_ngroups,
 1382                                                         CMGROUP_MAX);
 1383                         for (i = 0; i < cmcred->cmcred_ngroups; i++)
 1384                                 cmcred->cmcred_groups[i] =
 1385                                     td->td_ucred->cr_groups[i];
 1386                         break;
 1387 
 1388                 case SCM_RIGHTS:
 1389                         oldfds = datalen / sizeof (int);
 1390                         /*
 1391                          * check that all the FDs passed in refer to legal files
 1392                          * If not, reject the entire operation.
 1393                          */
 1394                         fdp = data;
 1395                         FILEDESC_LOCK(fdescp);
 1396                         for (i = 0; i < oldfds; i++) {
 1397                                 fd = *fdp++;
 1398                                 if ((unsigned)fd >= fdescp->fd_nfiles ||
 1399                                     fdescp->fd_ofiles[fd] == NULL) {
 1400                                         FILEDESC_UNLOCK(fdescp);
 1401                                         error = EBADF;
 1402                                         goto out;
 1403                                 }
 1404                                 fp = fdescp->fd_ofiles[fd];
 1405                                 if (!(fp->f_ops->fo_flags & DFLAG_PASSABLE)) {
 1406                                         FILEDESC_UNLOCK(fdescp);
 1407                                         error = EOPNOTSUPP;
 1408                                         goto out;
 1409                                 }
 1410 
 1411                         }
 1412                         /*
 1413                          * Now replace the integer FDs with pointers to
 1414                          * the associated global file table entry..
 1415                          */
 1416                         newlen = oldfds * sizeof(struct file *);
 1417                         *controlp = sbcreatecontrol(NULL, newlen,
 1418                             SCM_RIGHTS, SOL_SOCKET);
 1419                         if (*controlp == NULL) {
 1420                                 FILEDESC_UNLOCK(fdescp);
 1421                                 error = E2BIG;
 1422                                 goto out;
 1423                         }
 1424 
 1425                         fdp = data;
 1426                         rp = (struct file **)
 1427                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 1428                         for (i = 0; i < oldfds; i++) {
 1429                                 fp = fdescp->fd_ofiles[*fdp++];
 1430                                 *rp++ = fp;
 1431                                 FILE_LOCK(fp);
 1432                                 fp->f_count++;
 1433                                 fp->f_msgcount++;
 1434                                 FILE_UNLOCK(fp);
 1435                                 unp_rights++;
 1436                         }
 1437                         FILEDESC_UNLOCK(fdescp);
 1438                         break;
 1439 
 1440                 case SCM_TIMESTAMP:
 1441                         *controlp = sbcreatecontrol(NULL, sizeof(*tv),
 1442                             SCM_TIMESTAMP, SOL_SOCKET);
 1443                         if (*controlp == NULL) {
 1444                                 error = ENOBUFS;
 1445                                 goto out;
 1446                         }
 1447                         tv = (struct timeval *)
 1448                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 1449                         microtime(tv);
 1450                         break;
 1451 
 1452                 default:
 1453                         error = EINVAL;
 1454                         goto out;
 1455                 }
 1456 
 1457                 controlp = &(*controlp)->m_next;
 1458 
 1459                 if (CMSG_SPACE(datalen) < clen) {
 1460                         clen -= CMSG_SPACE(datalen);
 1461                         cm = (struct cmsghdr *)
 1462                             ((caddr_t)cm + CMSG_SPACE(datalen));
 1463                 } else {
 1464                         clen = 0;
 1465                         cm = NULL;
 1466                 }
 1467         }
 1468 
 1469 out:
 1470         m_freem(control);
 1471 
 1472         return (error);
 1473 }
 1474 
 1475 /*
 1476  * unp_defer is thread-local during garbage collection, and does not require
 1477  * explicit synchronization.  unp_gcing prevents other threads from entering
 1478  * garbage collection, and perhaps should be an sx lock instead.
 1479  */
 1480 static int      unp_defer, unp_gcing;
 1481 
 1482 static void
 1483 unp_gc(void)
 1484 {
 1485         struct file *fp, *nextfp;
 1486         struct socket *so;
 1487         struct file **extra_ref, **fpp;
 1488         int nunref, i;
 1489         int nfiles_snap;
 1490         int nfiles_slack = 20;
 1491 
 1492         UNP_LOCK_ASSERT();
 1493 
 1494         if (unp_gcing) {
 1495                 UNP_UNLOCK();
 1496                 return;
 1497         }
 1498         unp_gcing = 1;
 1499         unp_defer = 0;
 1500         UNP_UNLOCK();
 1501         /*
 1502          * before going through all this, set all FDs to
 1503          * be NOT defered and NOT externally accessible
 1504          */
 1505         sx_slock(&filelist_lock);
 1506         LIST_FOREACH(fp, &filehead, f_list)
 1507                 fp->f_gcflag &= ~(FMARK|FDEFER);
 1508         do {
 1509                 LIST_FOREACH(fp, &filehead, f_list) {
 1510                         FILE_LOCK(fp);
 1511                         /*
 1512                          * If the file is not open, skip it
 1513                          */
 1514                         if (fp->f_count == 0) {
 1515                                 FILE_UNLOCK(fp);
 1516                                 continue;
 1517                         }
 1518                         /*
 1519                          * If we already marked it as 'defer'  in a
 1520                          * previous pass, then try process it this time
 1521                          * and un-mark it
 1522                          */
 1523                         if (fp->f_gcflag & FDEFER) {
 1524                                 fp->f_gcflag &= ~FDEFER;
 1525                                 unp_defer--;
 1526                         } else {
 1527                                 /*
 1528                                  * if it's not defered, then check if it's
 1529                                  * already marked.. if so skip it
 1530                                  */
 1531                                 if (fp->f_gcflag & FMARK) {
 1532                                         FILE_UNLOCK(fp);
 1533                                         continue;
 1534                                 }
 1535                                 /*
 1536                                  * If all references are from messages
 1537                                  * in transit, then skip it. it's not
 1538                                  * externally accessible.
 1539                                  */
 1540                                 if (fp->f_count == fp->f_msgcount) {
 1541                                         FILE_UNLOCK(fp);
 1542                                         continue;
 1543                                 }
 1544                                 /*
 1545                                  * If it got this far then it must be
 1546                                  * externally accessible.
 1547                                  */
 1548                                 fp->f_gcflag |= FMARK;
 1549                         }
 1550                         /*
 1551                          * either it was defered, or it is externally
 1552                          * accessible and not already marked so.
 1553                          * Now check if it is possibly one of OUR sockets.
 1554                          */
 1555                         if (fp->f_type != DTYPE_SOCKET ||
 1556                             (so = fp->f_data) == NULL) {
 1557                                 FILE_UNLOCK(fp);
 1558                                 continue;
 1559                         }
 1560                         FILE_UNLOCK(fp);
 1561                         if (so->so_proto->pr_domain != &localdomain ||
 1562                             (so->so_proto->pr_flags&PR_RIGHTS) == 0)
 1563                                 continue;
 1564 #ifdef notdef
 1565                         if (so->so_rcv.sb_flags & SB_LOCK) {
 1566                                 /*
 1567                                  * This is problematical; it's not clear
 1568                                  * we need to wait for the sockbuf to be
 1569                                  * unlocked (on a uniprocessor, at least),
 1570                                  * and it's also not clear what to do
 1571                                  * if sbwait returns an error due to receipt
 1572                                  * of a signal.  If sbwait does return
 1573                                  * an error, we'll go into an infinite
 1574                                  * loop.  Delete all of this for now.
 1575                                  */
 1576                                 (void) sbwait(&so->so_rcv);
 1577                                 goto restart;
 1578                         }
 1579 #endif
 1580                         /*
 1581                          * So, Ok, it's one of our sockets and it IS externally
 1582                          * accessible (or was defered). Now we look
 1583                          * to see if we hold any file descriptors in its
 1584                          * message buffers. Follow those links and mark them
 1585                          * as accessible too.
 1586                          */
 1587                         SOCKBUF_LOCK(&so->so_rcv);
 1588                         unp_scan(so->so_rcv.sb_mb, unp_mark);
 1589                         SOCKBUF_UNLOCK(&so->so_rcv);
 1590                 }
 1591         } while (unp_defer);
 1592         sx_sunlock(&filelist_lock);
 1593         /*
 1594          * We grab an extra reference to each of the file table entries
 1595          * that are not otherwise accessible and then free the rights
 1596          * that are stored in messages on them.
 1597          *
 1598          * The bug in the orginal code is a little tricky, so I'll describe
 1599          * what's wrong with it here.
 1600          *
 1601          * It is incorrect to simply unp_discard each entry for f_msgcount
 1602          * times -- consider the case of sockets A and B that contain
 1603          * references to each other.  On a last close of some other socket,
 1604          * we trigger a gc since the number of outstanding rights (unp_rights)
 1605          * is non-zero.  If during the sweep phase the gc code un_discards,
 1606          * we end up doing a (full) closef on the descriptor.  A closef on A
 1607          * results in the following chain.  Closef calls soo_close, which
 1608          * calls soclose.   Soclose calls first (through the switch
 1609          * uipc_usrreq) unp_detach, which re-invokes unp_gc.  Unp_gc simply
 1610          * returns because the previous instance had set unp_gcing, and
 1611          * we return all the way back to soclose, which marks the socket
 1612          * with SS_NOFDREF, and then calls sofree.  Sofree calls sorflush
 1613          * to free up the rights that are queued in messages on the socket A,
 1614          * i.e., the reference on B.  The sorflush calls via the dom_dispose
 1615          * switch unp_dispose, which unp_scans with unp_discard.  This second
 1616          * instance of unp_discard just calls closef on B.
 1617          *
 1618          * Well, a similar chain occurs on B, resulting in a sorflush on B,
 1619          * which results in another closef on A.  Unfortunately, A is already
 1620          * being closed, and the descriptor has already been marked with
 1621          * SS_NOFDREF, and soclose panics at this point.
 1622          *
 1623          * Here, we first take an extra reference to each inaccessible
 1624          * descriptor.  Then, we call sorflush ourself, since we know
 1625          * it is a Unix domain socket anyhow.  After we destroy all the
 1626          * rights carried in messages, we do a last closef to get rid
 1627          * of our extra reference.  This is the last close, and the
 1628          * unp_detach etc will shut down the socket.
 1629          *
 1630          * 91/09/19, bsy@cs.cmu.edu
 1631          */
 1632 again:
 1633         nfiles_snap = nfiles + nfiles_slack;    /* some slack */
 1634         extra_ref = malloc(nfiles_snap * sizeof(struct file *), M_TEMP,
 1635             M_WAITOK);
 1636         sx_slock(&filelist_lock);
 1637         if (nfiles_snap < nfiles) {
 1638                 sx_sunlock(&filelist_lock);
 1639                 free(extra_ref, M_TEMP);
 1640                 nfiles_slack += 20;
 1641                 goto again;
 1642         }
 1643         for (nunref = 0, fp = LIST_FIRST(&filehead), fpp = extra_ref;
 1644             fp != NULL; fp = nextfp) {
 1645                 nextfp = LIST_NEXT(fp, f_list);
 1646                 FILE_LOCK(fp);
 1647                 /*
 1648                  * If it's not open, skip it
 1649                  */
 1650                 if (fp->f_count == 0) {
 1651                         FILE_UNLOCK(fp);
 1652                         continue;
 1653                 }
 1654                 /*
 1655                  * If all refs are from msgs, and it's not marked accessible
 1656                  * then it must be referenced from some unreachable cycle
 1657                  * of (shut-down) FDs, so include it in our
 1658                  * list of FDs to remove
 1659                  */
 1660                 if (fp->f_count == fp->f_msgcount && !(fp->f_gcflag & FMARK)) {
 1661                         *fpp++ = fp;
 1662                         nunref++;
 1663                         fp->f_count++;
 1664                 }
 1665                 FILE_UNLOCK(fp);
 1666         }
 1667         sx_sunlock(&filelist_lock);
 1668         /*
 1669          * for each FD on our hit list, do the following two things
 1670          */
 1671         for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
 1672                 struct file *tfp = *fpp;
 1673                 FILE_LOCK(tfp);
 1674                 if (tfp->f_type == DTYPE_SOCKET &&
 1675                     tfp->f_data != NULL) {
 1676                         FILE_UNLOCK(tfp);
 1677                         sorflush(tfp->f_data);
 1678                 } else {
 1679                         FILE_UNLOCK(tfp);
 1680                 }
 1681         }
 1682         for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
 1683                 closef(*fpp, (struct thread *) NULL);
 1684         free(extra_ref, M_TEMP);
 1685         unp_gcing = 0;
 1686 
 1687         UNP_UNLOCK_ASSERT();
 1688 }
 1689 
 1690 void
 1691 unp_dispose(struct mbuf *m)
 1692 {
 1693 
 1694         if (m)
 1695                 unp_scan(m, unp_discard);
 1696 }
 1697 
 1698 static int
 1699 unp_listen(struct socket *so, struct unpcb *unp, struct thread *td)
 1700 {
 1701         int error;
 1702 
 1703         UNP_LOCK_ASSERT();
 1704 
 1705         SOCK_LOCK(so);
 1706         error = solisten_proto_check(so);
 1707         if (error == 0) {
 1708                 cru2x(td->td_ucred, &unp->unp_peercred);
 1709                 unp->unp_flags |= UNP_HAVEPCCACHED;
 1710                 solisten_proto(so);
 1711         }
 1712         SOCK_UNLOCK(so);
 1713         return (error);
 1714 }
 1715 
 1716 static void
 1717 unp_scan(struct mbuf *m0, void (*op)(struct file *))
 1718 {
 1719         struct mbuf *m;
 1720         struct file **rp;
 1721         struct cmsghdr *cm;
 1722         void *data;
 1723         int i;
 1724         socklen_t clen, datalen;
 1725         int qfds;
 1726 
 1727         while (m0 != NULL) {
 1728                 for (m = m0; m; m = m->m_next) {
 1729                         if (m->m_type != MT_CONTROL)
 1730                                 continue;
 1731 
 1732                         cm = mtod(m, struct cmsghdr *);
 1733                         clen = m->m_len;
 1734 
 1735                         while (cm != NULL) {
 1736                                 if (sizeof(*cm) > clen || cm->cmsg_len > clen)
 1737                                         break;
 1738 
 1739                                 data = CMSG_DATA(cm);
 1740                                 datalen = (caddr_t)cm + cm->cmsg_len
 1741                                     - (caddr_t)data;
 1742 
 1743                                 if (cm->cmsg_level == SOL_SOCKET &&
 1744                                     cm->cmsg_type == SCM_RIGHTS) {
 1745                                         qfds = datalen / sizeof (struct file *);
 1746                                         rp = data;
 1747                                         for (i = 0; i < qfds; i++)
 1748                                                 (*op)(*rp++);
 1749                                 }
 1750 
 1751                                 if (CMSG_SPACE(datalen) < clen) {
 1752                                         clen -= CMSG_SPACE(datalen);
 1753                                         cm = (struct cmsghdr *)
 1754                                             ((caddr_t)cm + CMSG_SPACE(datalen));
 1755                                 } else {
 1756                                         clen = 0;
 1757                                         cm = NULL;
 1758                                 }
 1759                         }
 1760                 }
 1761                 m0 = m0->m_act;
 1762         }
 1763 }
 1764 
 1765 static void
 1766 unp_mark(struct file *fp)
 1767 {
 1768         if (fp->f_gcflag & FMARK)
 1769                 return;
 1770         unp_defer++;
 1771         fp->f_gcflag |= (FMARK|FDEFER);
 1772 }
 1773 
 1774 static void
 1775 unp_discard(struct file *fp)
 1776 {
 1777         FILE_LOCK(fp);
 1778         fp->f_msgcount--;
 1779         unp_rights--;
 1780         FILE_UNLOCK(fp);
 1781         (void) closef(fp, (struct thread *)NULL);
 1782 }

Cache object: 0d6eb1513d8851e6716e6327d83f0ced


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.