The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/uipc_usrreq.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright 2004-2005 Robert N. M. Watson
    3  * Copyright (c) 1982, 1986, 1989, 1991, 1993
    4  *      The Regents of the University of California.  All rights reserved.
    5  *
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  * 1. Redistributions of source code must retain the above copyright
   10  *    notice, this list of conditions and the following disclaimer.
   11  * 2. Redistributions in binary form must reproduce the above copyright
   12  *    notice, this list of conditions and the following disclaimer in the
   13  *    documentation and/or other materials provided with the distribution.
   14  * 4. Neither the name of the University nor the names of its contributors
   15  *    may be used to endorse or promote products derived from this software
   16  *    without specific prior written permission.
   17  *
   18  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   28  * SUCH DAMAGE.
   29  *
   30  *      From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94
   31  */
   32 
   33 #include <sys/cdefs.h>
   34 __FBSDID("$FreeBSD: releng/5.4/sys/kern/uipc_usrreq.c 145979 2005-05-07 03:58:25Z cperciva $");
   35 
   36 #include "opt_mac.h"
   37 
   38 #include <sys/param.h>
   39 #include <sys/domain.h>
   40 #include <sys/fcntl.h>
   41 #include <sys/malloc.h>         /* XXX must be before <sys/file.h> */
   42 #include <sys/file.h>
   43 #include <sys/filedesc.h>
   44 #include <sys/jail.h>
   45 #include <sys/kernel.h>
   46 #include <sys/lock.h>
   47 #include <sys/mac.h>
   48 #include <sys/mbuf.h>
   49 #include <sys/mutex.h>
   50 #include <sys/namei.h>
   51 #include <sys/proc.h>
   52 #include <sys/protosw.h>
   53 #include <sys/resourcevar.h>
   54 #include <sys/socket.h>
   55 #include <sys/socketvar.h>
   56 #include <sys/signalvar.h>
   57 #include <sys/stat.h>
   58 #include <sys/sx.h>
   59 #include <sys/sysctl.h>
   60 #include <sys/systm.h>
   61 #include <sys/un.h>
   62 #include <sys/unpcb.h>
   63 #include <sys/vnode.h>
   64 
   65 #include <vm/uma.h>
   66 
   67 static uma_zone_t unp_zone;
   68 static  unp_gen_t unp_gencnt;
   69 static  u_int unp_count;
   70 
   71 static  struct unp_head unp_shead, unp_dhead;
   72 
   73 /*
   74  * Unix communications domain.
   75  *
   76  * TODO:
   77  *      SEQPACKET, RDM
   78  *      rethink name space problems
   79  *      need a proper out-of-band
   80  *      lock pushdown
   81  */
   82 static const struct     sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL };
   83 static ino_t    unp_ino;                /* prototype for fake inode numbers */
   84 
   85 /*
   86  * Currently, UNIX domain sockets are protected by a single subsystem lock,
   87  * which covers global data structures and variables, the contents of each
   88  * per-socket unpcb structure, and the so_pcb field in sockets attached to
   89  * the UNIX domain.  This provides for a moderate degree of paralellism, as
   90  * receive operations on UNIX domain sockets do not need to acquire the
   91  * subsystem lock.  Finer grained locking to permit send() without acquiring
   92  * a global lock would be a logical next step.
   93  *
   94  * The UNIX domain socket lock preceds all socket layer locks, including the
   95  * socket lock and socket buffer lock, permitting UNIX domain socket code to
   96  * call into socket support routines without releasing its locks.
   97  *
   98  * Some caution is required in areas where the UNIX domain socket code enters
   99  * VFS in order to create or find rendezvous points.  This results in
  100  * dropping of the UNIX domain socket subsystem lock, acquisition of the
  101  * Giant lock, and potential sleeping.  This increases the chances of races,
  102  * and exposes weaknesses in the socket->protocol API by offering poor
  103  * failure modes.
  104  */
  105 static struct mtx unp_mtx;
  106 #define UNP_LOCK_INIT() \
  107         mtx_init(&unp_mtx, "unp", NULL, MTX_DEF)
  108 #define UNP_LOCK()              mtx_lock(&unp_mtx)
  109 #define UNP_UNLOCK()            mtx_unlock(&unp_mtx)
  110 #define UNP_LOCK_ASSERT()       mtx_assert(&unp_mtx, MA_OWNED)
  111 #define UNP_UNLOCK_ASSERT()     mtx_assert(&unp_mtx, MA_NOTOWNED)
  112 
  113 static int     unp_attach(struct socket *);
  114 static void    unp_detach(struct unpcb *);
  115 static int     unp_bind(struct unpcb *,struct sockaddr *, struct thread *);
  116 static int     unp_connect(struct socket *,struct sockaddr *, struct thread *);
  117 static int     unp_connect2(struct socket *so, struct socket *so2);
  118 static void    unp_disconnect(struct unpcb *);
  119 static void    unp_shutdown(struct unpcb *);
  120 static void    unp_drop(struct unpcb *, int);
  121 static void    unp_gc(void);
  122 static void    unp_scan(struct mbuf *, void (*)(struct file *));
  123 static void    unp_mark(struct file *);
  124 static void    unp_discard(struct file *);
  125 static void    unp_freerights(struct file **, int);
  126 static int     unp_internalize(struct mbuf **, struct thread *);
  127 static int     unp_listen(struct socket *, struct unpcb *, struct thread *);
  128 
  129 static int
  130 uipc_abort(struct socket *so)
  131 {
  132         struct unpcb *unp;
  133 
  134         UNP_LOCK();
  135         unp = sotounpcb(so);
  136         if (unp == NULL) {
  137                 UNP_UNLOCK();
  138                 return (EINVAL);
  139         }
  140         unp_drop(unp, ECONNABORTED);
  141         unp_detach(unp);
  142         UNP_UNLOCK_ASSERT();
  143         ACCEPT_LOCK();
  144         SOCK_LOCK(so);
  145         sotryfree(so);
  146         return (0);
  147 }
  148 
  149 static int
  150 uipc_accept(struct socket *so, struct sockaddr **nam)
  151 {
  152         struct unpcb *unp;
  153         const struct sockaddr *sa;
  154 
  155         /*
  156          * Pass back name of connected socket,
  157          * if it was bound and we are still connected
  158          * (our peer may have closed already!).
  159          */
  160         *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
  161         UNP_LOCK();
  162         unp = sotounpcb(so);
  163         if (unp == NULL) {
  164                 UNP_UNLOCK();
  165                 free(*nam, M_SONAME);
  166                 *nam = NULL;
  167                 return (EINVAL);
  168         }
  169         if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL)
  170                 sa = (struct sockaddr *) unp->unp_conn->unp_addr;
  171         else
  172                 sa = &sun_noname;
  173         bcopy(sa, *nam, sa->sa_len);
  174         UNP_UNLOCK();
  175         return (0);
  176 }
  177 
  178 static int
  179 uipc_attach(struct socket *so, int proto, struct thread *td)
  180 {
  181         struct unpcb *unp = sotounpcb(so);
  182 
  183         if (unp != NULL)
  184                 return (EISCONN);
  185         return (unp_attach(so));
  186 }
  187 
  188 static int
  189 uipc_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
  190 {
  191         struct unpcb *unp;
  192         int error;
  193 
  194         UNP_LOCK();
  195         unp = sotounpcb(so);
  196         if (unp == NULL) {
  197                 UNP_UNLOCK();
  198                 return (EINVAL);
  199         }
  200         error = unp_bind(unp, nam, td);
  201         UNP_UNLOCK();
  202         return (error);
  203 }
  204 
  205 static int
  206 uipc_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
  207 {
  208         struct unpcb *unp;
  209         int error;
  210 
  211         KASSERT(td == curthread, ("uipc_connect: td != curthread"));
  212 
  213         UNP_LOCK();
  214         unp = sotounpcb(so);
  215         if (unp == NULL) {
  216                 UNP_UNLOCK();
  217                 return (EINVAL);
  218         }
  219         error = unp_connect(so, nam, td);
  220         UNP_UNLOCK();
  221         return (error);
  222 }
  223 
  224 int
  225 uipc_connect2(struct socket *so1, struct socket *so2)
  226 {
  227         struct unpcb *unp;
  228         int error;
  229 
  230         UNP_LOCK();
  231         unp = sotounpcb(so1);
  232         if (unp == NULL) {
  233                 UNP_UNLOCK();
  234                 return (EINVAL);
  235         }
  236         error = unp_connect2(so1, so2);
  237         UNP_UNLOCK();
  238         return (error);
  239 }
  240 
  241 /* control is EOPNOTSUPP */
  242 
  243 static int
  244 uipc_detach(struct socket *so)
  245 {
  246         struct unpcb *unp;
  247 
  248         UNP_LOCK();
  249         unp = sotounpcb(so);
  250         if (unp == NULL) {
  251                 UNP_UNLOCK();
  252                 return (EINVAL);
  253         }
  254         unp_detach(unp);
  255         UNP_UNLOCK_ASSERT();
  256         return (0);
  257 }
  258 
  259 static int
  260 uipc_disconnect(struct socket *so)
  261 {
  262         struct unpcb *unp;
  263 
  264         UNP_LOCK();
  265         unp = sotounpcb(so);
  266         if (unp == NULL) {
  267                 UNP_UNLOCK();
  268                 return (EINVAL);
  269         }
  270         unp_disconnect(unp);
  271         UNP_UNLOCK();
  272         return (0);
  273 }
  274 
  275 static int
  276 uipc_listen(struct socket *so, struct thread *td)
  277 {
  278         struct unpcb *unp;
  279         int error;
  280 
  281         UNP_LOCK();
  282         unp = sotounpcb(so);
  283         if (unp == NULL || unp->unp_vnode == NULL) {
  284                 UNP_UNLOCK();
  285                 return (EINVAL);
  286         }
  287         error = unp_listen(so, unp, td);
  288         UNP_UNLOCK();
  289         return (error);
  290 }
  291 
  292 static int
  293 uipc_peeraddr(struct socket *so, struct sockaddr **nam)
  294 {
  295         struct unpcb *unp;
  296         const struct sockaddr *sa;
  297 
  298         *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
  299         UNP_LOCK();
  300         unp = sotounpcb(so);
  301         if (unp == NULL) {
  302                 UNP_UNLOCK();
  303                 free(*nam, M_SONAME);
  304                 *nam = NULL;
  305                 return (EINVAL);
  306         }
  307         if (unp->unp_conn != NULL && unp->unp_conn->unp_addr!= NULL)
  308                 sa = (struct sockaddr *) unp->unp_conn->unp_addr;
  309         else {
  310                 /*
  311                  * XXX: It seems that this test always fails even when
  312                  * connection is established.  So, this else clause is
  313                  * added as workaround to return PF_LOCAL sockaddr.
  314                  */
  315                 sa = &sun_noname;
  316         }
  317         bcopy(sa, *nam, sa->sa_len);
  318         UNP_UNLOCK();
  319         return (0);
  320 }
  321 
  322 static int
  323 uipc_rcvd(struct socket *so, int flags)
  324 {
  325         struct unpcb *unp;
  326         struct socket *so2;
  327         u_long newhiwat;
  328 
  329         UNP_LOCK();
  330         unp = sotounpcb(so);
  331         if (unp == NULL) {
  332                 UNP_UNLOCK();
  333                 return (EINVAL);
  334         }
  335         switch (so->so_type) {
  336         case SOCK_DGRAM:
  337                 panic("uipc_rcvd DGRAM?");
  338                 /*NOTREACHED*/
  339 
  340         case SOCK_STREAM:
  341                 if (unp->unp_conn == NULL)
  342                         break;
  343                 so2 = unp->unp_conn->unp_socket;
  344                 SOCKBUF_LOCK(&so2->so_snd);
  345                 SOCKBUF_LOCK(&so->so_rcv);
  346                 /*
  347                  * Adjust backpressure on sender
  348                  * and wakeup any waiting to write.
  349                  */
  350                 so2->so_snd.sb_mbmax += unp->unp_mbcnt - so->so_rcv.sb_mbcnt;
  351                 unp->unp_mbcnt = so->so_rcv.sb_mbcnt;
  352                 newhiwat = so2->so_snd.sb_hiwat + unp->unp_cc -
  353                     so->so_rcv.sb_cc;
  354                 (void)chgsbsize(so2->so_cred->cr_uidinfo, &so2->so_snd.sb_hiwat,
  355                     newhiwat, RLIM_INFINITY);
  356                 unp->unp_cc = so->so_rcv.sb_cc;
  357                 SOCKBUF_UNLOCK(&so->so_rcv);
  358                 sowwakeup_locked(so2);
  359                 break;
  360 
  361         default:
  362                 panic("uipc_rcvd unknown socktype");
  363         }
  364         UNP_UNLOCK();
  365         return (0);
  366 }
  367 
  368 /* pru_rcvoob is EOPNOTSUPP */
  369 
  370 static int
  371 uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
  372     struct mbuf *control, struct thread *td)
  373 {
  374         int error = 0;
  375         struct unpcb *unp;
  376         struct socket *so2;
  377         u_long newhiwat;
  378 
  379         unp = sotounpcb(so);
  380         if (unp == NULL) {
  381                 error = EINVAL;
  382                 goto release;
  383         }
  384         if (flags & PRUS_OOB) {
  385                 error = EOPNOTSUPP;
  386                 goto release;
  387         }
  388 
  389         if (control != NULL && (error = unp_internalize(&control, td)))
  390                 goto release;
  391 
  392         UNP_LOCK();
  393         unp = sotounpcb(so);
  394         if (unp == NULL) {
  395                 UNP_UNLOCK();
  396                 error = EINVAL;
  397                 goto dispose_release;
  398         }
  399 
  400         switch (so->so_type) {
  401         case SOCK_DGRAM:
  402         {
  403                 const struct sockaddr *from;
  404 
  405                 if (nam != NULL) {
  406                         if (unp->unp_conn != NULL) {
  407                                 error = EISCONN;
  408                                 break;
  409                         }
  410                         error = unp_connect(so, nam, td);
  411                         if (error)
  412                                 break;
  413                 } else {
  414                         if (unp->unp_conn == NULL) {
  415                                 error = ENOTCONN;
  416                                 break;
  417                         }
  418                 }
  419                 so2 = unp->unp_conn->unp_socket;
  420                 if (unp->unp_addr != NULL)
  421                         from = (struct sockaddr *)unp->unp_addr;
  422                 else
  423                         from = &sun_noname;
  424                 SOCKBUF_LOCK(&so2->so_rcv);
  425                 if (sbappendaddr_locked(&so2->so_rcv, from, m, control)) {
  426                         sorwakeup_locked(so2);
  427                         m = NULL;
  428                         control = NULL;
  429                 } else {
  430                         SOCKBUF_UNLOCK(&so2->so_rcv);
  431                         error = ENOBUFS;
  432                 }
  433                 if (nam != NULL)
  434                         unp_disconnect(unp);
  435                 break;
  436         }
  437 
  438         case SOCK_STREAM:
  439                 /* Connect if not connected yet. */
  440                 /*
  441                  * Note: A better implementation would complain
  442                  * if not equal to the peer's address.
  443                  */
  444                 if ((so->so_state & SS_ISCONNECTED) == 0) {
  445                         if (nam != NULL) {
  446                                 error = unp_connect(so, nam, td);
  447                                 if (error)
  448                                         break;  /* XXX */
  449                         } else {
  450                                 error = ENOTCONN;
  451                                 break;
  452                         }
  453                 }
  454 
  455                 SOCKBUF_LOCK(&so->so_snd);
  456                 if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
  457                         SOCKBUF_UNLOCK(&so->so_snd);
  458                         error = EPIPE;
  459                         break;
  460                 }
  461                 if (unp->unp_conn == NULL)
  462                         panic("uipc_send connected but no connection?");
  463                 so2 = unp->unp_conn->unp_socket;
  464                 SOCKBUF_LOCK(&so2->so_rcv);
  465                 /*
  466                  * Send to paired receive port, and then reduce
  467                  * send buffer hiwater marks to maintain backpressure.
  468                  * Wake up readers.
  469                  */
  470                 if (control != NULL) {
  471                         if (sbappendcontrol_locked(&so2->so_rcv, m, control))
  472                                 control = NULL;
  473                 } else {
  474                         sbappend_locked(&so2->so_rcv, m);
  475                 }
  476                 so->so_snd.sb_mbmax -=
  477                         so2->so_rcv.sb_mbcnt - unp->unp_conn->unp_mbcnt;
  478                 unp->unp_conn->unp_mbcnt = so2->so_rcv.sb_mbcnt;
  479                 newhiwat = so->so_snd.sb_hiwat -
  480                     (so2->so_rcv.sb_cc - unp->unp_conn->unp_cc);
  481                 (void)chgsbsize(so->so_cred->cr_uidinfo, &so->so_snd.sb_hiwat,
  482                     newhiwat, RLIM_INFINITY);
  483                 SOCKBUF_UNLOCK(&so->so_snd);
  484                 unp->unp_conn->unp_cc = so2->so_rcv.sb_cc;
  485                 sorwakeup_locked(so2);
  486                 m = NULL;
  487                 break;
  488 
  489         default:
  490                 panic("uipc_send unknown socktype");
  491         }
  492 
  493         /*
  494          * SEND_EOF is equivalent to a SEND followed by
  495          * a SHUTDOWN.
  496          */
  497         if (flags & PRUS_EOF) {
  498                 socantsendmore(so);
  499                 unp_shutdown(unp);
  500         }
  501         UNP_UNLOCK();
  502 
  503 dispose_release:
  504         if (control != NULL && error != 0)
  505                 unp_dispose(control);
  506 
  507 release:
  508         if (control != NULL)
  509                 m_freem(control);
  510         if (m != NULL)
  511                 m_freem(m);
  512         return (error);
  513 }
  514 
  515 static int
  516 uipc_sense(struct socket *so, struct stat *sb)
  517 {
  518         struct unpcb *unp;
  519         struct socket *so2;
  520 
  521         UNP_LOCK();
  522         unp = sotounpcb(so);
  523         if (unp == NULL) {
  524                 UNP_UNLOCK();
  525                 return (EINVAL);
  526         }
  527         sb->st_blksize = so->so_snd.sb_hiwat;
  528         if (so->so_type == SOCK_STREAM && unp->unp_conn != NULL) {
  529                 so2 = unp->unp_conn->unp_socket;
  530                 sb->st_blksize += so2->so_rcv.sb_cc;
  531         }
  532         sb->st_dev = NODEV;
  533         if (unp->unp_ino == 0)
  534                 unp->unp_ino = (++unp_ino == 0) ? ++unp_ino : unp_ino;
  535         sb->st_ino = unp->unp_ino;
  536         UNP_UNLOCK();
  537         return (0);
  538 }
  539 
  540 static int
  541 uipc_shutdown(struct socket *so)
  542 {
  543         struct unpcb *unp;
  544 
  545         UNP_LOCK();
  546         unp = sotounpcb(so);
  547         if (unp == NULL) {
  548                 UNP_UNLOCK();
  549                 return (EINVAL);
  550         }
  551         socantsendmore(so);
  552         unp_shutdown(unp);
  553         UNP_UNLOCK();
  554         return (0);
  555 }
  556 
  557 static int
  558 uipc_sockaddr(struct socket *so, struct sockaddr **nam)
  559 {
  560         struct unpcb *unp;
  561         const struct sockaddr *sa;
  562 
  563         *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
  564         UNP_LOCK();
  565         unp = sotounpcb(so);
  566         if (unp == NULL) {
  567                 UNP_UNLOCK();
  568                 free(*nam, M_SONAME);
  569                 *nam = NULL;
  570                 return (EINVAL);
  571         }
  572         if (unp->unp_addr != NULL)
  573                 sa = (struct sockaddr *) unp->unp_addr;
  574         else
  575                 sa = &sun_noname;
  576         bcopy(sa, *nam, sa->sa_len);
  577         UNP_UNLOCK();
  578         return (0);
  579 }
  580 
  581 struct pr_usrreqs uipc_usrreqs = {
  582         uipc_abort, uipc_accept, uipc_attach, uipc_bind, uipc_connect,
  583         uipc_connect2, pru_control_notsupp, uipc_detach, uipc_disconnect,
  584         uipc_listen, uipc_peeraddr, uipc_rcvd, pru_rcvoob_notsupp,
  585         uipc_send, uipc_sense, uipc_shutdown, uipc_sockaddr,
  586         sosend, soreceive, sopoll, pru_sosetlabel_null
  587 };
  588 
  589 int
  590 uipc_ctloutput(struct socket *so, struct sockopt *sopt)
  591 {
  592         struct unpcb *unp;
  593         struct xucred xu;
  594         int error;
  595 
  596         switch (sopt->sopt_dir) {
  597         case SOPT_GET:
  598                 switch (sopt->sopt_name) {
  599                 case LOCAL_PEERCRED:
  600                         error = 0;
  601                         UNP_LOCK();
  602                         unp = sotounpcb(so);
  603                         if (unp == NULL) {
  604                                 UNP_UNLOCK();
  605                                 error = EINVAL;
  606                                 break;
  607                         }
  608                         if (unp->unp_flags & UNP_HAVEPC)
  609                                 xu = unp->unp_peercred;
  610                         else {
  611                                 if (so->so_type == SOCK_STREAM)
  612                                         error = ENOTCONN;
  613                                 else
  614                                         error = EINVAL;
  615                         }
  616                         UNP_UNLOCK();
  617                         if (error == 0)
  618                                 error = sooptcopyout(sopt, &xu, sizeof(xu));
  619                         break;
  620                 default:
  621                         error = EOPNOTSUPP;
  622                         break;
  623                 }
  624                 break;
  625         case SOPT_SET:
  626         default:
  627                 error = EOPNOTSUPP;
  628                 break;
  629         }
  630         return (error);
  631 }
  632 
  633 /*
  634  * Both send and receive buffers are allocated PIPSIZ bytes of buffering
  635  * for stream sockets, although the total for sender and receiver is
  636  * actually only PIPSIZ.
  637  * Datagram sockets really use the sendspace as the maximum datagram size,
  638  * and don't really want to reserve the sendspace.  Their recvspace should
  639  * be large enough for at least one max-size datagram plus address.
  640  */
  641 #ifndef PIPSIZ
  642 #define PIPSIZ  8192
  643 #endif
  644 static u_long   unpst_sendspace = PIPSIZ;
  645 static u_long   unpst_recvspace = PIPSIZ;
  646 static u_long   unpdg_sendspace = 2*1024;       /* really max datagram size */
  647 static u_long   unpdg_recvspace = 4*1024;
  648 
  649 static int      unp_rights;                     /* file descriptors in flight */
  650 
  651 SYSCTL_DECL(_net_local_stream);
  652 SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW,
  653            &unpst_sendspace, 0, "");
  654 SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW,
  655            &unpst_recvspace, 0, "");
  656 SYSCTL_DECL(_net_local_dgram);
  657 SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW,
  658            &unpdg_sendspace, 0, "");
  659 SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW,
  660            &unpdg_recvspace, 0, "");
  661 SYSCTL_DECL(_net_local);
  662 SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, "");
  663 
  664 static int
  665 unp_attach(struct socket *so)
  666 {
  667         struct unpcb *unp;
  668         int error;
  669 
  670         if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
  671                 switch (so->so_type) {
  672 
  673                 case SOCK_STREAM:
  674                         error = soreserve(so, unpst_sendspace, unpst_recvspace);
  675                         break;
  676 
  677                 case SOCK_DGRAM:
  678                         error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
  679                         break;
  680 
  681                 default:
  682                         panic("unp_attach");
  683                 }
  684                 if (error)
  685                         return (error);
  686         }
  687         unp = uma_zalloc(unp_zone, M_WAITOK | M_ZERO);
  688         if (unp == NULL)
  689                 return (ENOBUFS);
  690         LIST_INIT(&unp->unp_refs);
  691         unp->unp_socket = so;
  692         so->so_pcb = unp;
  693 
  694         UNP_LOCK();
  695         unp->unp_gencnt = ++unp_gencnt;
  696         unp_count++;
  697         LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ? &unp_dhead
  698                          : &unp_shead, unp, unp_link);
  699         UNP_UNLOCK();
  700 
  701         return (0);
  702 }
  703 
  704 static void
  705 unp_detach(struct unpcb *unp)
  706 {
  707         struct vnode *vp;
  708 
  709         UNP_LOCK_ASSERT();
  710 
  711         LIST_REMOVE(unp, unp_link);
  712         unp->unp_gencnt = ++unp_gencnt;
  713         --unp_count;
  714         if ((vp = unp->unp_vnode) != NULL) {
  715                 /*
  716                  * XXXRW: should v_socket be frobbed only while holding
  717                  * Giant?
  718                  */
  719                 unp->unp_vnode->v_socket = NULL;
  720                 unp->unp_vnode = NULL;
  721         }
  722         if (unp->unp_conn != NULL)
  723                 unp_disconnect(unp);
  724         while (!LIST_EMPTY(&unp->unp_refs)) {
  725                 struct unpcb *ref = LIST_FIRST(&unp->unp_refs);
  726                 unp_drop(ref, ECONNRESET);
  727         }
  728         soisdisconnected(unp->unp_socket);
  729         unp->unp_socket->so_pcb = NULL;
  730         if (unp_rights) {
  731                 /*
  732                  * Normally the receive buffer is flushed later,
  733                  * in sofree, but if our receive buffer holds references
  734                  * to descriptors that are now garbage, we will dispose
  735                  * of those descriptor references after the garbage collector
  736                  * gets them (resulting in a "panic: closef: count < 0").
  737                  */
  738                 sorflush(unp->unp_socket);
  739                 unp_gc();       /* Will unlock UNP. */
  740         } else
  741                 UNP_UNLOCK();
  742         UNP_UNLOCK_ASSERT();
  743         if (unp->unp_addr != NULL)
  744                 FREE(unp->unp_addr, M_SONAME);
  745         uma_zfree(unp_zone, unp);
  746         if (vp) {
  747                 mtx_lock(&Giant);
  748                 vrele(vp);
  749                 mtx_unlock(&Giant);
  750         }
  751 }
  752 
  753 static int
  754 unp_bind(struct unpcb *unp, struct sockaddr *nam, struct thread *td)
  755 {
  756         struct sockaddr_un *soun = (struct sockaddr_un *)nam;
  757         struct vnode *vp;
  758         struct mount *mp;
  759         struct vattr vattr;
  760         int error, namelen;
  761         struct nameidata nd;
  762         char *buf;
  763 
  764         UNP_LOCK_ASSERT();
  765 
  766         /*
  767          * XXXRW: This test-and-set of unp_vnode is non-atomic; the
  768          * unlocked read here is fine, but the value of unp_vnode needs
  769          * to be tested again after we do all the lookups to see if the
  770          * pcb is still unbound?
  771          */
  772         if (unp->unp_vnode != NULL)
  773                 return (EINVAL);
  774 
  775         namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path);
  776         if (namelen <= 0)
  777                 return (EINVAL);
  778 
  779         UNP_UNLOCK();
  780 
  781         buf = malloc(namelen + 1, M_TEMP, M_WAITOK);
  782         strlcpy(buf, soun->sun_path, namelen + 1);
  783 
  784         mtx_lock(&Giant);
  785 restart:
  786         mtx_assert(&Giant, MA_OWNED);
  787         NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT | SAVENAME, UIO_SYSSPACE,
  788             buf, td);
  789 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
  790         error = namei(&nd);
  791         if (error)
  792                 goto done;
  793         vp = nd.ni_vp;
  794         if (vp != NULL || vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
  795                 NDFREE(&nd, NDF_ONLY_PNBUF);
  796                 if (nd.ni_dvp == vp)
  797                         vrele(nd.ni_dvp);
  798                 else
  799                         vput(nd.ni_dvp);
  800                 if (vp != NULL) {
  801                         vrele(vp);
  802                         error = EADDRINUSE;
  803                         goto done;
  804                 }
  805                 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH);
  806                 if (error)
  807                         goto done;
  808                 goto restart;
  809         }
  810         VATTR_NULL(&vattr);
  811         vattr.va_type = VSOCK;
  812         vattr.va_mode = (ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask);
  813 #ifdef MAC
  814         error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
  815             &vattr);
  816 #endif
  817         if (error == 0) {
  818                 VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
  819                 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
  820         }
  821         NDFREE(&nd, NDF_ONLY_PNBUF);
  822         vput(nd.ni_dvp);
  823         if (error) {
  824                 vn_finished_write(mp);
  825                 goto done;
  826         }
  827         vp = nd.ni_vp;
  828         ASSERT_VOP_LOCKED(vp, "unp_bind");
  829         soun = (struct sockaddr_un *)sodupsockaddr(nam, M_WAITOK);
  830         UNP_LOCK();
  831         vp->v_socket = unp->unp_socket;
  832         unp->unp_vnode = vp;
  833         unp->unp_addr = soun;
  834         UNP_UNLOCK();
  835         VOP_UNLOCK(vp, 0, td);
  836         vn_finished_write(mp);
  837 done:
  838         mtx_unlock(&Giant);
  839         free(buf, M_TEMP);
  840         UNP_LOCK();
  841         return (error);
  842 }
  843 
  844 static int
  845 unp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
  846 {
  847         struct sockaddr_un *soun = (struct sockaddr_un *)nam;
  848         struct vnode *vp;
  849         struct socket *so2, *so3;
  850         struct unpcb *unp, *unp2, *unp3;
  851         int error, len;
  852         struct nameidata nd;
  853         char buf[SOCK_MAXADDRLEN];
  854         struct sockaddr *sa;
  855 
  856         UNP_LOCK_ASSERT();
  857         unp = sotounpcb(so);
  858 
  859         len = nam->sa_len - offsetof(struct sockaddr_un, sun_path);
  860         if (len <= 0)
  861                 return (EINVAL);
  862         strlcpy(buf, soun->sun_path, len + 1);
  863         UNP_UNLOCK();
  864         sa = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
  865         mtx_lock(&Giant);
  866         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, buf, td);
  867         error = namei(&nd);
  868         if (error)
  869                 vp = NULL;
  870         else
  871                 vp = nd.ni_vp;
  872         ASSERT_VOP_LOCKED(vp, "unp_connect");
  873         NDFREE(&nd, NDF_ONLY_PNBUF);
  874         if (error)
  875                 goto bad;
  876 
  877         if (vp->v_type != VSOCK) {
  878                 error = ENOTSOCK;
  879                 goto bad;
  880         }
  881         error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td);
  882         if (error)
  883                 goto bad;
  884         mtx_unlock(&Giant);
  885         UNP_LOCK();
  886         unp = sotounpcb(so);
  887         if (unp == NULL) {
  888                 error = EINVAL;
  889                 goto bad2;
  890         }
  891         so2 = vp->v_socket;
  892         if (so2 == NULL) {
  893                 error = ECONNREFUSED;
  894                 goto bad2;
  895         }
  896         if (so->so_type != so2->so_type) {
  897                 error = EPROTOTYPE;
  898                 goto bad2;
  899         }
  900         if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
  901                 if (so2->so_options & SO_ACCEPTCONN) {
  902                         /*
  903                          * NB: drop locks here so unp_attach is entered
  904                          *     w/o locks; this avoids a recursive lock
  905                          *     of the head and holding sleep locks across
  906                          *     a (potentially) blocking malloc.
  907                          */
  908                         UNP_UNLOCK();
  909                         so3 = sonewconn(so2, 0);
  910                         UNP_LOCK();
  911                 } else
  912                         so3 = NULL;
  913                 if (so3 == NULL) {
  914                         error = ECONNREFUSED;
  915                         goto bad2;
  916                 }
  917                 unp = sotounpcb(so);
  918                 unp2 = sotounpcb(so2);
  919                 unp3 = sotounpcb(so3);
  920                 if (unp2->unp_addr != NULL) {
  921                         bcopy(unp2->unp_addr, sa, unp2->unp_addr->sun_len);
  922                         unp3->unp_addr = (struct sockaddr_un *) sa;
  923                         sa = NULL;
  924                 }
  925                 /*
  926                  * unp_peercred management:
  927                  *
  928                  * The connecter's (client's) credentials are copied
  929                  * from its process structure at the time of connect()
  930                  * (which is now).
  931                  */
  932                 cru2x(td->td_ucred, &unp3->unp_peercred);
  933                 unp3->unp_flags |= UNP_HAVEPC;
  934                 /*
  935                  * The receiver's (server's) credentials are copied
  936                  * from the unp_peercred member of socket on which the
  937                  * former called listen(); unp_listen() cached that
  938                  * process's credentials at that time so we can use
  939                  * them now.
  940                  */
  941                 KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED,
  942                     ("unp_connect: listener without cached peercred"));
  943                 memcpy(&unp->unp_peercred, &unp2->unp_peercred,
  944                     sizeof(unp->unp_peercred));
  945                 unp->unp_flags |= UNP_HAVEPC;
  946 #ifdef MAC
  947                 SOCK_LOCK(so);
  948                 mac_set_socket_peer_from_socket(so, so3);
  949                 mac_set_socket_peer_from_socket(so3, so);
  950                 SOCK_UNLOCK(so);
  951 #endif
  952 
  953                 so2 = so3;
  954         }
  955         error = unp_connect2(so, so2);
  956 bad2:
  957         UNP_UNLOCK();
  958         mtx_lock(&Giant);
  959 bad:
  960         mtx_assert(&Giant, MA_OWNED);
  961         if (vp != NULL)
  962                 vput(vp);
  963         mtx_unlock(&Giant);
  964         free(sa, M_SONAME);
  965         UNP_LOCK();
  966         return (error);
  967 }
  968 
  969 static int
  970 unp_connect2(struct socket *so, struct socket *so2)
  971 {
  972         struct unpcb *unp = sotounpcb(so);
  973         struct unpcb *unp2;
  974 
  975         UNP_LOCK_ASSERT();
  976 
  977         if (so2->so_type != so->so_type)
  978                 return (EPROTOTYPE);
  979         unp2 = sotounpcb(so2);
  980         unp->unp_conn = unp2;
  981         switch (so->so_type) {
  982 
  983         case SOCK_DGRAM:
  984                 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink);
  985                 soisconnected(so);
  986                 break;
  987 
  988         case SOCK_STREAM:
  989                 unp2->unp_conn = unp;
  990                 soisconnected(so);
  991                 soisconnected(so2);
  992                 break;
  993 
  994         default:
  995                 panic("unp_connect2");
  996         }
  997         return (0);
  998 }
  999 
 1000 static void
 1001 unp_disconnect(struct unpcb *unp)
 1002 {
 1003         struct unpcb *unp2 = unp->unp_conn;
 1004         struct socket *so;
 1005 
 1006         UNP_LOCK_ASSERT();
 1007 
 1008         if (unp2 == NULL)
 1009                 return;
 1010         unp->unp_conn = NULL;
 1011         switch (unp->unp_socket->so_type) {
 1012 
 1013         case SOCK_DGRAM:
 1014                 LIST_REMOVE(unp, unp_reflink);
 1015                 so = unp->unp_socket;
 1016                 SOCK_LOCK(so);
 1017                 so->so_state &= ~SS_ISCONNECTED;
 1018                 SOCK_UNLOCK(so);
 1019                 break;
 1020 
 1021         case SOCK_STREAM:
 1022                 soisdisconnected(unp->unp_socket);
 1023                 unp2->unp_conn = NULL;
 1024                 soisdisconnected(unp2->unp_socket);
 1025                 break;
 1026         }
 1027 }
 1028 
 1029 #ifdef notdef
 1030 void
 1031 unp_abort(struct unpcb *unp)
 1032 {
 1033 
 1034         unp_detach(unp);
 1035         UNP_UNLOCK_ASSERT();
 1036 }
 1037 #endif
 1038 
 1039 /*
 1040  * unp_pcblist() assumes that UNIX domain socket memory is never reclaimed
 1041  * by the zone (UMA_ZONE_NOFREE), and as such potentially stale pointers
 1042  * are safe to reference.  It first scans the list of struct unpcb's to
 1043  * generate a pointer list, then it rescans its list one entry at a time to
 1044  * externalize and copyout.  It checks the generation number to see if a
 1045  * struct unpcb has been reused, and will skip it if so.
 1046  */
 1047 static int
 1048 unp_pcblist(SYSCTL_HANDLER_ARGS)
 1049 {
 1050         int error, i, n;
 1051         struct unpcb *unp, **unp_list;
 1052         unp_gen_t gencnt;
 1053         struct xunpgen *xug;
 1054         struct unp_head *head;
 1055         struct xunpcb *xu;
 1056 
 1057         head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
 1058 
 1059         /*
 1060          * The process of preparing the PCB list is too time-consuming and
 1061          * resource-intensive to repeat twice on every request.
 1062          */
 1063         if (req->oldptr == NULL) {
 1064                 n = unp_count;
 1065                 req->oldidx = 2 * (sizeof *xug)
 1066                         + (n + n/8) * sizeof(struct xunpcb);
 1067                 return (0);
 1068         }
 1069 
 1070         if (req->newptr != NULL)
 1071                 return (EPERM);
 1072 
 1073         /*
 1074          * OK, now we're committed to doing something.
 1075          */
 1076         xug = malloc(sizeof(*xug), M_TEMP, M_WAITOK);
 1077         UNP_LOCK();
 1078         gencnt = unp_gencnt;
 1079         n = unp_count;
 1080         UNP_UNLOCK();
 1081 
 1082         xug->xug_len = sizeof *xug;
 1083         xug->xug_count = n;
 1084         xug->xug_gen = gencnt;
 1085         xug->xug_sogen = so_gencnt;
 1086         error = SYSCTL_OUT(req, xug, sizeof *xug);
 1087         if (error) {
 1088                 free(xug, M_TEMP);
 1089                 return (error);
 1090         }
 1091 
 1092         unp_list = malloc(n * sizeof *unp_list, M_TEMP, M_WAITOK);
 1093 
 1094         UNP_LOCK();
 1095         for (unp = LIST_FIRST(head), i = 0; unp && i < n;
 1096              unp = LIST_NEXT(unp, unp_link)) {
 1097                 if (unp->unp_gencnt <= gencnt) {
 1098                         if (cr_cansee(req->td->td_ucred,
 1099                             unp->unp_socket->so_cred))
 1100                                 continue;
 1101                         unp_list[i++] = unp;
 1102                 }
 1103         }
 1104         UNP_UNLOCK();
 1105         n = i;                  /* in case we lost some during malloc */
 1106 
 1107         error = 0;
 1108         xu = malloc(sizeof(*xu), M_TEMP, M_WAITOK | M_ZERO);
 1109         for (i = 0; i < n; i++) {
 1110                 unp = unp_list[i];
 1111                 if (unp->unp_gencnt <= gencnt) {
 1112                         xu->xu_len = sizeof *xu;
 1113                         xu->xu_unpp = unp;
 1114                         /*
 1115                          * XXX - need more locking here to protect against
 1116                          * connect/disconnect races for SMP.
 1117                          */
 1118                         if (unp->unp_addr != NULL)
 1119                                 bcopy(unp->unp_addr, &xu->xu_addr,
 1120                                       unp->unp_addr->sun_len);
 1121                         if (unp->unp_conn != NULL &&
 1122                             unp->unp_conn->unp_addr != NULL)
 1123                                 bcopy(unp->unp_conn->unp_addr,
 1124                                       &xu->xu_caddr,
 1125                                       unp->unp_conn->unp_addr->sun_len);
 1126                         bcopy(unp, &xu->xu_unp, sizeof *unp);
 1127                         sotoxsocket(unp->unp_socket, &xu->xu_socket);
 1128                         error = SYSCTL_OUT(req, xu, sizeof *xu);
 1129                 }
 1130         }
 1131         free(xu, M_TEMP);
 1132         if (!error) {
 1133                 /*
 1134                  * Give the user an updated idea of our state.
 1135                  * If the generation differs from what we told
 1136                  * her before, she knows that something happened
 1137                  * while we were processing this request, and it
 1138                  * might be necessary to retry.
 1139                  */
 1140                 xug->xug_gen = unp_gencnt;
 1141                 xug->xug_sogen = so_gencnt;
 1142                 xug->xug_count = unp_count;
 1143                 error = SYSCTL_OUT(req, xug, sizeof *xug);
 1144         }
 1145         free(unp_list, M_TEMP);
 1146         free(xug, M_TEMP);
 1147         return (error);
 1148 }
 1149 
 1150 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLFLAG_RD,
 1151             (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb",
 1152             "List of active local datagram sockets");
 1153 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLFLAG_RD,
 1154             (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb",
 1155             "List of active local stream sockets");
 1156 
 1157 static void
 1158 unp_shutdown(struct unpcb *unp)
 1159 {
 1160         struct socket *so;
 1161 
 1162         UNP_LOCK_ASSERT();
 1163 
 1164         if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn &&
 1165             (so = unp->unp_conn->unp_socket))
 1166                 socantrcvmore(so);
 1167 }
 1168 
 1169 static void
 1170 unp_drop(struct unpcb *unp, int errno)
 1171 {
 1172         struct socket *so = unp->unp_socket;
 1173 
 1174         UNP_LOCK_ASSERT();
 1175 
 1176         so->so_error = errno;
 1177         unp_disconnect(unp);
 1178 }
 1179 
 1180 #ifdef notdef
 1181 void
 1182 unp_drain(void)
 1183 {
 1184 
 1185 }
 1186 #endif
 1187 
 1188 static void
 1189 unp_freerights(struct file **rp, int fdcount)
 1190 {
 1191         int i;
 1192         struct file *fp;
 1193 
 1194         for (i = 0; i < fdcount; i++) {
 1195                 fp = *rp;
 1196                 /*
 1197                  * zero the pointer before calling
 1198                  * unp_discard since it may end up
 1199                  * in unp_gc()..
 1200                  */
 1201                 *rp++ = 0;
 1202                 unp_discard(fp);
 1203         }
 1204 }
 1205 
 1206 int
 1207 unp_externalize(struct mbuf *control, struct mbuf **controlp)
 1208 {
 1209         struct thread *td = curthread;          /* XXX */
 1210         struct cmsghdr *cm = mtod(control, struct cmsghdr *);
 1211         int i;
 1212         int *fdp;
 1213         struct file **rp;
 1214         struct file *fp;
 1215         void *data;
 1216         socklen_t clen = control->m_len, datalen;
 1217         int error, newfds;
 1218         int f;
 1219         u_int newlen;
 1220 
 1221         UNP_UNLOCK_ASSERT();
 1222 
 1223         error = 0;
 1224         if (controlp != NULL) /* controlp == NULL => free control messages */
 1225                 *controlp = NULL;
 1226 
 1227         while (cm != NULL) {
 1228                 if (sizeof(*cm) > clen || cm->cmsg_len > clen) {
 1229                         error = EINVAL;
 1230                         break;
 1231                 }
 1232 
 1233                 data = CMSG_DATA(cm);
 1234                 datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
 1235 
 1236                 if (cm->cmsg_level == SOL_SOCKET
 1237                     && cm->cmsg_type == SCM_RIGHTS) {
 1238                         newfds = datalen / sizeof(struct file *);
 1239                         rp = data;
 1240 
 1241                         /* If we're not outputting the descriptors free them. */
 1242                         if (error || controlp == NULL) {
 1243                                 unp_freerights(rp, newfds);
 1244                                 goto next;
 1245                         }
 1246                         FILEDESC_LOCK(td->td_proc->p_fd);
 1247                         /* if the new FD's will not fit free them.  */
 1248                         if (!fdavail(td, newfds)) {
 1249                                 FILEDESC_UNLOCK(td->td_proc->p_fd);
 1250                                 error = EMSGSIZE;
 1251                                 unp_freerights(rp, newfds);
 1252                                 goto next;
 1253                         }
 1254                         /*
 1255                          * now change each pointer to an fd in the global
 1256                          * table to an integer that is the index to the
 1257                          * local fd table entry that we set up to point
 1258                          * to the global one we are transferring.
 1259                          */
 1260                         newlen = newfds * sizeof(int);
 1261                         *controlp = sbcreatecontrol(NULL, newlen,
 1262                             SCM_RIGHTS, SOL_SOCKET);
 1263                         if (*controlp == NULL) {
 1264                                 FILEDESC_UNLOCK(td->td_proc->p_fd);
 1265                                 error = E2BIG;
 1266                                 unp_freerights(rp, newfds);
 1267                                 goto next;
 1268                         }
 1269 
 1270                         fdp = (int *)
 1271                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 1272                         for (i = 0; i < newfds; i++) {
 1273                                 if (fdalloc(td, 0, &f))
 1274                                         panic("unp_externalize fdalloc failed");
 1275                                 fp = *rp++;
 1276                                 td->td_proc->p_fd->fd_ofiles[f] = fp;
 1277                                 FILE_LOCK(fp);
 1278                                 fp->f_msgcount--;
 1279                                 FILE_UNLOCK(fp);
 1280                                 unp_rights--;
 1281                                 *fdp++ = f;
 1282                         }
 1283                         FILEDESC_UNLOCK(td->td_proc->p_fd);
 1284                 } else { /* We can just copy anything else across */
 1285                         if (error || controlp == NULL)
 1286                                 goto next;
 1287                         *controlp = sbcreatecontrol(NULL, datalen,
 1288                             cm->cmsg_type, cm->cmsg_level);
 1289                         if (*controlp == NULL) {
 1290                                 error = ENOBUFS;
 1291                                 goto next;
 1292                         }
 1293                         bcopy(data,
 1294                             CMSG_DATA(mtod(*controlp, struct cmsghdr *)),
 1295                             datalen);
 1296                 }
 1297 
 1298                 controlp = &(*controlp)->m_next;
 1299 
 1300 next:
 1301                 if (CMSG_SPACE(datalen) < clen) {
 1302                         clen -= CMSG_SPACE(datalen);
 1303                         cm = (struct cmsghdr *)
 1304                             ((caddr_t)cm + CMSG_SPACE(datalen));
 1305                 } else {
 1306                         clen = 0;
 1307                         cm = NULL;
 1308                 }
 1309         }
 1310 
 1311         m_freem(control);
 1312 
 1313         return (error);
 1314 }
 1315 
 1316 void
 1317 unp_init(void)
 1318 {
 1319         unp_zone = uma_zcreate("unpcb", sizeof(struct unpcb), NULL, NULL,
 1320             NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 1321         if (unp_zone == NULL)
 1322                 panic("unp_init");
 1323         uma_zone_set_max(unp_zone, nmbclusters);
 1324         LIST_INIT(&unp_dhead);
 1325         LIST_INIT(&unp_shead);
 1326 
 1327         UNP_LOCK_INIT();
 1328 }
 1329 
 1330 static int
 1331 unp_internalize(struct mbuf **controlp, struct thread *td)
 1332 {
 1333         struct mbuf *control = *controlp;
 1334         struct proc *p = td->td_proc;
 1335         struct filedesc *fdescp = p->p_fd;
 1336         struct cmsghdr *cm = mtod(control, struct cmsghdr *);
 1337         struct cmsgcred *cmcred;
 1338         struct file **rp;
 1339         struct file *fp;
 1340         struct timeval *tv;
 1341         int i, fd, *fdp;
 1342         void *data;
 1343         socklen_t clen = control->m_len, datalen;
 1344         int error, oldfds;
 1345         u_int newlen;
 1346 
 1347         UNP_UNLOCK_ASSERT();
 1348 
 1349         error = 0;
 1350         *controlp = NULL;
 1351 
 1352         while (cm != NULL) {
 1353                 if (sizeof(*cm) > clen || cm->cmsg_level != SOL_SOCKET
 1354                     || cm->cmsg_len > clen) {
 1355                         error = EINVAL;
 1356                         goto out;
 1357                 }
 1358 
 1359                 data = CMSG_DATA(cm);
 1360                 datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
 1361 
 1362                 switch (cm->cmsg_type) {
 1363                 /*
 1364                  * Fill in credential information.
 1365                  */
 1366                 case SCM_CREDS:
 1367                         *controlp = sbcreatecontrol(NULL, sizeof(*cmcred),
 1368                             SCM_CREDS, SOL_SOCKET);
 1369                         if (*controlp == NULL) {
 1370                                 error = ENOBUFS;
 1371                                 goto out;
 1372                         }
 1373 
 1374                         cmcred = (struct cmsgcred *)
 1375                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 1376                         cmcred->cmcred_pid = p->p_pid;
 1377                         cmcred->cmcred_uid = td->td_ucred->cr_ruid;
 1378                         cmcred->cmcred_gid = td->td_ucred->cr_rgid;
 1379                         cmcred->cmcred_euid = td->td_ucred->cr_uid;
 1380                         cmcred->cmcred_ngroups = MIN(td->td_ucred->cr_ngroups,
 1381                                                         CMGROUP_MAX);
 1382                         for (i = 0; i < cmcred->cmcred_ngroups; i++)
 1383                                 cmcred->cmcred_groups[i] =
 1384                                     td->td_ucred->cr_groups[i];
 1385                         break;
 1386 
 1387                 case SCM_RIGHTS:
 1388                         oldfds = datalen / sizeof (int);
 1389                         /*
 1390                          * check that all the FDs passed in refer to legal files
 1391                          * If not, reject the entire operation.
 1392                          */
 1393                         fdp = data;
 1394                         FILEDESC_LOCK(fdescp);
 1395                         for (i = 0; i < oldfds; i++) {
 1396                                 fd = *fdp++;
 1397                                 if ((unsigned)fd >= fdescp->fd_nfiles ||
 1398                                     fdescp->fd_ofiles[fd] == NULL) {
 1399                                         FILEDESC_UNLOCK(fdescp);
 1400                                         error = EBADF;
 1401                                         goto out;
 1402                                 }
 1403                                 fp = fdescp->fd_ofiles[fd];
 1404                                 if (!(fp->f_ops->fo_flags & DFLAG_PASSABLE)) {
 1405                                         FILEDESC_UNLOCK(fdescp);
 1406                                         error = EOPNOTSUPP;
 1407                                         goto out;
 1408                                 }
 1409 
 1410                         }
 1411                         /*
 1412                          * Now replace the integer FDs with pointers to
 1413                          * the associated global file table entry..
 1414                          */
 1415                         newlen = oldfds * sizeof(struct file *);
 1416                         *controlp = sbcreatecontrol(NULL, newlen,
 1417                             SCM_RIGHTS, SOL_SOCKET);
 1418                         if (*controlp == NULL) {
 1419                                 FILEDESC_UNLOCK(fdescp);
 1420                                 error = E2BIG;
 1421                                 goto out;
 1422                         }
 1423 
 1424                         fdp = data;
 1425                         rp = (struct file **)
 1426                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 1427                         for (i = 0; i < oldfds; i++) {
 1428                                 fp = fdescp->fd_ofiles[*fdp++];
 1429                                 *rp++ = fp;
 1430                                 FILE_LOCK(fp);
 1431                                 fp->f_count++;
 1432                                 fp->f_msgcount++;
 1433                                 FILE_UNLOCK(fp);
 1434                                 unp_rights++;
 1435                         }
 1436                         FILEDESC_UNLOCK(fdescp);
 1437                         break;
 1438 
 1439                 case SCM_TIMESTAMP:
 1440                         *controlp = sbcreatecontrol(NULL, sizeof(*tv),
 1441                             SCM_TIMESTAMP, SOL_SOCKET);
 1442                         if (*controlp == NULL) {
 1443                                 error = ENOBUFS;
 1444                                 goto out;
 1445                         }
 1446                         tv = (struct timeval *)
 1447                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 1448                         microtime(tv);
 1449                         break;
 1450 
 1451                 default:
 1452                         error = EINVAL;
 1453                         goto out;
 1454                 }
 1455 
 1456                 controlp = &(*controlp)->m_next;
 1457 
 1458                 if (CMSG_SPACE(datalen) < clen) {
 1459                         clen -= CMSG_SPACE(datalen);
 1460                         cm = (struct cmsghdr *)
 1461                             ((caddr_t)cm + CMSG_SPACE(datalen));
 1462                 } else {
 1463                         clen = 0;
 1464                         cm = NULL;
 1465                 }
 1466         }
 1467 
 1468 out:
 1469         m_freem(control);
 1470 
 1471         return (error);
 1472 }
 1473 
 1474 /*
 1475  * unp_defer is thread-local during garbage collection, and does not require
 1476  * explicit synchronization.  unp_gcing prevents other threads from entering
 1477  * garbage collection, and perhaps should be an sx lock instead.
 1478  */
 1479 static int      unp_defer, unp_gcing;
 1480 
 1481 static void
 1482 unp_gc(void)
 1483 {
 1484         struct file *fp, *nextfp;
 1485         struct socket *so;
 1486         struct file **extra_ref, **fpp;
 1487         int nunref, i;
 1488         int nfiles_snap;
 1489         int nfiles_slack = 20;
 1490 
 1491         UNP_LOCK_ASSERT();
 1492 
 1493         if (unp_gcing) {
 1494                 UNP_UNLOCK();
 1495                 return;
 1496         }
 1497         unp_gcing = 1;
 1498         unp_defer = 0;
 1499         UNP_UNLOCK();
 1500         /*
 1501          * before going through all this, set all FDs to
 1502          * be NOT defered and NOT externally accessible
 1503          */
 1504         sx_slock(&filelist_lock);
 1505         LIST_FOREACH(fp, &filehead, f_list)
 1506                 fp->f_gcflag &= ~(FMARK|FDEFER);
 1507         do {
 1508                 LIST_FOREACH(fp, &filehead, f_list) {
 1509                         FILE_LOCK(fp);
 1510                         /*
 1511                          * If the file is not open, skip it
 1512                          */
 1513                         if (fp->f_count == 0) {
 1514                                 FILE_UNLOCK(fp);
 1515                                 continue;
 1516                         }
 1517                         /*
 1518                          * If we already marked it as 'defer'  in a
 1519                          * previous pass, then try process it this time
 1520                          * and un-mark it
 1521                          */
 1522                         if (fp->f_gcflag & FDEFER) {
 1523                                 fp->f_gcflag &= ~FDEFER;
 1524                                 unp_defer--;
 1525                         } else {
 1526                                 /*
 1527                                  * if it's not defered, then check if it's
 1528                                  * already marked.. if so skip it
 1529                                  */
 1530                                 if (fp->f_gcflag & FMARK) {
 1531                                         FILE_UNLOCK(fp);
 1532                                         continue;
 1533                                 }
 1534                                 /*
 1535                                  * If all references are from messages
 1536                                  * in transit, then skip it. it's not
 1537                                  * externally accessible.
 1538                                  */
 1539                                 if (fp->f_count == fp->f_msgcount) {
 1540                                         FILE_UNLOCK(fp);
 1541                                         continue;
 1542                                 }
 1543                                 /*
 1544                                  * If it got this far then it must be
 1545                                  * externally accessible.
 1546                                  */
 1547                                 fp->f_gcflag |= FMARK;
 1548                         }
 1549                         /*
 1550                          * either it was defered, or it is externally
 1551                          * accessible and not already marked so.
 1552                          * Now check if it is possibly one of OUR sockets.
 1553                          */
 1554                         if (fp->f_type != DTYPE_SOCKET ||
 1555                             (so = fp->f_data) == NULL) {
 1556                                 FILE_UNLOCK(fp);
 1557                                 continue;
 1558                         }
 1559                         FILE_UNLOCK(fp);
 1560                         if (so->so_proto->pr_domain != &localdomain ||
 1561                             (so->so_proto->pr_flags&PR_RIGHTS) == 0)
 1562                                 continue;
 1563 #ifdef notdef
 1564                         if (so->so_rcv.sb_flags & SB_LOCK) {
 1565                                 /*
 1566                                  * This is problematical; it's not clear
 1567                                  * we need to wait for the sockbuf to be
 1568                                  * unlocked (on a uniprocessor, at least),
 1569                                  * and it's also not clear what to do
 1570                                  * if sbwait returns an error due to receipt
 1571                                  * of a signal.  If sbwait does return
 1572                                  * an error, we'll go into an infinite
 1573                                  * loop.  Delete all of this for now.
 1574                                  */
 1575                                 (void) sbwait(&so->so_rcv);
 1576                                 goto restart;
 1577                         }
 1578 #endif
 1579                         /*
 1580                          * So, Ok, it's one of our sockets and it IS externally
 1581                          * accessible (or was defered). Now we look
 1582                          * to see if we hold any file descriptors in its
 1583                          * message buffers. Follow those links and mark them
 1584                          * as accessible too.
 1585                          */
 1586                         SOCKBUF_LOCK(&so->so_rcv);
 1587                         unp_scan(so->so_rcv.sb_mb, unp_mark);
 1588                         SOCKBUF_UNLOCK(&so->so_rcv);
 1589                 }
 1590         } while (unp_defer);
 1591         sx_sunlock(&filelist_lock);
 1592         /*
 1593          * We grab an extra reference to each of the file table entries
 1594          * that are not otherwise accessible and then free the rights
 1595          * that are stored in messages on them.
 1596          *
 1597          * The bug in the orginal code is a little tricky, so I'll describe
 1598          * what's wrong with it here.
 1599          *
 1600          * It is incorrect to simply unp_discard each entry for f_msgcount
 1601          * times -- consider the case of sockets A and B that contain
 1602          * references to each other.  On a last close of some other socket,
 1603          * we trigger a gc since the number of outstanding rights (unp_rights)
 1604          * is non-zero.  If during the sweep phase the gc code un_discards,
 1605          * we end up doing a (full) closef on the descriptor.  A closef on A
 1606          * results in the following chain.  Closef calls soo_close, which
 1607          * calls soclose.   Soclose calls first (through the switch
 1608          * uipc_usrreq) unp_detach, which re-invokes unp_gc.  Unp_gc simply
 1609          * returns because the previous instance had set unp_gcing, and
 1610          * we return all the way back to soclose, which marks the socket
 1611          * with SS_NOFDREF, and then calls sofree.  Sofree calls sorflush
 1612          * to free up the rights that are queued in messages on the socket A,
 1613          * i.e., the reference on B.  The sorflush calls via the dom_dispose
 1614          * switch unp_dispose, which unp_scans with unp_discard.  This second
 1615          * instance of unp_discard just calls closef on B.
 1616          *
 1617          * Well, a similar chain occurs on B, resulting in a sorflush on B,
 1618          * which results in another closef on A.  Unfortunately, A is already
 1619          * being closed, and the descriptor has already been marked with
 1620          * SS_NOFDREF, and soclose panics at this point.
 1621          *
 1622          * Here, we first take an extra reference to each inaccessible
 1623          * descriptor.  Then, we call sorflush ourself, since we know
 1624          * it is a Unix domain socket anyhow.  After we destroy all the
 1625          * rights carried in messages, we do a last closef to get rid
 1626          * of our extra reference.  This is the last close, and the
 1627          * unp_detach etc will shut down the socket.
 1628          *
 1629          * 91/09/19, bsy@cs.cmu.edu
 1630          */
 1631 again:
 1632         nfiles_snap = nfiles + nfiles_slack;    /* some slack */
 1633         extra_ref = malloc(nfiles_snap * sizeof(struct file *), M_TEMP,
 1634             M_WAITOK);
 1635         sx_slock(&filelist_lock);
 1636         if (nfiles_snap < nfiles) {
 1637                 sx_sunlock(&filelist_lock);
 1638                 free(extra_ref, M_TEMP);
 1639                 nfiles_slack += 20;
 1640                 goto again;
 1641         }
 1642         for (nunref = 0, fp = LIST_FIRST(&filehead), fpp = extra_ref;
 1643             fp != NULL; fp = nextfp) {
 1644                 nextfp = LIST_NEXT(fp, f_list);
 1645                 FILE_LOCK(fp);
 1646                 /*
 1647                  * If it's not open, skip it
 1648                  */
 1649                 if (fp->f_count == 0) {
 1650                         FILE_UNLOCK(fp);
 1651                         continue;
 1652                 }
 1653                 /*
 1654                  * If all refs are from msgs, and it's not marked accessible
 1655                  * then it must be referenced from some unreachable cycle
 1656                  * of (shut-down) FDs, so include it in our
 1657                  * list of FDs to remove
 1658                  */
 1659                 if (fp->f_count == fp->f_msgcount && !(fp->f_gcflag & FMARK)) {
 1660                         *fpp++ = fp;
 1661                         nunref++;
 1662                         fp->f_count++;
 1663                 }
 1664                 FILE_UNLOCK(fp);
 1665         }
 1666         sx_sunlock(&filelist_lock);
 1667         /*
 1668          * for each FD on our hit list, do the following two things
 1669          */
 1670         for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
 1671                 struct file *tfp = *fpp;
 1672                 FILE_LOCK(tfp);
 1673                 if (tfp->f_type == DTYPE_SOCKET &&
 1674                     tfp->f_data != NULL) {
 1675                         FILE_UNLOCK(tfp);
 1676                         sorflush(tfp->f_data);
 1677                 } else {
 1678                         FILE_UNLOCK(tfp);
 1679                 }
 1680         }
 1681         for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
 1682                 closef(*fpp, (struct thread *) NULL);
 1683         free(extra_ref, M_TEMP);
 1684         unp_gcing = 0;
 1685 
 1686         UNP_UNLOCK_ASSERT();
 1687 }
 1688 
 1689 void
 1690 unp_dispose(struct mbuf *m)
 1691 {
 1692 
 1693         if (m)
 1694                 unp_scan(m, unp_discard);
 1695 }
 1696 
 1697 static int
 1698 unp_listen(struct socket *so, struct unpcb *unp, struct thread *td)
 1699 {
 1700         int error;
 1701 
 1702         UNP_LOCK_ASSERT();
 1703 
 1704         SOCK_LOCK(so);
 1705         error = solisten_proto_check(so);
 1706         if (error == 0) {
 1707                 cru2x(td->td_ucred, &unp->unp_peercred);
 1708                 unp->unp_flags |= UNP_HAVEPCCACHED;
 1709                 solisten_proto(so);
 1710         }
 1711         SOCK_UNLOCK(so);
 1712         return (error);
 1713 }
 1714 
 1715 static void
 1716 unp_scan(struct mbuf *m0, void (*op)(struct file *))
 1717 {
 1718         struct mbuf *m;
 1719         struct file **rp;
 1720         struct cmsghdr *cm;
 1721         void *data;
 1722         int i;
 1723         socklen_t clen, datalen;
 1724         int qfds;
 1725 
 1726         while (m0 != NULL) {
 1727                 for (m = m0; m; m = m->m_next) {
 1728                         if (m->m_type != MT_CONTROL)
 1729                                 continue;
 1730 
 1731                         cm = mtod(m, struct cmsghdr *);
 1732                         clen = m->m_len;
 1733 
 1734                         while (cm != NULL) {
 1735                                 if (sizeof(*cm) > clen || cm->cmsg_len > clen)
 1736                                         break;
 1737 
 1738                                 data = CMSG_DATA(cm);
 1739                                 datalen = (caddr_t)cm + cm->cmsg_len
 1740                                     - (caddr_t)data;
 1741 
 1742                                 if (cm->cmsg_level == SOL_SOCKET &&
 1743                                     cm->cmsg_type == SCM_RIGHTS) {
 1744                                         qfds = datalen / sizeof (struct file *);
 1745                                         rp = data;
 1746                                         for (i = 0; i < qfds; i++)
 1747                                                 (*op)(*rp++);
 1748                                 }
 1749 
 1750                                 if (CMSG_SPACE(datalen) < clen) {
 1751                                         clen -= CMSG_SPACE(datalen);
 1752                                         cm = (struct cmsghdr *)
 1753                                             ((caddr_t)cm + CMSG_SPACE(datalen));
 1754                                 } else {
 1755                                         clen = 0;
 1756                                         cm = NULL;
 1757                                 }
 1758                         }
 1759                 }
 1760                 m0 = m0->m_act;
 1761         }
 1762 }
 1763 
 1764 static void
 1765 unp_mark(struct file *fp)
 1766 {
 1767         if (fp->f_gcflag & FMARK)
 1768                 return;
 1769         unp_defer++;
 1770         fp->f_gcflag |= (FMARK|FDEFER);
 1771 }
 1772 
 1773 static void
 1774 unp_discard(struct file *fp)
 1775 {
 1776         FILE_LOCK(fp);
 1777         fp->f_msgcount--;
 1778         unp_rights--;
 1779         FILE_UNLOCK(fp);
 1780         (void) closef(fp, (struct thread *)NULL);
 1781 }

Cache object: fb50c37a5d7ad627be10aaa0285ab474


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.