The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/uipc_usrreq.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright 2004 Robert N. M. Watson
    3  * Copyright (c) 1982, 1986, 1989, 1991, 1993
    4  *      The Regents of the University of California.  All rights reserved.
    5  *
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  * 1. Redistributions of source code must retain the above copyright
   10  *    notice, this list of conditions and the following disclaimer.
   11  * 2. Redistributions in binary form must reproduce the above copyright
   12  *    notice, this list of conditions and the following disclaimer in the
   13  *    documentation and/or other materials provided with the distribution.
   14  * 4. Neither the name of the University nor the names of its contributors
   15  *    may be used to endorse or promote products derived from this software
   16  *    without specific prior written permission.
   17  *
   18  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   28  * SUCH DAMAGE.
   29  *
   30  *      From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94
   31  */
   32 
   33 #include <sys/cdefs.h>
   34 __FBSDID("$FreeBSD: releng/5.3/sys/kern/uipc_usrreq.c 146003 2005-05-08 10:23:51Z cperciva $");
   35 
   36 #include "opt_mac.h"
   37 
   38 #include <sys/param.h>
   39 #include <sys/domain.h>
   40 #include <sys/fcntl.h>
   41 #include <sys/malloc.h>         /* XXX must be before <sys/file.h> */
   42 #include <sys/file.h>
   43 #include <sys/filedesc.h>
   44 #include <sys/jail.h>
   45 #include <sys/kernel.h>
   46 #include <sys/lock.h>
   47 #include <sys/mac.h>
   48 #include <sys/mbuf.h>
   49 #include <sys/mutex.h>
   50 #include <sys/namei.h>
   51 #include <sys/proc.h>
   52 #include <sys/protosw.h>
   53 #include <sys/resourcevar.h>
   54 #include <sys/socket.h>
   55 #include <sys/socketvar.h>
   56 #include <sys/signalvar.h>
   57 #include <sys/stat.h>
   58 #include <sys/sx.h>
   59 #include <sys/sysctl.h>
   60 #include <sys/systm.h>
   61 #include <sys/un.h>
   62 #include <sys/unpcb.h>
   63 #include <sys/vnode.h>
   64 
   65 #include <vm/uma.h>
   66 
   67 static uma_zone_t unp_zone;
   68 static  unp_gen_t unp_gencnt;
   69 static  u_int unp_count;
   70 
   71 static  struct unp_head unp_shead, unp_dhead;
   72 
   73 /*
   74  * Unix communications domain.
   75  *
   76  * TODO:
   77  *      SEQPACKET, RDM
   78  *      rethink name space problems
   79  *      need a proper out-of-band
   80  *      lock pushdown
   81  */
   82 static const struct     sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL };
   83 static ino_t    unp_ino;                /* prototype for fake inode numbers */
   84 
   85 /*
   86  * Currently, UNIX domain sockets are protected by a single subsystem lock,
   87  * which covers global data structures and variables, the contents of each
   88  * per-socket unpcb structure, and the so_pcb field in sockets attached to
   89  * the UNIX domain.  This provides for a moderate degree of paralellism, as
   90  * receive operations on UNIX domain sockets do not need to acquire the
   91  * subsystem lock.  Finer grained locking to permit send() without acquiring
   92  * a global lock would be a logical next step.
   93  *
   94  * The UNIX domain socket lock preceds all socket layer locks, including the
   95  * socket lock and socket buffer lock, permitting UNIX domain socket code to
   96  * call into socket support routines without releasing its locks.
   97  *
   98  * Some caution is required in areas where the UNIX domain socket code enters
   99  * VFS in order to create or find rendezvous points.  This results in
  100  * dropping of the UNIX domain socket subsystem lock, acquisition of the
  101  * Giant lock, and potential sleeping.  This increases the chances of races,
  102  * and exposes weaknesses in the socket->protocol API by offering poor
  103  * failure modes.
  104  */
  105 static struct mtx unp_mtx;
  106 #define UNP_LOCK_INIT() \
  107         mtx_init(&unp_mtx, "unp", NULL, MTX_DEF)
  108 #define UNP_LOCK()              mtx_lock(&unp_mtx)
  109 #define UNP_UNLOCK()            mtx_unlock(&unp_mtx)
  110 #define UNP_LOCK_ASSERT()       mtx_assert(&unp_mtx, MA_OWNED)
  111 #define UNP_UNLOCK_ASSERT()     mtx_assert(&unp_mtx, MA_NOTOWNED)
  112 
  113 static int     unp_attach(struct socket *);
  114 static void    unp_detach(struct unpcb *);
  115 static int     unp_bind(struct unpcb *,struct sockaddr *, struct thread *);
  116 static int     unp_connect(struct socket *,struct sockaddr *, struct thread *);
  117 static int     unp_connect2(struct socket *so, struct socket *so2);
  118 static void    unp_disconnect(struct unpcb *);
  119 static void    unp_shutdown(struct unpcb *);
  120 static void    unp_drop(struct unpcb *, int);
  121 static void    unp_gc(void);
  122 static void    unp_scan(struct mbuf *, void (*)(struct file *));
  123 static void    unp_mark(struct file *);
  124 static void    unp_discard(struct file *);
  125 static void    unp_freerights(struct file **, int);
  126 static int     unp_internalize(struct mbuf **, struct thread *);
  127 static int     unp_listen(struct unpcb *, struct thread *);
  128 
  129 static int
  130 uipc_abort(struct socket *so)
  131 {
  132         struct unpcb *unp;
  133 
  134         UNP_LOCK();
  135         unp = sotounpcb(so);
  136         if (unp == NULL) {
  137                 UNP_UNLOCK();
  138                 return (EINVAL);
  139         }
  140         unp_drop(unp, ECONNABORTED);
  141         unp_detach(unp);
  142         UNP_UNLOCK_ASSERT();
  143         ACCEPT_LOCK();
  144         SOCK_LOCK(so);
  145         sotryfree(so);
  146         return (0);
  147 }
  148 
  149 static int
  150 uipc_accept(struct socket *so, struct sockaddr **nam)
  151 {
  152         struct unpcb *unp;
  153         const struct sockaddr *sa;
  154 
  155         /*
  156          * Pass back name of connected socket,
  157          * if it was bound and we are still connected
  158          * (our peer may have closed already!).
  159          */
  160         *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
  161         UNP_LOCK();
  162         unp = sotounpcb(so);
  163         if (unp == NULL) {
  164                 UNP_UNLOCK();
  165                 free(*nam, M_SONAME);
  166                 *nam = NULL;
  167                 return (EINVAL);
  168         }
  169         if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL)
  170                 sa = (struct sockaddr *) unp->unp_conn->unp_addr;
  171         else
  172                 sa = &sun_noname;
  173         bcopy(sa, *nam, sa->sa_len);
  174         UNP_UNLOCK();
  175         return (0);
  176 }
  177 
  178 static int
  179 uipc_attach(struct socket *so, int proto, struct thread *td)
  180 {
  181         struct unpcb *unp = sotounpcb(so);
  182 
  183         if (unp != NULL)
  184                 return (EISCONN);
  185         return (unp_attach(so));
  186 }
  187 
  188 static int
  189 uipc_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
  190 {
  191         struct unpcb *unp;
  192         int error;
  193 
  194         UNP_LOCK();
  195         unp = sotounpcb(so);
  196         if (unp == NULL) {
  197                 UNP_UNLOCK();
  198                 return (EINVAL);
  199         }
  200         error = unp_bind(unp, nam, td);
  201         UNP_UNLOCK();
  202         return (error);
  203 }
  204 
  205 static int
  206 uipc_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
  207 {
  208         struct unpcb *unp;
  209         int error;
  210 
  211         KASSERT(td == curthread, ("uipc_connect: td != curthread"));
  212 
  213         UNP_LOCK();
  214         unp = sotounpcb(so);
  215         if (unp == NULL) {
  216                 UNP_UNLOCK();
  217                 return (EINVAL);
  218         }
  219         error = unp_connect(so, nam, td);
  220         UNP_UNLOCK();
  221         return (error);
  222 }
  223 
  224 int
  225 uipc_connect2(struct socket *so1, struct socket *so2)
  226 {
  227         struct unpcb *unp;
  228         int error;
  229 
  230         UNP_LOCK();
  231         unp = sotounpcb(so1);
  232         if (unp == NULL) {
  233                 UNP_UNLOCK();
  234                 return (EINVAL);
  235         }
  236         error = unp_connect2(so1, so2);
  237         UNP_UNLOCK();
  238         return (error);
  239 }
  240 
  241 /* control is EOPNOTSUPP */
  242 
  243 static int
  244 uipc_detach(struct socket *so)
  245 {
  246         struct unpcb *unp;
  247 
  248         UNP_LOCK();
  249         unp = sotounpcb(so);
  250         if (unp == NULL) {
  251                 UNP_UNLOCK();
  252                 return (EINVAL);
  253         }
  254         unp_detach(unp);
  255         UNP_UNLOCK_ASSERT();
  256         return (0);
  257 }
  258 
  259 static int
  260 uipc_disconnect(struct socket *so)
  261 {
  262         struct unpcb *unp;
  263 
  264         UNP_LOCK();
  265         unp = sotounpcb(so);
  266         if (unp == NULL) {
  267                 UNP_UNLOCK();
  268                 return (EINVAL);
  269         }
  270         unp_disconnect(unp);
  271         UNP_UNLOCK();
  272         return (0);
  273 }
  274 
  275 static int
  276 uipc_listen(struct socket *so, struct thread *td)
  277 {
  278         struct unpcb *unp;
  279         int error;
  280 
  281         UNP_LOCK();
  282         unp = sotounpcb(so);
  283         if (unp == NULL || unp->unp_vnode == NULL) {
  284                 UNP_UNLOCK();
  285                 return (EINVAL);
  286         }
  287         error = unp_listen(unp, td);
  288         UNP_UNLOCK();
  289         return (error);
  290 }
  291 
  292 static int
  293 uipc_peeraddr(struct socket *so, struct sockaddr **nam)
  294 {
  295         struct unpcb *unp;
  296         const struct sockaddr *sa;
  297 
  298         *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
  299         UNP_LOCK();
  300         unp = sotounpcb(so);
  301         if (unp == NULL) {
  302                 UNP_UNLOCK();
  303                 free(*nam, M_SONAME);
  304                 *nam = NULL;
  305                 return (EINVAL);
  306         }
  307         if (unp->unp_conn != NULL && unp->unp_conn->unp_addr!= NULL)
  308                 sa = (struct sockaddr *) unp->unp_conn->unp_addr;
  309         else {
  310                 /*
  311                  * XXX: It seems that this test always fails even when
  312                  * connection is established.  So, this else clause is
  313                  * added as workaround to return PF_LOCAL sockaddr.
  314                  */
  315                 sa = &sun_noname;
  316         }
  317         bcopy(sa, *nam, sa->sa_len);
  318         UNP_UNLOCK();
  319         return (0);
  320 }
  321 
  322 static int
  323 uipc_rcvd(struct socket *so, int flags)
  324 {
  325         struct unpcb *unp;
  326         struct socket *so2;
  327         u_long newhiwat;
  328 
  329         UNP_LOCK();
  330         unp = sotounpcb(so);
  331         if (unp == NULL) {
  332                 UNP_UNLOCK();
  333                 return (EINVAL);
  334         }
  335         switch (so->so_type) {
  336         case SOCK_DGRAM:
  337                 panic("uipc_rcvd DGRAM?");
  338                 /*NOTREACHED*/
  339 
  340         case SOCK_STREAM:
  341                 if (unp->unp_conn == NULL)
  342                         break;
  343                 so2 = unp->unp_conn->unp_socket;
  344                 SOCKBUF_LOCK(&so2->so_snd);
  345                 SOCKBUF_LOCK(&so->so_rcv);
  346                 /*
  347                  * Adjust backpressure on sender
  348                  * and wakeup any waiting to write.
  349                  */
  350                 so2->so_snd.sb_mbmax += unp->unp_mbcnt - so->so_rcv.sb_mbcnt;
  351                 unp->unp_mbcnt = so->so_rcv.sb_mbcnt;
  352                 newhiwat = so2->so_snd.sb_hiwat + unp->unp_cc -
  353                     so->so_rcv.sb_cc;
  354                 (void)chgsbsize(so2->so_cred->cr_uidinfo, &so2->so_snd.sb_hiwat,
  355                     newhiwat, RLIM_INFINITY);
  356                 unp->unp_cc = so->so_rcv.sb_cc;
  357                 SOCKBUF_UNLOCK(&so->so_rcv);
  358                 sowwakeup_locked(so2);
  359                 break;
  360 
  361         default:
  362                 panic("uipc_rcvd unknown socktype");
  363         }
  364         UNP_UNLOCK();
  365         return (0);
  366 }
  367 
  368 /* pru_rcvoob is EOPNOTSUPP */
  369 
  370 static int
  371 uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
  372           struct mbuf *control, struct thread *td)
  373 {
  374         int error = 0;
  375         struct unpcb *unp;
  376         struct socket *so2;
  377         u_long newhiwat;
  378 
  379         unp = sotounpcb(so);
  380         if (unp == NULL) {
  381                 error = EINVAL;
  382                 goto release;
  383         }
  384         if (flags & PRUS_OOB) {
  385                 error = EOPNOTSUPP;
  386                 goto release;
  387         }
  388 
  389         if (control != NULL && (error = unp_internalize(&control, td)))
  390                 goto release;
  391 
  392         UNP_LOCK();
  393         unp = sotounpcb(so);
  394         if (unp == NULL) {
  395                 UNP_UNLOCK();
  396                 error = EINVAL;
  397                 goto dispose_release;
  398         }
  399 
  400         switch (so->so_type) {
  401         case SOCK_DGRAM:
  402         {
  403                 const struct sockaddr *from;
  404 
  405                 if (nam != NULL) {
  406                         if (unp->unp_conn != NULL) {
  407                                 error = EISCONN;
  408                                 break;
  409                         }
  410                         error = unp_connect(so, nam, td);
  411                         if (error)
  412                                 break;
  413                 } else {
  414                         if (unp->unp_conn == NULL) {
  415                                 error = ENOTCONN;
  416                                 break;
  417                         }
  418                 }
  419                 so2 = unp->unp_conn->unp_socket;
  420                 if (unp->unp_addr != NULL)
  421                         from = (struct sockaddr *)unp->unp_addr;
  422                 else
  423                         from = &sun_noname;
  424                 SOCKBUF_LOCK(&so2->so_rcv);
  425                 if (sbappendaddr_locked(&so2->so_rcv, from, m, control)) {
  426                         sorwakeup_locked(so2);
  427                         m = NULL;
  428                         control = NULL;
  429                 } else {
  430                         SOCKBUF_UNLOCK(&so2->so_rcv);
  431                         error = ENOBUFS;
  432                 }
  433                 if (nam != NULL)
  434                         unp_disconnect(unp);
  435                 break;
  436         }
  437 
  438         case SOCK_STREAM:
  439                 /* Connect if not connected yet. */
  440                 /*
  441                  * Note: A better implementation would complain
  442                  * if not equal to the peer's address.
  443                  */
  444                 if ((so->so_state & SS_ISCONNECTED) == 0) {
  445                         if (nam != NULL) {
  446                                 error = unp_connect(so, nam, td);
  447                                 if (error)
  448                                         break;  /* XXX */
  449                         } else {
  450                                 error = ENOTCONN;
  451                                 break;
  452                         }
  453                 }
  454 
  455                 if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
  456                         error = EPIPE;
  457                         break;
  458                 }
  459                 if (unp->unp_conn == NULL)
  460                         panic("uipc_send connected but no connection?");
  461                 so2 = unp->unp_conn->unp_socket;
  462                 SOCKBUF_LOCK(&so2->so_rcv);
  463                 /*
  464                  * Send to paired receive port, and then reduce
  465                  * send buffer hiwater marks to maintain backpressure.
  466                  * Wake up readers.
  467                  */
  468                 if (control != NULL) {
  469                         if (sbappendcontrol_locked(&so2->so_rcv, m, control))
  470                                 control = NULL;
  471                 } else {
  472                         sbappend_locked(&so2->so_rcv, m);
  473                 }
  474                 so->so_snd.sb_mbmax -=
  475                         so2->so_rcv.sb_mbcnt - unp->unp_conn->unp_mbcnt;
  476                 unp->unp_conn->unp_mbcnt = so2->so_rcv.sb_mbcnt;
  477                 newhiwat = so->so_snd.sb_hiwat -
  478                     (so2->so_rcv.sb_cc - unp->unp_conn->unp_cc);
  479                 (void)chgsbsize(so->so_cred->cr_uidinfo, &so->so_snd.sb_hiwat,
  480                     newhiwat, RLIM_INFINITY);
  481                 unp->unp_conn->unp_cc = so2->so_rcv.sb_cc;
  482                 sorwakeup_locked(so2);
  483                 m = NULL;
  484                 break;
  485 
  486         default:
  487                 panic("uipc_send unknown socktype");
  488         }
  489 
  490         /*
  491          * SEND_EOF is equivalent to a SEND followed by
  492          * a SHUTDOWN.
  493          */
  494         if (flags & PRUS_EOF) {
  495                 socantsendmore(so);
  496                 unp_shutdown(unp);
  497         }
  498         UNP_UNLOCK();
  499 
  500 dispose_release:
  501         if (control != NULL && error != 0)
  502                 unp_dispose(control);
  503 
  504 release:
  505         if (control != NULL)
  506                 m_freem(control);
  507         if (m != NULL)
  508                 m_freem(m);
  509         return (error);
  510 }
  511 
  512 static int
  513 uipc_sense(struct socket *so, struct stat *sb)
  514 {
  515         struct unpcb *unp;
  516         struct socket *so2;
  517 
  518         UNP_LOCK();
  519         unp = sotounpcb(so);
  520         if (unp == NULL) {
  521                 UNP_UNLOCK();
  522                 return (EINVAL);
  523         }
  524         sb->st_blksize = so->so_snd.sb_hiwat;
  525         if (so->so_type == SOCK_STREAM && unp->unp_conn != NULL) {
  526                 so2 = unp->unp_conn->unp_socket;
  527                 sb->st_blksize += so2->so_rcv.sb_cc;
  528         }
  529         sb->st_dev = NODEV;
  530         if (unp->unp_ino == 0)
  531                 unp->unp_ino = (++unp_ino == 0) ? ++unp_ino : unp_ino;
  532         sb->st_ino = unp->unp_ino;
  533         UNP_UNLOCK();
  534         return (0);
  535 }
  536 
  537 static int
  538 uipc_shutdown(struct socket *so)
  539 {
  540         struct unpcb *unp;
  541 
  542         UNP_LOCK();
  543         unp = sotounpcb(so);
  544         if (unp == NULL) {
  545                 UNP_UNLOCK();
  546                 return (EINVAL);
  547         }
  548         socantsendmore(so);
  549         unp_shutdown(unp);
  550         UNP_UNLOCK();
  551         return (0);
  552 }
  553 
  554 static int
  555 uipc_sockaddr(struct socket *so, struct sockaddr **nam)
  556 {
  557         struct unpcb *unp;
  558         const struct sockaddr *sa;
  559 
  560         *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
  561         UNP_LOCK();
  562         unp = sotounpcb(so);
  563         if (unp == NULL) {
  564                 UNP_UNLOCK();
  565                 free(*nam, M_SONAME);
  566                 *nam = NULL;
  567                 return (EINVAL);
  568         }
  569         if (unp->unp_addr != NULL)
  570                 sa = (struct sockaddr *) unp->unp_addr;
  571         else
  572                 sa = &sun_noname;
  573         bcopy(sa, *nam, sa->sa_len);
  574         UNP_UNLOCK();
  575         return (0);
  576 }
  577 
  578 struct pr_usrreqs uipc_usrreqs = {
  579         uipc_abort, uipc_accept, uipc_attach, uipc_bind, uipc_connect,
  580         uipc_connect2, pru_control_notsupp, uipc_detach, uipc_disconnect,
  581         uipc_listen, uipc_peeraddr, uipc_rcvd, pru_rcvoob_notsupp,
  582         uipc_send, uipc_sense, uipc_shutdown, uipc_sockaddr,
  583         sosend, soreceive, sopoll, pru_sosetlabel_null
  584 };
  585 
  586 int
  587 uipc_ctloutput(so, sopt)
  588         struct socket *so;
  589         struct sockopt *sopt;
  590 {
  591         struct unpcb *unp;
  592         struct xucred xu;
  593         int error;
  594 
  595         switch (sopt->sopt_dir) {
  596         case SOPT_GET:
  597                 switch (sopt->sopt_name) {
  598                 case LOCAL_PEERCRED:
  599                         error = 0;
  600                         UNP_LOCK();
  601                         unp = sotounpcb(so);
  602                         if (unp == NULL) {
  603                                 UNP_UNLOCK();
  604                                 error = EINVAL;
  605                                 break;
  606                         }
  607                         if (unp->unp_flags & UNP_HAVEPC)
  608                                 xu = unp->unp_peercred;
  609                         else {
  610                                 if (so->so_type == SOCK_STREAM)
  611                                         error = ENOTCONN;
  612                                 else
  613                                         error = EINVAL;
  614                         }
  615                         UNP_UNLOCK();
  616                         if (error == 0)
  617                                 error = sooptcopyout(sopt, &xu, sizeof(xu));
  618                         break;
  619                 default:
  620                         error = EOPNOTSUPP;
  621                         break;
  622                 }
  623                 break;
  624         case SOPT_SET:
  625         default:
  626                 error = EOPNOTSUPP;
  627                 break;
  628         }
  629         return (error);
  630 }
  631 
  632 /*
  633  * Both send and receive buffers are allocated PIPSIZ bytes of buffering
  634  * for stream sockets, although the total for sender and receiver is
  635  * actually only PIPSIZ.
  636  * Datagram sockets really use the sendspace as the maximum datagram size,
  637  * and don't really want to reserve the sendspace.  Their recvspace should
  638  * be large enough for at least one max-size datagram plus address.
  639  */
  640 #ifndef PIPSIZ
  641 #define PIPSIZ  8192
  642 #endif
  643 static u_long   unpst_sendspace = PIPSIZ;
  644 static u_long   unpst_recvspace = PIPSIZ;
  645 static u_long   unpdg_sendspace = 2*1024;       /* really max datagram size */
  646 static u_long   unpdg_recvspace = 4*1024;
  647 
  648 static int      unp_rights;                     /* file descriptors in flight */
  649 
  650 SYSCTL_DECL(_net_local_stream);
  651 SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW,
  652            &unpst_sendspace, 0, "");
  653 SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW,
  654            &unpst_recvspace, 0, "");
  655 SYSCTL_DECL(_net_local_dgram);
  656 SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW,
  657            &unpdg_sendspace, 0, "");
  658 SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW,
  659            &unpdg_recvspace, 0, "");
  660 SYSCTL_DECL(_net_local);
  661 SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, "");
  662 
  663 static int
  664 unp_attach(so)
  665         struct socket *so;
  666 {
  667         register struct unpcb *unp;
  668         int error;
  669 
  670         if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
  671                 switch (so->so_type) {
  672 
  673                 case SOCK_STREAM:
  674                         error = soreserve(so, unpst_sendspace, unpst_recvspace);
  675                         break;
  676 
  677                 case SOCK_DGRAM:
  678                         error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
  679                         break;
  680 
  681                 default:
  682                         panic("unp_attach");
  683                 }
  684                 if (error)
  685                         return (error);
  686         }
  687         unp = uma_zalloc(unp_zone, M_WAITOK);
  688         if (unp == NULL)
  689                 return (ENOBUFS);
  690         bzero(unp, sizeof *unp);
  691         LIST_INIT(&unp->unp_refs);
  692         unp->unp_socket = so;
  693 
  694         UNP_LOCK();
  695         unp->unp_gencnt = ++unp_gencnt;
  696         unp_count++;
  697         LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ? &unp_dhead
  698                          : &unp_shead, unp, unp_link);
  699         so->so_pcb = unp;
  700         UNP_UNLOCK();
  701 
  702         return (0);
  703 }
  704 
  705 static void
  706 unp_detach(unp)
  707         register struct unpcb *unp;
  708 {
  709         struct vnode *vp;
  710 
  711         UNP_LOCK_ASSERT();
  712 
  713         LIST_REMOVE(unp, unp_link);
  714         unp->unp_gencnt = ++unp_gencnt;
  715         --unp_count;
  716         if ((vp = unp->unp_vnode) != NULL) {
  717                 /*
  718                  * XXXRW: should v_socket be frobbed only while holding
  719                  * Giant?
  720                  */
  721                 unp->unp_vnode->v_socket = NULL;
  722                 unp->unp_vnode = NULL;
  723         }
  724         if (unp->unp_conn != NULL)
  725                 unp_disconnect(unp);
  726         while (!LIST_EMPTY(&unp->unp_refs)) {
  727                 struct unpcb *ref = LIST_FIRST(&unp->unp_refs);
  728                 unp_drop(ref, ECONNRESET);
  729         }
  730         soisdisconnected(unp->unp_socket);
  731         unp->unp_socket->so_pcb = NULL;
  732         if (unp_rights) {
  733                 /*
  734                  * Normally the receive buffer is flushed later,
  735                  * in sofree, but if our receive buffer holds references
  736                  * to descriptors that are now garbage, we will dispose
  737                  * of those descriptor references after the garbage collector
  738                  * gets them (resulting in a "panic: closef: count < 0").
  739                  */
  740                 sorflush(unp->unp_socket);
  741                 unp_gc();       /* Will unlock UNP. */
  742         } else
  743                 UNP_UNLOCK();
  744         UNP_UNLOCK_ASSERT();
  745         if (unp->unp_addr != NULL)
  746                 FREE(unp->unp_addr, M_SONAME);
  747         uma_zfree(unp_zone, unp);
  748         if (vp) {
  749                 mtx_lock(&Giant);
  750                 vrele(vp);
  751                 mtx_unlock(&Giant);
  752         }
  753 }
  754 
  755 static int
  756 unp_bind(unp, nam, td)
  757         struct unpcb *unp;
  758         struct sockaddr *nam;
  759         struct thread *td;
  760 {
  761         struct sockaddr_un *soun = (struct sockaddr_un *)nam;
  762         struct vnode *vp;
  763         struct mount *mp;
  764         struct vattr vattr;
  765         int error, namelen;
  766         struct nameidata nd;
  767         char *buf;
  768 
  769         UNP_LOCK_ASSERT();
  770 
  771         /*
  772          * XXXRW: This test-and-set of unp_vnode is non-atomic; the
  773          * unlocked read here is fine, but the value of unp_vnode needs
  774          * to be tested again after we do all the lookups to see if the
  775          * pcb is still unbound?
  776          */
  777         if (unp->unp_vnode != NULL)
  778                 return (EINVAL);
  779 
  780         namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path);
  781         if (namelen <= 0)
  782                 return (EINVAL);
  783 
  784         UNP_UNLOCK();
  785 
  786         buf = malloc(namelen + 1, M_TEMP, M_WAITOK);
  787         strlcpy(buf, soun->sun_path, namelen + 1);
  788 
  789         mtx_lock(&Giant);
  790 restart:
  791         mtx_assert(&Giant, MA_OWNED);
  792         NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT | SAVENAME, UIO_SYSSPACE,
  793             buf, td);
  794 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
  795         error = namei(&nd);
  796         if (error)
  797                 goto done;
  798         vp = nd.ni_vp;
  799         if (vp != NULL || vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
  800                 NDFREE(&nd, NDF_ONLY_PNBUF);
  801                 if (nd.ni_dvp == vp)
  802                         vrele(nd.ni_dvp);
  803                 else
  804                         vput(nd.ni_dvp);
  805                 if (vp != NULL) {
  806                         vrele(vp);
  807                         error = EADDRINUSE;
  808                         goto done;
  809                 }
  810                 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH);
  811                 if (error)
  812                         goto done;
  813                 goto restart;
  814         }
  815         VATTR_NULL(&vattr);
  816         vattr.va_type = VSOCK;
  817         vattr.va_mode = (ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask);
  818 #ifdef MAC
  819         error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
  820             &vattr);
  821 #endif
  822         if (error == 0) {
  823                 VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
  824                 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
  825         }
  826         NDFREE(&nd, NDF_ONLY_PNBUF);
  827         vput(nd.ni_dvp);
  828         if (error)
  829                 goto done;
  830         vp = nd.ni_vp;
  831         ASSERT_VOP_LOCKED(vp, "unp_bind");
  832         soun = (struct sockaddr_un *)sodupsockaddr(nam, M_WAITOK);
  833         UNP_LOCK();
  834         vp->v_socket = unp->unp_socket;
  835         unp->unp_vnode = vp;
  836         unp->unp_addr = soun;
  837         UNP_UNLOCK();
  838         VOP_UNLOCK(vp, 0, td);
  839         vn_finished_write(mp);
  840 done:
  841         mtx_unlock(&Giant);
  842         free(buf, M_TEMP);
  843         UNP_LOCK();
  844         return (error);
  845 }
  846 
  847 static int
  848 unp_connect(so, nam, td)
  849         struct socket *so;
  850         struct sockaddr *nam;
  851         struct thread *td;
  852 {
  853         register struct sockaddr_un *soun = (struct sockaddr_un *)nam;
  854         register struct vnode *vp;
  855         register struct socket *so2, *so3;
  856         struct unpcb *unp, *unp2, *unp3;
  857         int error, len;
  858         struct nameidata nd;
  859         char buf[SOCK_MAXADDRLEN];
  860         struct sockaddr *sa;
  861 
  862         UNP_LOCK_ASSERT();
  863         unp = sotounpcb(so);
  864 
  865         len = nam->sa_len - offsetof(struct sockaddr_un, sun_path);
  866         if (len <= 0)
  867                 return (EINVAL);
  868         strlcpy(buf, soun->sun_path, len + 1);
  869         UNP_UNLOCK();
  870         sa = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
  871         mtx_lock(&Giant);
  872         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, buf, td);
  873         error = namei(&nd);
  874         if (error)
  875                 vp = NULL;
  876         else
  877                 vp = nd.ni_vp;
  878         ASSERT_VOP_LOCKED(vp, "unp_connect");
  879         NDFREE(&nd, NDF_ONLY_PNBUF);
  880         if (error)
  881                 goto bad;
  882 
  883         if (vp->v_type != VSOCK) {
  884                 error = ENOTSOCK;
  885                 goto bad;
  886         }
  887         error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td);
  888         if (error)
  889                 goto bad;
  890         mtx_unlock(&Giant);
  891         UNP_LOCK();
  892         unp = sotounpcb(so);
  893         if (unp == NULL) {
  894                 /*
  895                  * XXXRW: Temporary debugging printf.
  896                  */
  897                 printf("unp_connect(): lost race to another thread\n");
  898                 error = EINVAL;
  899                 goto bad2;
  900         }
  901         so2 = vp->v_socket;
  902         if (so2 == NULL) {
  903                 error = ECONNREFUSED;
  904                 goto bad2;
  905         }
  906         if (so->so_type != so2->so_type) {
  907                 error = EPROTOTYPE;
  908                 goto bad2;
  909         }
  910         if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
  911                 if (so2->so_options & SO_ACCEPTCONN) {
  912                         /*
  913                          * NB: drop locks here so unp_attach is entered
  914                          *     w/o locks; this avoids a recursive lock
  915                          *     of the head and holding sleep locks across
  916                          *     a (potentially) blocking malloc.
  917                          */
  918                         UNP_UNLOCK();
  919                         so3 = sonewconn(so2, 0);
  920                         UNP_LOCK();
  921                 } else
  922                         so3 = NULL;
  923                 if (so3 == NULL) {
  924                         error = ECONNREFUSED;
  925                         goto bad2;
  926                 }
  927                 unp = sotounpcb(so);
  928                 unp2 = sotounpcb(so2);
  929                 unp3 = sotounpcb(so3);
  930                 if (unp2->unp_addr != NULL) {
  931                         bcopy(unp2->unp_addr, sa, unp2->unp_addr->sun_len);
  932                         unp3->unp_addr = (struct sockaddr_un *) sa;
  933                         sa = NULL;
  934                 }
  935                 /*
  936                  * unp_peercred management:
  937                  *
  938                  * The connecter's (client's) credentials are copied
  939                  * from its process structure at the time of connect()
  940                  * (which is now).
  941                  */
  942                 cru2x(td->td_ucred, &unp3->unp_peercred);
  943                 unp3->unp_flags |= UNP_HAVEPC;
  944                 /*
  945                  * The receiver's (server's) credentials are copied
  946                  * from the unp_peercred member of socket on which the
  947                  * former called listen(); unp_listen() cached that
  948                  * process's credentials at that time so we can use
  949                  * them now.
  950                  */
  951                 KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED,
  952                     ("unp_connect: listener without cached peercred"));
  953                 memcpy(&unp->unp_peercred, &unp2->unp_peercred,
  954                     sizeof(unp->unp_peercred));
  955                 unp->unp_flags |= UNP_HAVEPC;
  956 #ifdef MAC
  957                 SOCK_LOCK(so);
  958                 mac_set_socket_peer_from_socket(so, so3);
  959                 mac_set_socket_peer_from_socket(so3, so);
  960                 SOCK_UNLOCK(so);
  961 #endif
  962 
  963                 so2 = so3;
  964         }
  965         error = unp_connect2(so, so2);
  966 bad2:
  967         UNP_UNLOCK();
  968         mtx_lock(&Giant);
  969 bad:
  970         mtx_assert(&Giant, MA_OWNED);
  971         if (vp != NULL)
  972                 vput(vp);
  973         mtx_unlock(&Giant);
  974         free(sa, M_SONAME);
  975         UNP_LOCK();
  976         return (error);
  977 }
  978 
  979 static int
  980 unp_connect2(so, so2)
  981         register struct socket *so;
  982         register struct socket *so2;
  983 {
  984         register struct unpcb *unp = sotounpcb(so);
  985         register struct unpcb *unp2;
  986 
  987         UNP_LOCK_ASSERT();
  988 
  989         if (so2->so_type != so->so_type)
  990                 return (EPROTOTYPE);
  991         unp2 = sotounpcb(so2);
  992         unp->unp_conn = unp2;
  993         switch (so->so_type) {
  994 
  995         case SOCK_DGRAM:
  996                 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink);
  997                 soisconnected(so);
  998                 break;
  999 
 1000         case SOCK_STREAM:
 1001                 unp2->unp_conn = unp;
 1002                 soisconnected(so);
 1003                 soisconnected(so2);
 1004                 break;
 1005 
 1006         default:
 1007                 panic("unp_connect2");
 1008         }
 1009         return (0);
 1010 }
 1011 
 1012 static void
 1013 unp_disconnect(unp)
 1014         struct unpcb *unp;
 1015 {
 1016         register struct unpcb *unp2 = unp->unp_conn;
 1017         struct socket *so;
 1018 
 1019         UNP_LOCK_ASSERT();
 1020 
 1021         if (unp2 == NULL)
 1022                 return;
 1023         unp->unp_conn = NULL;
 1024         switch (unp->unp_socket->so_type) {
 1025 
 1026         case SOCK_DGRAM:
 1027                 LIST_REMOVE(unp, unp_reflink);
 1028                 so = unp->unp_socket;
 1029                 SOCK_LOCK(so);
 1030                 so->so_state &= ~SS_ISCONNECTED;
 1031                 SOCK_UNLOCK(so);
 1032                 break;
 1033 
 1034         case SOCK_STREAM:
 1035                 soisdisconnected(unp->unp_socket);
 1036                 unp2->unp_conn = NULL;
 1037                 soisdisconnected(unp2->unp_socket);
 1038                 break;
 1039         }
 1040 }
 1041 
 1042 #ifdef notdef
 1043 void
 1044 unp_abort(unp)
 1045         struct unpcb *unp;
 1046 {
 1047 
 1048         unp_detach(unp);
 1049         UNP_UNLOCK_ASSERT();
 1050 }
 1051 #endif
 1052 
 1053 /*
 1054  * unp_pcblist() assumes that UNIX domain socket memory is never reclaimed
 1055  * by the zone (UMA_ZONE_NOFREE), and as such potentially stale pointers
 1056  * are safe to reference.  It first scans the list of struct unpcb's to
 1057  * generate a pointer list, then it rescans its list one entry at a time to
 1058  * externalize and copyout.  It checks the generation number to see if a
 1059  * struct unpcb has been reused, and will skip it if so.
 1060  */
 1061 static int
 1062 unp_pcblist(SYSCTL_HANDLER_ARGS)
 1063 {
 1064         int error, i, n;
 1065         struct unpcb *unp, **unp_list;
 1066         unp_gen_t gencnt;
 1067         struct xunpgen *xug;
 1068         struct unp_head *head;
 1069         struct xunpcb *xu;
 1070 
 1071         head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
 1072 
 1073         /*
 1074          * The process of preparing the PCB list is too time-consuming and
 1075          * resource-intensive to repeat twice on every request.
 1076          */
 1077         if (req->oldptr == NULL) {
 1078                 n = unp_count;
 1079                 req->oldidx = 2 * (sizeof *xug)
 1080                         + (n + n/8) * sizeof(struct xunpcb);
 1081                 return (0);
 1082         }
 1083 
 1084         if (req->newptr != NULL)
 1085                 return (EPERM);
 1086 
 1087         /*
 1088          * OK, now we're committed to doing something.
 1089          */
 1090         xug = malloc(sizeof(*xug), M_TEMP, M_WAITOK);
 1091         UNP_LOCK();
 1092         gencnt = unp_gencnt;
 1093         n = unp_count;
 1094         UNP_UNLOCK();
 1095 
 1096         xug->xug_len = sizeof *xug;
 1097         xug->xug_count = n;
 1098         xug->xug_gen = gencnt;
 1099         xug->xug_sogen = so_gencnt;
 1100         error = SYSCTL_OUT(req, xug, sizeof *xug);
 1101         if (error) {
 1102                 free(xug, M_TEMP);
 1103                 return (error);
 1104         }
 1105 
 1106         unp_list = malloc(n * sizeof *unp_list, M_TEMP, M_WAITOK);
 1107 
 1108         UNP_LOCK();
 1109         for (unp = LIST_FIRST(head), i = 0; unp && i < n;
 1110              unp = LIST_NEXT(unp, unp_link)) {
 1111                 if (unp->unp_gencnt <= gencnt) {
 1112                         if (cr_cansee(req->td->td_ucred,
 1113                             unp->unp_socket->so_cred))
 1114                                 continue;
 1115                         unp_list[i++] = unp;
 1116                 }
 1117         }
 1118         UNP_UNLOCK();
 1119         n = i;                  /* in case we lost some during malloc */
 1120 
 1121         error = 0;
 1122         xu = malloc(sizeof(*xu), M_TEMP, M_WAITOK | M_ZERO);
 1123         for (i = 0; i < n; i++) {
 1124                 unp = unp_list[i];
 1125                 if (unp->unp_gencnt <= gencnt) {
 1126                         xu->xu_len = sizeof *xu;
 1127                         xu->xu_unpp = unp;
 1128                         /*
 1129                          * XXX - need more locking here to protect against
 1130                          * connect/disconnect races for SMP.
 1131                          */
 1132                         if (unp->unp_addr != NULL)
 1133                                 bcopy(unp->unp_addr, &xu->xu_addr,
 1134                                       unp->unp_addr->sun_len);
 1135                         if (unp->unp_conn != NULL &&
 1136                             unp->unp_conn->unp_addr != NULL)
 1137                                 bcopy(unp->unp_conn->unp_addr,
 1138                                       &xu->xu_caddr,
 1139                                       unp->unp_conn->unp_addr->sun_len);
 1140                         bcopy(unp, &xu->xu_unp, sizeof *unp);
 1141                         sotoxsocket(unp->unp_socket, &xu->xu_socket);
 1142                         error = SYSCTL_OUT(req, xu, sizeof *xu);
 1143                 }
 1144         }
 1145         free(xu, M_TEMP);
 1146         if (!error) {
 1147                 /*
 1148                  * Give the user an updated idea of our state.
 1149                  * If the generation differs from what we told
 1150                  * her before, she knows that something happened
 1151                  * while we were processing this request, and it
 1152                  * might be necessary to retry.
 1153                  */
 1154                 xug->xug_gen = unp_gencnt;
 1155                 xug->xug_sogen = so_gencnt;
 1156                 xug->xug_count = unp_count;
 1157                 error = SYSCTL_OUT(req, xug, sizeof *xug);
 1158         }
 1159         free(unp_list, M_TEMP);
 1160         free(xug, M_TEMP);
 1161         return (error);
 1162 }
 1163 
 1164 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLFLAG_RD,
 1165             (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb",
 1166             "List of active local datagram sockets");
 1167 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLFLAG_RD,
 1168             (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb",
 1169             "List of active local stream sockets");
 1170 
 1171 static void
 1172 unp_shutdown(unp)
 1173         struct unpcb *unp;
 1174 {
 1175         struct socket *so;
 1176 
 1177         UNP_LOCK_ASSERT();
 1178 
 1179         if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn &&
 1180             (so = unp->unp_conn->unp_socket))
 1181                 socantrcvmore(so);
 1182 }
 1183 
 1184 static void
 1185 unp_drop(unp, errno)
 1186         struct unpcb *unp;
 1187         int errno;
 1188 {
 1189         struct socket *so = unp->unp_socket;
 1190 
 1191         UNP_LOCK_ASSERT();
 1192 
 1193         so->so_error = errno;
 1194         unp_disconnect(unp);
 1195 }
 1196 
 1197 #ifdef notdef
 1198 void
 1199 unp_drain()
 1200 {
 1201 
 1202 }
 1203 #endif
 1204 
 1205 static void
 1206 unp_freerights(rp, fdcount)
 1207         struct file **rp;
 1208         int fdcount;
 1209 {
 1210         int i;
 1211         struct file *fp;
 1212 
 1213         for (i = 0; i < fdcount; i++) {
 1214                 fp = *rp;
 1215                 /*
 1216                  * zero the pointer before calling
 1217                  * unp_discard since it may end up
 1218                  * in unp_gc()..
 1219                  */
 1220                 *rp++ = 0;
 1221                 unp_discard(fp);
 1222         }
 1223 }
 1224 
 1225 int
 1226 unp_externalize(control, controlp)
 1227         struct mbuf *control, **controlp;
 1228 {
 1229         struct thread *td = curthread;          /* XXX */
 1230         struct cmsghdr *cm = mtod(control, struct cmsghdr *);
 1231         int i;
 1232         int *fdp;
 1233         struct file **rp;
 1234         struct file *fp;
 1235         void *data;
 1236         socklen_t clen = control->m_len, datalen;
 1237         int error, newfds;
 1238         int f;
 1239         u_int newlen;
 1240 
 1241         UNP_UNLOCK_ASSERT();
 1242 
 1243         error = 0;
 1244         if (controlp != NULL) /* controlp == NULL => free control messages */
 1245                 *controlp = NULL;
 1246 
 1247         while (cm != NULL) {
 1248                 if (sizeof(*cm) > clen || cm->cmsg_len > clen) {
 1249                         error = EINVAL;
 1250                         break;
 1251                 }
 1252 
 1253                 data = CMSG_DATA(cm);
 1254                 datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
 1255 
 1256                 if (cm->cmsg_level == SOL_SOCKET
 1257                     && cm->cmsg_type == SCM_RIGHTS) {
 1258                         newfds = datalen / sizeof(struct file *);
 1259                         rp = data;
 1260 
 1261                         /* If we're not outputting the descriptors free them. */
 1262                         if (error || controlp == NULL) {
 1263                                 unp_freerights(rp, newfds);
 1264                                 goto next;
 1265                         }
 1266                         FILEDESC_LOCK(td->td_proc->p_fd);
 1267                         /* if the new FD's will not fit free them.  */
 1268                         if (!fdavail(td, newfds)) {
 1269                                 FILEDESC_UNLOCK(td->td_proc->p_fd);
 1270                                 error = EMSGSIZE;
 1271                                 unp_freerights(rp, newfds);
 1272                                 goto next;
 1273                         }
 1274                         /*
 1275                          * now change each pointer to an fd in the global
 1276                          * table to an integer that is the index to the
 1277                          * local fd table entry that we set up to point
 1278                          * to the global one we are transferring.
 1279                          */
 1280                         newlen = newfds * sizeof(int);
 1281                         *controlp = sbcreatecontrol(NULL, newlen,
 1282                             SCM_RIGHTS, SOL_SOCKET);
 1283                         if (*controlp == NULL) {
 1284                                 FILEDESC_UNLOCK(td->td_proc->p_fd);
 1285                                 error = E2BIG;
 1286                                 unp_freerights(rp, newfds);
 1287                                 goto next;
 1288                         }
 1289 
 1290                         fdp = (int *)
 1291                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 1292                         for (i = 0; i < newfds; i++) {
 1293                                 if (fdalloc(td, 0, &f))
 1294                                         panic("unp_externalize fdalloc failed");
 1295                                 fp = *rp++;
 1296                                 td->td_proc->p_fd->fd_ofiles[f] = fp;
 1297                                 FILE_LOCK(fp);
 1298                                 fp->f_msgcount--;
 1299                                 FILE_UNLOCK(fp);
 1300                                 unp_rights--;
 1301                                 *fdp++ = f;
 1302                         }
 1303                         FILEDESC_UNLOCK(td->td_proc->p_fd);
 1304                 } else { /* We can just copy anything else across */
 1305                         if (error || controlp == NULL)
 1306                                 goto next;
 1307                         *controlp = sbcreatecontrol(NULL, datalen,
 1308                             cm->cmsg_type, cm->cmsg_level);
 1309                         if (*controlp == NULL) {
 1310                                 error = ENOBUFS;
 1311                                 goto next;
 1312                         }
 1313                         bcopy(data,
 1314                             CMSG_DATA(mtod(*controlp, struct cmsghdr *)),
 1315                             datalen);
 1316                 }
 1317 
 1318                 controlp = &(*controlp)->m_next;
 1319 
 1320 next:
 1321                 if (CMSG_SPACE(datalen) < clen) {
 1322                         clen -= CMSG_SPACE(datalen);
 1323                         cm = (struct cmsghdr *)
 1324                             ((caddr_t)cm + CMSG_SPACE(datalen));
 1325                 } else {
 1326                         clen = 0;
 1327                         cm = NULL;
 1328                 }
 1329         }
 1330 
 1331         m_freem(control);
 1332 
 1333         return (error);
 1334 }
 1335 
 1336 void
 1337 unp_init(void)
 1338 {
 1339         unp_zone = uma_zcreate("unpcb", sizeof(struct unpcb), NULL, NULL,
 1340             NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 1341         if (unp_zone == NULL)
 1342                 panic("unp_init");
 1343         uma_zone_set_max(unp_zone, nmbclusters);
 1344         LIST_INIT(&unp_dhead);
 1345         LIST_INIT(&unp_shead);
 1346 
 1347         UNP_LOCK_INIT();
 1348 }
 1349 
 1350 static int
 1351 unp_internalize(controlp, td)
 1352         struct mbuf **controlp;
 1353         struct thread *td;
 1354 {
 1355         struct mbuf *control = *controlp;
 1356         struct proc *p = td->td_proc;
 1357         struct filedesc *fdescp = p->p_fd;
 1358         struct cmsghdr *cm = mtod(control, struct cmsghdr *);
 1359         struct cmsgcred *cmcred;
 1360         struct file **rp;
 1361         struct file *fp;
 1362         struct timeval *tv;
 1363         int i, fd, *fdp;
 1364         void *data;
 1365         socklen_t clen = control->m_len, datalen;
 1366         int error, oldfds;
 1367         u_int newlen;
 1368 
 1369         UNP_UNLOCK_ASSERT();
 1370 
 1371         error = 0;
 1372         *controlp = NULL;
 1373 
 1374         while (cm != NULL) {
 1375                 if (sizeof(*cm) > clen || cm->cmsg_level != SOL_SOCKET
 1376                     || cm->cmsg_len > clen) {
 1377                         error = EINVAL;
 1378                         goto out;
 1379                 }
 1380 
 1381                 data = CMSG_DATA(cm);
 1382                 datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
 1383 
 1384                 switch (cm->cmsg_type) {
 1385                 /*
 1386                  * Fill in credential information.
 1387                  */
 1388                 case SCM_CREDS:
 1389                         *controlp = sbcreatecontrol(NULL, sizeof(*cmcred),
 1390                             SCM_CREDS, SOL_SOCKET);
 1391                         if (*controlp == NULL) {
 1392                                 error = ENOBUFS;
 1393                                 goto out;
 1394                         }
 1395 
 1396                         cmcred = (struct cmsgcred *)
 1397                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 1398                         cmcred->cmcred_pid = p->p_pid;
 1399                         cmcred->cmcred_uid = td->td_ucred->cr_ruid;
 1400                         cmcred->cmcred_gid = td->td_ucred->cr_rgid;
 1401                         cmcred->cmcred_euid = td->td_ucred->cr_uid;
 1402                         cmcred->cmcred_ngroups = MIN(td->td_ucred->cr_ngroups,
 1403                                                         CMGROUP_MAX);
 1404                         for (i = 0; i < cmcred->cmcred_ngroups; i++)
 1405                                 cmcred->cmcred_groups[i] =
 1406                                     td->td_ucred->cr_groups[i];
 1407                         break;
 1408 
 1409                 case SCM_RIGHTS:
 1410                         oldfds = datalen / sizeof (int);
 1411                         /*
 1412                          * check that all the FDs passed in refer to legal files
 1413                          * If not, reject the entire operation.
 1414                          */
 1415                         fdp = data;
 1416                         FILEDESC_LOCK(fdescp);
 1417                         for (i = 0; i < oldfds; i++) {
 1418                                 fd = *fdp++;
 1419                                 if ((unsigned)fd >= fdescp->fd_nfiles ||
 1420                                     fdescp->fd_ofiles[fd] == NULL) {
 1421                                         FILEDESC_UNLOCK(fdescp);
 1422                                         error = EBADF;
 1423                                         goto out;
 1424                                 }
 1425                                 fp = fdescp->fd_ofiles[fd];
 1426                                 if (!(fp->f_ops->fo_flags & DFLAG_PASSABLE)) {
 1427                                         FILEDESC_UNLOCK(fdescp);
 1428                                         error = EOPNOTSUPP;
 1429                                         goto out;
 1430                                 }
 1431 
 1432                         }
 1433                         /*
 1434                          * Now replace the integer FDs with pointers to
 1435                          * the associated global file table entry..
 1436                          */
 1437                         newlen = oldfds * sizeof(struct file *);
 1438                         *controlp = sbcreatecontrol(NULL, newlen,
 1439                             SCM_RIGHTS, SOL_SOCKET);
 1440                         if (*controlp == NULL) {
 1441                                 FILEDESC_UNLOCK(fdescp);
 1442                                 error = E2BIG;
 1443                                 goto out;
 1444                         }
 1445 
 1446                         fdp = data;
 1447                         rp = (struct file **)
 1448                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 1449                         for (i = 0; i < oldfds; i++) {
 1450                                 fp = fdescp->fd_ofiles[*fdp++];
 1451                                 *rp++ = fp;
 1452                                 FILE_LOCK(fp);
 1453                                 fp->f_count++;
 1454                                 fp->f_msgcount++;
 1455                                 FILE_UNLOCK(fp);
 1456                                 unp_rights++;
 1457                         }
 1458                         FILEDESC_UNLOCK(fdescp);
 1459                         break;
 1460 
 1461                 case SCM_TIMESTAMP:
 1462                         *controlp = sbcreatecontrol(NULL, sizeof(*tv),
 1463                             SCM_TIMESTAMP, SOL_SOCKET);
 1464                         if (*controlp == NULL) {
 1465                                 error = ENOBUFS;
 1466                                 goto out;
 1467                         }
 1468                         tv = (struct timeval *)
 1469                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 1470                         microtime(tv);
 1471                         break;
 1472 
 1473                 default:
 1474                         error = EINVAL;
 1475                         goto out;
 1476                 }
 1477 
 1478                 controlp = &(*controlp)->m_next;
 1479 
 1480                 if (CMSG_SPACE(datalen) < clen) {
 1481                         clen -= CMSG_SPACE(datalen);
 1482                         cm = (struct cmsghdr *)
 1483                             ((caddr_t)cm + CMSG_SPACE(datalen));
 1484                 } else {
 1485                         clen = 0;
 1486                         cm = NULL;
 1487                 }
 1488         }
 1489 
 1490 out:
 1491         m_freem(control);
 1492 
 1493         return (error);
 1494 }
 1495 
 1496 /*
 1497  * unp_defer is thread-local during garbage collection, and does not require
 1498  * explicit synchronization.  unp_gcing prevents other threads from entering
 1499  * garbage collection, and perhaps should be an sx lock instead.
 1500  */
 1501 static int      unp_defer, unp_gcing;
 1502 
 1503 static void
 1504 unp_gc()
 1505 {
 1506         register struct file *fp, *nextfp;
 1507         register struct socket *so;
 1508         struct file **extra_ref, **fpp;
 1509         int nunref, i;
 1510         int nfiles_snap;
 1511         int nfiles_slack = 20;
 1512 
 1513         UNP_LOCK_ASSERT();
 1514 
 1515         if (unp_gcing) {
 1516                 UNP_UNLOCK();
 1517                 return;
 1518         }
 1519         unp_gcing = 1;
 1520         unp_defer = 0;
 1521         UNP_UNLOCK();
 1522         /*
 1523          * before going through all this, set all FDs to
 1524          * be NOT defered and NOT externally accessible
 1525          */
 1526         sx_slock(&filelist_lock);
 1527         LIST_FOREACH(fp, &filehead, f_list)
 1528                 fp->f_gcflag &= ~(FMARK|FDEFER);
 1529         do {
 1530                 LIST_FOREACH(fp, &filehead, f_list) {
 1531                         FILE_LOCK(fp);
 1532                         /*
 1533                          * If the file is not open, skip it
 1534                          */
 1535                         if (fp->f_count == 0) {
 1536                                 FILE_UNLOCK(fp);
 1537                                 continue;
 1538                         }
 1539                         /*
 1540                          * If we already marked it as 'defer'  in a
 1541                          * previous pass, then try process it this time
 1542                          * and un-mark it
 1543                          */
 1544                         if (fp->f_gcflag & FDEFER) {
 1545                                 fp->f_gcflag &= ~FDEFER;
 1546                                 unp_defer--;
 1547                         } else {
 1548                                 /*
 1549                                  * if it's not defered, then check if it's
 1550                                  * already marked.. if so skip it
 1551                                  */
 1552                                 if (fp->f_gcflag & FMARK) {
 1553                                         FILE_UNLOCK(fp);
 1554                                         continue;
 1555                                 }
 1556                                 /*
 1557                                  * If all references are from messages
 1558                                  * in transit, then skip it. it's not
 1559                                  * externally accessible.
 1560                                  */
 1561                                 if (fp->f_count == fp->f_msgcount) {
 1562                                         FILE_UNLOCK(fp);
 1563                                         continue;
 1564                                 }
 1565                                 /*
 1566                                  * If it got this far then it must be
 1567                                  * externally accessible.
 1568                                  */
 1569                                 fp->f_gcflag |= FMARK;
 1570                         }
 1571                         /*
 1572                          * either it was defered, or it is externally
 1573                          * accessible and not already marked so.
 1574                          * Now check if it is possibly one of OUR sockets.
 1575                          */
 1576                         if (fp->f_type != DTYPE_SOCKET ||
 1577                             (so = fp->f_data) == NULL) {
 1578                                 FILE_UNLOCK(fp);
 1579                                 continue;
 1580                         }
 1581                         FILE_UNLOCK(fp);
 1582                         if (so->so_proto->pr_domain != &localdomain ||
 1583                             (so->so_proto->pr_flags&PR_RIGHTS) == 0)
 1584                                 continue;
 1585 #ifdef notdef
 1586                         if (so->so_rcv.sb_flags & SB_LOCK) {
 1587                                 /*
 1588                                  * This is problematical; it's not clear
 1589                                  * we need to wait for the sockbuf to be
 1590                                  * unlocked (on a uniprocessor, at least),
 1591                                  * and it's also not clear what to do
 1592                                  * if sbwait returns an error due to receipt
 1593                                  * of a signal.  If sbwait does return
 1594                                  * an error, we'll go into an infinite
 1595                                  * loop.  Delete all of this for now.
 1596                                  */
 1597                                 (void) sbwait(&so->so_rcv);
 1598                                 goto restart;
 1599                         }
 1600 #endif
 1601                         /*
 1602                          * So, Ok, it's one of our sockets and it IS externally
 1603                          * accessible (or was defered). Now we look
 1604                          * to see if we hold any file descriptors in its
 1605                          * message buffers. Follow those links and mark them
 1606                          * as accessible too.
 1607                          */
 1608                         SOCKBUF_LOCK(&so->so_rcv);
 1609                         unp_scan(so->so_rcv.sb_mb, unp_mark);
 1610                         SOCKBUF_UNLOCK(&so->so_rcv);
 1611                 }
 1612         } while (unp_defer);
 1613         sx_sunlock(&filelist_lock);
 1614         /*
 1615          * We grab an extra reference to each of the file table entries
 1616          * that are not otherwise accessible and then free the rights
 1617          * that are stored in messages on them.
 1618          *
 1619          * The bug in the orginal code is a little tricky, so I'll describe
 1620          * what's wrong with it here.
 1621          *
 1622          * It is incorrect to simply unp_discard each entry for f_msgcount
 1623          * times -- consider the case of sockets A and B that contain
 1624          * references to each other.  On a last close of some other socket,
 1625          * we trigger a gc since the number of outstanding rights (unp_rights)
 1626          * is non-zero.  If during the sweep phase the gc code un_discards,
 1627          * we end up doing a (full) closef on the descriptor.  A closef on A
 1628          * results in the following chain.  Closef calls soo_close, which
 1629          * calls soclose.   Soclose calls first (through the switch
 1630          * uipc_usrreq) unp_detach, which re-invokes unp_gc.  Unp_gc simply
 1631          * returns because the previous instance had set unp_gcing, and
 1632          * we return all the way back to soclose, which marks the socket
 1633          * with SS_NOFDREF, and then calls sofree.  Sofree calls sorflush
 1634          * to free up the rights that are queued in messages on the socket A,
 1635          * i.e., the reference on B.  The sorflush calls via the dom_dispose
 1636          * switch unp_dispose, which unp_scans with unp_discard.  This second
 1637          * instance of unp_discard just calls closef on B.
 1638          *
 1639          * Well, a similar chain occurs on B, resulting in a sorflush on B,
 1640          * which results in another closef on A.  Unfortunately, A is already
 1641          * being closed, and the descriptor has already been marked with
 1642          * SS_NOFDREF, and soclose panics at this point.
 1643          *
 1644          * Here, we first take an extra reference to each inaccessible
 1645          * descriptor.  Then, we call sorflush ourself, since we know
 1646          * it is a Unix domain socket anyhow.  After we destroy all the
 1647          * rights carried in messages, we do a last closef to get rid
 1648          * of our extra reference.  This is the last close, and the
 1649          * unp_detach etc will shut down the socket.
 1650          *
 1651          * 91/09/19, bsy@cs.cmu.edu
 1652          */
 1653 again:
 1654         nfiles_snap = nfiles + nfiles_slack;    /* some slack */
 1655         extra_ref = malloc(nfiles_snap * sizeof(struct file *), M_TEMP,
 1656             M_WAITOK);
 1657         sx_slock(&filelist_lock);
 1658         if (nfiles_snap < nfiles) {
 1659                 sx_sunlock(&filelist_lock);
 1660                 free(extra_ref, M_TEMP);
 1661                 nfiles_slack += 20;
 1662                 goto again;
 1663         }
 1664         for (nunref = 0, fp = LIST_FIRST(&filehead), fpp = extra_ref;
 1665             fp != NULL; fp = nextfp) {
 1666                 nextfp = LIST_NEXT(fp, f_list);
 1667                 FILE_LOCK(fp);
 1668                 /*
 1669                  * If it's not open, skip it
 1670                  */
 1671                 if (fp->f_count == 0) {
 1672                         FILE_UNLOCK(fp);
 1673                         continue;
 1674                 }
 1675                 /*
 1676                  * If all refs are from msgs, and it's not marked accessible
 1677                  * then it must be referenced from some unreachable cycle
 1678                  * of (shut-down) FDs, so include it in our
 1679                  * list of FDs to remove
 1680                  */
 1681                 if (fp->f_count == fp->f_msgcount && !(fp->f_gcflag & FMARK)) {
 1682                         *fpp++ = fp;
 1683                         nunref++;
 1684                         fp->f_count++;
 1685                 }
 1686                 FILE_UNLOCK(fp);
 1687         }
 1688         sx_sunlock(&filelist_lock);
 1689         /*
 1690          * for each FD on our hit list, do the following two things
 1691          */
 1692         for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
 1693                 struct file *tfp = *fpp;
 1694                 FILE_LOCK(tfp);
 1695                 if (tfp->f_type == DTYPE_SOCKET &&
 1696                     tfp->f_data != NULL) {
 1697                         FILE_UNLOCK(tfp);
 1698                         sorflush(tfp->f_data);
 1699                 } else {
 1700                         FILE_UNLOCK(tfp);
 1701                 }
 1702         }
 1703         for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
 1704                 closef(*fpp, (struct thread *) NULL);
 1705         free(extra_ref, M_TEMP);
 1706         unp_gcing = 0;
 1707 
 1708         UNP_UNLOCK_ASSERT();
 1709 }
 1710 
 1711 void
 1712 unp_dispose(m)
 1713         struct mbuf *m;
 1714 {
 1715 
 1716         if (m)
 1717                 unp_scan(m, unp_discard);
 1718 }
 1719 
 1720 static int
 1721 unp_listen(unp, td)
 1722         struct unpcb *unp;
 1723         struct thread *td;
 1724 {
 1725         UNP_LOCK_ASSERT();
 1726 
 1727         /*
 1728          * XXXRW: Why populate the local peer cred with our own credential?
 1729          */
 1730         cru2x(td->td_ucred, &unp->unp_peercred);
 1731         unp->unp_flags |= UNP_HAVEPCCACHED;
 1732         return (0);
 1733 }
 1734 
 1735 static void
 1736 unp_scan(m0, op)
 1737         register struct mbuf *m0;
 1738         void (*op)(struct file *);
 1739 {
 1740         struct mbuf *m;
 1741         struct file **rp;
 1742         struct cmsghdr *cm;
 1743         void *data;
 1744         int i;
 1745         socklen_t clen, datalen;
 1746         int qfds;
 1747 
 1748         while (m0 != NULL) {
 1749                 for (m = m0; m; m = m->m_next) {
 1750                         if (m->m_type != MT_CONTROL)
 1751                                 continue;
 1752 
 1753                         cm = mtod(m, struct cmsghdr *);
 1754                         clen = m->m_len;
 1755 
 1756                         while (cm != NULL) {
 1757                                 if (sizeof(*cm) > clen || cm->cmsg_len > clen)
 1758                                         break;
 1759 
 1760                                 data = CMSG_DATA(cm);
 1761                                 datalen = (caddr_t)cm + cm->cmsg_len
 1762                                     - (caddr_t)data;
 1763 
 1764                                 if (cm->cmsg_level == SOL_SOCKET &&
 1765                                     cm->cmsg_type == SCM_RIGHTS) {
 1766                                         qfds = datalen / sizeof (struct file *);
 1767                                         rp = data;
 1768                                         for (i = 0; i < qfds; i++)
 1769                                                 (*op)(*rp++);
 1770                                 }
 1771 
 1772                                 if (CMSG_SPACE(datalen) < clen) {
 1773                                         clen -= CMSG_SPACE(datalen);
 1774                                         cm = (struct cmsghdr *)
 1775                                             ((caddr_t)cm + CMSG_SPACE(datalen));
 1776                                 } else {
 1777                                         clen = 0;
 1778                                         cm = NULL;
 1779                                 }
 1780                         }
 1781                 }
 1782                 m0 = m0->m_act;
 1783         }
 1784 }
 1785 
 1786 static void
 1787 unp_mark(fp)
 1788         struct file *fp;
 1789 {
 1790         if (fp->f_gcflag & FMARK)
 1791                 return;
 1792         unp_defer++;
 1793         fp->f_gcflag |= (FMARK|FDEFER);
 1794 }
 1795 
 1796 static void
 1797 unp_discard(fp)
 1798         struct file *fp;
 1799 {
 1800         FILE_LOCK(fp);
 1801         fp->f_msgcount--;
 1802         unp_rights--;
 1803         FILE_UNLOCK(fp);
 1804         (void) closef(fp, (struct thread *)NULL);
 1805 }

Cache object: 0b0b3b54af566f355d856ec73be605de


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.