The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/uipc_usrreq.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 1982, 1986, 1989, 1991, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  * 3. Neither the name of the University nor the names of its contributors
   14  *    may be used to endorse or promote products derived from this software
   15  *    without specific prior written permission.
   16  *
   17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   27  * SUCH DAMAGE.
   28  *
   29  *      From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94
   30  * $FreeBSD: src/sys/kern/uipc_usrreq.c,v 1.54.2.10 2003/03/04 17:28:09 nectar Exp $
   31  */
   32 
   33 #include <sys/param.h>
   34 #include <sys/systm.h>
   35 #include <sys/kernel.h>
   36 #include <sys/domain.h>
   37 #include <sys/fcntl.h>
   38 #include <sys/malloc.h>         /* XXX must be before <sys/file.h> */
   39 #include <sys/proc.h>
   40 #include <sys/file.h>
   41 #include <sys/filedesc.h>
   42 #include <sys/mbuf.h>
   43 #include <sys/nlookup.h>
   44 #include <sys/protosw.h>
   45 #include <sys/socket.h>
   46 #include <sys/socketvar.h>
   47 #include <sys/resourcevar.h>
   48 #include <sys/stat.h>
   49 #include <sys/mount.h>
   50 #include <sys/sysctl.h>
   51 #include <sys/un.h>
   52 #include <sys/unpcb.h>
   53 #include <sys/vnode.h>
   54 
   55 #include <sys/file2.h>
   56 #include <sys/spinlock2.h>
   57 #include <sys/socketvar2.h>
   58 #include <sys/msgport2.h>
   59 
   60 typedef struct unp_defdiscard {
   61         struct unp_defdiscard *next;
   62         struct file *fp;
   63 } *unp_defdiscard_t;
   64 
   65 static  MALLOC_DEFINE(M_UNPCB, "unpcb", "unpcb struct");
   66 static  unp_gen_t unp_gencnt;
   67 static  u_int unp_count;
   68 
   69 static  struct unp_head unp_shead, unp_dhead;
   70 
   71 static struct lwkt_token unp_token = LWKT_TOKEN_INITIALIZER(unp_token);
   72 static int unp_defdiscard_nest;
   73 static unp_defdiscard_t unp_defdiscard_base;
   74 
   75 /*
   76  * Unix communications domain.
   77  *
   78  * TODO:
   79  *      RDM
   80  *      rethink name space problems
   81  *      need a proper out-of-band
   82  *      lock pushdown
   83  */
   84 static struct   sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL };
   85 static ino_t    unp_ino = 1;            /* prototype for fake inode numbers */
   86 static struct spinlock unp_ino_spin = SPINLOCK_INITIALIZER(&unp_ino_spin);
   87 
   88 static int     unp_attach (struct socket *, struct pru_attach_info *);
   89 static void    unp_detach (struct unpcb *);
   90 static int     unp_bind (struct unpcb *,struct sockaddr *, struct thread *);
   91 static int     unp_connect (struct socket *,struct sockaddr *,
   92                                 struct thread *);
   93 static void    unp_disconnect (struct unpcb *);
   94 static void    unp_shutdown (struct unpcb *);
   95 static void    unp_drop (struct unpcb *, int);
   96 static void    unp_gc (void);
   97 static int     unp_gc_clearmarks(struct file *, void *);
   98 static int     unp_gc_checkmarks(struct file *, void *);
   99 static int     unp_gc_checkrefs(struct file *, void *);
  100 static int     unp_revoke_gc_check(struct file *, void *);
  101 static void    unp_scan (struct mbuf *, void (*)(struct file *, void *),
  102                                 void *data);
  103 static void    unp_mark (struct file *, void *data);
  104 static void    unp_discard (struct file *, void *);
  105 static int     unp_internalize (struct mbuf *, struct thread *);
  106 static int     unp_listen (struct unpcb *, struct thread *);
  107 static void    unp_fp_externalize(struct lwp *lp, struct file *fp, int fd);
  108 
  109 /*
  110  * SMP Considerations:
  111  *
  112  *      Since unp_token will be automaticly released upon execution of
  113  *      blocking code, we need to reference unp_conn before any possible
  114  *      blocking code to prevent it from being ripped behind our back.
  115  *
  116  *      Any adjustment to unp->unp_conn requires both the global unp_token
  117  *      AND the per-unp token (lwkt_token_pool_lookup(unp)) to be held.
  118  *
  119  *      Any access to so_pcb to obtain unp requires the pool token for
  120  *      unp to be held.
  121  */
  122 
  123 /* NOTE: unp_token MUST be held */
  124 static __inline void
  125 unp_reference(struct unpcb *unp)
  126 {
  127         atomic_add_int(&unp->unp_refcnt, 1);
  128 }
  129 
  130 /* NOTE: unp_token MUST be held */
  131 static __inline void
  132 unp_free(struct unpcb *unp)
  133 {
  134         KKASSERT(unp->unp_refcnt > 0);
  135         if (atomic_fetchadd_int(&unp->unp_refcnt, -1) == 1)
  136                 unp_detach(unp);
  137 }
  138 
  139 /*
  140  * NOTE: (so) is referenced from soabort*() and netmsg_pru_abort()
  141  *       will sofree() it when we return.
  142  */
  143 static void
  144 uipc_abort(netmsg_t msg)
  145 {
  146         struct unpcb *unp;
  147         int error;
  148 
  149         lwkt_gettoken(&unp_token);
  150         unp = msg->base.nm_so->so_pcb;
  151         if (unp) {
  152                 unp_drop(unp, ECONNABORTED);
  153                 unp_free(unp);
  154                 error = 0;
  155         } else {
  156                 error = EINVAL;
  157         }
  158         lwkt_reltoken(&unp_token);
  159 
  160         lwkt_replymsg(&msg->lmsg, error);
  161 }
  162 
  163 static void
  164 uipc_accept(netmsg_t msg)
  165 {
  166         struct unpcb *unp;
  167         int error;
  168 
  169         lwkt_gettoken(&unp_token);
  170         unp = msg->base.nm_so->so_pcb;
  171         if (unp == NULL) {
  172                 error = EINVAL;
  173         } else {
  174                 struct unpcb *unp2 = unp->unp_conn;
  175 
  176                 /*
  177                  * Pass back name of connected socket,
  178                  * if it was bound and we are still connected
  179                  * (our peer may have closed already!).
  180                  */
  181                 if (unp2 && unp2->unp_addr) {
  182                         unp_reference(unp2);
  183                         *msg->accept.nm_nam = dup_sockaddr(
  184                                 (struct sockaddr *)unp2->unp_addr);
  185                         unp_free(unp2);
  186                 } else {
  187                         *msg->accept.nm_nam = dup_sockaddr(&sun_noname);
  188                 }
  189                 error = 0;
  190         }
  191         lwkt_reltoken(&unp_token);
  192         lwkt_replymsg(&msg->lmsg, error);
  193 }
  194 
  195 static void
  196 uipc_attach(netmsg_t msg)
  197 {
  198         struct unpcb *unp;
  199         int error;
  200 
  201         lwkt_gettoken(&unp_token);
  202         unp = msg->base.nm_so->so_pcb;
  203         if (unp)
  204                 error = EISCONN;
  205         else
  206                 error = unp_attach(msg->base.nm_so, msg->attach.nm_ai);
  207         lwkt_reltoken(&unp_token);
  208         lwkt_replymsg(&msg->lmsg, error);
  209 }
  210 
  211 static void
  212 uipc_bind(netmsg_t msg)
  213 {
  214         struct unpcb *unp;
  215         int error;
  216 
  217         lwkt_gettoken(&unp_token);
  218         unp = msg->base.nm_so->so_pcb;
  219         if (unp)
  220                 error = unp_bind(unp, msg->bind.nm_nam, msg->bind.nm_td);
  221         else
  222                 error = EINVAL;
  223         lwkt_reltoken(&unp_token);
  224         lwkt_replymsg(&msg->lmsg, error);
  225 }
  226 
  227 static void
  228 uipc_connect(netmsg_t msg)
  229 {
  230         struct unpcb *unp;
  231         int error;
  232 
  233         unp = msg->base.nm_so->so_pcb;
  234         if (unp) {
  235                 error = unp_connect(msg->base.nm_so,
  236                                     msg->connect.nm_nam,
  237                                     msg->connect.nm_td);
  238         } else {
  239                 error = EINVAL;
  240         }
  241         lwkt_replymsg(&msg->lmsg, error);
  242 }
  243 
  244 static void
  245 uipc_connect2(netmsg_t msg)
  246 {
  247         struct unpcb *unp;
  248         int error;
  249 
  250         unp = msg->connect2.nm_so1->so_pcb;
  251         if (unp) {
  252                 error = unp_connect2(msg->connect2.nm_so1,
  253                                      msg->connect2.nm_so2);
  254         } else {
  255                 error = EINVAL;
  256         }
  257         lwkt_replymsg(&msg->lmsg, error);
  258 }
  259 
  260 /* control is EOPNOTSUPP */
  261 
  262 static void
  263 uipc_detach(netmsg_t msg)
  264 {
  265         struct unpcb *unp;
  266         int error;
  267 
  268         lwkt_gettoken(&unp_token);
  269         unp = msg->base.nm_so->so_pcb;
  270         if (unp) {
  271                 unp_free(unp);
  272                 error = 0;
  273         } else {
  274                 error = EINVAL;
  275         }
  276         lwkt_reltoken(&unp_token);
  277         lwkt_replymsg(&msg->lmsg, error);
  278 }
  279 
  280 static void
  281 uipc_disconnect(netmsg_t msg)
  282 {
  283         struct unpcb *unp;
  284         int error;
  285 
  286         lwkt_gettoken(&unp_token);
  287         unp = msg->base.nm_so->so_pcb;
  288         if (unp) {
  289                 unp_disconnect(unp);
  290                 error = 0;
  291         } else {
  292                 error = EINVAL;
  293         }
  294         lwkt_reltoken(&unp_token);
  295         lwkt_replymsg(&msg->lmsg, error);
  296 }
  297 
  298 static void
  299 uipc_listen(netmsg_t msg)
  300 {
  301         struct unpcb *unp;
  302         int error;
  303 
  304         lwkt_gettoken(&unp_token);
  305         unp = msg->base.nm_so->so_pcb;
  306         if (unp == NULL || unp->unp_vnode == NULL)
  307                 error = EINVAL;
  308         else
  309                 error = unp_listen(unp, msg->listen.nm_td);
  310         lwkt_reltoken(&unp_token);
  311         lwkt_replymsg(&msg->lmsg, error);
  312 }
  313 
  314 static void
  315 uipc_peeraddr(netmsg_t msg)
  316 {
  317         struct unpcb *unp;
  318         int error;
  319 
  320         lwkt_gettoken(&unp_token);
  321         unp = msg->base.nm_so->so_pcb;
  322         if (unp == NULL) {
  323                 error = EINVAL;
  324         } else if (unp->unp_conn && unp->unp_conn->unp_addr) {
  325                 struct unpcb *unp2 = unp->unp_conn;
  326 
  327                 unp_reference(unp2);
  328                 *msg->peeraddr.nm_nam = dup_sockaddr(
  329                                 (struct sockaddr *)unp2->unp_addr);
  330                 unp_free(unp2);
  331                 error = 0;
  332         } else {
  333                 /*
  334                  * XXX: It seems that this test always fails even when
  335                  * connection is established.  So, this else clause is
  336                  * added as workaround to return PF_LOCAL sockaddr.
  337                  */
  338                 *msg->peeraddr.nm_nam = dup_sockaddr(&sun_noname);
  339                 error = 0;
  340         }
  341         lwkt_reltoken(&unp_token);
  342         lwkt_replymsg(&msg->lmsg, error);
  343 }
  344 
  345 static void
  346 uipc_rcvd(netmsg_t msg)
  347 {
  348         struct unpcb *unp, *unp2;
  349         struct socket *so;
  350         struct socket *so2;
  351         int error;
  352 
  353         /*
  354          * so_pcb is only modified with both the global and the unp
  355          * pool token held.  The unp pointer is invalid until we verify
  356          * that it is good by re-checking so_pcb AFTER obtaining the token.
  357          */
  358         so = msg->base.nm_so;
  359         while ((unp = so->so_pcb) != NULL) {
  360                 lwkt_getpooltoken(unp);
  361                 if (unp == so->so_pcb)
  362                         break;
  363                 lwkt_relpooltoken(unp);
  364         }
  365         if (unp == NULL) {
  366                 error = EINVAL;
  367                 goto done;
  368         }
  369         /* pool token held */
  370 
  371         switch (so->so_type) {
  372         case SOCK_DGRAM:
  373                 panic("uipc_rcvd DGRAM?");
  374                 /*NOTREACHED*/
  375         case SOCK_STREAM:
  376         case SOCK_SEQPACKET:
  377                 if (unp->unp_conn == NULL)
  378                         break;
  379                 unp2 = unp->unp_conn;   /* protected by pool token */
  380 
  381                 /*
  382                  * Because we are transfering mbufs directly to the
  383                  * peer socket we have to use SSB_STOP on the sender
  384                  * to prevent it from building up infinite mbufs.
  385                  *
  386                  * As in several places in this module w ehave to ref unp2
  387                  * to ensure that it does not get ripped out from under us
  388                  * if we block on the so2 token or in sowwakeup().
  389                  */
  390                 so2 = unp2->unp_socket;
  391                 unp_reference(unp2);
  392                 lwkt_gettoken(&so2->so_rcv.ssb_token);
  393                 if (so->so_rcv.ssb_cc < so2->so_snd.ssb_hiwat &&
  394                     so->so_rcv.ssb_mbcnt < so2->so_snd.ssb_mbmax
  395                 ) {
  396                         atomic_clear_int(&so2->so_snd.ssb_flags, SSB_STOP);
  397 
  398                         sowwakeup(so2);
  399                 }
  400                 lwkt_reltoken(&so2->so_rcv.ssb_token);
  401                 unp_free(unp2);
  402                 break;
  403         default:
  404                 panic("uipc_rcvd unknown socktype");
  405                 /*NOTREACHED*/
  406         }
  407         error = 0;
  408         lwkt_relpooltoken(unp);
  409 done:
  410         lwkt_replymsg(&msg->lmsg, error);
  411 }
  412 
  413 /* pru_rcvoob is EOPNOTSUPP */
  414 
  415 static void
  416 uipc_send(netmsg_t msg)
  417 {
  418         struct unpcb *unp, *unp2;
  419         struct socket *so;
  420         struct socket *so2;
  421         struct mbuf *control;
  422         struct mbuf *m;
  423         int error = 0;
  424 
  425         so = msg->base.nm_so;
  426         control = msg->send.nm_control;
  427         m = msg->send.nm_m;
  428 
  429         /*
  430          * so_pcb is only modified with both the global and the unp
  431          * pool token held.  The unp pointer is invalid until we verify
  432          * that it is good by re-checking so_pcb AFTER obtaining the token.
  433          */
  434         so = msg->base.nm_so;
  435         while ((unp = so->so_pcb) != NULL) {
  436                 lwkt_getpooltoken(unp);
  437                 if (unp == so->so_pcb)
  438                         break;
  439                 lwkt_relpooltoken(unp);
  440         }
  441         if (unp == NULL) {
  442                 error = EINVAL;
  443                 goto done;
  444         }
  445         /* pool token held */
  446 
  447         if (msg->send.nm_flags & PRUS_OOB) {
  448                 error = EOPNOTSUPP;
  449                 goto release;
  450         }
  451 
  452         wakeup_start_delayed();
  453 
  454         if (control && (error = unp_internalize(control, msg->send.nm_td)))
  455                 goto release;
  456 
  457         switch (so->so_type) {
  458         case SOCK_DGRAM: 
  459         {
  460                 struct sockaddr *from;
  461 
  462                 if (msg->send.nm_addr) {
  463                         if (unp->unp_conn) {
  464                                 error = EISCONN;
  465                                 break;
  466                         }
  467                         error = unp_connect(so,
  468                                             msg->send.nm_addr,
  469                                             msg->send.nm_td);
  470                         if (error)
  471                                 break;
  472                 } else {
  473                         if (unp->unp_conn == NULL) {
  474                                 error = ENOTCONN;
  475                                 break;
  476                         }
  477                 }
  478                 unp2 = unp->unp_conn;
  479                 so2 = unp2->unp_socket;
  480                 if (unp->unp_addr)
  481                         from = (struct sockaddr *)unp->unp_addr;
  482                 else
  483                         from = &sun_noname;
  484 
  485                 unp_reference(unp2);
  486 
  487                 lwkt_gettoken(&so2->so_rcv.ssb_token);
  488                 if (ssb_appendaddr(&so2->so_rcv, from, m, control)) {
  489                         sorwakeup(so2);
  490                         m = NULL;
  491                         control = NULL;
  492                 } else {
  493                         error = ENOBUFS;
  494                 }
  495                 if (msg->send.nm_addr)
  496                         unp_disconnect(unp);
  497                 lwkt_reltoken(&so2->so_rcv.ssb_token);
  498 
  499                 unp_free(unp2);
  500                 break;
  501         }
  502 
  503         case SOCK_STREAM:
  504         case SOCK_SEQPACKET:
  505                 /* Connect if not connected yet. */
  506                 /*
  507                  * Note: A better implementation would complain
  508                  * if not equal to the peer's address.
  509                  */
  510                 if (!(so->so_state & SS_ISCONNECTED)) {
  511                         if (msg->send.nm_addr) {
  512                                 error = unp_connect(so,
  513                                                     msg->send.nm_addr,
  514                                                     msg->send.nm_td);
  515                                 if (error)
  516                                         break;  /* XXX */
  517                         } else {
  518                                 error = ENOTCONN;
  519                                 break;
  520                         }
  521                 }
  522 
  523                 if (so->so_state & SS_CANTSENDMORE) {
  524                         error = EPIPE;
  525                         break;
  526                 }
  527                 if (unp->unp_conn == NULL)
  528                         panic("uipc_send connected but no connection?");
  529                 unp2 = unp->unp_conn;
  530                 so2 = unp2->unp_socket;
  531 
  532                 unp_reference(unp2);
  533 
  534                 /*
  535                  * Send to paired receive port, and then reduce
  536                  * send buffer hiwater marks to maintain backpressure.
  537                  * Wake up readers.
  538                  */
  539                 lwkt_gettoken(&so2->so_rcv.ssb_token);
  540                 if (control) {
  541                         if (ssb_appendcontrol(&so2->so_rcv, m, control)) {
  542                                 control = NULL;
  543                                 m = NULL;
  544                         }
  545                 } else if (so->so_type == SOCK_SEQPACKET) {
  546                         sbappendrecord(&so2->so_rcv.sb, m);
  547                         m = NULL;
  548                 } else {
  549                         sbappend(&so2->so_rcv.sb, m);
  550                         m = NULL;
  551                 }
  552 
  553                 /*
  554                  * Because we are transfering mbufs directly to the
  555                  * peer socket we have to use SSB_STOP on the sender
  556                  * to prevent it from building up infinite mbufs.
  557                  */
  558                 if (so2->so_rcv.ssb_cc >= so->so_snd.ssb_hiwat ||
  559                     so2->so_rcv.ssb_mbcnt >= so->so_snd.ssb_mbmax
  560                 ) {
  561                         atomic_set_int(&so->so_snd.ssb_flags, SSB_STOP);
  562                 }
  563                 lwkt_reltoken(&so2->so_rcv.ssb_token);
  564                 sorwakeup(so2);
  565 
  566                 unp_free(unp2);
  567                 break;
  568 
  569         default:
  570                 panic("uipc_send unknown socktype");
  571         }
  572 
  573         /*
  574          * SEND_EOF is equivalent to a SEND followed by a SHUTDOWN.
  575          */
  576         if (msg->send.nm_flags & PRUS_EOF) {
  577                 socantsendmore(so);
  578                 unp_shutdown(unp);
  579         }
  580 
  581         if (control && error != 0)
  582                 unp_dispose(control);
  583 release:
  584         lwkt_relpooltoken(unp);
  585         wakeup_end_delayed();
  586 done:
  587 
  588         if (control)
  589                 m_freem(control);
  590         if (m)
  591                 m_freem(m);
  592         lwkt_replymsg(&msg->lmsg, error);
  593 }
  594 
  595 /*
  596  * MPSAFE
  597  */
  598 static void
  599 uipc_sense(netmsg_t msg)
  600 {
  601         struct unpcb *unp;
  602         struct socket *so;
  603         struct stat *sb;
  604         int error;
  605 
  606         so = msg->base.nm_so;
  607         sb = msg->sense.nm_stat;
  608 
  609         /*
  610          * so_pcb is only modified with both the global and the unp
  611          * pool token held.  The unp pointer is invalid until we verify
  612          * that it is good by re-checking so_pcb AFTER obtaining the token.
  613          */
  614         while ((unp = so->so_pcb) != NULL) {
  615                 lwkt_getpooltoken(unp);
  616                 if (unp == so->so_pcb)
  617                         break;
  618                 lwkt_relpooltoken(unp);
  619         }
  620         if (unp == NULL) {
  621                 error = EINVAL;
  622                 goto done;
  623         }
  624         /* pool token held */
  625 
  626         sb->st_blksize = so->so_snd.ssb_hiwat;
  627         sb->st_dev = NOUDEV;
  628         if (unp->unp_ino == 0) {        /* make up a non-zero inode number */
  629                 spin_lock(&unp_ino_spin);
  630                 unp->unp_ino = unp_ino++;
  631                 spin_unlock(&unp_ino_spin);
  632         }
  633         sb->st_ino = unp->unp_ino;
  634         error = 0;
  635         lwkt_relpooltoken(unp);
  636 done:
  637         lwkt_replymsg(&msg->lmsg, error);
  638 }
  639 
  640 static void
  641 uipc_shutdown(netmsg_t msg)
  642 {
  643         struct socket *so;
  644         struct unpcb *unp;
  645         int error;
  646 
  647         /*
  648          * so_pcb is only modified with both the global and the unp
  649          * pool token held.  The unp pointer is invalid until we verify
  650          * that it is good by re-checking so_pcb AFTER obtaining the token.
  651          */
  652         so = msg->base.nm_so;
  653         while ((unp = so->so_pcb) != NULL) {
  654                 lwkt_getpooltoken(unp);
  655                 if (unp == so->so_pcb)
  656                         break;
  657                 lwkt_relpooltoken(unp);
  658         }
  659         if (unp) {
  660                 /* pool token held */
  661                 socantsendmore(so);
  662                 unp_shutdown(unp);
  663                 lwkt_relpooltoken(unp);
  664                 error = 0;
  665         } else {
  666                 error = EINVAL;
  667         }
  668         lwkt_replymsg(&msg->lmsg, error);
  669 }
  670 
  671 static void
  672 uipc_sockaddr(netmsg_t msg)
  673 {
  674         struct socket *so;
  675         struct unpcb *unp;
  676         int error;
  677 
  678         /*
  679          * so_pcb is only modified with both the global and the unp
  680          * pool token held.  The unp pointer is invalid until we verify
  681          * that it is good by re-checking so_pcb AFTER obtaining the token.
  682          */
  683         so = msg->base.nm_so;
  684         while ((unp = so->so_pcb) != NULL) {
  685                 lwkt_getpooltoken(unp);
  686                 if (unp == so->so_pcb)
  687                         break;
  688                 lwkt_relpooltoken(unp);
  689         }
  690         if (unp) {
  691                 /* pool token held */
  692                 if (unp->unp_addr) {
  693                         *msg->sockaddr.nm_nam =
  694                                 dup_sockaddr((struct sockaddr *)unp->unp_addr);
  695                 }
  696                 lwkt_relpooltoken(unp);
  697                 error = 0;
  698         } else {
  699                 error = EINVAL;
  700         }
  701         lwkt_replymsg(&msg->lmsg, error);
  702 }
  703 
  704 struct pr_usrreqs uipc_usrreqs = {
  705         .pru_abort = uipc_abort,
  706         .pru_accept = uipc_accept,
  707         .pru_attach = uipc_attach,
  708         .pru_bind = uipc_bind,
  709         .pru_connect = uipc_connect,
  710         .pru_connect2 = uipc_connect2,
  711         .pru_control = pr_generic_notsupp,
  712         .pru_detach = uipc_detach,
  713         .pru_disconnect = uipc_disconnect,
  714         .pru_listen = uipc_listen,
  715         .pru_peeraddr = uipc_peeraddr,
  716         .pru_rcvd = uipc_rcvd,
  717         .pru_rcvoob = pr_generic_notsupp,
  718         .pru_send = uipc_send,
  719         .pru_sense = uipc_sense,
  720         .pru_shutdown = uipc_shutdown,
  721         .pru_sockaddr = uipc_sockaddr,
  722         .pru_sosend = sosend,
  723         .pru_soreceive = soreceive
  724 };
  725 
  726 void
  727 uipc_ctloutput(netmsg_t msg)
  728 {
  729         struct socket *so;
  730         struct sockopt *sopt;
  731         struct unpcb *unp;
  732         int error = 0;
  733 
  734         lwkt_gettoken(&unp_token);
  735         so = msg->base.nm_so;
  736         sopt = msg->ctloutput.nm_sopt;
  737         unp = so->so_pcb;
  738 
  739         switch (sopt->sopt_dir) {
  740         case SOPT_GET:
  741                 switch (sopt->sopt_name) {
  742                 case LOCAL_PEERCRED:
  743                         if (unp->unp_flags & UNP_HAVEPC)
  744                                 soopt_from_kbuf(sopt, &unp->unp_peercred,
  745                                                 sizeof(unp->unp_peercred));
  746                         else {
  747                                 if (so->so_type == SOCK_STREAM)
  748                                         error = ENOTCONN;
  749                                 else if (so->so_type == SOCK_SEQPACKET)
  750                                         error = ENOTCONN;
  751                                 else
  752                                         error = EINVAL;
  753                         }
  754                         break;
  755                 default:
  756                         error = EOPNOTSUPP;
  757                         break;
  758                 }
  759                 break;
  760         case SOPT_SET:
  761         default:
  762                 error = EOPNOTSUPP;
  763                 break;
  764         }
  765         lwkt_reltoken(&unp_token);
  766         lwkt_replymsg(&msg->lmsg, error);
  767 }
  768         
  769 /*
  770  * Both send and receive buffers are allocated PIPSIZ bytes of buffering
  771  * for stream sockets, although the total for sender and receiver is
  772  * actually only PIPSIZ.
  773  *
  774  * Datagram sockets really use the sendspace as the maximum datagram size,
  775  * and don't really want to reserve the sendspace.  Their recvspace should
  776  * be large enough for at least one max-size datagram plus address.
  777  *
  778  * We want the local send/recv space to be significant larger then lo0's
  779  * mtu of 16384.
  780  */
  781 #ifndef PIPSIZ
  782 #define PIPSIZ  57344
  783 #endif
  784 static u_long   unpst_sendspace = PIPSIZ;
  785 static u_long   unpst_recvspace = PIPSIZ;
  786 static u_long   unpdg_sendspace = 2*1024;       /* really max datagram size */
  787 static u_long   unpdg_recvspace = 4*1024;
  788 
  789 static int      unp_rights;                     /* file descriptors in flight */
  790 static struct spinlock unp_spin = SPINLOCK_INITIALIZER(&unp_spin);
  791 
  792 SYSCTL_DECL(_net_local_seqpacket);
  793 SYSCTL_DECL(_net_local_stream);
  794 SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW, 
  795     &unpst_sendspace, 0, "Size of stream socket send buffer");
  796 SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW,
  797     &unpst_recvspace, 0, "Size of stream socket receive buffer");
  798 
  799 SYSCTL_DECL(_net_local_dgram);
  800 SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW,
  801     &unpdg_sendspace, 0, "Max datagram socket size");
  802 SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW,
  803     &unpdg_recvspace, 0, "Size of datagram socket receive buffer");
  804 
  805 SYSCTL_DECL(_net_local);
  806 SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0,
  807    "File descriptors in flight");
  808 
  809 static int
  810 unp_attach(struct socket *so, struct pru_attach_info *ai)
  811 {
  812         struct unpcb *unp;
  813         int error;
  814 
  815         lwkt_gettoken(&unp_token);
  816 
  817         if (so->so_snd.ssb_hiwat == 0 || so->so_rcv.ssb_hiwat == 0) {
  818                 switch (so->so_type) {
  819 
  820                 case SOCK_STREAM:
  821                 case SOCK_SEQPACKET:
  822                         error = soreserve(so, unpst_sendspace, unpst_recvspace,
  823                                           ai->sb_rlimit);
  824                         break;
  825 
  826                 case SOCK_DGRAM:
  827                         error = soreserve(so, unpdg_sendspace, unpdg_recvspace,
  828                                           ai->sb_rlimit);
  829                         break;
  830 
  831                 default:
  832                         panic("unp_attach");
  833                 }
  834                 if (error)
  835                         goto failed;
  836         }
  837         unp = kmalloc(sizeof(*unp), M_UNPCB, M_WAITOK | M_ZERO | M_NULLOK);
  838         if (unp == NULL) {
  839                 error = ENOBUFS;
  840                 goto failed;
  841         }
  842         unp->unp_refcnt = 1;
  843         unp->unp_gencnt = ++unp_gencnt;
  844         unp_count++;
  845         LIST_INIT(&unp->unp_refs);
  846         unp->unp_socket = so;
  847         unp->unp_rvnode = ai->fd_rdir;          /* jail cruft XXX JH */
  848         LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ? &unp_dhead
  849                          : &unp_shead, unp, unp_link);
  850         so->so_pcb = (caddr_t)unp;
  851         soreference(so);
  852         error = 0;
  853 failed:
  854         lwkt_reltoken(&unp_token);
  855         return error;
  856 }
  857 
  858 static void
  859 unp_detach(struct unpcb *unp)
  860 {
  861         struct socket *so;
  862 
  863         lwkt_gettoken(&unp_token);
  864         lwkt_getpooltoken(unp);
  865 
  866         LIST_REMOVE(unp, unp_link);     /* both tokens required */
  867         unp->unp_gencnt = ++unp_gencnt;
  868         --unp_count;
  869         if (unp->unp_vnode) {
  870                 unp->unp_vnode->v_socket = NULL;
  871                 vrele(unp->unp_vnode);
  872                 unp->unp_vnode = NULL;
  873         }
  874         if (unp->unp_conn)
  875                 unp_disconnect(unp);
  876         while (!LIST_EMPTY(&unp->unp_refs))
  877                 unp_drop(LIST_FIRST(&unp->unp_refs), ECONNRESET);
  878         soisdisconnected(unp->unp_socket);
  879         so = unp->unp_socket;
  880         soreference(so);                /* for delayed sorflush */
  881         KKASSERT(so->so_pcb == unp);
  882         so->so_pcb = NULL;              /* both tokens required */
  883         unp->unp_socket = NULL;
  884         sofree(so);             /* remove pcb ref */
  885 
  886         if (unp_rights) {
  887                 /*
  888                  * Normally the receive buffer is flushed later,
  889                  * in sofree, but if our receive buffer holds references
  890                  * to descriptors that are now garbage, we will dispose
  891                  * of those descriptor references after the garbage collector
  892                  * gets them (resulting in a "panic: closef: count < 0").
  893                  */
  894                 sorflush(so);
  895                 unp_gc();
  896         }
  897         sofree(so);
  898         lwkt_relpooltoken(unp);
  899         lwkt_reltoken(&unp_token);
  900 
  901         if (unp->unp_addr)
  902                 kfree(unp->unp_addr, M_SONAME);
  903         kfree(unp, M_UNPCB);
  904 }
  905 
  906 static int
  907 unp_bind(struct unpcb *unp, struct sockaddr *nam, struct thread *td)
  908 {
  909         struct proc *p = td->td_proc;
  910         struct sockaddr_un *soun = (struct sockaddr_un *)nam;
  911         struct vnode *vp;
  912         struct vattr vattr;
  913         int error, namelen;
  914         struct nlookupdata nd;
  915         char buf[SOCK_MAXADDRLEN];
  916 
  917         lwkt_gettoken(&unp_token);
  918         if (unp->unp_vnode != NULL) {
  919                 error = EINVAL;
  920                 goto failed;
  921         }
  922         namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path);
  923         if (namelen <= 0) {
  924                 error = EINVAL;
  925                 goto failed;
  926         }
  927         strncpy(buf, soun->sun_path, namelen);
  928         buf[namelen] = 0;       /* null-terminate the string */
  929         error = nlookup_init(&nd, buf, UIO_SYSSPACE,
  930                              NLC_LOCKVP | NLC_CREATE | NLC_REFDVP);
  931         if (error == 0)
  932                 error = nlookup(&nd);
  933         if (error == 0 && nd.nl_nch.ncp->nc_vp != NULL)
  934                 error = EADDRINUSE;
  935         if (error)
  936                 goto done;
  937 
  938         VATTR_NULL(&vattr);
  939         vattr.va_type = VSOCK;
  940         vattr.va_mode = (ACCESSPERMS & ~p->p_fd->fd_cmask);
  941         error = VOP_NCREATE(&nd.nl_nch, nd.nl_dvp, &vp, nd.nl_cred, &vattr);
  942         if (error == 0) {
  943                 if (unp->unp_vnode == NULL) {
  944                         vp->v_socket = unp->unp_socket;
  945                         unp->unp_vnode = vp;
  946                         unp->unp_addr = (struct sockaddr_un *)dup_sockaddr(nam);
  947                         vn_unlock(vp);
  948                 } else {
  949                         vput(vp);               /* late race */
  950                         error = EINVAL;
  951                 }
  952         }
  953 done:
  954         nlookup_done(&nd);
  955 failed:
  956         lwkt_reltoken(&unp_token);
  957         return (error);
  958 }
  959 
  960 static int
  961 unp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
  962 {
  963         struct proc *p = td->td_proc;
  964         struct sockaddr_un *soun = (struct sockaddr_un *)nam;
  965         struct vnode *vp;
  966         struct socket *so2, *so3;
  967         struct unpcb *unp, *unp2, *unp3;
  968         int error, len;
  969         struct nlookupdata nd;
  970         char buf[SOCK_MAXADDRLEN];
  971 
  972         lwkt_gettoken(&unp_token);
  973 
  974         len = nam->sa_len - offsetof(struct sockaddr_un, sun_path);
  975         if (len <= 0) {
  976                 error = EINVAL;
  977                 goto failed;
  978         }
  979         strncpy(buf, soun->sun_path, len);
  980         buf[len] = 0;
  981 
  982         vp = NULL;
  983         error = nlookup_init(&nd, buf, UIO_SYSSPACE, NLC_FOLLOW);
  984         if (error == 0)
  985                 error = nlookup(&nd);
  986         if (error == 0)
  987                 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp);
  988         nlookup_done(&nd);
  989         if (error)
  990                 goto failed;
  991 
  992         if (vp->v_type != VSOCK) {
  993                 error = ENOTSOCK;
  994                 goto bad;
  995         }
  996         error = VOP_EACCESS(vp, VWRITE, p->p_ucred);
  997         if (error)
  998                 goto bad;
  999         so2 = vp->v_socket;
 1000         if (so2 == NULL) {
 1001                 error = ECONNREFUSED;
 1002                 goto bad;
 1003         }
 1004         if (so->so_type != so2->so_type) {
 1005                 error = EPROTOTYPE;
 1006                 goto bad;
 1007         }
 1008         if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
 1009                 if (!(so2->so_options & SO_ACCEPTCONN) ||
 1010                     (so3 = sonewconn(so2, 0)) == NULL) {
 1011                         error = ECONNREFUSED;
 1012                         goto bad;
 1013                 }
 1014                 unp = so->so_pcb;
 1015                 if (unp->unp_conn) {    /* race, already connected! */
 1016                         error = EISCONN;
 1017                         sofree(so3);
 1018                         goto bad;
 1019                 }
 1020                 unp2 = so2->so_pcb;
 1021                 unp3 = so3->so_pcb;
 1022                 if (unp2->unp_addr)
 1023                         unp3->unp_addr = (struct sockaddr_un *)
 1024                                 dup_sockaddr((struct sockaddr *)unp2->unp_addr);
 1025 
 1026                 /*
 1027                  * unp_peercred management:
 1028                  *
 1029                  * The connecter's (client's) credentials are copied
 1030                  * from its process structure at the time of connect()
 1031                  * (which is now).
 1032                  */
 1033                 cru2x(p->p_ucred, &unp3->unp_peercred);
 1034                 unp3->unp_flags |= UNP_HAVEPC;
 1035                 /*
 1036                  * The receiver's (server's) credentials are copied
 1037                  * from the unp_peercred member of socket on which the
 1038                  * former called listen(); unp_listen() cached that
 1039                  * process's credentials at that time so we can use
 1040                  * them now.
 1041                  */
 1042                 KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED,
 1043                     ("unp_connect: listener without cached peercred"));
 1044                 memcpy(&unp->unp_peercred, &unp2->unp_peercred,
 1045                     sizeof(unp->unp_peercred));
 1046                 unp->unp_flags |= UNP_HAVEPC;
 1047 
 1048                 so2 = so3;
 1049         }
 1050         error = unp_connect2(so, so2);
 1051 bad:
 1052         vput(vp);
 1053 failed:
 1054         lwkt_reltoken(&unp_token);
 1055         return (error);
 1056 }
 1057 
 1058 /*
 1059  * Connect two unix domain sockets together.
 1060  *
 1061  * NOTE: Semantics for any change to unp_conn requires that the per-unp
 1062  *       pool token also be held.
 1063  */
 1064 int
 1065 unp_connect2(struct socket *so, struct socket *so2)
 1066 {
 1067         struct unpcb *unp;
 1068         struct unpcb *unp2;
 1069 
 1070         lwkt_gettoken(&unp_token);
 1071         unp = so->so_pcb;
 1072         if (so2->so_type != so->so_type) {
 1073                 lwkt_reltoken(&unp_token);
 1074                 return (EPROTOTYPE);
 1075         }
 1076         unp2 = so2->so_pcb;
 1077         lwkt_getpooltoken(unp);
 1078         lwkt_getpooltoken(unp2);
 1079 
 1080         unp->unp_conn = unp2;
 1081 
 1082         switch (so->so_type) {
 1083         case SOCK_DGRAM:
 1084                 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink);
 1085                 soisconnected(so);
 1086                 break;
 1087 
 1088         case SOCK_STREAM:
 1089         case SOCK_SEQPACKET:
 1090                 unp2->unp_conn = unp;
 1091                 soisconnected(so);
 1092                 soisconnected(so2);
 1093                 break;
 1094 
 1095         default:
 1096                 panic("unp_connect2");
 1097         }
 1098         lwkt_relpooltoken(unp2);
 1099         lwkt_relpooltoken(unp);
 1100         lwkt_reltoken(&unp_token);
 1101         return (0);
 1102 }
 1103 
 1104 /*
 1105  * Disconnect a unix domain socket pair.
 1106  *
 1107  * NOTE: Semantics for any change to unp_conn requires that the per-unp
 1108  *       pool token also be held.
 1109  */
 1110 static void
 1111 unp_disconnect(struct unpcb *unp)
 1112 {
 1113         struct unpcb *unp2;
 1114 
 1115         lwkt_gettoken(&unp_token);
 1116         lwkt_getpooltoken(unp);
 1117 
 1118         while ((unp2 = unp->unp_conn) != NULL) {
 1119                 lwkt_getpooltoken(unp2);
 1120                 if (unp2 == unp->unp_conn)
 1121                         break;
 1122                 lwkt_relpooltoken(unp2);
 1123         }
 1124         if (unp2 == NULL)
 1125                 goto done;
 1126 
 1127         unp->unp_conn = NULL;
 1128 
 1129         switch (unp->unp_socket->so_type) {
 1130         case SOCK_DGRAM:
 1131                 LIST_REMOVE(unp, unp_reflink);
 1132                 soclrstate(unp->unp_socket, SS_ISCONNECTED);
 1133                 break;
 1134 
 1135         case SOCK_STREAM:
 1136         case SOCK_SEQPACKET:
 1137                 unp_reference(unp2);
 1138                 unp2->unp_conn = NULL;
 1139 
 1140                 soisdisconnected(unp->unp_socket);
 1141                 soisdisconnected(unp2->unp_socket);
 1142 
 1143                 unp_free(unp2);
 1144                 break;
 1145         }
 1146         lwkt_relpooltoken(unp2);
 1147 done:
 1148         lwkt_relpooltoken(unp);
 1149         lwkt_reltoken(&unp_token);
 1150 }
 1151 
 1152 #ifdef notdef
 1153 void
 1154 unp_abort(struct unpcb *unp)
 1155 {
 1156         lwkt_gettoken(&unp_token);
 1157         unp_free(unp);
 1158         lwkt_reltoken(&unp_token);
 1159 }
 1160 #endif
 1161 
 1162 static int
 1163 prison_unpcb(struct thread *td, struct unpcb *unp)
 1164 {
 1165         struct proc *p;
 1166 
 1167         if (td == NULL)
 1168                 return (0);
 1169         if ((p = td->td_proc) == NULL)
 1170                 return (0);
 1171         if (!p->p_ucred->cr_prison)
 1172                 return (0);
 1173         if (p->p_fd->fd_rdir == unp->unp_rvnode)
 1174                 return (0);
 1175         return (1);
 1176 }
 1177 
 1178 static int
 1179 unp_pcblist(SYSCTL_HANDLER_ARGS)
 1180 {
 1181         int error, i, n;
 1182         struct unpcb *unp, **unp_list;
 1183         unp_gen_t gencnt;
 1184         struct unp_head *head;
 1185 
 1186         head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
 1187 
 1188         KKASSERT(curproc != NULL);
 1189 
 1190         /*
 1191          * The process of preparing the PCB list is too time-consuming and
 1192          * resource-intensive to repeat twice on every request.
 1193          */
 1194         if (req->oldptr == NULL) {
 1195                 n = unp_count;
 1196                 req->oldidx = (n + n/8) * sizeof(struct xunpcb);
 1197                 return 0;
 1198         }
 1199 
 1200         if (req->newptr != NULL)
 1201                 return EPERM;
 1202 
 1203         lwkt_gettoken(&unp_token);
 1204 
 1205         /*
 1206          * OK, now we're committed to doing something.
 1207          */
 1208         gencnt = unp_gencnt;
 1209         n = unp_count;
 1210 
 1211         unp_list = kmalloc(n * sizeof *unp_list, M_TEMP, M_WAITOK);
 1212         
 1213         for (unp = LIST_FIRST(head), i = 0; unp && i < n;
 1214              unp = LIST_NEXT(unp, unp_link)) {
 1215                 if (unp->unp_gencnt <= gencnt && !prison_unpcb(req->td, unp))
 1216                         unp_list[i++] = unp;
 1217         }
 1218         n = i;                  /* in case we lost some during malloc */
 1219 
 1220         error = 0;
 1221         for (i = 0; i < n; i++) {
 1222                 unp = unp_list[i];
 1223                 if (unp->unp_gencnt <= gencnt) {
 1224                         struct xunpcb xu;
 1225                         xu.xu_len = sizeof xu;
 1226                         xu.xu_unpp = unp;
 1227                         /*
 1228                          * XXX - need more locking here to protect against
 1229                          * connect/disconnect races for SMP.
 1230                          */
 1231                         if (unp->unp_addr)
 1232                                 bcopy(unp->unp_addr, &xu.xu_addr, 
 1233                                       unp->unp_addr->sun_len);
 1234                         if (unp->unp_conn && unp->unp_conn->unp_addr)
 1235                                 bcopy(unp->unp_conn->unp_addr,
 1236                                       &xu.xu_caddr,
 1237                                       unp->unp_conn->unp_addr->sun_len);
 1238                         bcopy(unp, &xu.xu_unp, sizeof *unp);
 1239                         sotoxsocket(unp->unp_socket, &xu.xu_socket);
 1240                         error = SYSCTL_OUT(req, &xu, sizeof xu);
 1241                 }
 1242         }
 1243         lwkt_reltoken(&unp_token);
 1244         kfree(unp_list, M_TEMP);
 1245 
 1246         return error;
 1247 }
 1248 
 1249 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLFLAG_RD, 
 1250             (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb",
 1251             "List of active local datagram sockets");
 1252 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLFLAG_RD, 
 1253             (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb",
 1254             "List of active local stream sockets");
 1255 SYSCTL_PROC(_net_local_seqpacket, OID_AUTO, pcblist, CTLFLAG_RD, 
 1256             (caddr_t)(long)SOCK_SEQPACKET, 0, unp_pcblist, "S,xunpcb",
 1257             "List of active local seqpacket stream sockets");
 1258 
 1259 static void
 1260 unp_shutdown(struct unpcb *unp)
 1261 {
 1262         struct socket *so;
 1263 
 1264         if ((unp->unp_socket->so_type == SOCK_STREAM ||
 1265              unp->unp_socket->so_type == SOCK_SEQPACKET) &&
 1266             unp->unp_conn != NULL && (so = unp->unp_conn->unp_socket)) {
 1267                 socantrcvmore(so);
 1268         }
 1269 }
 1270 
 1271 static void
 1272 unp_drop(struct unpcb *unp, int err)
 1273 {
 1274         struct socket *so = unp->unp_socket;
 1275 
 1276         so->so_error = err;
 1277         unp_disconnect(unp);
 1278 }
 1279 
 1280 #ifdef notdef
 1281 void
 1282 unp_drain(void)
 1283 {
 1284         lwkt_gettoken(&unp_token);
 1285         lwkt_reltoken(&unp_token);
 1286 }
 1287 #endif
 1288 
 1289 int
 1290 unp_externalize(struct mbuf *rights)
 1291 {
 1292         struct thread *td = curthread;
 1293         struct proc *p = td->td_proc;           /* XXX */
 1294         struct lwp *lp = td->td_lwp;
 1295         struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
 1296         int *fdp;
 1297         int i;
 1298         struct file **rp;
 1299         struct file *fp;
 1300         int newfds = (cm->cmsg_len - (CMSG_DATA(cm) - (u_char *)cm))
 1301                 / sizeof (struct file *);
 1302         int f;
 1303 
 1304         lwkt_gettoken(&unp_token);
 1305 
 1306         /*
 1307          * if the new FD's will not fit, then we free them all
 1308          */
 1309         if (!fdavail(p, newfds)) {
 1310                 rp = (struct file **)CMSG_DATA(cm);
 1311                 for (i = 0; i < newfds; i++) {
 1312                         fp = *rp;
 1313                         /*
 1314                          * zero the pointer before calling unp_discard,
 1315                          * since it may end up in unp_gc()..
 1316                          */
 1317                         *rp++ = NULL;
 1318                         unp_discard(fp, NULL);
 1319                 }
 1320                 lwkt_reltoken(&unp_token);
 1321                 return (EMSGSIZE);
 1322         }
 1323 
 1324         /*
 1325          * now change each pointer to an fd in the global table to 
 1326          * an integer that is the index to the local fd table entry
 1327          * that we set up to point to the global one we are transferring.
 1328          * If sizeof (struct file *) is bigger than or equal to sizeof int,
 1329          * then do it in forward order. In that case, an integer will
 1330          * always come in the same place or before its corresponding
 1331          * struct file pointer.
 1332          * If sizeof (struct file *) is smaller than sizeof int, then
 1333          * do it in reverse order.
 1334          */
 1335         if (sizeof (struct file *) >= sizeof (int)) {
 1336                 fdp = (int *)CMSG_DATA(cm);
 1337                 rp = (struct file **)CMSG_DATA(cm);
 1338                 for (i = 0; i < newfds; i++) {
 1339                         if (fdalloc(p, 0, &f))
 1340                                 panic("unp_externalize");
 1341                         fp = *rp++;
 1342                         unp_fp_externalize(lp, fp, f);
 1343                         *fdp++ = f;
 1344                 }
 1345         } else {
 1346                 fdp = (int *)CMSG_DATA(cm) + newfds - 1;
 1347                 rp = (struct file **)CMSG_DATA(cm) + newfds - 1;
 1348                 for (i = 0; i < newfds; i++) {
 1349                         if (fdalloc(p, 0, &f))
 1350                                 panic("unp_externalize");
 1351                         fp = *rp--;
 1352                         unp_fp_externalize(lp, fp, f);
 1353                         *fdp-- = f;
 1354                 }
 1355         }
 1356 
 1357         /*
 1358          * Adjust length, in case sizeof(struct file *) and sizeof(int)
 1359          * differs.
 1360          */
 1361         cm->cmsg_len = CMSG_LEN(newfds * sizeof(int));
 1362         rights->m_len = cm->cmsg_len;
 1363 
 1364         lwkt_reltoken(&unp_token);
 1365         return (0);
 1366 }
 1367 
 1368 static void
 1369 unp_fp_externalize(struct lwp *lp, struct file *fp, int fd)
 1370 {
 1371         struct file *fx;
 1372         int error;
 1373 
 1374         lwkt_gettoken(&unp_token);
 1375 
 1376         if (lp) {
 1377                 KKASSERT(fd >= 0);
 1378                 if (fp->f_flag & FREVOKED) {
 1379                         kprintf("Warning: revoked fp exiting unix socket\n");
 1380                         fx = NULL;
 1381                         error = falloc(lp, &fx, NULL);
 1382                         if (error == 0)
 1383                                 fsetfd(lp->lwp_proc->p_fd, fx, fd);
 1384                         else
 1385                                 fsetfd(lp->lwp_proc->p_fd, NULL, fd);
 1386                         fdrop(fx);
 1387                 } else {
 1388                         fsetfd(lp->lwp_proc->p_fd, fp, fd);
 1389                 }
 1390         }
 1391         spin_lock(&unp_spin);
 1392         fp->f_msgcount--;
 1393         unp_rights--;
 1394         spin_unlock(&unp_spin);
 1395         fdrop(fp);
 1396 
 1397         lwkt_reltoken(&unp_token);
 1398 }
 1399 
 1400 
 1401 void
 1402 unp_init(void)
 1403 {
 1404         LIST_INIT(&unp_dhead);
 1405         LIST_INIT(&unp_shead);
 1406         spin_init(&unp_spin);
 1407 }
 1408 
 1409 static int
 1410 unp_internalize(struct mbuf *control, struct thread *td)
 1411 {
 1412         struct proc *p = td->td_proc;
 1413         struct filedesc *fdescp;
 1414         struct cmsghdr *cm = mtod(control, struct cmsghdr *);
 1415         struct file **rp;
 1416         struct file *fp;
 1417         int i, fd, *fdp;
 1418         struct cmsgcred *cmcred;
 1419         int oldfds;
 1420         u_int newlen;
 1421         int error;
 1422 
 1423         KKASSERT(p);
 1424         lwkt_gettoken(&unp_token);
 1425 
 1426         fdescp = p->p_fd;
 1427         if ((cm->cmsg_type != SCM_RIGHTS && cm->cmsg_type != SCM_CREDS) ||
 1428             cm->cmsg_level != SOL_SOCKET ||
 1429             CMSG_ALIGN(cm->cmsg_len) != control->m_len) {
 1430                 error = EINVAL;
 1431                 goto done;
 1432         }
 1433 
 1434         /*
 1435          * Fill in credential information.
 1436          */
 1437         if (cm->cmsg_type == SCM_CREDS) {
 1438                 cmcred = (struct cmsgcred *)CMSG_DATA(cm);
 1439                 cmcred->cmcred_pid = p->p_pid;
 1440                 cmcred->cmcred_uid = p->p_ucred->cr_ruid;
 1441                 cmcred->cmcred_gid = p->p_ucred->cr_rgid;
 1442                 cmcred->cmcred_euid = p->p_ucred->cr_uid;
 1443                 cmcred->cmcred_ngroups = MIN(p->p_ucred->cr_ngroups,
 1444                                                         CMGROUP_MAX);
 1445                 for (i = 0; i < cmcred->cmcred_ngroups; i++)
 1446                         cmcred->cmcred_groups[i] = p->p_ucred->cr_groups[i];
 1447                 error = 0;
 1448                 goto done;
 1449         }
 1450 
 1451         /*
 1452          * cmsghdr may not be aligned, do not allow calculation(s) to
 1453          * go negative.
 1454          */
 1455         if (cm->cmsg_len < CMSG_LEN(0)) {
 1456                 error = EINVAL;
 1457                 goto done;
 1458         }
 1459 
 1460         oldfds = (cm->cmsg_len - CMSG_LEN(0)) / sizeof (int);
 1461 
 1462         /*
 1463          * check that all the FDs passed in refer to legal OPEN files
 1464          * If not, reject the entire operation.
 1465          */
 1466         fdp = (int *)CMSG_DATA(cm);
 1467         for (i = 0; i < oldfds; i++) {
 1468                 fd = *fdp++;
 1469                 if ((unsigned)fd >= fdescp->fd_nfiles ||
 1470                     fdescp->fd_files[fd].fp == NULL) {
 1471                         error = EBADF;
 1472                         goto done;
 1473                 }
 1474                 if (fdescp->fd_files[fd].fp->f_type == DTYPE_KQUEUE) {
 1475                         error = EOPNOTSUPP;
 1476                         goto done;
 1477                 }
 1478         }
 1479         /*
 1480          * Now replace the integer FDs with pointers to
 1481          * the associated global file table entry..
 1482          * Allocate a bigger buffer as necessary. But if an cluster is not
 1483          * enough, return E2BIG.
 1484          */
 1485         newlen = CMSG_LEN(oldfds * sizeof(struct file *));
 1486         if (newlen > MCLBYTES) {
 1487                 error = E2BIG;
 1488                 goto done;
 1489         }
 1490         if (newlen - control->m_len > M_TRAILINGSPACE(control)) {
 1491                 if (control->m_flags & M_EXT) {
 1492                         error = E2BIG;
 1493                         goto done;
 1494                 }
 1495                 MCLGET(control, MB_WAIT);
 1496                 if (!(control->m_flags & M_EXT)) {
 1497                         error = ENOBUFS;
 1498                         goto done;
 1499                 }
 1500 
 1501                 /* copy the data to the cluster */
 1502                 memcpy(mtod(control, char *), cm, cm->cmsg_len);
 1503                 cm = mtod(control, struct cmsghdr *);
 1504         }
 1505 
 1506         /*
 1507          * Adjust length, in case sizeof(struct file *) and sizeof(int)
 1508          * differs.
 1509          */
 1510         cm->cmsg_len = newlen;
 1511         control->m_len = CMSG_ALIGN(newlen);
 1512 
 1513         /*
 1514          * Transform the file descriptors into struct file pointers.
 1515          * If sizeof (struct file *) is bigger than or equal to sizeof int,
 1516          * then do it in reverse order so that the int won't get until
 1517          * we're done.
 1518          * If sizeof (struct file *) is smaller than sizeof int, then
 1519          * do it in forward order.
 1520          */
 1521         if (sizeof (struct file *) >= sizeof (int)) {
 1522                 fdp = (int *)CMSG_DATA(cm) + oldfds - 1;
 1523                 rp = (struct file **)CMSG_DATA(cm) + oldfds - 1;
 1524                 for (i = 0; i < oldfds; i++) {
 1525                         fp = fdescp->fd_files[*fdp--].fp;
 1526                         *rp-- = fp;
 1527                         fhold(fp);
 1528                         spin_lock(&unp_spin);
 1529                         fp->f_msgcount++;
 1530                         unp_rights++;
 1531                         spin_unlock(&unp_spin);
 1532                 }
 1533         } else {
 1534                 fdp = (int *)CMSG_DATA(cm);
 1535                 rp = (struct file **)CMSG_DATA(cm);
 1536                 for (i = 0; i < oldfds; i++) {
 1537                         fp = fdescp->fd_files[*fdp++].fp;
 1538                         *rp++ = fp;
 1539                         fhold(fp);
 1540                         spin_lock(&unp_spin);
 1541                         fp->f_msgcount++;
 1542                         unp_rights++;
 1543                         spin_unlock(&unp_spin);
 1544                 }
 1545         }
 1546         error = 0;
 1547 done:
 1548         lwkt_reltoken(&unp_token);
 1549         return error;
 1550 }
 1551 
 1552 /*
 1553  * Garbage collect in-transit file descriptors that get lost due to
 1554  * loops (i.e. when a socket is sent to another process over itself,
 1555  * and more complex situations).
 1556  *
 1557  * NOT MPSAFE - TODO socket flush code and maybe closef.  Rest is MPSAFE.
 1558  */
 1559 
 1560 struct unp_gc_info {
 1561         struct file **extra_ref;
 1562         struct file *locked_fp;
 1563         int defer;
 1564         int index;
 1565         int maxindex;
 1566 };
 1567 
 1568 static void
 1569 unp_gc(void)
 1570 {
 1571         struct unp_gc_info info;
 1572         static boolean_t unp_gcing;
 1573         struct file **fpp;
 1574         int i;
 1575 
 1576         /*
 1577          * Only one gc can be in-progress at any given moment
 1578          */
 1579         spin_lock(&unp_spin);
 1580         if (unp_gcing) {
 1581                 spin_unlock(&unp_spin);
 1582                 return;
 1583         }
 1584         unp_gcing = TRUE;
 1585         spin_unlock(&unp_spin);
 1586 
 1587         lwkt_gettoken(&unp_token);
 1588 
 1589         /* 
 1590          * Before going through all this, set all FDs to be NOT defered
 1591          * and NOT externally accessible (not marked).  During the scan
 1592          * a fd can be marked externally accessible but we may or may not
 1593          * be able to immediately process it (controlled by FDEFER).
 1594          *
 1595          * If we loop sleep a bit.  The complexity of the topology can cause
 1596          * multiple loops.  Also failure to acquire the socket's so_rcv
 1597          * token can cause us to loop.
 1598          */
 1599         allfiles_scan_exclusive(unp_gc_clearmarks, NULL);
 1600         do {
 1601                 info.defer = 0;
 1602                 allfiles_scan_exclusive(unp_gc_checkmarks, &info);
 1603                 if (info.defer)
 1604                         tsleep(&info, 0, "gcagain", 1);
 1605         } while (info.defer);
 1606 
 1607         /*
 1608          * We grab an extra reference to each of the file table entries
 1609          * that are not otherwise accessible and then free the rights
 1610          * that are stored in messages on them.
 1611          *
 1612          * The bug in the orginal code is a little tricky, so I'll describe
 1613          * what's wrong with it here.
 1614          *
 1615          * It is incorrect to simply unp_discard each entry for f_msgcount
 1616          * times -- consider the case of sockets A and B that contain
 1617          * references to each other.  On a last close of some other socket,
 1618          * we trigger a gc since the number of outstanding rights (unp_rights)
 1619          * is non-zero.  If during the sweep phase the gc code un_discards,
 1620          * we end up doing a (full) closef on the descriptor.  A closef on A
 1621          * results in the following chain.  Closef calls soo_close, which
 1622          * calls soclose.   Soclose calls first (through the switch
 1623          * uipc_usrreq) unp_detach, which re-invokes unp_gc.  Unp_gc simply
 1624          * returns because the previous instance had set unp_gcing, and
 1625          * we return all the way back to soclose, which marks the socket
 1626          * with SS_NOFDREF, and then calls sofree.  Sofree calls sorflush
 1627          * to free up the rights that are queued in messages on the socket A,
 1628          * i.e., the reference on B.  The sorflush calls via the dom_dispose
 1629          * switch unp_dispose, which unp_scans with unp_discard.  This second
 1630          * instance of unp_discard just calls closef on B.
 1631          *
 1632          * Well, a similar chain occurs on B, resulting in a sorflush on B,
 1633          * which results in another closef on A.  Unfortunately, A is already
 1634          * being closed, and the descriptor has already been marked with
 1635          * SS_NOFDREF, and soclose panics at this point.
 1636          *
 1637          * Here, we first take an extra reference to each inaccessible
 1638          * descriptor.  Then, we call sorflush ourself, since we know
 1639          * it is a Unix domain socket anyhow.  After we destroy all the
 1640          * rights carried in messages, we do a last closef to get rid
 1641          * of our extra reference.  This is the last close, and the
 1642          * unp_detach etc will shut down the socket.
 1643          *
 1644          * 91/09/19, bsy@cs.cmu.edu
 1645          */
 1646         info.extra_ref = kmalloc(256 * sizeof(struct file *), M_FILE, M_WAITOK);
 1647         info.maxindex = 256;
 1648 
 1649         do {
 1650                 /*
 1651                  * Look for matches
 1652                  */
 1653                 info.index = 0;
 1654                 allfiles_scan_exclusive(unp_gc_checkrefs, &info);
 1655 
 1656                 /* 
 1657                  * For each FD on our hit list, do the following two things
 1658                  */
 1659                 for (i = info.index, fpp = info.extra_ref; --i >= 0; ++fpp) {
 1660                         struct file *tfp = *fpp;
 1661                         if (tfp->f_type == DTYPE_SOCKET && tfp->f_data != NULL)
 1662                                 sorflush((struct socket *)(tfp->f_data));
 1663                 }
 1664                 for (i = info.index, fpp = info.extra_ref; --i >= 0; ++fpp)
 1665                         closef(*fpp, NULL);
 1666         } while (info.index == info.maxindex);
 1667 
 1668         lwkt_reltoken(&unp_token);
 1669 
 1670         kfree((caddr_t)info.extra_ref, M_FILE);
 1671         unp_gcing = FALSE;
 1672 }
 1673 
 1674 /*
 1675  * MPSAFE - NOTE: filehead list and file pointer spinlocked on entry
 1676  */
 1677 static int
 1678 unp_gc_checkrefs(struct file *fp, void *data)
 1679 {
 1680         struct unp_gc_info *info = data;
 1681 
 1682         if (fp->f_count == 0)
 1683                 return(0);
 1684         if (info->index == info->maxindex)
 1685                 return(-1);
 1686 
 1687         /* 
 1688          * If all refs are from msgs, and it's not marked accessible
 1689          * then it must be referenced from some unreachable cycle
 1690          * of (shut-down) FDs, so include it in our
 1691          * list of FDs to remove
 1692          */
 1693         if (fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) {
 1694                 info->extra_ref[info->index++] = fp;
 1695                 fhold(fp);
 1696         }
 1697         return(0);
 1698 }
 1699 
 1700 /*
 1701  * MPSAFE - NOTE: filehead list and file pointer spinlocked on entry
 1702  */
 1703 static int
 1704 unp_gc_clearmarks(struct file *fp, void *data __unused)
 1705 {
 1706         atomic_clear_int(&fp->f_flag, FMARK | FDEFER);
 1707         return(0);
 1708 }
 1709 
 1710 /*
 1711  * MPSAFE - NOTE: filehead list and file pointer spinlocked on entry
 1712  */
 1713 static int
 1714 unp_gc_checkmarks(struct file *fp, void *data)
 1715 {
 1716         struct unp_gc_info *info = data;
 1717         struct socket *so;
 1718 
 1719         /*
 1720          * If the file is not open, skip it.  Make sure it isn't marked
 1721          * defered or we could loop forever, in case we somehow race
 1722          * something.
 1723          */
 1724         if (fp->f_count == 0) {
 1725                 if (fp->f_flag & FDEFER)
 1726                         atomic_clear_int(&fp->f_flag, FDEFER);
 1727                 return(0);
 1728         }
 1729         /*
 1730          * If we already marked it as 'defer'  in a
 1731          * previous pass, then try process it this time
 1732          * and un-mark it
 1733          */
 1734         if (fp->f_flag & FDEFER) {
 1735                 atomic_clear_int(&fp->f_flag, FDEFER);
 1736         } else {
 1737                 /*
 1738                  * if it's not defered, then check if it's
 1739                  * already marked.. if so skip it
 1740                  */
 1741                 if (fp->f_flag & FMARK)
 1742                         return(0);
 1743                 /* 
 1744                  * If all references are from messages
 1745                  * in transit, then skip it. it's not 
 1746                  * externally accessible.
 1747                  */ 
 1748                 if (fp->f_count == fp->f_msgcount)
 1749                         return(0);
 1750                 /* 
 1751                  * If it got this far then it must be
 1752                  * externally accessible.
 1753                  */
 1754                 atomic_set_int(&fp->f_flag, FMARK);
 1755         }
 1756 
 1757         /*
 1758          * either it was defered, or it is externally 
 1759          * accessible and not already marked so.
 1760          * Now check if it is possibly one of OUR sockets.
 1761          */ 
 1762         if (fp->f_type != DTYPE_SOCKET ||
 1763             (so = (struct socket *)fp->f_data) == NULL) {
 1764                 return(0);
 1765         }
 1766         if (so->so_proto->pr_domain != &localdomain ||
 1767             !(so->so_proto->pr_flags & PR_RIGHTS)) {
 1768                 return(0);
 1769         }
 1770 
 1771         /*
 1772          * So, Ok, it's one of our sockets and it IS externally accessible
 1773          * (or was defered).  Now we look to see if we hold any file
 1774          * descriptors in its message buffers.  Follow those links and mark
 1775          * them as accessible too.
 1776          *
 1777          * We are holding multiple spinlocks here, if we cannot get the
 1778          * token non-blocking defer until the next loop.
 1779          */
 1780         info->locked_fp = fp;
 1781         if (lwkt_trytoken(&so->so_rcv.ssb_token)) {
 1782                 unp_scan(so->so_rcv.ssb_mb, unp_mark, info);
 1783                 lwkt_reltoken(&so->so_rcv.ssb_token);
 1784         } else {
 1785                 atomic_set_int(&fp->f_flag, FDEFER);
 1786                 ++info->defer;
 1787         }
 1788         return (0);
 1789 }
 1790 
 1791 /*
 1792  * Scan all unix domain sockets and replace any revoked file pointers
 1793  * found with the dummy file pointer fx.  We don't worry about races
 1794  * against file pointers being read out as those are handled in the
 1795  * externalize code.
 1796  */
 1797 
 1798 #define REVOKE_GC_MAXFILES      32
 1799 
 1800 struct unp_revoke_gc_info {
 1801         struct file     *fx;
 1802         struct file     *fary[REVOKE_GC_MAXFILES];
 1803         int             fcount;
 1804 };
 1805 
 1806 void
 1807 unp_revoke_gc(struct file *fx)
 1808 {
 1809         struct unp_revoke_gc_info info;
 1810         int i;
 1811 
 1812         lwkt_gettoken(&unp_token);
 1813         info.fx = fx;
 1814         do {
 1815                 info.fcount = 0;
 1816                 allfiles_scan_exclusive(unp_revoke_gc_check, &info);
 1817                 for (i = 0; i < info.fcount; ++i)
 1818                         unp_fp_externalize(NULL, info.fary[i], -1);
 1819         } while (info.fcount == REVOKE_GC_MAXFILES);
 1820         lwkt_reltoken(&unp_token);
 1821 }
 1822 
 1823 /*
 1824  * Check for and replace revoked descriptors.
 1825  *
 1826  * WARNING:  This routine is not allowed to block.
 1827  */
 1828 static int
 1829 unp_revoke_gc_check(struct file *fps, void *vinfo)
 1830 {
 1831         struct unp_revoke_gc_info *info = vinfo;
 1832         struct file *fp;
 1833         struct socket *so;
 1834         struct mbuf *m0;
 1835         struct mbuf *m;
 1836         struct file **rp;
 1837         struct cmsghdr *cm;
 1838         int i;
 1839         int qfds;
 1840 
 1841         /*
 1842          * Is this a unix domain socket with rights-passing abilities?
 1843          */
 1844         if (fps->f_type != DTYPE_SOCKET)
 1845                 return (0);
 1846         if ((so = (struct socket *)fps->f_data) == NULL)
 1847                 return(0);
 1848         if (so->so_proto->pr_domain != &localdomain)
 1849                 return(0);
 1850         if ((so->so_proto->pr_flags & PR_RIGHTS) == 0)
 1851                 return(0);
 1852 
 1853         /*
 1854          * Scan the mbufs for control messages and replace any revoked
 1855          * descriptors we find.
 1856          */
 1857         lwkt_gettoken(&so->so_rcv.ssb_token);
 1858         m0 = so->so_rcv.ssb_mb;
 1859         while (m0) {
 1860                 for (m = m0; m; m = m->m_next) {
 1861                         if (m->m_type != MT_CONTROL)
 1862                                 continue;
 1863                         if (m->m_len < sizeof(*cm))
 1864                                 continue;
 1865                         cm = mtod(m, struct cmsghdr *);
 1866                         if (cm->cmsg_level != SOL_SOCKET ||
 1867                             cm->cmsg_type != SCM_RIGHTS) {
 1868                                 continue;
 1869                         }
 1870                         qfds = (cm->cmsg_len - CMSG_LEN(0)) / sizeof(void *);
 1871                         rp = (struct file **)CMSG_DATA(cm);
 1872                         for (i = 0; i < qfds; i++) {
 1873                                 fp = rp[i];
 1874                                 if (fp->f_flag & FREVOKED) {
 1875                                         kprintf("Warning: Removing revoked fp from unix domain socket queue\n");
 1876                                         fhold(info->fx);
 1877                                         info->fx->f_msgcount++;
 1878                                         unp_rights++;
 1879                                         rp[i] = info->fx;
 1880                                         info->fary[info->fcount++] = fp;
 1881                                 }
 1882                                 if (info->fcount == REVOKE_GC_MAXFILES)
 1883                                         break;
 1884                         }
 1885                         if (info->fcount == REVOKE_GC_MAXFILES)
 1886                                 break;
 1887                 }
 1888                 m0 = m0->m_nextpkt;
 1889                 if (info->fcount == REVOKE_GC_MAXFILES)
 1890                         break;
 1891         }
 1892         lwkt_reltoken(&so->so_rcv.ssb_token);
 1893 
 1894         /*
 1895          * Stop the scan if we filled up our array.
 1896          */
 1897         if (info->fcount == REVOKE_GC_MAXFILES)
 1898                 return(-1);
 1899         return(0);
 1900 }
 1901 
 1902 /*
 1903  * Dispose of the fp's stored in a mbuf.
 1904  *
 1905  * The dds loop can cause additional fps to be entered onto the
 1906  * list while it is running, flattening out the operation and avoiding
 1907  * a deep kernel stack recursion.
 1908  */
 1909 void
 1910 unp_dispose(struct mbuf *m)
 1911 {
 1912         unp_defdiscard_t dds;
 1913 
 1914         lwkt_gettoken(&unp_token);
 1915         ++unp_defdiscard_nest;
 1916         if (m) {
 1917                 unp_scan(m, unp_discard, NULL);
 1918         }
 1919         if (unp_defdiscard_nest == 1) {
 1920                 while ((dds = unp_defdiscard_base) != NULL) {
 1921                         unp_defdiscard_base = dds->next;
 1922                         closef(dds->fp, NULL);
 1923                         kfree(dds, M_UNPCB);
 1924                 }
 1925         }
 1926         --unp_defdiscard_nest;
 1927         lwkt_reltoken(&unp_token);
 1928 }
 1929 
 1930 static int
 1931 unp_listen(struct unpcb *unp, struct thread *td)
 1932 {
 1933         struct proc *p = td->td_proc;
 1934 
 1935         KKASSERT(p);
 1936         lwkt_gettoken(&unp_token);
 1937         cru2x(p->p_ucred, &unp->unp_peercred);
 1938         unp->unp_flags |= UNP_HAVEPCCACHED;
 1939         lwkt_reltoken(&unp_token);
 1940         return (0);
 1941 }
 1942 
 1943 static void
 1944 unp_scan(struct mbuf *m0, void (*op)(struct file *, void *), void *data)
 1945 {
 1946         struct mbuf *m;
 1947         struct file **rp;
 1948         struct cmsghdr *cm;
 1949         int i;
 1950         int qfds;
 1951 
 1952         while (m0) {
 1953                 for (m = m0; m; m = m->m_next) {
 1954                         if (m->m_type == MT_CONTROL &&
 1955                             m->m_len >= sizeof(*cm)) {
 1956                                 cm = mtod(m, struct cmsghdr *);
 1957                                 if (cm->cmsg_level != SOL_SOCKET ||
 1958                                     cm->cmsg_type != SCM_RIGHTS)
 1959                                         continue;
 1960                                 qfds = (cm->cmsg_len - CMSG_LEN(0)) /
 1961                                         sizeof(void *);
 1962                                 rp = (struct file **)CMSG_DATA(cm);
 1963                                 for (i = 0; i < qfds; i++)
 1964                                         (*op)(*rp++, data);
 1965                                 break;          /* XXX, but saves time */
 1966                         }
 1967                 }
 1968                 m0 = m0->m_nextpkt;
 1969         }
 1970 }
 1971 
 1972 /*
 1973  * Mark visibility.  info->defer is recalculated on every pass.
 1974  */
 1975 static void
 1976 unp_mark(struct file *fp, void *data)
 1977 {
 1978         struct unp_gc_info *info = data;
 1979 
 1980         if ((fp->f_flag & FMARK) == 0) {
 1981                 ++info->defer;
 1982                 atomic_set_int(&fp->f_flag, FMARK | FDEFER);
 1983         } else if (fp->f_flag & FDEFER) {
 1984                 ++info->defer;
 1985         }
 1986 }
 1987 
 1988 /*
 1989  * Discard a fp previously held in a unix domain socket mbuf.  To
 1990  * avoid blowing out the kernel stack due to contrived chain-reactions
 1991  * we may have to defer the operation to a higher procedural level.
 1992  *
 1993  * Caller holds unp_token
 1994  */
 1995 static void
 1996 unp_discard(struct file *fp, void *data __unused)
 1997 {
 1998         unp_defdiscard_t dds;
 1999 
 2000         spin_lock(&unp_spin);
 2001         fp->f_msgcount--;
 2002         unp_rights--;
 2003         spin_unlock(&unp_spin);
 2004 
 2005         if (unp_defdiscard_nest) {
 2006                 dds = kmalloc(sizeof(*dds), M_UNPCB, M_WAITOK|M_ZERO);
 2007                 dds->fp = fp;
 2008                 dds->next = unp_defdiscard_base;
 2009                 unp_defdiscard_base = dds;
 2010         } else {
 2011                 closef(fp, NULL);
 2012         }
 2013 }
 2014 

Cache object: a1ade0702864c9fa268623ca4b13503d


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.