The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/uipc_usrreq.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 1982, 1986, 1989, 1991, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  * 3. All advertising materials mentioning features or use of this software
   14  *    must display the following acknowledgement:
   15  *      This product includes software developed by the University of
   16  *      California, Berkeley and its contributors.
   17  * 4. Neither the name of the University nor the names of its contributors
   18  *    may be used to endorse or promote products derived from this software
   19  *    without specific prior written permission.
   20  *
   21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   31  * SUCH DAMAGE.
   32  *
   33  *      From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94
   34  * $FreeBSD: releng/5.1/sys/kern/uipc_usrreq.c 112516 2003-03-23 19:41:34Z cognet $
   35  */
   36 
   37 #include "opt_mac.h"
   38 
   39 #include <sys/param.h>
   40 #include <sys/domain.h>
   41 #include <sys/fcntl.h>
   42 #include <sys/malloc.h>         /* XXX must be before <sys/file.h> */
   43 #include <sys/file.h>
   44 #include <sys/filedesc.h>
   45 #include <sys/jail.h>
   46 #include <sys/kernel.h>
   47 #include <sys/lock.h>
   48 #include <sys/mac.h>
   49 #include <sys/mbuf.h>
   50 #include <sys/mutex.h>
   51 #include <sys/namei.h>
   52 #include <sys/proc.h>
   53 #include <sys/protosw.h>
   54 #include <sys/resourcevar.h>
   55 #include <sys/socket.h>
   56 #include <sys/socketvar.h>
   57 #include <sys/signalvar.h>
   58 #include <sys/stat.h>
   59 #include <sys/sx.h>
   60 #include <sys/sysctl.h>
   61 #include <sys/systm.h>
   62 #include <sys/un.h>
   63 #include <sys/unpcb.h>
   64 #include <sys/vnode.h>
   65 
   66 #include <vm/uma.h>
   67 
   68 static uma_zone_t unp_zone;
   69 static  unp_gen_t unp_gencnt;
   70 static  u_int unp_count;
   71 
   72 static  struct unp_head unp_shead, unp_dhead;
   73 
   74 /*
   75  * Unix communications domain.
   76  *
   77  * TODO:
   78  *      SEQPACKET, RDM
   79  *      rethink name space problems
   80  *      need a proper out-of-band
   81  *      lock pushdown
   82  */
   83 static struct   sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL };
   84 static ino_t    unp_ino;                /* prototype for fake inode numbers */
   85 
   86 static int     unp_attach(struct socket *);
   87 static void    unp_detach(struct unpcb *);
   88 static int     unp_bind(struct unpcb *,struct sockaddr *, struct thread *);
   89 static int     unp_connect(struct socket *,struct sockaddr *, struct thread *);
   90 static void    unp_disconnect(struct unpcb *);
   91 static void    unp_shutdown(struct unpcb *);
   92 static void    unp_drop(struct unpcb *, int);
   93 static void    unp_gc(void);
   94 static void    unp_scan(struct mbuf *, void (*)(struct file *));
   95 static void    unp_mark(struct file *);
   96 static void    unp_discard(struct file *);
   97 static void    unp_freerights(struct file **, int);
   98 static int     unp_internalize(struct mbuf **, struct thread *);
   99 static int     unp_listen(struct unpcb *, struct thread *);
  100 
  101 static int
  102 uipc_abort(struct socket *so)
  103 {
  104         struct unpcb *unp = sotounpcb(so);
  105 
  106         if (unp == 0)
  107                 return EINVAL;
  108         unp_drop(unp, ECONNABORTED);
  109         unp_detach(unp);
  110         sotryfree(so);
  111         return 0;
  112 }
  113 
  114 static int
  115 uipc_accept(struct socket *so, struct sockaddr **nam)
  116 {
  117         struct unpcb *unp = sotounpcb(so);
  118 
  119         if (unp == 0)
  120                 return EINVAL;
  121 
  122         /*
  123          * Pass back name of connected socket,
  124          * if it was bound and we are still connected
  125          * (our peer may have closed already!).
  126          */
  127         if (unp->unp_conn && unp->unp_conn->unp_addr) {
  128                 *nam = dup_sockaddr((struct sockaddr *)unp->unp_conn->unp_addr,
  129                                     1);
  130         } else {
  131                 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
  132         }
  133         return 0;
  134 }
  135 
  136 static int
  137 uipc_attach(struct socket *so, int proto, struct thread *td)
  138 {
  139         struct unpcb *unp = sotounpcb(so);
  140 
  141         if (unp != 0)
  142                 return EISCONN;
  143         return unp_attach(so);
  144 }
  145 
  146 static int
  147 uipc_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
  148 {
  149         struct unpcb *unp = sotounpcb(so);
  150 
  151         if (unp == 0)
  152                 return EINVAL;
  153 
  154         return unp_bind(unp, nam, td);
  155 }
  156 
  157 static int
  158 uipc_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
  159 {
  160         struct unpcb *unp = sotounpcb(so);
  161 
  162         if (unp == 0)
  163                 return EINVAL;
  164         return unp_connect(so, nam, curthread);
  165 }
  166 
  167 static int
  168 uipc_connect2(struct socket *so1, struct socket *so2)
  169 {
  170         struct unpcb *unp = sotounpcb(so1);
  171 
  172         if (unp == 0)
  173                 return EINVAL;
  174 
  175         return unp_connect2(so1, so2);
  176 }
  177 
  178 /* control is EOPNOTSUPP */
  179 
  180 static int
  181 uipc_detach(struct socket *so)
  182 {
  183         struct unpcb *unp = sotounpcb(so);
  184 
  185         if (unp == 0)
  186                 return EINVAL;
  187 
  188         unp_detach(unp);
  189         return 0;
  190 }
  191 
  192 static int
  193 uipc_disconnect(struct socket *so)
  194 {
  195         struct unpcb *unp = sotounpcb(so);
  196 
  197         if (unp == 0)
  198                 return EINVAL;
  199         unp_disconnect(unp);
  200         return 0;
  201 }
  202 
  203 static int
  204 uipc_listen(struct socket *so, struct thread *td)
  205 {
  206         struct unpcb *unp = sotounpcb(so);
  207 
  208         if (unp == 0 || unp->unp_vnode == 0)
  209                 return EINVAL;
  210         return unp_listen(unp, td);
  211 }
  212 
  213 static int
  214 uipc_peeraddr(struct socket *so, struct sockaddr **nam)
  215 {
  216         struct unpcb *unp = sotounpcb(so);
  217 
  218         if (unp == 0)
  219                 return EINVAL;
  220         if (unp->unp_conn && unp->unp_conn->unp_addr)
  221                 *nam = dup_sockaddr((struct sockaddr *)unp->unp_conn->unp_addr,
  222                                     1);
  223         else {
  224                 /*
  225                  * XXX: It seems that this test always fails even when
  226                  * connection is established.  So, this else clause is
  227                  * added as workaround to return PF_LOCAL sockaddr.
  228                  */
  229                 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
  230         }
  231         return 0;
  232 }
  233 
  234 static int
  235 uipc_rcvd(struct socket *so, int flags)
  236 {
  237         struct unpcb *unp = sotounpcb(so);
  238         struct socket *so2;
  239         u_long newhiwat;
  240 
  241         if (unp == 0)
  242                 return EINVAL;
  243         switch (so->so_type) {
  244         case SOCK_DGRAM:
  245                 panic("uipc_rcvd DGRAM?");
  246                 /*NOTREACHED*/
  247 
  248         case SOCK_STREAM:
  249                 if (unp->unp_conn == 0)
  250                         break;
  251                 so2 = unp->unp_conn->unp_socket;
  252                 /*
  253                  * Adjust backpressure on sender
  254                  * and wakeup any waiting to write.
  255                  */
  256                 so2->so_snd.sb_mbmax += unp->unp_mbcnt - so->so_rcv.sb_mbcnt;
  257                 unp->unp_mbcnt = so->so_rcv.sb_mbcnt;
  258                 newhiwat = so2->so_snd.sb_hiwat + unp->unp_cc -
  259                     so->so_rcv.sb_cc;
  260                 (void)chgsbsize(so2->so_cred->cr_uidinfo, &so2->so_snd.sb_hiwat,
  261                     newhiwat, RLIM_INFINITY);
  262                 unp->unp_cc = so->so_rcv.sb_cc;
  263                 sowwakeup(so2);
  264                 break;
  265 
  266         default:
  267                 panic("uipc_rcvd unknown socktype");
  268         }
  269         return 0;
  270 }
  271 
  272 /* pru_rcvoob is EOPNOTSUPP */
  273 
  274 static int
  275 uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
  276           struct mbuf *control, struct thread *td)
  277 {
  278         int error = 0;
  279         struct unpcb *unp = sotounpcb(so);
  280         struct socket *so2;
  281         u_long newhiwat;
  282 
  283         if (unp == 0) {
  284                 error = EINVAL;
  285                 goto release;
  286         }
  287         if (flags & PRUS_OOB) {
  288                 error = EOPNOTSUPP;
  289                 goto release;
  290         }
  291 
  292         if (control && (error = unp_internalize(&control, td)))
  293                 goto release;
  294 
  295         switch (so->so_type) {
  296         case SOCK_DGRAM: 
  297         {
  298                 struct sockaddr *from;
  299 
  300                 if (nam) {
  301                         if (unp->unp_conn) {
  302                                 error = EISCONN;
  303                                 break;
  304                         }
  305                         error = unp_connect(so, nam, td);
  306                         if (error)
  307                                 break;
  308                 } else {
  309                         if (unp->unp_conn == 0) {
  310                                 error = ENOTCONN;
  311                                 break;
  312                         }
  313                 }
  314                 so2 = unp->unp_conn->unp_socket;
  315                 if (unp->unp_addr)
  316                         from = (struct sockaddr *)unp->unp_addr;
  317                 else
  318                         from = &sun_noname;
  319                 if (sbappendaddr(&so2->so_rcv, from, m, control)) {
  320                         sorwakeup(so2);
  321                         m = 0;
  322                         control = 0;
  323                 } else
  324                         error = ENOBUFS;
  325                 if (nam)
  326                         unp_disconnect(unp);
  327                 break;
  328         }
  329 
  330         case SOCK_STREAM:
  331                 /* Connect if not connected yet. */
  332                 /*
  333                  * Note: A better implementation would complain
  334                  * if not equal to the peer's address.
  335                  */
  336                 if ((so->so_state & SS_ISCONNECTED) == 0) {
  337                         if (nam) {
  338                                 error = unp_connect(so, nam, td);
  339                                 if (error)
  340                                         break;  /* XXX */
  341                         } else {
  342                                 error = ENOTCONN;
  343                                 break;
  344                         }
  345                 }
  346 
  347                 if (so->so_state & SS_CANTSENDMORE) {
  348                         error = EPIPE;
  349                         break;
  350                 }
  351                 if (unp->unp_conn == 0)
  352                         panic("uipc_send connected but no connection?");
  353                 so2 = unp->unp_conn->unp_socket;
  354                 /*
  355                  * Send to paired receive port, and then reduce
  356                  * send buffer hiwater marks to maintain backpressure.
  357                  * Wake up readers.
  358                  */
  359                 if (control) {
  360                         if (sbappendcontrol(&so2->so_rcv, m, control))
  361                                 control = 0;
  362                 } else
  363                         sbappend(&so2->so_rcv, m);
  364                 so->so_snd.sb_mbmax -=
  365                         so2->so_rcv.sb_mbcnt - unp->unp_conn->unp_mbcnt;
  366                 unp->unp_conn->unp_mbcnt = so2->so_rcv.sb_mbcnt;
  367                 newhiwat = so->so_snd.sb_hiwat -
  368                     (so2->so_rcv.sb_cc - unp->unp_conn->unp_cc);
  369                 (void)chgsbsize(so->so_cred->cr_uidinfo, &so->so_snd.sb_hiwat,
  370                     newhiwat, RLIM_INFINITY);
  371                 unp->unp_conn->unp_cc = so2->so_rcv.sb_cc;
  372                 sorwakeup(so2);
  373                 m = 0;
  374                 break;
  375 
  376         default:
  377                 panic("uipc_send unknown socktype");
  378         }
  379 
  380         /*
  381          * SEND_EOF is equivalent to a SEND followed by
  382          * a SHUTDOWN.
  383          */
  384         if (flags & PRUS_EOF) {
  385                 socantsendmore(so);
  386                 unp_shutdown(unp);
  387         }
  388 
  389         if (control && error != 0)
  390                 unp_dispose(control);
  391 
  392 release:
  393         if (control)
  394                 m_freem(control);
  395         if (m)
  396                 m_freem(m);
  397         return error;
  398 }
  399 
  400 static int
  401 uipc_sense(struct socket *so, struct stat *sb)
  402 {
  403         struct unpcb *unp = sotounpcb(so);
  404         struct socket *so2;
  405 
  406         if (unp == 0)
  407                 return EINVAL;
  408         sb->st_blksize = so->so_snd.sb_hiwat;
  409         if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
  410                 so2 = unp->unp_conn->unp_socket;
  411                 sb->st_blksize += so2->so_rcv.sb_cc;
  412         }
  413         sb->st_dev = NOUDEV;
  414         if (unp->unp_ino == 0)
  415                 unp->unp_ino = (++unp_ino == 0) ? ++unp_ino : unp_ino;
  416         sb->st_ino = unp->unp_ino;
  417         return (0);
  418 }
  419 
  420 static int
  421 uipc_shutdown(struct socket *so)
  422 {
  423         struct unpcb *unp = sotounpcb(so);
  424 
  425         if (unp == 0)
  426                 return EINVAL;
  427         socantsendmore(so);
  428         unp_shutdown(unp);
  429         return 0;
  430 }
  431 
  432 static int
  433 uipc_sockaddr(struct socket *so, struct sockaddr **nam)
  434 {
  435         struct unpcb *unp = sotounpcb(so);
  436 
  437         if (unp == 0)
  438                 return EINVAL;
  439         if (unp->unp_addr)
  440                 *nam = dup_sockaddr((struct sockaddr *)unp->unp_addr, 1);
  441         else
  442                 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
  443         return 0;
  444 }
  445 
  446 struct pr_usrreqs uipc_usrreqs = {
  447         uipc_abort, uipc_accept, uipc_attach, uipc_bind, uipc_connect,
  448         uipc_connect2, pru_control_notsupp, uipc_detach, uipc_disconnect,
  449         uipc_listen, uipc_peeraddr, uipc_rcvd, pru_rcvoob_notsupp,
  450         uipc_send, uipc_sense, uipc_shutdown, uipc_sockaddr,
  451         sosend, soreceive, sopoll
  452 };
  453 
  454 int
  455 uipc_ctloutput(so, sopt)
  456         struct socket *so;
  457         struct sockopt *sopt;
  458 {
  459         struct unpcb *unp = sotounpcb(so);
  460         int error;
  461 
  462         switch (sopt->sopt_dir) {
  463         case SOPT_GET:
  464                 switch (sopt->sopt_name) {
  465                 case LOCAL_PEERCRED:
  466                         if (unp->unp_flags & UNP_HAVEPC)
  467                                 error = sooptcopyout(sopt, &unp->unp_peercred,
  468                                     sizeof(unp->unp_peercred));
  469                         else {
  470                                 if (so->so_type == SOCK_STREAM)
  471                                         error = ENOTCONN;
  472                                 else
  473                                         error = EINVAL;
  474                         }
  475                         break;
  476                 default:
  477                         error = EOPNOTSUPP;
  478                         break;
  479                 }
  480                 break;
  481         case SOPT_SET:
  482         default:
  483                 error = EOPNOTSUPP;
  484                 break;
  485         }
  486         return (error);
  487 }
  488         
  489 /*
  490  * Both send and receive buffers are allocated PIPSIZ bytes of buffering
  491  * for stream sockets, although the total for sender and receiver is
  492  * actually only PIPSIZ.
  493  * Datagram sockets really use the sendspace as the maximum datagram size,
  494  * and don't really want to reserve the sendspace.  Their recvspace should
  495  * be large enough for at least one max-size datagram plus address.
  496  */
  497 #ifndef PIPSIZ
  498 #define PIPSIZ  8192
  499 #endif
  500 static u_long   unpst_sendspace = PIPSIZ;
  501 static u_long   unpst_recvspace = PIPSIZ;
  502 static u_long   unpdg_sendspace = 2*1024;       /* really max datagram size */
  503 static u_long   unpdg_recvspace = 4*1024;
  504 
  505 static int      unp_rights;                     /* file descriptors in flight */
  506 
  507 SYSCTL_DECL(_net_local_stream);
  508 SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW, 
  509            &unpst_sendspace, 0, "");
  510 SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW,
  511            &unpst_recvspace, 0, "");
  512 SYSCTL_DECL(_net_local_dgram);
  513 SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW,
  514            &unpdg_sendspace, 0, "");
  515 SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW,
  516            &unpdg_recvspace, 0, "");
  517 SYSCTL_DECL(_net_local);
  518 SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, "");
  519 
  520 static int
  521 unp_attach(so)
  522         struct socket *so;
  523 {
  524         register struct unpcb *unp;
  525         int error;
  526 
  527         if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
  528                 switch (so->so_type) {
  529 
  530                 case SOCK_STREAM:
  531                         error = soreserve(so, unpst_sendspace, unpst_recvspace);
  532                         break;
  533 
  534                 case SOCK_DGRAM:
  535                         error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
  536                         break;
  537 
  538                 default:
  539                         panic("unp_attach");
  540                 }
  541                 if (error)
  542                         return (error);
  543         }
  544         unp = uma_zalloc(unp_zone, M_WAITOK);
  545         if (unp == NULL)
  546                 return (ENOBUFS);
  547         bzero(unp, sizeof *unp);
  548         unp->unp_gencnt = ++unp_gencnt;
  549         unp_count++;
  550         LIST_INIT(&unp->unp_refs);
  551         unp->unp_socket = so;
  552         LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ? &unp_dhead
  553                          : &unp_shead, unp, unp_link);
  554         so->so_pcb = unp;
  555         return (0);
  556 }
  557 
  558 static void
  559 unp_detach(unp)
  560         register struct unpcb *unp;
  561 {
  562         LIST_REMOVE(unp, unp_link);
  563         unp->unp_gencnt = ++unp_gencnt;
  564         --unp_count;
  565         if (unp->unp_vnode) {
  566                 unp->unp_vnode->v_socket = 0;
  567                 vrele(unp->unp_vnode);
  568                 unp->unp_vnode = 0;
  569         }
  570         if (unp->unp_conn)
  571                 unp_disconnect(unp);
  572         while (!LIST_EMPTY(&unp->unp_refs))
  573                 unp_drop(LIST_FIRST(&unp->unp_refs), ECONNRESET);
  574         soisdisconnected(unp->unp_socket);
  575         unp->unp_socket->so_pcb = 0;
  576         if (unp_rights) {
  577                 /*
  578                  * Normally the receive buffer is flushed later,
  579                  * in sofree, but if our receive buffer holds references
  580                  * to descriptors that are now garbage, we will dispose
  581                  * of those descriptor references after the garbage collector
  582                  * gets them (resulting in a "panic: closef: count < 0").
  583                  */
  584                 sorflush(unp->unp_socket);
  585                 unp_gc();
  586         }
  587         if (unp->unp_addr)
  588                 FREE(unp->unp_addr, M_SONAME);
  589         uma_zfree(unp_zone, unp);
  590 }
  591 
  592 static int
  593 unp_bind(unp, nam, td)
  594         struct unpcb *unp;
  595         struct sockaddr *nam;
  596         struct thread *td;
  597 {
  598         struct sockaddr_un *soun = (struct sockaddr_un *)nam;
  599         struct vnode *vp;
  600         struct mount *mp;
  601         struct vattr vattr;
  602         int error, namelen;
  603         struct nameidata nd;
  604         char *buf;
  605 
  606         if (unp->unp_vnode != NULL)
  607                 return (EINVAL);
  608 
  609         namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path);
  610         if (namelen <= 0)
  611                 return EINVAL;
  612 
  613         buf = malloc(namelen + 1, M_TEMP, M_WAITOK);
  614         strlcpy(buf, soun->sun_path, namelen + 1);
  615 
  616 restart:
  617         NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT | SAVENAME, UIO_SYSSPACE,
  618             buf, td);
  619 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
  620         error = namei(&nd);
  621         if (error) {
  622                 free(buf, M_TEMP);
  623                 return (error);
  624         }
  625         vp = nd.ni_vp;
  626         if (vp != NULL || vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
  627                 NDFREE(&nd, NDF_ONLY_PNBUF);
  628                 if (nd.ni_dvp == vp)
  629                         vrele(nd.ni_dvp);
  630                 else
  631                         vput(nd.ni_dvp);
  632                 if (vp != NULL) {
  633                         vrele(vp);
  634                         free(buf, M_TEMP);
  635                         return (EADDRINUSE);
  636                 }
  637                 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH);
  638                 if (error) {
  639                         free(buf, M_TEMP);
  640                         return (error);
  641                 }
  642                 goto restart;
  643         }
  644         VATTR_NULL(&vattr);
  645         vattr.va_type = VSOCK;
  646         vattr.va_mode = (ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask);
  647 #ifdef MAC
  648         error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
  649             &vattr);
  650 #endif
  651         if (error == 0) {
  652                 VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
  653                 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
  654         }
  655         NDFREE(&nd, NDF_ONLY_PNBUF);
  656         vput(nd.ni_dvp);
  657         if (error) {
  658                 free(buf, M_TEMP);
  659                 return (error);
  660         }
  661         vp = nd.ni_vp;
  662         vp->v_socket = unp->unp_socket;
  663         unp->unp_vnode = vp;
  664         unp->unp_addr = (struct sockaddr_un *)dup_sockaddr(nam, 1);
  665         VOP_UNLOCK(vp, 0, td);
  666         vn_finished_write(mp);
  667         free(buf, M_TEMP);
  668         return (0);
  669 }
  670 
  671 static int
  672 unp_connect(so, nam, td)
  673         struct socket *so;
  674         struct sockaddr *nam;
  675         struct thread *td;
  676 {
  677         register struct sockaddr_un *soun = (struct sockaddr_un *)nam;
  678         register struct vnode *vp;
  679         register struct socket *so2, *so3;
  680         struct unpcb *unp, *unp2, *unp3;
  681         int error, len;
  682         struct nameidata nd;
  683         char buf[SOCK_MAXADDRLEN];
  684 
  685         len = nam->sa_len - offsetof(struct sockaddr_un, sun_path);
  686         if (len <= 0)
  687                 return EINVAL;
  688         strlcpy(buf, soun->sun_path, len + 1);
  689 
  690         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, buf, td);
  691         error = namei(&nd);
  692         if (error)
  693                 return (error);
  694         vp = nd.ni_vp;
  695         NDFREE(&nd, NDF_ONLY_PNBUF);
  696         if (vp->v_type != VSOCK) {
  697                 error = ENOTSOCK;
  698                 goto bad;
  699         }
  700         error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td);
  701         if (error)
  702                 goto bad;
  703         so2 = vp->v_socket;
  704         if (so2 == 0) {
  705                 error = ECONNREFUSED;
  706                 goto bad;
  707         }
  708         if (so->so_type != so2->so_type) {
  709                 error = EPROTOTYPE;
  710                 goto bad;
  711         }
  712         if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
  713                 if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
  714                     (so3 = sonewconn(so2, 0)) == 0) {
  715                         error = ECONNREFUSED;
  716                         goto bad;
  717                 }
  718                 unp = sotounpcb(so);
  719                 unp2 = sotounpcb(so2);
  720                 unp3 = sotounpcb(so3);
  721                 if (unp2->unp_addr)
  722                         unp3->unp_addr = (struct sockaddr_un *)
  723                                 dup_sockaddr((struct sockaddr *)
  724                                              unp2->unp_addr, 1);
  725 
  726                 /*
  727                  * unp_peercred management:
  728                  *
  729                  * The connecter's (client's) credentials are copied
  730                  * from its process structure at the time of connect()
  731                  * (which is now).
  732                  */
  733                 cru2x(td->td_ucred, &unp3->unp_peercred);
  734                 unp3->unp_flags |= UNP_HAVEPC;
  735                 /*
  736                  * The receiver's (server's) credentials are copied
  737                  * from the unp_peercred member of socket on which the
  738                  * former called listen(); unp_listen() cached that
  739                  * process's credentials at that time so we can use
  740                  * them now.
  741                  */
  742                 KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED,
  743                     ("unp_connect: listener without cached peercred"));
  744                 memcpy(&unp->unp_peercred, &unp2->unp_peercred,
  745                     sizeof(unp->unp_peercred));
  746                 unp->unp_flags |= UNP_HAVEPC;
  747 #ifdef MAC
  748                 mac_set_socket_peer_from_socket(so, so3);
  749                 mac_set_socket_peer_from_socket(so3, so);
  750 #endif
  751 
  752                 so2 = so3;
  753         }
  754         error = unp_connect2(so, so2);
  755 bad:
  756         vput(vp);
  757         return (error);
  758 }
  759 
  760 int
  761 unp_connect2(so, so2)
  762         register struct socket *so;
  763         register struct socket *so2;
  764 {
  765         register struct unpcb *unp = sotounpcb(so);
  766         register struct unpcb *unp2;
  767 
  768         if (so2->so_type != so->so_type)
  769                 return (EPROTOTYPE);
  770         unp2 = sotounpcb(so2);
  771         unp->unp_conn = unp2;
  772         switch (so->so_type) {
  773 
  774         case SOCK_DGRAM:
  775                 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink);
  776                 soisconnected(so);
  777                 break;
  778 
  779         case SOCK_STREAM:
  780                 unp2->unp_conn = unp;
  781                 soisconnected(so);
  782                 soisconnected(so2);
  783                 break;
  784 
  785         default:
  786                 panic("unp_connect2");
  787         }
  788         return (0);
  789 }
  790 
  791 static void
  792 unp_disconnect(unp)
  793         struct unpcb *unp;
  794 {
  795         register struct unpcb *unp2 = unp->unp_conn;
  796 
  797         if (unp2 == 0)
  798                 return;
  799         unp->unp_conn = 0;
  800         switch (unp->unp_socket->so_type) {
  801 
  802         case SOCK_DGRAM:
  803                 LIST_REMOVE(unp, unp_reflink);
  804                 unp->unp_socket->so_state &= ~SS_ISCONNECTED;
  805                 break;
  806 
  807         case SOCK_STREAM:
  808                 soisdisconnected(unp->unp_socket);
  809                 unp2->unp_conn = 0;
  810                 soisdisconnected(unp2->unp_socket);
  811                 break;
  812         }
  813 }
  814 
  815 #ifdef notdef
  816 void
  817 unp_abort(unp)
  818         struct unpcb *unp;
  819 {
  820 
  821         unp_detach(unp);
  822 }
  823 #endif
  824 
  825 static int
  826 unp_pcblist(SYSCTL_HANDLER_ARGS)
  827 {
  828         int error, i, n;
  829         struct unpcb *unp, **unp_list;
  830         unp_gen_t gencnt;
  831         struct xunpgen *xug;
  832         struct unp_head *head;
  833         struct xunpcb *xu;
  834 
  835         head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
  836 
  837         /*
  838          * The process of preparing the PCB list is too time-consuming and
  839          * resource-intensive to repeat twice on every request.
  840          */
  841         if (req->oldptr == 0) {
  842                 n = unp_count;
  843                 req->oldidx = 2 * (sizeof *xug)
  844                         + (n + n/8) * sizeof(struct xunpcb);
  845                 return 0;
  846         }
  847 
  848         if (req->newptr != 0)
  849                 return EPERM;
  850 
  851         /*
  852          * OK, now we're committed to doing something.
  853          */
  854         xug = malloc(sizeof(*xug), M_TEMP, M_WAITOK);
  855         gencnt = unp_gencnt;
  856         n = unp_count;
  857 
  858         xug->xug_len = sizeof *xug;
  859         xug->xug_count = n;
  860         xug->xug_gen = gencnt;
  861         xug->xug_sogen = so_gencnt;
  862         error = SYSCTL_OUT(req, xug, sizeof *xug);
  863         if (error) {
  864                 free(xug, M_TEMP);
  865                 return error;
  866         }
  867 
  868         unp_list = malloc(n * sizeof *unp_list, M_TEMP, M_WAITOK);
  869         
  870         for (unp = LIST_FIRST(head), i = 0; unp && i < n;
  871              unp = LIST_NEXT(unp, unp_link)) {
  872                 if (unp->unp_gencnt <= gencnt) {
  873                         if (cr_cansee(req->td->td_ucred,
  874                             unp->unp_socket->so_cred))
  875                                 continue;
  876                         unp_list[i++] = unp;
  877                 }
  878         }
  879         n = i;                  /* in case we lost some during malloc */
  880 
  881         error = 0;
  882         xu = malloc(sizeof(*xu), M_TEMP, M_WAITOK);
  883         for (i = 0; i < n; i++) {
  884                 unp = unp_list[i];
  885                 if (unp->unp_gencnt <= gencnt) {
  886                         xu->xu_len = sizeof *xu;
  887                         xu->xu_unpp = unp;
  888                         /*
  889                          * XXX - need more locking here to protect against
  890                          * connect/disconnect races for SMP.
  891                          */
  892                         if (unp->unp_addr)
  893                                 bcopy(unp->unp_addr, &xu->xu_addr, 
  894                                       unp->unp_addr->sun_len);
  895                         if (unp->unp_conn && unp->unp_conn->unp_addr)
  896                                 bcopy(unp->unp_conn->unp_addr,
  897                                       &xu->xu_caddr,
  898                                       unp->unp_conn->unp_addr->sun_len);
  899                         bcopy(unp, &xu->xu_unp, sizeof *unp);
  900                         sotoxsocket(unp->unp_socket, &xu->xu_socket);
  901                         error = SYSCTL_OUT(req, xu, sizeof *xu);
  902                 }
  903         }
  904         free(xu, M_TEMP);
  905         if (!error) {
  906                 /*
  907                  * Give the user an updated idea of our state.
  908                  * If the generation differs from what we told
  909                  * her before, she knows that something happened
  910                  * while we were processing this request, and it
  911                  * might be necessary to retry.
  912                  */
  913                 xug->xug_gen = unp_gencnt;
  914                 xug->xug_sogen = so_gencnt;
  915                 xug->xug_count = unp_count;
  916                 error = SYSCTL_OUT(req, xug, sizeof *xug);
  917         }
  918         free(unp_list, M_TEMP);
  919         free(xug, M_TEMP);
  920         return error;
  921 }
  922 
  923 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLFLAG_RD, 
  924             (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb",
  925             "List of active local datagram sockets");
  926 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLFLAG_RD, 
  927             (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb",
  928             "List of active local stream sockets");
  929 
  930 static void
  931 unp_shutdown(unp)
  932         struct unpcb *unp;
  933 {
  934         struct socket *so;
  935 
  936         if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn &&
  937             (so = unp->unp_conn->unp_socket))
  938                 socantrcvmore(so);
  939 }
  940 
  941 static void
  942 unp_drop(unp, errno)
  943         struct unpcb *unp;
  944         int errno;
  945 {
  946         struct socket *so = unp->unp_socket;
  947 
  948         so->so_error = errno;
  949         unp_disconnect(unp);
  950 }
  951 
  952 #ifdef notdef
  953 void
  954 unp_drain()
  955 {
  956 
  957 }
  958 #endif
  959 
  960 static void
  961 unp_freerights(rp, fdcount)
  962         struct file **rp;
  963         int fdcount;
  964 {
  965         int i;
  966         struct file *fp;
  967 
  968         for (i = 0; i < fdcount; i++) {
  969                 fp = *rp;
  970                 /*
  971                  * zero the pointer before calling
  972                  * unp_discard since it may end up
  973                  * in unp_gc()..
  974                  */
  975                 *rp++ = 0;
  976                 unp_discard(fp);
  977         }
  978 }
  979 
  980 int
  981 unp_externalize(control, controlp)
  982         struct mbuf *control, **controlp;
  983 {
  984         struct thread *td = curthread;          /* XXX */
  985         struct cmsghdr *cm = mtod(control, struct cmsghdr *);
  986         int i;
  987         int *fdp;
  988         struct file **rp;
  989         struct file *fp;
  990         void *data;
  991         socklen_t clen = control->m_len, datalen;
  992         int error, newfds;
  993         int f;
  994         u_int newlen;
  995 
  996         error = 0;
  997         if (controlp != NULL) /* controlp == NULL => free control messages */
  998                 *controlp = NULL;
  999 
 1000         while (cm != NULL) {
 1001                 if (sizeof(*cm) > clen || cm->cmsg_len > clen) {
 1002                         error = EINVAL;
 1003                         break;
 1004                 }
 1005 
 1006                 data = CMSG_DATA(cm);
 1007                 datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
 1008 
 1009                 if (cm->cmsg_level == SOL_SOCKET
 1010                     && cm->cmsg_type == SCM_RIGHTS) {
 1011                         newfds = datalen / sizeof(struct file *);
 1012                         rp = data;
 1013 
 1014                         /* If we're not outputting the descriptors free them. */
 1015                         if (error || controlp == NULL) {
 1016                                 unp_freerights(rp, newfds);
 1017                                 goto next;
 1018                         }
 1019                         FILEDESC_LOCK(td->td_proc->p_fd);
 1020                         /* if the new FD's will not fit free them.  */
 1021                         if (!fdavail(td, newfds)) {
 1022                                 FILEDESC_UNLOCK(td->td_proc->p_fd);
 1023                                 error = EMSGSIZE;
 1024                                 unp_freerights(rp, newfds);
 1025                                 goto next;
 1026                         }
 1027                         /*
 1028                          * now change each pointer to an fd in the global
 1029                          * table to an integer that is the index to the
 1030                          * local fd table entry that we set up to point
 1031                          * to the global one we are transferring.
 1032                          */
 1033                         newlen = newfds * sizeof(int);
 1034                         *controlp = sbcreatecontrol(NULL, newlen,
 1035                             SCM_RIGHTS, SOL_SOCKET);
 1036                         if (*controlp == NULL) {
 1037                                 FILEDESC_UNLOCK(td->td_proc->p_fd);
 1038                                 error = E2BIG;
 1039                                 unp_freerights(rp, newfds);
 1040                                 goto next;
 1041                         }
 1042 
 1043                         fdp = (int *)
 1044                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 1045                         for (i = 0; i < newfds; i++) {
 1046                                 if (fdalloc(td, 0, &f))
 1047                                         panic("unp_externalize fdalloc failed");
 1048                                 fp = *rp++;
 1049                                 td->td_proc->p_fd->fd_ofiles[f] = fp;
 1050                                 FILE_LOCK(fp);
 1051                                 fp->f_msgcount--;
 1052                                 FILE_UNLOCK(fp);
 1053                                 unp_rights--;
 1054                                 *fdp++ = f;
 1055                         }
 1056                         FILEDESC_UNLOCK(td->td_proc->p_fd);
 1057                 } else { /* We can just copy anything else across */
 1058                         if (error || controlp == NULL)
 1059                                 goto next;
 1060                         *controlp = sbcreatecontrol(NULL, datalen,
 1061                             cm->cmsg_type, cm->cmsg_level);
 1062                         if (*controlp == NULL) {
 1063                                 error = ENOBUFS;
 1064                                 goto next;
 1065                         }
 1066                         bcopy(data,
 1067                             CMSG_DATA(mtod(*controlp, struct cmsghdr *)),
 1068                             datalen);
 1069                 }
 1070 
 1071                 controlp = &(*controlp)->m_next;
 1072 
 1073 next:
 1074                 if (CMSG_SPACE(datalen) < clen) {
 1075                         clen -= CMSG_SPACE(datalen);
 1076                         cm = (struct cmsghdr *)
 1077                             ((caddr_t)cm + CMSG_SPACE(datalen));
 1078                 } else {
 1079                         clen = 0;
 1080                         cm = NULL;
 1081                 }
 1082         }
 1083 
 1084         m_freem(control);
 1085 
 1086         return (error);
 1087 }
 1088 
 1089 void
 1090 unp_init(void)
 1091 {
 1092         unp_zone = uma_zcreate("unpcb", sizeof(struct unpcb), NULL, NULL,
 1093             NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 1094         uma_zone_set_max(unp_zone, nmbclusters);
 1095         if (unp_zone == 0)
 1096                 panic("unp_init");
 1097         LIST_INIT(&unp_dhead);
 1098         LIST_INIT(&unp_shead);
 1099 }
 1100 
 1101 static int
 1102 unp_internalize(controlp, td)
 1103         struct mbuf **controlp;
 1104         struct thread *td;
 1105 {
 1106         struct mbuf *control = *controlp;
 1107         struct proc *p = td->td_proc;
 1108         struct filedesc *fdescp = p->p_fd;
 1109         struct cmsghdr *cm = mtod(control, struct cmsghdr *);
 1110         struct cmsgcred *cmcred;
 1111         struct file **rp;
 1112         struct file *fp;
 1113         struct timeval *tv;
 1114         int i, fd, *fdp;
 1115         void *data;
 1116         socklen_t clen = control->m_len, datalen;
 1117         int error, oldfds;
 1118         u_int newlen;
 1119 
 1120         error = 0;
 1121         *controlp = NULL;
 1122 
 1123         while (cm != NULL) {
 1124                 if (sizeof(*cm) > clen || cm->cmsg_level != SOL_SOCKET
 1125                     || cm->cmsg_len > clen) {
 1126                         error = EINVAL;
 1127                         goto out;
 1128                 }
 1129 
 1130                 data = CMSG_DATA(cm);
 1131                 datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
 1132 
 1133                 switch (cm->cmsg_type) {
 1134                 /*
 1135                  * Fill in credential information.
 1136                  */
 1137                 case SCM_CREDS:
 1138                         *controlp = sbcreatecontrol(NULL, sizeof(*cmcred),
 1139                             SCM_CREDS, SOL_SOCKET);
 1140                         if (*controlp == NULL) {
 1141                                 error = ENOBUFS;
 1142                                 goto out;
 1143                         }
 1144 
 1145                         cmcred = (struct cmsgcred *)
 1146                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 1147                         cmcred->cmcred_pid = p->p_pid;
 1148                         cmcred->cmcred_uid = td->td_ucred->cr_ruid;
 1149                         cmcred->cmcred_gid = td->td_ucred->cr_rgid;
 1150                         cmcred->cmcred_euid = td->td_ucred->cr_uid;
 1151                         cmcred->cmcred_ngroups = MIN(td->td_ucred->cr_ngroups,
 1152                                                         CMGROUP_MAX);
 1153                         for (i = 0; i < cmcred->cmcred_ngroups; i++)
 1154                                 cmcred->cmcred_groups[i] =
 1155                                     td->td_ucred->cr_groups[i];
 1156                         break;
 1157 
 1158                 case SCM_RIGHTS:
 1159                         oldfds = datalen / sizeof (int);
 1160                         /*
 1161                          * check that all the FDs passed in refer to legal files
 1162                          * If not, reject the entire operation.
 1163                          */
 1164                         fdp = data;
 1165                         FILEDESC_LOCK(fdescp);
 1166                         for (i = 0; i < oldfds; i++) {
 1167                                 fd = *fdp++;
 1168                                 if ((unsigned)fd >= fdescp->fd_nfiles ||
 1169                                     fdescp->fd_ofiles[fd] == NULL) {
 1170                                         FILEDESC_UNLOCK(fdescp);
 1171                                         error = EBADF;
 1172                                         goto out;
 1173                                 }
 1174                                 fp = fdescp->fd_ofiles[fd];
 1175                                 if (!(fp->f_ops->fo_flags & DFLAG_PASSABLE)) {
 1176                                         FILEDESC_UNLOCK(fdescp);
 1177                                         error = EOPNOTSUPP;
 1178                                         goto out;
 1179                                 }
 1180 
 1181                         }
 1182                         /*
 1183                          * Now replace the integer FDs with pointers to
 1184                          * the associated global file table entry..
 1185                          */
 1186                         newlen = oldfds * sizeof(struct file *);
 1187                         *controlp = sbcreatecontrol(NULL, newlen,
 1188                             SCM_RIGHTS, SOL_SOCKET);
 1189                         if (*controlp == NULL) {
 1190                                 FILEDESC_UNLOCK(fdescp);
 1191                                 error = E2BIG;
 1192                                 goto out;
 1193                         }
 1194 
 1195                         fdp = data;
 1196                         rp = (struct file **)
 1197                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 1198                         for (i = 0; i < oldfds; i++) {
 1199                                 fp = fdescp->fd_ofiles[*fdp++];
 1200                                 *rp++ = fp;
 1201                                 FILE_LOCK(fp);
 1202                                 fp->f_count++;
 1203                                 fp->f_msgcount++;
 1204                                 FILE_UNLOCK(fp);
 1205                                 unp_rights++;
 1206                         }
 1207                         FILEDESC_UNLOCK(fdescp);
 1208                         break;
 1209 
 1210                 case SCM_TIMESTAMP:
 1211                         *controlp = sbcreatecontrol(NULL, sizeof(*tv),
 1212                             SCM_TIMESTAMP, SOL_SOCKET);
 1213                         if (*controlp == NULL) {
 1214                                 error = ENOBUFS;
 1215                                 goto out;
 1216                         }
 1217                         tv = (struct timeval *)
 1218                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 1219                         microtime(tv);
 1220                         break;
 1221 
 1222                 default:
 1223                         error = EINVAL;
 1224                         goto out;
 1225                 }
 1226 
 1227                 controlp = &(*controlp)->m_next;
 1228 
 1229                 if (CMSG_SPACE(datalen) < clen) {
 1230                         clen -= CMSG_SPACE(datalen);
 1231                         cm = (struct cmsghdr *)
 1232                             ((caddr_t)cm + CMSG_SPACE(datalen));
 1233                 } else {
 1234                         clen = 0;
 1235                         cm = NULL;
 1236                 }
 1237         }
 1238 
 1239 out:
 1240         m_freem(control);
 1241 
 1242         return (error);
 1243 }
 1244 
 1245 static int      unp_defer, unp_gcing;
 1246 
 1247 static void
 1248 unp_gc()
 1249 {
 1250         register struct file *fp, *nextfp;
 1251         register struct socket *so;
 1252         struct file **extra_ref, **fpp;
 1253         int nunref, i;
 1254 
 1255         if (unp_gcing)
 1256                 return;
 1257         unp_gcing = 1;
 1258         unp_defer = 0;
 1259         /* 
 1260          * before going through all this, set all FDs to 
 1261          * be NOT defered and NOT externally accessible
 1262          */
 1263         sx_slock(&filelist_lock);
 1264         LIST_FOREACH(fp, &filehead, f_list)
 1265                 fp->f_gcflag &= ~(FMARK|FDEFER);
 1266         do {
 1267                 LIST_FOREACH(fp, &filehead, f_list) {
 1268                         FILE_LOCK(fp);
 1269                         /*
 1270                          * If the file is not open, skip it
 1271                          */
 1272                         if (fp->f_count == 0) {
 1273                                 FILE_UNLOCK(fp);
 1274                                 continue;
 1275                         }
 1276                         /*
 1277                          * If we already marked it as 'defer'  in a
 1278                          * previous pass, then try process it this time
 1279                          * and un-mark it
 1280                          */
 1281                         if (fp->f_gcflag & FDEFER) {
 1282                                 fp->f_gcflag &= ~FDEFER;
 1283                                 unp_defer--;
 1284                         } else {
 1285                                 /*
 1286                                  * if it's not defered, then check if it's
 1287                                  * already marked.. if so skip it
 1288                                  */
 1289                                 if (fp->f_gcflag & FMARK) {
 1290                                         FILE_UNLOCK(fp);
 1291                                         continue;
 1292                                 }
 1293                                 /* 
 1294                                  * If all references are from messages
 1295                                  * in transit, then skip it. it's not 
 1296                                  * externally accessible.
 1297                                  */ 
 1298                                 if (fp->f_count == fp->f_msgcount) {
 1299                                         FILE_UNLOCK(fp);
 1300                                         continue;
 1301                                 }
 1302                                 /* 
 1303                                  * If it got this far then it must be
 1304                                  * externally accessible.
 1305                                  */
 1306                                 fp->f_gcflag |= FMARK;
 1307                         }
 1308                         /*
 1309                          * either it was defered, or it is externally 
 1310                          * accessible and not already marked so.
 1311                          * Now check if it is possibly one of OUR sockets.
 1312                          */ 
 1313                         if (fp->f_type != DTYPE_SOCKET ||
 1314                             (so = fp->f_data) == NULL) {
 1315                                 FILE_UNLOCK(fp);
 1316                                 continue;
 1317                         }
 1318                         FILE_UNLOCK(fp);
 1319                         if (so->so_proto->pr_domain != &localdomain ||
 1320                             (so->so_proto->pr_flags&PR_RIGHTS) == 0)
 1321                                 continue;
 1322 #ifdef notdef
 1323                         if (so->so_rcv.sb_flags & SB_LOCK) {
 1324                                 /*
 1325                                  * This is problematical; it's not clear
 1326                                  * we need to wait for the sockbuf to be
 1327                                  * unlocked (on a uniprocessor, at least),
 1328                                  * and it's also not clear what to do
 1329                                  * if sbwait returns an error due to receipt
 1330                                  * of a signal.  If sbwait does return
 1331                                  * an error, we'll go into an infinite
 1332                                  * loop.  Delete all of this for now.
 1333                                  */
 1334                                 (void) sbwait(&so->so_rcv);
 1335                                 goto restart;
 1336                         }
 1337 #endif
 1338                         /*
 1339                          * So, Ok, it's one of our sockets and it IS externally
 1340                          * accessible (or was defered). Now we look
 1341                          * to see if we hold any file descriptors in its
 1342                          * message buffers. Follow those links and mark them 
 1343                          * as accessible too.
 1344                          */
 1345                         unp_scan(so->so_rcv.sb_mb, unp_mark);
 1346                 }
 1347         } while (unp_defer);
 1348         sx_sunlock(&filelist_lock);
 1349         /*
 1350          * We grab an extra reference to each of the file table entries
 1351          * that are not otherwise accessible and then free the rights
 1352          * that are stored in messages on them.
 1353          *
 1354          * The bug in the orginal code is a little tricky, so I'll describe
 1355          * what's wrong with it here.
 1356          *
 1357          * It is incorrect to simply unp_discard each entry for f_msgcount
 1358          * times -- consider the case of sockets A and B that contain
 1359          * references to each other.  On a last close of some other socket,
 1360          * we trigger a gc since the number of outstanding rights (unp_rights)
 1361          * is non-zero.  If during the sweep phase the gc code un_discards,
 1362          * we end up doing a (full) closef on the descriptor.  A closef on A
 1363          * results in the following chain.  Closef calls soo_close, which
 1364          * calls soclose.   Soclose calls first (through the switch
 1365          * uipc_usrreq) unp_detach, which re-invokes unp_gc.  Unp_gc simply
 1366          * returns because the previous instance had set unp_gcing, and
 1367          * we return all the way back to soclose, which marks the socket
 1368          * with SS_NOFDREF, and then calls sofree.  Sofree calls sorflush
 1369          * to free up the rights that are queued in messages on the socket A,
 1370          * i.e., the reference on B.  The sorflush calls via the dom_dispose
 1371          * switch unp_dispose, which unp_scans with unp_discard.  This second
 1372          * instance of unp_discard just calls closef on B.
 1373          *
 1374          * Well, a similar chain occurs on B, resulting in a sorflush on B,
 1375          * which results in another closef on A.  Unfortunately, A is already
 1376          * being closed, and the descriptor has already been marked with
 1377          * SS_NOFDREF, and soclose panics at this point.
 1378          *
 1379          * Here, we first take an extra reference to each inaccessible
 1380          * descriptor.  Then, we call sorflush ourself, since we know
 1381          * it is a Unix domain socket anyhow.  After we destroy all the
 1382          * rights carried in messages, we do a last closef to get rid
 1383          * of our extra reference.  This is the last close, and the
 1384          * unp_detach etc will shut down the socket.
 1385          *
 1386          * 91/09/19, bsy@cs.cmu.edu
 1387          */
 1388         extra_ref = malloc(nfiles * sizeof(struct file *), M_TEMP, M_WAITOK);
 1389         sx_slock(&filelist_lock);
 1390         for (nunref = 0, fp = LIST_FIRST(&filehead), fpp = extra_ref; fp != 0;
 1391             fp = nextfp) {
 1392                 nextfp = LIST_NEXT(fp, f_list);
 1393                 FILE_LOCK(fp);
 1394                 /* 
 1395                  * If it's not open, skip it
 1396                  */
 1397                 if (fp->f_count == 0) {
 1398                         FILE_UNLOCK(fp);
 1399                         continue;
 1400                 }
 1401                 /* 
 1402                  * If all refs are from msgs, and it's not marked accessible
 1403                  * then it must be referenced from some unreachable cycle
 1404                  * of (shut-down) FDs, so include it in our
 1405                  * list of FDs to remove
 1406                  */
 1407                 if (fp->f_count == fp->f_msgcount && !(fp->f_gcflag & FMARK)) {
 1408                         *fpp++ = fp;
 1409                         nunref++;
 1410                         fp->f_count++;
 1411                 }
 1412                 FILE_UNLOCK(fp);
 1413         }
 1414         sx_sunlock(&filelist_lock);
 1415         /* 
 1416          * for each FD on our hit list, do the following two things
 1417          */
 1418         for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
 1419                 struct file *tfp = *fpp;
 1420                 FILE_LOCK(tfp);
 1421                 if (tfp->f_type == DTYPE_SOCKET &&
 1422                     tfp->f_data != NULL) {
 1423                         FILE_UNLOCK(tfp);
 1424                         sorflush(tfp->f_data);
 1425                 } else
 1426                         FILE_UNLOCK(tfp);
 1427         }
 1428         for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
 1429                 closef(*fpp, (struct thread *) NULL);
 1430         free(extra_ref, M_TEMP);
 1431         unp_gcing = 0;
 1432 }
 1433 
 1434 void
 1435 unp_dispose(m)
 1436         struct mbuf *m;
 1437 {
 1438 
 1439         if (m)
 1440                 unp_scan(m, unp_discard);
 1441 }
 1442 
 1443 static int
 1444 unp_listen(unp, td)
 1445         struct unpcb *unp;
 1446         struct thread *td;
 1447 {
 1448 
 1449         cru2x(td->td_ucred, &unp->unp_peercred);
 1450         unp->unp_flags |= UNP_HAVEPCCACHED;
 1451         return (0);
 1452 }
 1453 
 1454 static void
 1455 unp_scan(m0, op)
 1456         register struct mbuf *m0;
 1457         void (*op)(struct file *);
 1458 {
 1459         struct mbuf *m;
 1460         struct file **rp;
 1461         struct cmsghdr *cm;
 1462         void *data;
 1463         int i;
 1464         socklen_t clen, datalen;
 1465         int qfds;
 1466 
 1467         while (m0) {
 1468                 for (m = m0; m; m = m->m_next) {
 1469                         if (m->m_type != MT_CONTROL)
 1470                                 continue;
 1471 
 1472                         cm = mtod(m, struct cmsghdr *);
 1473                         clen = m->m_len;
 1474 
 1475                         while (cm != NULL) {
 1476                                 if (sizeof(*cm) > clen || cm->cmsg_len > clen)
 1477                                         break;
 1478 
 1479                                 data = CMSG_DATA(cm);
 1480                                 datalen = (caddr_t)cm + cm->cmsg_len
 1481                                     - (caddr_t)data;
 1482 
 1483                                 if (cm->cmsg_level == SOL_SOCKET &&
 1484                                     cm->cmsg_type == SCM_RIGHTS) {
 1485                                         qfds = datalen / sizeof (struct file *);
 1486                                         rp = data;
 1487                                         for (i = 0; i < qfds; i++)
 1488                                                 (*op)(*rp++);
 1489                                 }
 1490 
 1491                                 if (CMSG_SPACE(datalen) < clen) {
 1492                                         clen -= CMSG_SPACE(datalen);
 1493                                         cm = (struct cmsghdr *)
 1494                                             ((caddr_t)cm + CMSG_SPACE(datalen));
 1495                                 } else {
 1496                                         clen = 0;
 1497                                         cm = NULL;
 1498                                 }
 1499                         }
 1500                 }
 1501                 m0 = m0->m_act;
 1502         }
 1503 }
 1504 
 1505 static void
 1506 unp_mark(fp)
 1507         struct file *fp;
 1508 {
 1509         if (fp->f_gcflag & FMARK)
 1510                 return;
 1511         unp_defer++;
 1512         fp->f_gcflag |= (FMARK|FDEFER);
 1513 }
 1514 
 1515 static void
 1516 unp_discard(fp)
 1517         struct file *fp;
 1518 {
 1519         FILE_LOCK(fp);
 1520         fp->f_msgcount--;
 1521         unp_rights--;
 1522         FILE_UNLOCK(fp);
 1523         (void) closef(fp, (struct thread *)NULL);
 1524 }

Cache object: f66a4ce3bc3438d8ed2a983bc330543b


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.