The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/uipc_usrreq.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 1982, 1986, 1989, 1991, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  * 3. All advertising materials mentioning features or use of this software
   14  *    must display the following acknowledgement:
   15  *      This product includes software developed by the University of
   16  *      California, Berkeley and its contributors.
   17  * 4. Neither the name of the University nor the names of its contributors
   18  *    may be used to endorse or promote products derived from this software
   19  *    without specific prior written permission.
   20  *
   21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   31  * SUCH DAMAGE.
   32  *
   33  *      From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94
   34  * $FreeBSD: releng/5.0/sys/kern/uipc_usrreq.c 106096 2002-10-28 21:17:53Z rwatson $
   35  */
   36 
   37 #include "opt_mac.h"
   38 
   39 #include <sys/param.h>
   40 #include <sys/domain.h>
   41 #include <sys/fcntl.h>
   42 #include <sys/malloc.h>         /* XXX must be before <sys/file.h> */
   43 #include <sys/file.h>
   44 #include <sys/filedesc.h>
   45 #include <sys/jail.h>
   46 #include <sys/kernel.h>
   47 #include <sys/lock.h>
   48 #include <sys/mac.h>
   49 #include <sys/mbuf.h>
   50 #include <sys/mutex.h>
   51 #include <sys/namei.h>
   52 #include <sys/proc.h>
   53 #include <sys/protosw.h>
   54 #include <sys/resourcevar.h>
   55 #include <sys/socket.h>
   56 #include <sys/socketvar.h>
   57 #include <sys/signalvar.h>
   58 #include <sys/stat.h>
   59 #include <sys/sx.h>
   60 #include <sys/sysctl.h>
   61 #include <sys/systm.h>
   62 #include <sys/un.h>
   63 #include <sys/unpcb.h>
   64 #include <sys/vnode.h>
   65 
   66 #include <vm/uma.h>
   67 
   68 static uma_zone_t unp_zone;
   69 static  unp_gen_t unp_gencnt;
   70 static  u_int unp_count;
   71 
   72 static  struct unp_head unp_shead, unp_dhead;
   73 
   74 /*
   75  * Unix communications domain.
   76  *
   77  * TODO:
   78  *      SEQPACKET, RDM
   79  *      rethink name space problems
   80  *      need a proper out-of-band
   81  *      lock pushdown
   82  */
   83 static struct   sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL };
   84 static ino_t    unp_ino;                /* prototype for fake inode numbers */
   85 
   86 static int     unp_attach(struct socket *);
   87 static void    unp_detach(struct unpcb *);
   88 static int     unp_bind(struct unpcb *,struct sockaddr *, struct thread *);
   89 static int     unp_connect(struct socket *,struct sockaddr *, struct thread *);
   90 static void    unp_disconnect(struct unpcb *);
   91 static void    unp_shutdown(struct unpcb *);
   92 static void    unp_drop(struct unpcb *, int);
   93 static void    unp_gc(void);
   94 static void    unp_scan(struct mbuf *, void (*)(struct file *));
   95 static void    unp_mark(struct file *);
   96 static void    unp_discard(struct file *);
   97 static void    unp_freerights(struct file **, int);
   98 static int     unp_internalize(struct mbuf **, struct thread *);
   99 static int     unp_listen(struct unpcb *, struct thread *);
  100 
  101 static int
  102 uipc_abort(struct socket *so)
  103 {
  104         struct unpcb *unp = sotounpcb(so);
  105 
  106         if (unp == 0)
  107                 return EINVAL;
  108         unp_drop(unp, ECONNABORTED);
  109         unp_detach(unp);
  110         sotryfree(so);
  111         return 0;
  112 }
  113 
  114 static int
  115 uipc_accept(struct socket *so, struct sockaddr **nam)
  116 {
  117         struct unpcb *unp = sotounpcb(so);
  118 
  119         if (unp == 0)
  120                 return EINVAL;
  121 
  122         /*
  123          * Pass back name of connected socket,
  124          * if it was bound and we are still connected
  125          * (our peer may have closed already!).
  126          */
  127         if (unp->unp_conn && unp->unp_conn->unp_addr) {
  128                 *nam = dup_sockaddr((struct sockaddr *)unp->unp_conn->unp_addr,
  129                                     1);
  130         } else {
  131                 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
  132         }
  133         return 0;
  134 }
  135 
  136 static int
  137 uipc_attach(struct socket *so, int proto, struct thread *td)
  138 {
  139         struct unpcb *unp = sotounpcb(so);
  140 
  141         if (unp != 0)
  142                 return EISCONN;
  143         return unp_attach(so);
  144 }
  145 
  146 static int
  147 uipc_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
  148 {
  149         struct unpcb *unp = sotounpcb(so);
  150 
  151         if (unp == 0)
  152                 return EINVAL;
  153 
  154         return unp_bind(unp, nam, td);
  155 }
  156 
  157 static int
  158 uipc_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
  159 {
  160         struct unpcb *unp = sotounpcb(so);
  161 
  162         if (unp == 0)
  163                 return EINVAL;
  164         return unp_connect(so, nam, curthread);
  165 }
  166 
  167 static int
  168 uipc_connect2(struct socket *so1, struct socket *so2)
  169 {
  170         struct unpcb *unp = sotounpcb(so1);
  171 
  172         if (unp == 0)
  173                 return EINVAL;
  174 
  175         return unp_connect2(so1, so2);
  176 }
  177 
  178 /* control is EOPNOTSUPP */
  179 
  180 static int
  181 uipc_detach(struct socket *so)
  182 {
  183         struct unpcb *unp = sotounpcb(so);
  184 
  185         if (unp == 0)
  186                 return EINVAL;
  187 
  188         unp_detach(unp);
  189         return 0;
  190 }
  191 
  192 static int
  193 uipc_disconnect(struct socket *so)
  194 {
  195         struct unpcb *unp = sotounpcb(so);
  196 
  197         if (unp == 0)
  198                 return EINVAL;
  199         unp_disconnect(unp);
  200         return 0;
  201 }
  202 
  203 static int
  204 uipc_listen(struct socket *so, struct thread *td)
  205 {
  206         struct unpcb *unp = sotounpcb(so);
  207 
  208         if (unp == 0 || unp->unp_vnode == 0)
  209                 return EINVAL;
  210         return unp_listen(unp, td);
  211 }
  212 
  213 static int
  214 uipc_peeraddr(struct socket *so, struct sockaddr **nam)
  215 {
  216         struct unpcb *unp = sotounpcb(so);
  217 
  218         if (unp == 0)
  219                 return EINVAL;
  220         if (unp->unp_conn && unp->unp_conn->unp_addr)
  221                 *nam = dup_sockaddr((struct sockaddr *)unp->unp_conn->unp_addr,
  222                                     1);
  223         return 0;
  224 }
  225 
  226 static int
  227 uipc_rcvd(struct socket *so, int flags)
  228 {
  229         struct unpcb *unp = sotounpcb(so);
  230         struct socket *so2;
  231         u_long newhiwat;
  232 
  233         if (unp == 0)
  234                 return EINVAL;
  235         switch (so->so_type) {
  236         case SOCK_DGRAM:
  237                 panic("uipc_rcvd DGRAM?");
  238                 /*NOTREACHED*/
  239 
  240         case SOCK_STREAM:
  241                 if (unp->unp_conn == 0)
  242                         break;
  243                 so2 = unp->unp_conn->unp_socket;
  244                 /*
  245                  * Adjust backpressure on sender
  246                  * and wakeup any waiting to write.
  247                  */
  248                 so2->so_snd.sb_mbmax += unp->unp_mbcnt - so->so_rcv.sb_mbcnt;
  249                 unp->unp_mbcnt = so->so_rcv.sb_mbcnt;
  250                 newhiwat = so2->so_snd.sb_hiwat + unp->unp_cc -
  251                     so->so_rcv.sb_cc;
  252                 (void)chgsbsize(so2->so_cred->cr_uidinfo, &so2->so_snd.sb_hiwat,
  253                     newhiwat, RLIM_INFINITY);
  254                 unp->unp_cc = so->so_rcv.sb_cc;
  255                 sowwakeup(so2);
  256                 break;
  257 
  258         default:
  259                 panic("uipc_rcvd unknown socktype");
  260         }
  261         return 0;
  262 }
  263 
  264 /* pru_rcvoob is EOPNOTSUPP */
  265 
  266 static int
  267 uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
  268           struct mbuf *control, struct thread *td)
  269 {
  270         int error = 0;
  271         struct unpcb *unp = sotounpcb(so);
  272         struct socket *so2;
  273         u_long newhiwat;
  274 
  275         if (unp == 0) {
  276                 error = EINVAL;
  277                 goto release;
  278         }
  279         if (flags & PRUS_OOB) {
  280                 error = EOPNOTSUPP;
  281                 goto release;
  282         }
  283 
  284         if (control && (error = unp_internalize(&control, td)))
  285                 goto release;
  286 
  287         switch (so->so_type) {
  288         case SOCK_DGRAM: 
  289         {
  290                 struct sockaddr *from;
  291 
  292                 if (nam) {
  293                         if (unp->unp_conn) {
  294                                 error = EISCONN;
  295                                 break;
  296                         }
  297                         error = unp_connect(so, nam, td);
  298                         if (error)
  299                                 break;
  300                 } else {
  301                         if (unp->unp_conn == 0) {
  302                                 error = ENOTCONN;
  303                                 break;
  304                         }
  305                 }
  306                 so2 = unp->unp_conn->unp_socket;
  307                 if (unp->unp_addr)
  308                         from = (struct sockaddr *)unp->unp_addr;
  309                 else
  310                         from = &sun_noname;
  311                 if (sbappendaddr(&so2->so_rcv, from, m, control)) {
  312                         sorwakeup(so2);
  313                         m = 0;
  314                         control = 0;
  315                 } else
  316                         error = ENOBUFS;
  317                 if (nam)
  318                         unp_disconnect(unp);
  319                 break;
  320         }
  321 
  322         case SOCK_STREAM:
  323                 /* Connect if not connected yet. */
  324                 /*
  325                  * Note: A better implementation would complain
  326                  * if not equal to the peer's address.
  327                  */
  328                 if ((so->so_state & SS_ISCONNECTED) == 0) {
  329                         if (nam) {
  330                                 error = unp_connect(so, nam, td);
  331                                 if (error)
  332                                         break;  /* XXX */
  333                         } else {
  334                                 error = ENOTCONN;
  335                                 break;
  336                         }
  337                 }
  338 
  339                 if (so->so_state & SS_CANTSENDMORE) {
  340                         error = EPIPE;
  341                         break;
  342                 }
  343                 if (unp->unp_conn == 0)
  344                         panic("uipc_send connected but no connection?");
  345                 so2 = unp->unp_conn->unp_socket;
  346                 /*
  347                  * Send to paired receive port, and then reduce
  348                  * send buffer hiwater marks to maintain backpressure.
  349                  * Wake up readers.
  350                  */
  351                 if (control) {
  352                         if (sbappendcontrol(&so2->so_rcv, m, control))
  353                                 control = 0;
  354                 } else
  355                         sbappend(&so2->so_rcv, m);
  356                 so->so_snd.sb_mbmax -=
  357                         so2->so_rcv.sb_mbcnt - unp->unp_conn->unp_mbcnt;
  358                 unp->unp_conn->unp_mbcnt = so2->so_rcv.sb_mbcnt;
  359                 newhiwat = so->so_snd.sb_hiwat -
  360                     (so2->so_rcv.sb_cc - unp->unp_conn->unp_cc);
  361                 (void)chgsbsize(so->so_cred->cr_uidinfo, &so->so_snd.sb_hiwat,
  362                     newhiwat, RLIM_INFINITY);
  363                 unp->unp_conn->unp_cc = so2->so_rcv.sb_cc;
  364                 sorwakeup(so2);
  365                 m = 0;
  366                 break;
  367 
  368         default:
  369                 panic("uipc_send unknown socktype");
  370         }
  371 
  372         /*
  373          * SEND_EOF is equivalent to a SEND followed by
  374          * a SHUTDOWN.
  375          */
  376         if (flags & PRUS_EOF) {
  377                 socantsendmore(so);
  378                 unp_shutdown(unp);
  379         }
  380 
  381         if (control && error != 0)
  382                 unp_dispose(control);
  383 
  384 release:
  385         if (control)
  386                 m_freem(control);
  387         if (m)
  388                 m_freem(m);
  389         return error;
  390 }
  391 
  392 static int
  393 uipc_sense(struct socket *so, struct stat *sb)
  394 {
  395         struct unpcb *unp = sotounpcb(so);
  396         struct socket *so2;
  397 
  398         if (unp == 0)
  399                 return EINVAL;
  400         sb->st_blksize = so->so_snd.sb_hiwat;
  401         if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
  402                 so2 = unp->unp_conn->unp_socket;
  403                 sb->st_blksize += so2->so_rcv.sb_cc;
  404         }
  405         sb->st_dev = NOUDEV;
  406         if (unp->unp_ino == 0)
  407                 unp->unp_ino = unp_ino++;
  408         sb->st_ino = unp->unp_ino;
  409         return (0);
  410 }
  411 
  412 static int
  413 uipc_shutdown(struct socket *so)
  414 {
  415         struct unpcb *unp = sotounpcb(so);
  416 
  417         if (unp == 0)
  418                 return EINVAL;
  419         socantsendmore(so);
  420         unp_shutdown(unp);
  421         return 0;
  422 }
  423 
  424 static int
  425 uipc_sockaddr(struct socket *so, struct sockaddr **nam)
  426 {
  427         struct unpcb *unp = sotounpcb(so);
  428 
  429         if (unp == 0)
  430                 return EINVAL;
  431         if (unp->unp_addr)
  432                 *nam = dup_sockaddr((struct sockaddr *)unp->unp_addr, 1);
  433         else
  434                 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
  435         return 0;
  436 }
  437 
  438 struct pr_usrreqs uipc_usrreqs = {
  439         uipc_abort, uipc_accept, uipc_attach, uipc_bind, uipc_connect,
  440         uipc_connect2, pru_control_notsupp, uipc_detach, uipc_disconnect,
  441         uipc_listen, uipc_peeraddr, uipc_rcvd, pru_rcvoob_notsupp,
  442         uipc_send, uipc_sense, uipc_shutdown, uipc_sockaddr,
  443         sosend, soreceive, sopoll
  444 };
  445 
  446 int
  447 uipc_ctloutput(so, sopt)
  448         struct socket *so;
  449         struct sockopt *sopt;
  450 {
  451         struct unpcb *unp = sotounpcb(so);
  452         int error;
  453 
  454         switch (sopt->sopt_dir) {
  455         case SOPT_GET:
  456                 switch (sopt->sopt_name) {
  457                 case LOCAL_PEERCRED:
  458                         if (unp->unp_flags & UNP_HAVEPC)
  459                                 error = sooptcopyout(sopt, &unp->unp_peercred,
  460                                     sizeof(unp->unp_peercred));
  461                         else {
  462                                 if (so->so_type == SOCK_STREAM)
  463                                         error = ENOTCONN;
  464                                 else
  465                                         error = EINVAL;
  466                         }
  467                         break;
  468                 default:
  469                         error = EOPNOTSUPP;
  470                         break;
  471                 }
  472                 break;
  473         case SOPT_SET:
  474         default:
  475                 error = EOPNOTSUPP;
  476                 break;
  477         }
  478         return (error);
  479 }
  480         
  481 /*
  482  * Both send and receive buffers are allocated PIPSIZ bytes of buffering
  483  * for stream sockets, although the total for sender and receiver is
  484  * actually only PIPSIZ.
  485  * Datagram sockets really use the sendspace as the maximum datagram size,
  486  * and don't really want to reserve the sendspace.  Their recvspace should
  487  * be large enough for at least one max-size datagram plus address.
  488  */
  489 #ifndef PIPSIZ
  490 #define PIPSIZ  8192
  491 #endif
  492 static u_long   unpst_sendspace = PIPSIZ;
  493 static u_long   unpst_recvspace = PIPSIZ;
  494 static u_long   unpdg_sendspace = 2*1024;       /* really max datagram size */
  495 static u_long   unpdg_recvspace = 4*1024;
  496 
  497 static int      unp_rights;                     /* file descriptors in flight */
  498 
  499 SYSCTL_DECL(_net_local_stream);
  500 SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW, 
  501            &unpst_sendspace, 0, "");
  502 SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW,
  503            &unpst_recvspace, 0, "");
  504 SYSCTL_DECL(_net_local_dgram);
  505 SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW,
  506            &unpdg_sendspace, 0, "");
  507 SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW,
  508            &unpdg_recvspace, 0, "");
  509 SYSCTL_DECL(_net_local);
  510 SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, "");
  511 
  512 static int
  513 unp_attach(so)
  514         struct socket *so;
  515 {
  516         register struct unpcb *unp;
  517         int error;
  518 
  519         if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
  520                 switch (so->so_type) {
  521 
  522                 case SOCK_STREAM:
  523                         error = soreserve(so, unpst_sendspace, unpst_recvspace);
  524                         break;
  525 
  526                 case SOCK_DGRAM:
  527                         error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
  528                         break;
  529 
  530                 default:
  531                         panic("unp_attach");
  532                 }
  533                 if (error)
  534                         return (error);
  535         }
  536         unp = uma_zalloc(unp_zone, M_WAITOK);
  537         if (unp == NULL)
  538                 return (ENOBUFS);
  539         bzero(unp, sizeof *unp);
  540         unp->unp_gencnt = ++unp_gencnt;
  541         unp_count++;
  542         LIST_INIT(&unp->unp_refs);
  543         unp->unp_socket = so;
  544         FILEDESC_LOCK(curproc->p_fd);
  545         unp->unp_rvnode = curthread->td_proc->p_fd->fd_rdir;
  546         FILEDESC_UNLOCK(curproc->p_fd);
  547         LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ? &unp_dhead
  548                          : &unp_shead, unp, unp_link);
  549         so->so_pcb = unp;
  550         return (0);
  551 }
  552 
  553 static void
  554 unp_detach(unp)
  555         register struct unpcb *unp;
  556 {
  557         LIST_REMOVE(unp, unp_link);
  558         unp->unp_gencnt = ++unp_gencnt;
  559         --unp_count;
  560         if (unp->unp_vnode) {
  561                 unp->unp_vnode->v_socket = 0;
  562                 vrele(unp->unp_vnode);
  563                 unp->unp_vnode = 0;
  564         }
  565         if (unp->unp_conn)
  566                 unp_disconnect(unp);
  567         while (!LIST_EMPTY(&unp->unp_refs))
  568                 unp_drop(LIST_FIRST(&unp->unp_refs), ECONNRESET);
  569         soisdisconnected(unp->unp_socket);
  570         unp->unp_socket->so_pcb = 0;
  571         if (unp_rights) {
  572                 /*
  573                  * Normally the receive buffer is flushed later,
  574                  * in sofree, but if our receive buffer holds references
  575                  * to descriptors that are now garbage, we will dispose
  576                  * of those descriptor references after the garbage collector
  577                  * gets them (resulting in a "panic: closef: count < 0").
  578                  */
  579                 sorflush(unp->unp_socket);
  580                 unp_gc();
  581         }
  582         if (unp->unp_addr)
  583                 FREE(unp->unp_addr, M_SONAME);
  584         uma_zfree(unp_zone, unp);
  585 }
  586 
  587 static int
  588 unp_bind(unp, nam, td)
  589         struct unpcb *unp;
  590         struct sockaddr *nam;
  591         struct thread *td;
  592 {
  593         struct sockaddr_un *soun = (struct sockaddr_un *)nam;
  594         struct vnode *vp;
  595         struct mount *mp;
  596         struct vattr vattr;
  597         int error, namelen;
  598         struct nameidata nd;
  599         char *buf;
  600 
  601         if (unp->unp_vnode != NULL)
  602                 return (EINVAL);
  603 
  604         namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path);
  605         if (namelen <= 0)
  606                 return EINVAL;
  607 
  608         buf = malloc(namelen + 1, M_TEMP, M_WAITOK);
  609         strlcpy(buf, soun->sun_path, namelen + 1);
  610 
  611 restart:
  612         NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT | SAVENAME, UIO_SYSSPACE,
  613             buf, td);
  614 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
  615         error = namei(&nd);
  616         if (error) {
  617                 free(buf, M_TEMP);
  618                 return (error);
  619         }
  620         vp = nd.ni_vp;
  621         if (vp != NULL || vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
  622                 NDFREE(&nd, NDF_ONLY_PNBUF);
  623                 if (nd.ni_dvp == vp)
  624                         vrele(nd.ni_dvp);
  625                 else
  626                         vput(nd.ni_dvp);
  627                 if (vp != NULL) {
  628                         vrele(vp);
  629                         free(buf, M_TEMP);
  630                         return (EADDRINUSE);
  631                 }
  632                 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH);
  633                 if (error) {
  634                         free(buf, M_TEMP);
  635                         return (error);
  636                 }
  637                 goto restart;
  638         }
  639         VATTR_NULL(&vattr);
  640         vattr.va_type = VSOCK;
  641         FILEDESC_LOCK(td->td_proc->p_fd);
  642         vattr.va_mode = (ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask);
  643         FILEDESC_UNLOCK(td->td_proc->p_fd);
  644 #ifdef MAC
  645         error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
  646             &vattr);
  647 #endif
  648         if (error == 0) {
  649                 VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
  650                 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
  651         }
  652         NDFREE(&nd, NDF_ONLY_PNBUF);
  653         vput(nd.ni_dvp);
  654         if (error) {
  655                 free(buf, M_TEMP);
  656                 return (error);
  657         }
  658         vp = nd.ni_vp;
  659         vp->v_socket = unp->unp_socket;
  660         unp->unp_vnode = vp;
  661         unp->unp_addr = (struct sockaddr_un *)dup_sockaddr(nam, 1);
  662         VOP_UNLOCK(vp, 0, td);
  663         vn_finished_write(mp);
  664         free(buf, M_TEMP);
  665         return (0);
  666 }
  667 
  668 static int
  669 unp_connect(so, nam, td)
  670         struct socket *so;
  671         struct sockaddr *nam;
  672         struct thread *td;
  673 {
  674         register struct sockaddr_un *soun = (struct sockaddr_un *)nam;
  675         register struct vnode *vp;
  676         register struct socket *so2, *so3;
  677         struct unpcb *unp, *unp2, *unp3;
  678         int error, len;
  679         struct nameidata nd;
  680         char buf[SOCK_MAXADDRLEN];
  681 
  682         len = nam->sa_len - offsetof(struct sockaddr_un, sun_path);
  683         if (len <= 0)
  684                 return EINVAL;
  685         strlcpy(buf, soun->sun_path, len + 1);
  686 
  687         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, buf, td);
  688         error = namei(&nd);
  689         if (error)
  690                 return (error);
  691         vp = nd.ni_vp;
  692         NDFREE(&nd, NDF_ONLY_PNBUF);
  693         if (vp->v_type != VSOCK) {
  694                 error = ENOTSOCK;
  695                 goto bad;
  696         }
  697         error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td);
  698         if (error)
  699                 goto bad;
  700         so2 = vp->v_socket;
  701         if (so2 == 0) {
  702                 error = ECONNREFUSED;
  703                 goto bad;
  704         }
  705         if (so->so_type != so2->so_type) {
  706                 error = EPROTOTYPE;
  707                 goto bad;
  708         }
  709         if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
  710                 if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
  711                     (so3 = sonewconn(so2, 0)) == 0) {
  712                         error = ECONNREFUSED;
  713                         goto bad;
  714                 }
  715                 unp = sotounpcb(so);
  716                 unp2 = sotounpcb(so2);
  717                 unp3 = sotounpcb(so3);
  718                 if (unp2->unp_addr)
  719                         unp3->unp_addr = (struct sockaddr_un *)
  720                                 dup_sockaddr((struct sockaddr *)
  721                                              unp2->unp_addr, 1);
  722 
  723                 /*
  724                  * unp_peercred management:
  725                  *
  726                  * The connecter's (client's) credentials are copied
  727                  * from its process structure at the time of connect()
  728                  * (which is now).
  729                  */
  730                 cru2x(td->td_ucred, &unp3->unp_peercred);
  731                 unp3->unp_flags |= UNP_HAVEPC;
  732                 /*
  733                  * The receiver's (server's) credentials are copied
  734                  * from the unp_peercred member of socket on which the
  735                  * former called listen(); unp_listen() cached that
  736                  * process's credentials at that time so we can use
  737                  * them now.
  738                  */
  739                 KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED,
  740                     ("unp_connect: listener without cached peercred"));
  741                 memcpy(&unp->unp_peercred, &unp2->unp_peercred,
  742                     sizeof(unp->unp_peercred));
  743                 unp->unp_flags |= UNP_HAVEPC;
  744 #ifdef MAC
  745                 mac_set_socket_peer_from_socket(so, so3);
  746                 mac_set_socket_peer_from_socket(so3, so);
  747 #endif
  748 
  749                 so2 = so3;
  750         }
  751         error = unp_connect2(so, so2);
  752 bad:
  753         vput(vp);
  754         return (error);
  755 }
  756 
  757 int
  758 unp_connect2(so, so2)
  759         register struct socket *so;
  760         register struct socket *so2;
  761 {
  762         register struct unpcb *unp = sotounpcb(so);
  763         register struct unpcb *unp2;
  764 
  765         if (so2->so_type != so->so_type)
  766                 return (EPROTOTYPE);
  767         unp2 = sotounpcb(so2);
  768         unp->unp_conn = unp2;
  769         switch (so->so_type) {
  770 
  771         case SOCK_DGRAM:
  772                 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink);
  773                 soisconnected(so);
  774                 break;
  775 
  776         case SOCK_STREAM:
  777                 unp2->unp_conn = unp;
  778                 soisconnected(so);
  779                 soisconnected(so2);
  780                 break;
  781 
  782         default:
  783                 panic("unp_connect2");
  784         }
  785         return (0);
  786 }
  787 
  788 static void
  789 unp_disconnect(unp)
  790         struct unpcb *unp;
  791 {
  792         register struct unpcb *unp2 = unp->unp_conn;
  793 
  794         if (unp2 == 0)
  795                 return;
  796         unp->unp_conn = 0;
  797         switch (unp->unp_socket->so_type) {
  798 
  799         case SOCK_DGRAM:
  800                 LIST_REMOVE(unp, unp_reflink);
  801                 unp->unp_socket->so_state &= ~SS_ISCONNECTED;
  802                 break;
  803 
  804         case SOCK_STREAM:
  805                 soisdisconnected(unp->unp_socket);
  806                 unp2->unp_conn = 0;
  807                 soisdisconnected(unp2->unp_socket);
  808                 break;
  809         }
  810 }
  811 
  812 #ifdef notdef
  813 void
  814 unp_abort(unp)
  815         struct unpcb *unp;
  816 {
  817 
  818         unp_detach(unp);
  819 }
  820 #endif
  821 
  822 static int
  823 unp_pcblist(SYSCTL_HANDLER_ARGS)
  824 {
  825         int error, i, n;
  826         struct unpcb *unp, **unp_list;
  827         unp_gen_t gencnt;
  828         struct xunpgen *xug;
  829         struct unp_head *head;
  830         struct xunpcb *xu;
  831 
  832         head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
  833 
  834         /*
  835          * The process of preparing the PCB list is too time-consuming and
  836          * resource-intensive to repeat twice on every request.
  837          */
  838         if (req->oldptr == 0) {
  839                 n = unp_count;
  840                 req->oldidx = 2 * (sizeof *xug)
  841                         + (n + n/8) * sizeof(struct xunpcb);
  842                 return 0;
  843         }
  844 
  845         if (req->newptr != 0)
  846                 return EPERM;
  847 
  848         /*
  849          * OK, now we're committed to doing something.
  850          */
  851         xug = malloc(sizeof(*xug), M_TEMP, M_WAITOK);
  852         gencnt = unp_gencnt;
  853         n = unp_count;
  854 
  855         xug->xug_len = sizeof *xug;
  856         xug->xug_count = n;
  857         xug->xug_gen = gencnt;
  858         xug->xug_sogen = so_gencnt;
  859         error = SYSCTL_OUT(req, xug, sizeof *xug);
  860         if (error) {
  861                 free(xug, M_TEMP);
  862                 return error;
  863         }
  864 
  865         unp_list = malloc(n * sizeof *unp_list, M_TEMP, M_WAITOK);
  866         
  867         for (unp = LIST_FIRST(head), i = 0; unp && i < n;
  868              unp = LIST_NEXT(unp, unp_link)) {
  869                 if (unp->unp_gencnt <= gencnt) {
  870                         if (cr_cansee(req->td->td_ucred,
  871                             unp->unp_socket->so_cred))
  872                                 continue;
  873                         unp_list[i++] = unp;
  874                 }
  875         }
  876         n = i;                  /* in case we lost some during malloc */
  877 
  878         error = 0;
  879         xu = malloc(sizeof(*xu), M_TEMP, M_WAITOK);
  880         for (i = 0; i < n; i++) {
  881                 unp = unp_list[i];
  882                 if (unp->unp_gencnt <= gencnt) {
  883                         xu->xu_len = sizeof *xu;
  884                         xu->xu_unpp = unp;
  885                         /*
  886                          * XXX - need more locking here to protect against
  887                          * connect/disconnect races for SMP.
  888                          */
  889                         if (unp->unp_addr)
  890                                 bcopy(unp->unp_addr, &xu->xu_addr, 
  891                                       unp->unp_addr->sun_len);
  892                         if (unp->unp_conn && unp->unp_conn->unp_addr)
  893                                 bcopy(unp->unp_conn->unp_addr,
  894                                       &xu->xu_caddr,
  895                                       unp->unp_conn->unp_addr->sun_len);
  896                         bcopy(unp, &xu->xu_unp, sizeof *unp);
  897                         sotoxsocket(unp->unp_socket, &xu->xu_socket);
  898                         error = SYSCTL_OUT(req, xu, sizeof *xu);
  899                 }
  900         }
  901         free(xu, M_TEMP);
  902         if (!error) {
  903                 /*
  904                  * Give the user an updated idea of our state.
  905                  * If the generation differs from what we told
  906                  * her before, she knows that something happened
  907                  * while we were processing this request, and it
  908                  * might be necessary to retry.
  909                  */
  910                 xug->xug_gen = unp_gencnt;
  911                 xug->xug_sogen = so_gencnt;
  912                 xug->xug_count = unp_count;
  913                 error = SYSCTL_OUT(req, xug, sizeof *xug);
  914         }
  915         free(unp_list, M_TEMP);
  916         free(xug, M_TEMP);
  917         return error;
  918 }
  919 
  920 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLFLAG_RD, 
  921             (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb",
  922             "List of active local datagram sockets");
  923 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLFLAG_RD, 
  924             (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb",
  925             "List of active local stream sockets");
  926 
  927 static void
  928 unp_shutdown(unp)
  929         struct unpcb *unp;
  930 {
  931         struct socket *so;
  932 
  933         if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn &&
  934             (so = unp->unp_conn->unp_socket))
  935                 socantrcvmore(so);
  936 }
  937 
  938 static void
  939 unp_drop(unp, errno)
  940         struct unpcb *unp;
  941         int errno;
  942 {
  943         struct socket *so = unp->unp_socket;
  944 
  945         so->so_error = errno;
  946         unp_disconnect(unp);
  947 }
  948 
  949 #ifdef notdef
  950 void
  951 unp_drain()
  952 {
  953 
  954 }
  955 #endif
  956 
  957 static void
  958 unp_freerights(rp, fdcount)
  959         struct file **rp;
  960         int fdcount;
  961 {
  962         int i;
  963         struct file *fp;
  964 
  965         for (i = 0; i < fdcount; i++) {
  966                 fp = *rp;
  967                 /*
  968                  * zero the pointer before calling
  969                  * unp_discard since it may end up
  970                  * in unp_gc()..
  971                  */
  972                 *rp++ = 0;
  973                 unp_discard(fp);
  974         }
  975 }
  976 
  977 int
  978 unp_externalize(control, controlp)
  979         struct mbuf *control, **controlp;
  980 {
  981         struct thread *td = curthread;          /* XXX */
  982         struct cmsghdr *cm = mtod(control, struct cmsghdr *);
  983         int i;
  984         int *fdp;
  985         struct file **rp;
  986         struct file *fp;
  987         void *data;
  988         socklen_t clen = control->m_len, datalen;
  989         int error, newfds;
  990         int f;
  991         u_int newlen;
  992 
  993         error = 0;
  994         if (controlp != NULL) /* controlp == NULL => free control messages */
  995                 *controlp = NULL;
  996 
  997         while (cm != NULL) {
  998                 if (sizeof(*cm) > clen || cm->cmsg_len > clen) {
  999                         error = EINVAL;
 1000                         break;
 1001                 }
 1002 
 1003                 data = CMSG_DATA(cm);
 1004                 datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
 1005 
 1006                 if (cm->cmsg_level == SOL_SOCKET
 1007                     && cm->cmsg_type == SCM_RIGHTS) {
 1008                         newfds = datalen / sizeof(struct file *);
 1009                         rp = data;
 1010 
 1011                         /* If we're not outputting the discriptors free them. */
 1012                         if (error || controlp == NULL) {
 1013                                 unp_freerights(rp, newfds);
 1014                                 goto next;
 1015                         }
 1016                         FILEDESC_LOCK(td->td_proc->p_fd);
 1017                         /* if the new FD's will not fit free them.  */
 1018                         if (!fdavail(td, newfds)) {
 1019                                 FILEDESC_UNLOCK(td->td_proc->p_fd);
 1020                                 error = EMSGSIZE;
 1021                                 unp_freerights(rp, newfds);
 1022                                 goto next;
 1023                         }
 1024                         /*
 1025                          * now change each pointer to an fd in the global
 1026                          * table to an integer that is the index to the
 1027                          * local fd table entry that we set up to point
 1028                          * to the global one we are transferring.
 1029                          */
 1030                         newlen = newfds * sizeof(int);
 1031                         *controlp = sbcreatecontrol(NULL, newlen,
 1032                             SCM_RIGHTS, SOL_SOCKET);
 1033                         if (*controlp == NULL) {
 1034                                 FILEDESC_UNLOCK(td->td_proc->p_fd);
 1035                                 error = E2BIG;
 1036                                 unp_freerights(rp, newfds);
 1037                                 goto next;
 1038                         }
 1039 
 1040                         fdp = (int *)
 1041                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 1042                         for (i = 0; i < newfds; i++) {
 1043                                 if (fdalloc(td, 0, &f))
 1044                                         panic("unp_externalize fdalloc failed");
 1045                                 fp = *rp++;
 1046                                 td->td_proc->p_fd->fd_ofiles[f] = fp;
 1047                                 FILE_LOCK(fp);
 1048                                 fp->f_msgcount--;
 1049                                 FILE_UNLOCK(fp);
 1050                                 unp_rights--;
 1051                                 *fdp++ = f;
 1052                         }
 1053                         FILEDESC_UNLOCK(td->td_proc->p_fd);
 1054                 } else { /* We can just copy anything else across */
 1055                         if (error || controlp == NULL)
 1056                                 goto next;
 1057                         *controlp = sbcreatecontrol(NULL, datalen,
 1058                             cm->cmsg_type, cm->cmsg_level);
 1059                         if (*controlp == NULL) {
 1060                                 error = ENOBUFS;
 1061                                 goto next;
 1062                         }
 1063                         bcopy(data,
 1064                             CMSG_DATA(mtod(*controlp, struct cmsghdr *)),
 1065                             datalen);
 1066                 }
 1067 
 1068                 controlp = &(*controlp)->m_next;
 1069 
 1070 next:
 1071                 if (CMSG_SPACE(datalen) < clen) {
 1072                         clen -= CMSG_SPACE(datalen);
 1073                         cm = (struct cmsghdr *)
 1074                             ((caddr_t)cm + CMSG_SPACE(datalen));
 1075                 } else {
 1076                         clen = 0;
 1077                         cm = NULL;
 1078                 }
 1079         }
 1080 
 1081         m_freem(control);
 1082 
 1083         return (error);
 1084 }
 1085 
 1086 void
 1087 unp_init(void)
 1088 {
 1089         unp_zone = uma_zcreate("unpcb", sizeof(struct unpcb), NULL, NULL,
 1090             NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 1091         uma_zone_set_max(unp_zone, nmbclusters);
 1092         if (unp_zone == 0)
 1093                 panic("unp_init");
 1094         LIST_INIT(&unp_dhead);
 1095         LIST_INIT(&unp_shead);
 1096 }
 1097 
 1098 #ifndef MIN
 1099 #define MIN(a,b) (((a)<(b))?(a):(b))
 1100 #endif
 1101 
 1102 static int
 1103 unp_internalize(controlp, td)
 1104         struct mbuf **controlp;
 1105         struct thread *td;
 1106 {
 1107         struct mbuf *control = *controlp;
 1108         struct proc *p = td->td_proc;
 1109         struct filedesc *fdescp = p->p_fd;
 1110         struct cmsghdr *cm = mtod(control, struct cmsghdr *);
 1111         struct cmsgcred *cmcred;
 1112         struct file **rp;
 1113         struct file *fp;
 1114         struct timeval *tv;
 1115         int i, fd, *fdp;
 1116         void *data;
 1117         socklen_t clen = control->m_len, datalen;
 1118         int error, oldfds;
 1119         u_int newlen;
 1120 
 1121         error = 0;
 1122         *controlp = NULL;
 1123 
 1124         while (cm != NULL) {
 1125                 if (sizeof(*cm) > clen || cm->cmsg_level != SOL_SOCKET
 1126                     || cm->cmsg_len > clen) {
 1127                         error = EINVAL;
 1128                         goto out;
 1129                 }
 1130 
 1131                 data = CMSG_DATA(cm);
 1132                 datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
 1133 
 1134                 switch (cm->cmsg_type) {
 1135                 /*
 1136                  * Fill in credential information.
 1137                  */
 1138                 case SCM_CREDS:
 1139                         *controlp = sbcreatecontrol(NULL, sizeof(*cmcred),
 1140                             SCM_CREDS, SOL_SOCKET);
 1141                         if (*controlp == NULL) {
 1142                                 error = ENOBUFS;
 1143                                 goto out;
 1144                         }
 1145 
 1146                         cmcred = (struct cmsgcred *)
 1147                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 1148                         cmcred->cmcred_pid = p->p_pid;
 1149                         cmcred->cmcred_uid = td->td_ucred->cr_ruid;
 1150                         cmcred->cmcred_gid = td->td_ucred->cr_rgid;
 1151                         cmcred->cmcred_euid = td->td_ucred->cr_uid;
 1152                         cmcred->cmcred_ngroups = MIN(td->td_ucred->cr_ngroups,
 1153                                                         CMGROUP_MAX);
 1154                         for (i = 0; i < cmcred->cmcred_ngroups; i++)
 1155                                 cmcred->cmcred_groups[i] =
 1156                                     td->td_ucred->cr_groups[i];
 1157                         break;
 1158 
 1159                 case SCM_RIGHTS:
 1160                         oldfds = datalen / sizeof (int);
 1161                         /*
 1162                          * check that all the FDs passed in refer to legal files
 1163                          * If not, reject the entire operation.
 1164                          */
 1165                         fdp = data;
 1166                         FILEDESC_LOCK(fdescp);
 1167                         for (i = 0; i < oldfds; i++) {
 1168                                 fd = *fdp++;
 1169                                 if ((unsigned)fd >= fdescp->fd_nfiles ||
 1170                                     fdescp->fd_ofiles[fd] == NULL) {
 1171                                         FILEDESC_UNLOCK(fdescp);
 1172                                         error = EBADF;
 1173                                         goto out;
 1174                                 }
 1175                         }
 1176                         /*
 1177                          * Now replace the integer FDs with pointers to
 1178                          * the associated global file table entry..
 1179                          */
 1180                         newlen = oldfds * sizeof(struct file *);
 1181                         *controlp = sbcreatecontrol(NULL, newlen,
 1182                             SCM_RIGHTS, SOL_SOCKET);
 1183                         if (*controlp == NULL) {
 1184                                 FILEDESC_UNLOCK(fdescp);
 1185                                 error = E2BIG;
 1186                                 goto out;
 1187                         }
 1188 
 1189                         fdp = data;
 1190                         rp = (struct file **)
 1191                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 1192                         for (i = 0; i < oldfds; i++) {
 1193                                 fp = fdescp->fd_ofiles[*fdp++];
 1194                                 *rp++ = fp;
 1195                                 FILE_LOCK(fp);
 1196                                 fp->f_count++;
 1197                                 fp->f_msgcount++;
 1198                                 FILE_UNLOCK(fp);
 1199                                 unp_rights++;
 1200                         }
 1201                         FILEDESC_UNLOCK(fdescp);
 1202                         break;
 1203 
 1204                 case SCM_TIMESTAMP:
 1205                         *controlp = sbcreatecontrol(NULL, sizeof(*tv),
 1206                             SCM_TIMESTAMP, SOL_SOCKET);
 1207                         if (*controlp == NULL) {
 1208                                 error = ENOBUFS;
 1209                                 goto out;
 1210                         }
 1211                         tv = (struct timeval *)
 1212                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 1213                         microtime(tv);
 1214                         break;
 1215 
 1216                 default:
 1217                         error = EINVAL;
 1218                         goto out;
 1219                 }
 1220 
 1221                 controlp = &(*controlp)->m_next;
 1222 
 1223                 if (CMSG_SPACE(datalen) < clen) {
 1224                         clen -= CMSG_SPACE(datalen);
 1225                         cm = (struct cmsghdr *)
 1226                             ((caddr_t)cm + CMSG_SPACE(datalen));
 1227                 } else {
 1228                         clen = 0;
 1229                         cm = NULL;
 1230                 }
 1231         }
 1232 
 1233 out:
 1234         m_freem(control);
 1235 
 1236         return (error);
 1237 }
 1238 
 1239 static int      unp_defer, unp_gcing;
 1240 
 1241 static void
 1242 unp_gc()
 1243 {
 1244         register struct file *fp, *nextfp;
 1245         register struct socket *so;
 1246         struct file **extra_ref, **fpp;
 1247         int nunref, i;
 1248 
 1249         if (unp_gcing)
 1250                 return;
 1251         unp_gcing = 1;
 1252         unp_defer = 0;
 1253         /* 
 1254          * before going through all this, set all FDs to 
 1255          * be NOT defered and NOT externally accessible
 1256          */
 1257         sx_slock(&filelist_lock);
 1258         LIST_FOREACH(fp, &filehead, f_list)
 1259                 fp->f_gcflag &= ~(FMARK|FDEFER);
 1260         do {
 1261                 LIST_FOREACH(fp, &filehead, f_list) {
 1262                         FILE_LOCK(fp);
 1263                         /*
 1264                          * If the file is not open, skip it
 1265                          */
 1266                         if (fp->f_count == 0) {
 1267                                 FILE_UNLOCK(fp);
 1268                                 continue;
 1269                         }
 1270                         /*
 1271                          * If we already marked it as 'defer'  in a
 1272                          * previous pass, then try process it this time
 1273                          * and un-mark it
 1274                          */
 1275                         if (fp->f_gcflag & FDEFER) {
 1276                                 fp->f_gcflag &= ~FDEFER;
 1277                                 unp_defer--;
 1278                         } else {
 1279                                 /*
 1280                                  * if it's not defered, then check if it's
 1281                                  * already marked.. if so skip it
 1282                                  */
 1283                                 if (fp->f_gcflag & FMARK) {
 1284                                         FILE_UNLOCK(fp);
 1285                                         continue;
 1286                                 }
 1287                                 /* 
 1288                                  * If all references are from messages
 1289                                  * in transit, then skip it. it's not 
 1290                                  * externally accessible.
 1291                                  */ 
 1292                                 if (fp->f_count == fp->f_msgcount) {
 1293                                         FILE_UNLOCK(fp);
 1294                                         continue;
 1295                                 }
 1296                                 /* 
 1297                                  * If it got this far then it must be
 1298                                  * externally accessible.
 1299                                  */
 1300                                 fp->f_gcflag |= FMARK;
 1301                         }
 1302                         /*
 1303                          * either it was defered, or it is externally 
 1304                          * accessible and not already marked so.
 1305                          * Now check if it is possibly one of OUR sockets.
 1306                          */ 
 1307                         if (fp->f_type != DTYPE_SOCKET ||
 1308                             (so = (struct socket *)fp->f_data) == 0) {
 1309                                 FILE_UNLOCK(fp);
 1310                                 continue;
 1311                         }
 1312                         FILE_UNLOCK(fp);
 1313                         if (so->so_proto->pr_domain != &localdomain ||
 1314                             (so->so_proto->pr_flags&PR_RIGHTS) == 0)
 1315                                 continue;
 1316 #ifdef notdef
 1317                         if (so->so_rcv.sb_flags & SB_LOCK) {
 1318                                 /*
 1319                                  * This is problematical; it's not clear
 1320                                  * we need to wait for the sockbuf to be
 1321                                  * unlocked (on a uniprocessor, at least),
 1322                                  * and it's also not clear what to do
 1323                                  * if sbwait returns an error due to receipt
 1324                                  * of a signal.  If sbwait does return
 1325                                  * an error, we'll go into an infinite
 1326                                  * loop.  Delete all of this for now.
 1327                                  */
 1328                                 (void) sbwait(&so->so_rcv);
 1329                                 goto restart;
 1330                         }
 1331 #endif
 1332                         /*
 1333                          * So, Ok, it's one of our sockets and it IS externally
 1334                          * accessible (or was defered). Now we look
 1335                          * to see if we hold any file descriptors in its
 1336                          * message buffers. Follow those links and mark them 
 1337                          * as accessible too.
 1338                          */
 1339                         unp_scan(so->so_rcv.sb_mb, unp_mark);
 1340                 }
 1341         } while (unp_defer);
 1342         sx_sunlock(&filelist_lock);
 1343         /*
 1344          * We grab an extra reference to each of the file table entries
 1345          * that are not otherwise accessible and then free the rights
 1346          * that are stored in messages on them.
 1347          *
 1348          * The bug in the orginal code is a little tricky, so I'll describe
 1349          * what's wrong with it here.
 1350          *
 1351          * It is incorrect to simply unp_discard each entry for f_msgcount
 1352          * times -- consider the case of sockets A and B that contain
 1353          * references to each other.  On a last close of some other socket,
 1354          * we trigger a gc since the number of outstanding rights (unp_rights)
 1355          * is non-zero.  If during the sweep phase the gc code un_discards,
 1356          * we end up doing a (full) closef on the descriptor.  A closef on A
 1357          * results in the following chain.  Closef calls soo_close, which
 1358          * calls soclose.   Soclose calls first (through the switch
 1359          * uipc_usrreq) unp_detach, which re-invokes unp_gc.  Unp_gc simply
 1360          * returns because the previous instance had set unp_gcing, and
 1361          * we return all the way back to soclose, which marks the socket
 1362          * with SS_NOFDREF, and then calls sofree.  Sofree calls sorflush
 1363          * to free up the rights that are queued in messages on the socket A,
 1364          * i.e., the reference on B.  The sorflush calls via the dom_dispose
 1365          * switch unp_dispose, which unp_scans with unp_discard.  This second
 1366          * instance of unp_discard just calls closef on B.
 1367          *
 1368          * Well, a similar chain occurs on B, resulting in a sorflush on B,
 1369          * which results in another closef on A.  Unfortunately, A is already
 1370          * being closed, and the descriptor has already been marked with
 1371          * SS_NOFDREF, and soclose panics at this point.
 1372          *
 1373          * Here, we first take an extra reference to each inaccessible
 1374          * descriptor.  Then, we call sorflush ourself, since we know
 1375          * it is a Unix domain socket anyhow.  After we destroy all the
 1376          * rights carried in messages, we do a last closef to get rid
 1377          * of our extra reference.  This is the last close, and the
 1378          * unp_detach etc will shut down the socket.
 1379          *
 1380          * 91/09/19, bsy@cs.cmu.edu
 1381          */
 1382         extra_ref = malloc(nfiles * sizeof(struct file *), M_TEMP, M_WAITOK);
 1383         sx_slock(&filelist_lock);
 1384         for (nunref = 0, fp = LIST_FIRST(&filehead), fpp = extra_ref; fp != 0;
 1385             fp = nextfp) {
 1386                 nextfp = LIST_NEXT(fp, f_list);
 1387                 FILE_LOCK(fp);
 1388                 /* 
 1389                  * If it's not open, skip it
 1390                  */
 1391                 if (fp->f_count == 0) {
 1392                         FILE_UNLOCK(fp);
 1393                         continue;
 1394                 }
 1395                 /* 
 1396                  * If all refs are from msgs, and it's not marked accessible
 1397                  * then it must be referenced from some unreachable cycle
 1398                  * of (shut-down) FDs, so include it in our
 1399                  * list of FDs to remove
 1400                  */
 1401                 if (fp->f_count == fp->f_msgcount && !(fp->f_gcflag & FMARK)) {
 1402                         *fpp++ = fp;
 1403                         nunref++;
 1404                         fp->f_count++;
 1405                 }
 1406                 FILE_UNLOCK(fp);
 1407         }
 1408         sx_sunlock(&filelist_lock);
 1409         /* 
 1410          * for each FD on our hit list, do the following two things
 1411          */
 1412         for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
 1413                 struct file *tfp = *fpp;
 1414                 FILE_LOCK(tfp);
 1415                 if (tfp->f_type == DTYPE_SOCKET && tfp->f_data != NULL) {
 1416                         FILE_UNLOCK(tfp);
 1417                         sorflush((struct socket *)(tfp->f_data));
 1418                 } else
 1419                         FILE_UNLOCK(tfp);
 1420         }
 1421         for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
 1422                 closef(*fpp, (struct thread *) NULL);
 1423         free(extra_ref, M_TEMP);
 1424         unp_gcing = 0;
 1425 }
 1426 
 1427 void
 1428 unp_dispose(m)
 1429         struct mbuf *m;
 1430 {
 1431 
 1432         if (m)
 1433                 unp_scan(m, unp_discard);
 1434 }
 1435 
 1436 static int
 1437 unp_listen(unp, td)
 1438         struct unpcb *unp;
 1439         struct thread *td;
 1440 {
 1441 
 1442         cru2x(td->td_ucred, &unp->unp_peercred);
 1443         unp->unp_flags |= UNP_HAVEPCCACHED;
 1444         return (0);
 1445 }
 1446 
 1447 static void
 1448 unp_scan(m0, op)
 1449         register struct mbuf *m0;
 1450         void (*op)(struct file *);
 1451 {
 1452         struct mbuf *m;
 1453         struct file **rp;
 1454         struct cmsghdr *cm;
 1455         void *data;
 1456         int i;
 1457         socklen_t clen, datalen;
 1458         int qfds;
 1459 
 1460         while (m0) {
 1461                 for (m = m0; m; m = m->m_next) {
 1462                         if (m->m_type != MT_CONTROL)
 1463                                 continue;
 1464 
 1465                         cm = mtod(m, struct cmsghdr *);
 1466                         clen = m->m_len;
 1467 
 1468                         while (cm != NULL) {
 1469                                 if (sizeof(*cm) > clen || cm->cmsg_len > clen)
 1470                                         break;
 1471 
 1472                                 data = CMSG_DATA(cm);
 1473                                 datalen = (caddr_t)cm + cm->cmsg_len
 1474                                     - (caddr_t)data;
 1475 
 1476                                 if (cm->cmsg_level == SOL_SOCKET &&
 1477                                     cm->cmsg_type == SCM_RIGHTS) {
 1478                                         qfds = datalen / sizeof (struct file *);
 1479                                         rp = data;
 1480                                         for (i = 0; i < qfds; i++)
 1481                                                 (*op)(*rp++);
 1482                                 }
 1483 
 1484                                 if (CMSG_SPACE(datalen) < clen) {
 1485                                         clen -= CMSG_SPACE(datalen);
 1486                                         cm = (struct cmsghdr *)
 1487                                             ((caddr_t)cm + CMSG_SPACE(datalen));
 1488                                 } else {
 1489                                         clen = 0;
 1490                                         cm = NULL;
 1491                                 }
 1492                         }
 1493                 }
 1494                 m0 = m0->m_act;
 1495         }
 1496 }
 1497 
 1498 static void
 1499 unp_mark(fp)
 1500         struct file *fp;
 1501 {
 1502         if (fp->f_gcflag & FMARK)
 1503                 return;
 1504         unp_defer++;
 1505         fp->f_gcflag |= (FMARK|FDEFER);
 1506 }
 1507 
 1508 static void
 1509 unp_discard(fp)
 1510         struct file *fp;
 1511 {
 1512         FILE_LOCK(fp);
 1513         fp->f_msgcount--;
 1514         unp_rights--;
 1515         FILE_UNLOCK(fp);
 1516         (void) closef(fp, (struct thread *)NULL);
 1517 }

Cache object: 195c3b28f7c36cdee9ac347afb87ef7d


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.