The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/uipc_usrreq.c

Version: -  FREEBSD  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-2  -  FREEBSD-11-1  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-4  -  FREEBSD-10-3  -  FREEBSD-10-2  -  FREEBSD-10-1  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-3  -  FREEBSD-9-2  -  FREEBSD-9-1  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-4  -  FREEBSD-8-3  -  FREEBSD-8-2  -  FREEBSD-8-1  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-4  -  FREEBSD-7-3  -  FREEBSD-7-2  -  FREEBSD-7-1  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-4  -  FREEBSD-6-3  -  FREEBSD-6-2  -  FREEBSD-6-1  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-5  -  FREEBSD-5-4  -  FREEBSD-5-3  -  FREEBSD-5-2  -  FREEBSD-5-1  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  xnu-1456.1.26  -  xnu-1699.24.8  -  xnu-2050.18.24  -  OPENSOLARIS  -  minix-3-1-1 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 1982, 1986, 1989, 1991, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  * 3. All advertising materials mentioning features or use of this software
   14  *    must display the following acknowledgement:
   15  *      This product includes software developed by the University of
   16  *      California, Berkeley and its contributors.
   17  * 4. Neither the name of the University nor the names of its contributors
   18  *    may be used to endorse or promote products derived from this software
   19  *    without specific prior written permission.
   20  *
   21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   31  * SUCH DAMAGE.
   32  *
   33  *      From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94
   34  */
   35 
   36 #include <sys/cdefs.h>
   37 __FBSDID("$FreeBSD: releng/5.2/sys/kern/uipc_usrreq.c 122875 2003-11-18 00:39:07Z rwatson $");
   38 
   39 #include "opt_mac.h"
   40 
   41 #include <sys/param.h>
   42 #include <sys/domain.h>
   43 #include <sys/fcntl.h>
   44 #include <sys/malloc.h>         /* XXX must be before <sys/file.h> */
   45 #include <sys/file.h>
   46 #include <sys/filedesc.h>
   47 #include <sys/jail.h>
   48 #include <sys/kernel.h>
   49 #include <sys/lock.h>
   50 #include <sys/mac.h>
   51 #include <sys/mbuf.h>
   52 #include <sys/mutex.h>
   53 #include <sys/namei.h>
   54 #include <sys/proc.h>
   55 #include <sys/protosw.h>
   56 #include <sys/resourcevar.h>
   57 #include <sys/socket.h>
   58 #include <sys/socketvar.h>
   59 #include <sys/signalvar.h>
   60 #include <sys/stat.h>
   61 #include <sys/sx.h>
   62 #include <sys/sysctl.h>
   63 #include <sys/systm.h>
   64 #include <sys/un.h>
   65 #include <sys/unpcb.h>
   66 #include <sys/vnode.h>
   67 
   68 #include <vm/uma.h>
   69 
   70 static uma_zone_t unp_zone;
   71 static  unp_gen_t unp_gencnt;
   72 static  u_int unp_count;
   73 
   74 static  struct unp_head unp_shead, unp_dhead;
   75 
   76 /*
   77  * Unix communications domain.
   78  *
   79  * TODO:
   80  *      SEQPACKET, RDM
   81  *      rethink name space problems
   82  *      need a proper out-of-band
   83  *      lock pushdown
   84  */
   85 static struct   sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL };
   86 static ino_t    unp_ino;                /* prototype for fake inode numbers */
   87 
   88 static int     unp_attach(struct socket *);
   89 static void    unp_detach(struct unpcb *);
   90 static int     unp_bind(struct unpcb *,struct sockaddr *, struct thread *);
   91 static int     unp_connect(struct socket *,struct sockaddr *, struct thread *);
   92 static void    unp_disconnect(struct unpcb *);
   93 static void    unp_shutdown(struct unpcb *);
   94 static void    unp_drop(struct unpcb *, int);
   95 static void    unp_gc(void);
   96 static void    unp_scan(struct mbuf *, void (*)(struct file *));
   97 static void    unp_mark(struct file *);
   98 static void    unp_discard(struct file *);
   99 static void    unp_freerights(struct file **, int);
  100 static int     unp_internalize(struct mbuf **, struct thread *);
  101 static int     unp_listen(struct unpcb *, struct thread *);
  102 
  103 static int
  104 uipc_abort(struct socket *so)
  105 {
  106         struct unpcb *unp = sotounpcb(so);
  107 
  108         if (unp == 0)
  109                 return EINVAL;
  110         unp_drop(unp, ECONNABORTED);
  111         unp_detach(unp);
  112         sotryfree(so);
  113         return 0;
  114 }
  115 
  116 static int
  117 uipc_accept(struct socket *so, struct sockaddr **nam)
  118 {
  119         struct unpcb *unp = sotounpcb(so);
  120 
  121         if (unp == 0)
  122                 return EINVAL;
  123 
  124         /*
  125          * Pass back name of connected socket,
  126          * if it was bound and we are still connected
  127          * (our peer may have closed already!).
  128          */
  129         if (unp->unp_conn && unp->unp_conn->unp_addr) {
  130                 *nam = dup_sockaddr((struct sockaddr *)unp->unp_conn->unp_addr,
  131                                     1);
  132         } else {
  133                 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
  134         }
  135         return 0;
  136 }
  137 
  138 static int
  139 uipc_attach(struct socket *so, int proto, struct thread *td)
  140 {
  141         struct unpcb *unp = sotounpcb(so);
  142 
  143         if (unp != 0)
  144                 return EISCONN;
  145         return unp_attach(so);
  146 }
  147 
  148 static int
  149 uipc_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
  150 {
  151         struct unpcb *unp = sotounpcb(so);
  152 
  153         if (unp == 0)
  154                 return EINVAL;
  155 
  156         return unp_bind(unp, nam, td);
  157 }
  158 
  159 static int
  160 uipc_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
  161 {
  162         struct unpcb *unp = sotounpcb(so);
  163 
  164         if (unp == 0)
  165                 return EINVAL;
  166         return unp_connect(so, nam, curthread);
  167 }
  168 
  169 static int
  170 uipc_connect2(struct socket *so1, struct socket *so2)
  171 {
  172         struct unpcb *unp = sotounpcb(so1);
  173 
  174         if (unp == 0)
  175                 return EINVAL;
  176 
  177         return unp_connect2(so1, so2);
  178 }
  179 
  180 /* control is EOPNOTSUPP */
  181 
  182 static int
  183 uipc_detach(struct socket *so)
  184 {
  185         struct unpcb *unp = sotounpcb(so);
  186 
  187         if (unp == 0)
  188                 return EINVAL;
  189 
  190         unp_detach(unp);
  191         return 0;
  192 }
  193 
  194 static int
  195 uipc_disconnect(struct socket *so)
  196 {
  197         struct unpcb *unp = sotounpcb(so);
  198 
  199         if (unp == 0)
  200                 return EINVAL;
  201         unp_disconnect(unp);
  202         return 0;
  203 }
  204 
  205 static int
  206 uipc_listen(struct socket *so, struct thread *td)
  207 {
  208         struct unpcb *unp = sotounpcb(so);
  209 
  210         if (unp == 0 || unp->unp_vnode == 0)
  211                 return EINVAL;
  212         return unp_listen(unp, td);
  213 }
  214 
  215 static int
  216 uipc_peeraddr(struct socket *so, struct sockaddr **nam)
  217 {
  218         struct unpcb *unp = sotounpcb(so);
  219 
  220         if (unp == 0)
  221                 return EINVAL;
  222         if (unp->unp_conn && unp->unp_conn->unp_addr)
  223                 *nam = dup_sockaddr((struct sockaddr *)unp->unp_conn->unp_addr,
  224                                     1);
  225         else {
  226                 /*
  227                  * XXX: It seems that this test always fails even when
  228                  * connection is established.  So, this else clause is
  229                  * added as workaround to return PF_LOCAL sockaddr.
  230                  */
  231                 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
  232         }
  233         return 0;
  234 }
  235 
  236 static int
  237 uipc_rcvd(struct socket *so, int flags)
  238 {
  239         struct unpcb *unp = sotounpcb(so);
  240         struct socket *so2;
  241         u_long newhiwat;
  242 
  243         if (unp == 0)
  244                 return EINVAL;
  245         switch (so->so_type) {
  246         case SOCK_DGRAM:
  247                 panic("uipc_rcvd DGRAM?");
  248                 /*NOTREACHED*/
  249 
  250         case SOCK_STREAM:
  251                 if (unp->unp_conn == 0)
  252                         break;
  253                 so2 = unp->unp_conn->unp_socket;
  254                 /*
  255                  * Adjust backpressure on sender
  256                  * and wakeup any waiting to write.
  257                  */
  258                 so2->so_snd.sb_mbmax += unp->unp_mbcnt - so->so_rcv.sb_mbcnt;
  259                 unp->unp_mbcnt = so->so_rcv.sb_mbcnt;
  260                 newhiwat = so2->so_snd.sb_hiwat + unp->unp_cc -
  261                     so->so_rcv.sb_cc;
  262                 (void)chgsbsize(so2->so_cred->cr_uidinfo, &so2->so_snd.sb_hiwat,
  263                     newhiwat, RLIM_INFINITY);
  264                 unp->unp_cc = so->so_rcv.sb_cc;
  265                 sowwakeup(so2);
  266                 break;
  267 
  268         default:
  269                 panic("uipc_rcvd unknown socktype");
  270         }
  271         return 0;
  272 }
  273 
  274 /* pru_rcvoob is EOPNOTSUPP */
  275 
  276 static int
  277 uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
  278           struct mbuf *control, struct thread *td)
  279 {
  280         int error = 0;
  281         struct unpcb *unp = sotounpcb(so);
  282         struct socket *so2;
  283         u_long newhiwat;
  284 
  285         if (unp == 0) {
  286                 error = EINVAL;
  287                 goto release;
  288         }
  289         if (flags & PRUS_OOB) {
  290                 error = EOPNOTSUPP;
  291                 goto release;
  292         }
  293 
  294         if (control && (error = unp_internalize(&control, td)))
  295                 goto release;
  296 
  297         switch (so->so_type) {
  298         case SOCK_DGRAM: 
  299         {
  300                 struct sockaddr *from;
  301 
  302                 if (nam) {
  303                         if (unp->unp_conn) {
  304                                 error = EISCONN;
  305                                 break;
  306                         }
  307                         error = unp_connect(so, nam, td);
  308                         if (error)
  309                                 break;
  310                 } else {
  311                         if (unp->unp_conn == 0) {
  312                                 error = ENOTCONN;
  313                                 break;
  314                         }
  315                 }
  316                 so2 = unp->unp_conn->unp_socket;
  317                 if (unp->unp_addr)
  318                         from = (struct sockaddr *)unp->unp_addr;
  319                 else
  320                         from = &sun_noname;
  321                 if (sbappendaddr(&so2->so_rcv, from, m, control)) {
  322                         sorwakeup(so2);
  323                         m = 0;
  324                         control = 0;
  325                 } else
  326                         error = ENOBUFS;
  327                 if (nam)
  328                         unp_disconnect(unp);
  329                 break;
  330         }
  331 
  332         case SOCK_STREAM:
  333                 /* Connect if not connected yet. */
  334                 /*
  335                  * Note: A better implementation would complain
  336                  * if not equal to the peer's address.
  337                  */
  338                 if ((so->so_state & SS_ISCONNECTED) == 0) {
  339                         if (nam) {
  340                                 error = unp_connect(so, nam, td);
  341                                 if (error)
  342                                         break;  /* XXX */
  343                         } else {
  344                                 error = ENOTCONN;
  345                                 break;
  346                         }
  347                 }
  348 
  349                 if (so->so_state & SS_CANTSENDMORE) {
  350                         error = EPIPE;
  351                         break;
  352                 }
  353                 if (unp->unp_conn == 0)
  354                         panic("uipc_send connected but no connection?");
  355                 so2 = unp->unp_conn->unp_socket;
  356                 /*
  357                  * Send to paired receive port, and then reduce
  358                  * send buffer hiwater marks to maintain backpressure.
  359                  * Wake up readers.
  360                  */
  361                 if (control) {
  362                         if (sbappendcontrol(&so2->so_rcv, m, control))
  363                                 control = 0;
  364                 } else
  365                         sbappend(&so2->so_rcv, m);
  366                 so->so_snd.sb_mbmax -=
  367                         so2->so_rcv.sb_mbcnt - unp->unp_conn->unp_mbcnt;
  368                 unp->unp_conn->unp_mbcnt = so2->so_rcv.sb_mbcnt;
  369                 newhiwat = so->so_snd.sb_hiwat -
  370                     (so2->so_rcv.sb_cc - unp->unp_conn->unp_cc);
  371                 (void)chgsbsize(so->so_cred->cr_uidinfo, &so->so_snd.sb_hiwat,
  372                     newhiwat, RLIM_INFINITY);
  373                 unp->unp_conn->unp_cc = so2->so_rcv.sb_cc;
  374                 sorwakeup(so2);
  375                 m = 0;
  376                 break;
  377 
  378         default:
  379                 panic("uipc_send unknown socktype");
  380         }
  381 
  382         /*
  383          * SEND_EOF is equivalent to a SEND followed by
  384          * a SHUTDOWN.
  385          */
  386         if (flags & PRUS_EOF) {
  387                 socantsendmore(so);
  388                 unp_shutdown(unp);
  389         }
  390 
  391         if (control && error != 0)
  392                 unp_dispose(control);
  393 
  394 release:
  395         if (control)
  396                 m_freem(control);
  397         if (m)
  398                 m_freem(m);
  399         return error;
  400 }
  401 
  402 static int
  403 uipc_sense(struct socket *so, struct stat *sb)
  404 {
  405         struct unpcb *unp = sotounpcb(so);
  406         struct socket *so2;
  407 
  408         if (unp == 0)
  409                 return EINVAL;
  410         sb->st_blksize = so->so_snd.sb_hiwat;
  411         if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
  412                 so2 = unp->unp_conn->unp_socket;
  413                 sb->st_blksize += so2->so_rcv.sb_cc;
  414         }
  415         sb->st_dev = NOUDEV;
  416         if (unp->unp_ino == 0)
  417                 unp->unp_ino = (++unp_ino == 0) ? ++unp_ino : unp_ino;
  418         sb->st_ino = unp->unp_ino;
  419         return (0);
  420 }
  421 
  422 static int
  423 uipc_shutdown(struct socket *so)
  424 {
  425         struct unpcb *unp = sotounpcb(so);
  426 
  427         if (unp == 0)
  428                 return EINVAL;
  429         socantsendmore(so);
  430         unp_shutdown(unp);
  431         return 0;
  432 }
  433 
  434 static int
  435 uipc_sockaddr(struct socket *so, struct sockaddr **nam)
  436 {
  437         struct unpcb *unp = sotounpcb(so);
  438 
  439         if (unp == 0)
  440                 return EINVAL;
  441         if (unp->unp_addr)
  442                 *nam = dup_sockaddr((struct sockaddr *)unp->unp_addr, 1);
  443         else
  444                 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
  445         return 0;
  446 }
  447 
  448 struct pr_usrreqs uipc_usrreqs = {
  449         uipc_abort, uipc_accept, uipc_attach, uipc_bind, uipc_connect,
  450         uipc_connect2, pru_control_notsupp, uipc_detach, uipc_disconnect,
  451         uipc_listen, uipc_peeraddr, uipc_rcvd, pru_rcvoob_notsupp,
  452         uipc_send, uipc_sense, uipc_shutdown, uipc_sockaddr,
  453         sosend, soreceive, sopoll, pru_sosetlabel_null
  454 };
  455 
  456 int
  457 uipc_ctloutput(so, sopt)
  458         struct socket *so;
  459         struct sockopt *sopt;
  460 {
  461         struct unpcb *unp = sotounpcb(so);
  462         int error;
  463 
  464         switch (sopt->sopt_dir) {
  465         case SOPT_GET:
  466                 switch (sopt->sopt_name) {
  467                 case LOCAL_PEERCRED:
  468                         if (unp->unp_flags & UNP_HAVEPC)
  469                                 error = sooptcopyout(sopt, &unp->unp_peercred,
  470                                     sizeof(unp->unp_peercred));
  471                         else {
  472                                 if (so->so_type == SOCK_STREAM)
  473                                         error = ENOTCONN;
  474                                 else
  475                                         error = EINVAL;
  476                         }
  477                         break;
  478                 default:
  479                         error = EOPNOTSUPP;
  480                         break;
  481                 }
  482                 break;
  483         case SOPT_SET:
  484         default:
  485                 error = EOPNOTSUPP;
  486                 break;
  487         }
  488         return (error);
  489 }
  490         
  491 /*
  492  * Both send and receive buffers are allocated PIPSIZ bytes of buffering
  493  * for stream sockets, although the total for sender and receiver is
  494  * actually only PIPSIZ.
  495  * Datagram sockets really use the sendspace as the maximum datagram size,
  496  * and don't really want to reserve the sendspace.  Their recvspace should
  497  * be large enough for at least one max-size datagram plus address.
  498  */
  499 #ifndef PIPSIZ
  500 #define PIPSIZ  8192
  501 #endif
  502 static u_long   unpst_sendspace = PIPSIZ;
  503 static u_long   unpst_recvspace = PIPSIZ;
  504 static u_long   unpdg_sendspace = 2*1024;       /* really max datagram size */
  505 static u_long   unpdg_recvspace = 4*1024;
  506 
  507 static int      unp_rights;                     /* file descriptors in flight */
  508 
  509 SYSCTL_DECL(_net_local_stream);
  510 SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW, 
  511            &unpst_sendspace, 0, "");
  512 SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW,
  513            &unpst_recvspace, 0, "");
  514 SYSCTL_DECL(_net_local_dgram);
  515 SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW,
  516            &unpdg_sendspace, 0, "");
  517 SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW,
  518            &unpdg_recvspace, 0, "");
  519 SYSCTL_DECL(_net_local);
  520 SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, "");
  521 
  522 static int
  523 unp_attach(so)
  524         struct socket *so;
  525 {
  526         register struct unpcb *unp;
  527         int error;
  528 
  529         if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
  530                 switch (so->so_type) {
  531 
  532                 case SOCK_STREAM:
  533                         error = soreserve(so, unpst_sendspace, unpst_recvspace);
  534                         break;
  535 
  536                 case SOCK_DGRAM:
  537                         error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
  538                         break;
  539 
  540                 default:
  541                         panic("unp_attach");
  542                 }
  543                 if (error)
  544                         return (error);
  545         }
  546         unp = uma_zalloc(unp_zone, M_WAITOK);
  547         if (unp == NULL)
  548                 return (ENOBUFS);
  549         bzero(unp, sizeof *unp);
  550         unp->unp_gencnt = ++unp_gencnt;
  551         unp_count++;
  552         LIST_INIT(&unp->unp_refs);
  553         unp->unp_socket = so;
  554         LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ? &unp_dhead
  555                          : &unp_shead, unp, unp_link);
  556         so->so_pcb = unp;
  557         return (0);
  558 }
  559 
  560 static void
  561 unp_detach(unp)
  562         register struct unpcb *unp;
  563 {
  564         LIST_REMOVE(unp, unp_link);
  565         unp->unp_gencnt = ++unp_gencnt;
  566         --unp_count;
  567         if (unp->unp_vnode) {
  568                 unp->unp_vnode->v_socket = 0;
  569                 vrele(unp->unp_vnode);
  570                 unp->unp_vnode = 0;
  571         }
  572         if (unp->unp_conn)
  573                 unp_disconnect(unp);
  574         while (!LIST_EMPTY(&unp->unp_refs))
  575                 unp_drop(LIST_FIRST(&unp->unp_refs), ECONNRESET);
  576         soisdisconnected(unp->unp_socket);
  577         unp->unp_socket->so_pcb = 0;
  578         if (unp_rights) {
  579                 /*
  580                  * Normally the receive buffer is flushed later,
  581                  * in sofree, but if our receive buffer holds references
  582                  * to descriptors that are now garbage, we will dispose
  583                  * of those descriptor references after the garbage collector
  584                  * gets them (resulting in a "panic: closef: count < 0").
  585                  */
  586                 sorflush(unp->unp_socket);
  587                 unp_gc();
  588         }
  589         if (unp->unp_addr)
  590                 FREE(unp->unp_addr, M_SONAME);
  591         uma_zfree(unp_zone, unp);
  592 }
  593 
  594 static int
  595 unp_bind(unp, nam, td)
  596         struct unpcb *unp;
  597         struct sockaddr *nam;
  598         struct thread *td;
  599 {
  600         struct sockaddr_un *soun = (struct sockaddr_un *)nam;
  601         struct vnode *vp;
  602         struct mount *mp;
  603         struct vattr vattr;
  604         int error, namelen;
  605         struct nameidata nd;
  606         char *buf;
  607 
  608         if (unp->unp_vnode != NULL)
  609                 return (EINVAL);
  610 
  611         namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path);
  612         if (namelen <= 0)
  613                 return EINVAL;
  614 
  615         buf = malloc(namelen + 1, M_TEMP, M_WAITOK);
  616         strlcpy(buf, soun->sun_path, namelen + 1);
  617 
  618 restart:
  619         NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT | SAVENAME, UIO_SYSSPACE,
  620             buf, td);
  621 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
  622         error = namei(&nd);
  623         if (error) {
  624                 free(buf, M_TEMP);
  625                 return (error);
  626         }
  627         vp = nd.ni_vp;
  628         if (vp != NULL || vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
  629                 NDFREE(&nd, NDF_ONLY_PNBUF);
  630                 if (nd.ni_dvp == vp)
  631                         vrele(nd.ni_dvp);
  632                 else
  633                         vput(nd.ni_dvp);
  634                 if (vp != NULL) {
  635                         vrele(vp);
  636                         free(buf, M_TEMP);
  637                         return (EADDRINUSE);
  638                 }
  639                 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH);
  640                 if (error) {
  641                         free(buf, M_TEMP);
  642                         return (error);
  643                 }
  644                 goto restart;
  645         }
  646         VATTR_NULL(&vattr);
  647         vattr.va_type = VSOCK;
  648         vattr.va_mode = (ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask);
  649 #ifdef MAC
  650         error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
  651             &vattr);
  652 #endif
  653         if (error == 0) {
  654                 VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
  655                 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
  656         }
  657         NDFREE(&nd, NDF_ONLY_PNBUF);
  658         vput(nd.ni_dvp);
  659         if (error) {
  660                 free(buf, M_TEMP);
  661                 return (error);
  662         }
  663         vp = nd.ni_vp;
  664         vp->v_socket = unp->unp_socket;
  665         unp->unp_vnode = vp;
  666         unp->unp_addr = (struct sockaddr_un *)dup_sockaddr(nam, 1);
  667         VOP_UNLOCK(vp, 0, td);
  668         vn_finished_write(mp);
  669         free(buf, M_TEMP);
  670         return (0);
  671 }
  672 
  673 static int
  674 unp_connect(so, nam, td)
  675         struct socket *so;
  676         struct sockaddr *nam;
  677         struct thread *td;
  678 {
  679         register struct sockaddr_un *soun = (struct sockaddr_un *)nam;
  680         register struct vnode *vp;
  681         register struct socket *so2, *so3;
  682         struct unpcb *unp, *unp2, *unp3;
  683         int error, len;
  684         struct nameidata nd;
  685         char buf[SOCK_MAXADDRLEN];
  686 
  687         len = nam->sa_len - offsetof(struct sockaddr_un, sun_path);
  688         if (len <= 0)
  689                 return EINVAL;
  690         strlcpy(buf, soun->sun_path, len + 1);
  691 
  692         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, buf, td);
  693         error = namei(&nd);
  694         if (error)
  695                 return (error);
  696         vp = nd.ni_vp;
  697         NDFREE(&nd, NDF_ONLY_PNBUF);
  698         if (vp->v_type != VSOCK) {
  699                 error = ENOTSOCK;
  700                 goto bad;
  701         }
  702         error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td);
  703         if (error)
  704                 goto bad;
  705         so2 = vp->v_socket;
  706         if (so2 == 0) {
  707                 error = ECONNREFUSED;
  708                 goto bad;
  709         }
  710         if (so->so_type != so2->so_type) {
  711                 error = EPROTOTYPE;
  712                 goto bad;
  713         }
  714         if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
  715                 if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
  716                     (so3 = sonewconn(so2, 0)) == 0) {
  717                         error = ECONNREFUSED;
  718                         goto bad;
  719                 }
  720                 unp = sotounpcb(so);
  721                 unp2 = sotounpcb(so2);
  722                 unp3 = sotounpcb(so3);
  723                 if (unp2->unp_addr)
  724                         unp3->unp_addr = (struct sockaddr_un *)
  725                                 dup_sockaddr((struct sockaddr *)
  726                                              unp2->unp_addr, 1);
  727 
  728                 /*
  729                  * unp_peercred management:
  730                  *
  731                  * The connecter's (client's) credentials are copied
  732                  * from its process structure at the time of connect()
  733                  * (which is now).
  734                  */
  735                 cru2x(td->td_ucred, &unp3->unp_peercred);
  736                 unp3->unp_flags |= UNP_HAVEPC;
  737                 /*
  738                  * The receiver's (server's) credentials are copied
  739                  * from the unp_peercred member of socket on which the
  740                  * former called listen(); unp_listen() cached that
  741                  * process's credentials at that time so we can use
  742                  * them now.
  743                  */
  744                 KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED,
  745                     ("unp_connect: listener without cached peercred"));
  746                 memcpy(&unp->unp_peercred, &unp2->unp_peercred,
  747                     sizeof(unp->unp_peercred));
  748                 unp->unp_flags |= UNP_HAVEPC;
  749 #ifdef MAC
  750                 mac_set_socket_peer_from_socket(so, so3);
  751                 mac_set_socket_peer_from_socket(so3, so);
  752 #endif
  753 
  754                 so2 = so3;
  755         }
  756         error = unp_connect2(so, so2);
  757 bad:
  758         vput(vp);
  759         return (error);
  760 }
  761 
  762 int
  763 unp_connect2(so, so2)
  764         register struct socket *so;
  765         register struct socket *so2;
  766 {
  767         register struct unpcb *unp = sotounpcb(so);
  768         register struct unpcb *unp2;
  769 
  770         if (so2->so_type != so->so_type)
  771                 return (EPROTOTYPE);
  772         unp2 = sotounpcb(so2);
  773         unp->unp_conn = unp2;
  774         switch (so->so_type) {
  775 
  776         case SOCK_DGRAM:
  777                 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink);
  778                 soisconnected(so);
  779                 break;
  780 
  781         case SOCK_STREAM:
  782                 unp2->unp_conn = unp;
  783                 soisconnected(so);
  784                 soisconnected(so2);
  785                 break;
  786 
  787         default:
  788                 panic("unp_connect2");
  789         }
  790         return (0);
  791 }
  792 
  793 static void
  794 unp_disconnect(unp)
  795         struct unpcb *unp;
  796 {
  797         register struct unpcb *unp2 = unp->unp_conn;
  798 
  799         if (unp2 == 0)
  800                 return;
  801         unp->unp_conn = 0;
  802         switch (unp->unp_socket->so_type) {
  803 
  804         case SOCK_DGRAM:
  805                 LIST_REMOVE(unp, unp_reflink);
  806                 unp->unp_socket->so_state &= ~SS_ISCONNECTED;
  807                 break;
  808 
  809         case SOCK_STREAM:
  810                 soisdisconnected(unp->unp_socket);
  811                 unp2->unp_conn = 0;
  812                 soisdisconnected(unp2->unp_socket);
  813                 break;
  814         }
  815 }
  816 
  817 #ifdef notdef
  818 void
  819 unp_abort(unp)
  820         struct unpcb *unp;
  821 {
  822 
  823         unp_detach(unp);
  824 }
  825 #endif
  826 
  827 static int
  828 unp_pcblist(SYSCTL_HANDLER_ARGS)
  829 {
  830         int error, i, n;
  831         struct unpcb *unp, **unp_list;
  832         unp_gen_t gencnt;
  833         struct xunpgen *xug;
  834         struct unp_head *head;
  835         struct xunpcb *xu;
  836 
  837         head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
  838 
  839         /*
  840          * The process of preparing the PCB list is too time-consuming and
  841          * resource-intensive to repeat twice on every request.
  842          */
  843         if (req->oldptr == 0) {
  844                 n = unp_count;
  845                 req->oldidx = 2 * (sizeof *xug)
  846                         + (n + n/8) * sizeof(struct xunpcb);
  847                 return 0;
  848         }
  849 
  850         if (req->newptr != 0)
  851                 return EPERM;
  852 
  853         /*
  854          * OK, now we're committed to doing something.
  855          */
  856         xug = malloc(sizeof(*xug), M_TEMP, M_WAITOK);
  857         gencnt = unp_gencnt;
  858         n = unp_count;
  859 
  860         xug->xug_len = sizeof *xug;
  861         xug->xug_count = n;
  862         xug->xug_gen = gencnt;
  863         xug->xug_sogen = so_gencnt;
  864         error = SYSCTL_OUT(req, xug, sizeof *xug);
  865         if (error) {
  866                 free(xug, M_TEMP);
  867                 return error;
  868         }
  869 
  870         unp_list = malloc(n * sizeof *unp_list, M_TEMP, M_WAITOK);
  871         
  872         for (unp = LIST_FIRST(head), i = 0; unp && i < n;
  873              unp = LIST_NEXT(unp, unp_link)) {
  874                 if (unp->unp_gencnt <= gencnt) {
  875                         if (cr_cansee(req->td->td_ucred,
  876                             unp->unp_socket->so_cred))
  877                                 continue;
  878                         unp_list[i++] = unp;
  879                 }
  880         }
  881         n = i;                  /* in case we lost some during malloc */
  882 
  883         error = 0;
  884         xu = malloc(sizeof(*xu), M_TEMP, M_WAITOK);
  885         for (i = 0; i < n; i++) {
  886                 unp = unp_list[i];
  887                 if (unp->unp_gencnt <= gencnt) {
  888                         xu->xu_len = sizeof *xu;
  889                         xu->xu_unpp = unp;
  890                         /*
  891                          * XXX - need more locking here to protect against
  892                          * connect/disconnect races for SMP.
  893                          */
  894                         if (unp->unp_addr)
  895                                 bcopy(unp->unp_addr, &xu->xu_addr, 
  896                                       unp->unp_addr->sun_len);
  897                         if (unp->unp_conn && unp->unp_conn->unp_addr)
  898                                 bcopy(unp->unp_conn->unp_addr,
  899                                       &xu->xu_caddr,
  900                                       unp->unp_conn->unp_addr->sun_len);
  901                         bcopy(unp, &xu->xu_unp, sizeof *unp);
  902                         sotoxsocket(unp->unp_socket, &xu->xu_socket);
  903                         error = SYSCTL_OUT(req, xu, sizeof *xu);
  904                 }
  905         }
  906         free(xu, M_TEMP);
  907         if (!error) {
  908                 /*
  909                  * Give the user an updated idea of our state.
  910                  * If the generation differs from what we told
  911                  * her before, she knows that something happened
  912                  * while we were processing this request, and it
  913                  * might be necessary to retry.
  914                  */
  915                 xug->xug_gen = unp_gencnt;
  916                 xug->xug_sogen = so_gencnt;
  917                 xug->xug_count = unp_count;
  918                 error = SYSCTL_OUT(req, xug, sizeof *xug);
  919         }
  920         free(unp_list, M_TEMP);
  921         free(xug, M_TEMP);
  922         return error;
  923 }
  924 
  925 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLFLAG_RD, 
  926             (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb",
  927             "List of active local datagram sockets");
  928 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLFLAG_RD, 
  929             (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb",
  930             "List of active local stream sockets");
  931 
  932 static void
  933 unp_shutdown(unp)
  934         struct unpcb *unp;
  935 {
  936         struct socket *so;
  937 
  938         if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn &&
  939             (so = unp->unp_conn->unp_socket))
  940                 socantrcvmore(so);
  941 }
  942 
  943 static void
  944 unp_drop(unp, errno)
  945         struct unpcb *unp;
  946         int errno;
  947 {
  948         struct socket *so = unp->unp_socket;
  949 
  950         so->so_error = errno;
  951         unp_disconnect(unp);
  952 }
  953 
  954 #ifdef notdef
  955 void
  956 unp_drain()
  957 {
  958 
  959 }
  960 #endif
  961 
  962 static void
  963 unp_freerights(rp, fdcount)
  964         struct file **rp;
  965         int fdcount;
  966 {
  967         int i;
  968         struct file *fp;
  969 
  970         for (i = 0; i < fdcount; i++) {
  971                 fp = *rp;
  972                 /*
  973                  * zero the pointer before calling
  974                  * unp_discard since it may end up
  975                  * in unp_gc()..
  976                  */
  977                 *rp++ = 0;
  978                 unp_discard(fp);
  979         }
  980 }
  981 
  982 int
  983 unp_externalize(control, controlp)
  984         struct mbuf *control, **controlp;
  985 {
  986         struct thread *td = curthread;          /* XXX */
  987         struct cmsghdr *cm = mtod(control, struct cmsghdr *);
  988         int i;
  989         int *fdp;
  990         struct file **rp;
  991         struct file *fp;
  992         void *data;
  993         socklen_t clen = control->m_len, datalen;
  994         int error, newfds;
  995         int f;
  996         u_int newlen;
  997 
  998         error = 0;
  999         if (controlp != NULL) /* controlp == NULL => free control messages */
 1000                 *controlp = NULL;
 1001 
 1002         while (cm != NULL) {
 1003                 if (sizeof(*cm) > clen || cm->cmsg_len > clen) {
 1004                         error = EINVAL;
 1005                         break;
 1006                 }
 1007 
 1008                 data = CMSG_DATA(cm);
 1009                 datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
 1010 
 1011                 if (cm->cmsg_level == SOL_SOCKET
 1012                     && cm->cmsg_type == SCM_RIGHTS) {
 1013                         newfds = datalen / sizeof(struct file *);
 1014                         rp = data;
 1015 
 1016                         /* If we're not outputting the descriptors free them. */
 1017                         if (error || controlp == NULL) {
 1018                                 unp_freerights(rp, newfds);
 1019                                 goto next;
 1020                         }
 1021                         FILEDESC_LOCK(td->td_proc->p_fd);
 1022                         /* if the new FD's will not fit free them.  */
 1023                         if (!fdavail(td, newfds)) {
 1024                                 FILEDESC_UNLOCK(td->td_proc->p_fd);
 1025                                 error = EMSGSIZE;
 1026                                 unp_freerights(rp, newfds);
 1027                                 goto next;
 1028                         }
 1029                         /*
 1030                          * now change each pointer to an fd in the global
 1031                          * table to an integer that is the index to the
 1032                          * local fd table entry that we set up to point
 1033                          * to the global one we are transferring.
 1034                          */
 1035                         newlen = newfds * sizeof(int);
 1036                         *controlp = sbcreatecontrol(NULL, newlen,
 1037                             SCM_RIGHTS, SOL_SOCKET);
 1038                         if (*controlp == NULL) {
 1039                                 FILEDESC_UNLOCK(td->td_proc->p_fd);
 1040                                 error = E2BIG;
 1041                                 unp_freerights(rp, newfds);
 1042                                 goto next;
 1043                         }
 1044 
 1045                         fdp = (int *)
 1046                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 1047                         for (i = 0; i < newfds; i++) {
 1048                                 if (fdalloc(td, 0, &f))
 1049                                         panic("unp_externalize fdalloc failed");
 1050                                 fp = *rp++;
 1051                                 td->td_proc->p_fd->fd_ofiles[f] = fp;
 1052                                 FILE_LOCK(fp);
 1053                                 fp->f_msgcount--;
 1054                                 FILE_UNLOCK(fp);
 1055                                 unp_rights--;
 1056                                 *fdp++ = f;
 1057                         }
 1058                         FILEDESC_UNLOCK(td->td_proc->p_fd);
 1059                 } else { /* We can just copy anything else across */
 1060                         if (error || controlp == NULL)
 1061                                 goto next;
 1062                         *controlp = sbcreatecontrol(NULL, datalen,
 1063                             cm->cmsg_type, cm->cmsg_level);
 1064                         if (*controlp == NULL) {
 1065                                 error = ENOBUFS;
 1066                                 goto next;
 1067                         }
 1068                         bcopy(data,
 1069                             CMSG_DATA(mtod(*controlp, struct cmsghdr *)),
 1070                             datalen);
 1071                 }
 1072 
 1073                 controlp = &(*controlp)->m_next;
 1074 
 1075 next:
 1076                 if (CMSG_SPACE(datalen) < clen) {
 1077                         clen -= CMSG_SPACE(datalen);
 1078                         cm = (struct cmsghdr *)
 1079                             ((caddr_t)cm + CMSG_SPACE(datalen));
 1080                 } else {
 1081                         clen = 0;
 1082                         cm = NULL;
 1083                 }
 1084         }
 1085 
 1086         m_freem(control);
 1087 
 1088         return (error);
 1089 }
 1090 
 1091 void
 1092 unp_init(void)
 1093 {
 1094         unp_zone = uma_zcreate("unpcb", sizeof(struct unpcb), NULL, NULL,
 1095             NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 1096         uma_zone_set_max(unp_zone, nmbclusters);
 1097         if (unp_zone == 0)
 1098                 panic("unp_init");
 1099         LIST_INIT(&unp_dhead);
 1100         LIST_INIT(&unp_shead);
 1101 }
 1102 
 1103 static int
 1104 unp_internalize(controlp, td)
 1105         struct mbuf **controlp;
 1106         struct thread *td;
 1107 {
 1108         struct mbuf *control = *controlp;
 1109         struct proc *p = td->td_proc;
 1110         struct filedesc *fdescp = p->p_fd;
 1111         struct cmsghdr *cm = mtod(control, struct cmsghdr *);
 1112         struct cmsgcred *cmcred;
 1113         struct file **rp;
 1114         struct file *fp;
 1115         struct timeval *tv;
 1116         int i, fd, *fdp;
 1117         void *data;
 1118         socklen_t clen = control->m_len, datalen;
 1119         int error, oldfds;
 1120         u_int newlen;
 1121 
 1122         error = 0;
 1123         *controlp = NULL;
 1124 
 1125         while (cm != NULL) {
 1126                 if (sizeof(*cm) > clen || cm->cmsg_level != SOL_SOCKET
 1127                     || cm->cmsg_len > clen) {
 1128                         error = EINVAL;
 1129                         goto out;
 1130                 }
 1131 
 1132                 data = CMSG_DATA(cm);
 1133                 datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
 1134 
 1135                 switch (cm->cmsg_type) {
 1136                 /*
 1137                  * Fill in credential information.
 1138                  */
 1139                 case SCM_CREDS:
 1140                         *controlp = sbcreatecontrol(NULL, sizeof(*cmcred),
 1141                             SCM_CREDS, SOL_SOCKET);
 1142                         if (*controlp == NULL) {
 1143                                 error = ENOBUFS;
 1144                                 goto out;
 1145                         }
 1146 
 1147                         cmcred = (struct cmsgcred *)
 1148                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 1149                         cmcred->cmcred_pid = p->p_pid;
 1150                         cmcred->cmcred_uid = td->td_ucred->cr_ruid;
 1151                         cmcred->cmcred_gid = td->td_ucred->cr_rgid;
 1152                         cmcred->cmcred_euid = td->td_ucred->cr_uid;
 1153                         cmcred->cmcred_ngroups = MIN(td->td_ucred->cr_ngroups,
 1154                                                         CMGROUP_MAX);
 1155                         for (i = 0; i < cmcred->cmcred_ngroups; i++)
 1156                                 cmcred->cmcred_groups[i] =
 1157                                     td->td_ucred->cr_groups[i];
 1158                         break;
 1159 
 1160                 case SCM_RIGHTS:
 1161                         oldfds = datalen / sizeof (int);
 1162                         /*
 1163                          * check that all the FDs passed in refer to legal files
 1164                          * If not, reject the entire operation.
 1165                          */
 1166                         fdp = data;
 1167                         FILEDESC_LOCK(fdescp);
 1168                         for (i = 0; i < oldfds; i++) {
 1169                                 fd = *fdp++;
 1170                                 if ((unsigned)fd >= fdescp->fd_nfiles ||
 1171                                     fdescp->fd_ofiles[fd] == NULL) {
 1172                                         FILEDESC_UNLOCK(fdescp);
 1173                                         error = EBADF;
 1174                                         goto out;
 1175                                 }
 1176                                 fp = fdescp->fd_ofiles[fd];
 1177                                 if (!(fp->f_ops->fo_flags & DFLAG_PASSABLE)) {
 1178                                         FILEDESC_UNLOCK(fdescp);
 1179                                         error = EOPNOTSUPP;
 1180                                         goto out;
 1181                                 }
 1182 
 1183                         }
 1184                         /*
 1185                          * Now replace the integer FDs with pointers to
 1186                          * the associated global file table entry..
 1187                          */
 1188                         newlen = oldfds * sizeof(struct file *);
 1189                         *controlp = sbcreatecontrol(NULL, newlen,
 1190                             SCM_RIGHTS, SOL_SOCKET);
 1191                         if (*controlp == NULL) {
 1192                                 FILEDESC_UNLOCK(fdescp);
 1193                                 error = E2BIG;
 1194                                 goto out;
 1195                         }
 1196 
 1197                         fdp = data;
 1198                         rp = (struct file **)
 1199                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 1200                         for (i = 0; i < oldfds; i++) {
 1201                                 fp = fdescp->fd_ofiles[*fdp++];
 1202                                 *rp++ = fp;
 1203                                 FILE_LOCK(fp);
 1204                                 fp->f_count++;
 1205                                 fp->f_msgcount++;
 1206                                 FILE_UNLOCK(fp);
 1207                                 unp_rights++;
 1208                         }
 1209                         FILEDESC_UNLOCK(fdescp);
 1210                         break;
 1211 
 1212                 case SCM_TIMESTAMP:
 1213                         *controlp = sbcreatecontrol(NULL, sizeof(*tv),
 1214                             SCM_TIMESTAMP, SOL_SOCKET);
 1215                         if (*controlp == NULL) {
 1216                                 error = ENOBUFS;
 1217                                 goto out;
 1218                         }
 1219                         tv = (struct timeval *)
 1220                             CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 1221                         microtime(tv);
 1222                         break;
 1223 
 1224                 default:
 1225                         error = EINVAL;
 1226                         goto out;
 1227                 }
 1228 
 1229                 controlp = &(*controlp)->m_next;
 1230 
 1231                 if (CMSG_SPACE(datalen) < clen) {
 1232                         clen -= CMSG_SPACE(datalen);
 1233                         cm = (struct cmsghdr *)
 1234                             ((caddr_t)cm + CMSG_SPACE(datalen));
 1235                 } else {
 1236                         clen = 0;
 1237                         cm = NULL;
 1238                 }
 1239         }
 1240 
 1241 out:
 1242         m_freem(control);
 1243 
 1244         return (error);
 1245 }
 1246 
 1247 static int      unp_defer, unp_gcing;
 1248 
 1249 static void
 1250 unp_gc()
 1251 {
 1252         register struct file *fp, *nextfp;
 1253         register struct socket *so;
 1254         struct file **extra_ref, **fpp;
 1255         int nunref, i;
 1256 
 1257         if (unp_gcing)
 1258                 return;
 1259         unp_gcing = 1;
 1260         unp_defer = 0;
 1261         /* 
 1262          * before going through all this, set all FDs to 
 1263          * be NOT defered and NOT externally accessible
 1264          */
 1265         sx_slock(&filelist_lock);
 1266         LIST_FOREACH(fp, &filehead, f_list)
 1267                 fp->f_gcflag &= ~(FMARK|FDEFER);
 1268         do {
 1269                 LIST_FOREACH(fp, &filehead, f_list) {
 1270                         FILE_LOCK(fp);
 1271                         /*
 1272                          * If the file is not open, skip it
 1273                          */
 1274                         if (fp->f_count == 0) {
 1275                                 FILE_UNLOCK(fp);
 1276                                 continue;
 1277                         }
 1278                         /*
 1279                          * If we already marked it as 'defer'  in a
 1280                          * previous pass, then try process it this time
 1281                          * and un-mark it
 1282                          */
 1283                         if (fp->f_gcflag & FDEFER) {
 1284                                 fp->f_gcflag &= ~FDEFER;
 1285                                 unp_defer--;
 1286                         } else {
 1287                                 /*
 1288                                  * if it's not defered, then check if it's
 1289                                  * already marked.. if so skip it
 1290                                  */
 1291                                 if (fp->f_gcflag & FMARK) {
 1292                                         FILE_UNLOCK(fp);
 1293                                         continue;
 1294                                 }
 1295                                 /* 
 1296                                  * If all references are from messages
 1297                                  * in transit, then skip it. it's not 
 1298                                  * externally accessible.
 1299                                  */ 
 1300                                 if (fp->f_count == fp->f_msgcount) {
 1301                                         FILE_UNLOCK(fp);
 1302                                         continue;
 1303                                 }
 1304                                 /* 
 1305                                  * If it got this far then it must be
 1306                                  * externally accessible.
 1307                                  */
 1308                                 fp->f_gcflag |= FMARK;
 1309                         }
 1310                         /*
 1311                          * either it was defered, or it is externally 
 1312                          * accessible and not already marked so.
 1313                          * Now check if it is possibly one of OUR sockets.
 1314                          */ 
 1315                         if (fp->f_type != DTYPE_SOCKET ||
 1316                             (so = fp->f_data) == NULL) {
 1317                                 FILE_UNLOCK(fp);
 1318                                 continue;
 1319                         }
 1320                         FILE_UNLOCK(fp);
 1321                         if (so->so_proto->pr_domain != &localdomain ||
 1322                             (so->so_proto->pr_flags&PR_RIGHTS) == 0)
 1323                                 continue;
 1324 #ifdef notdef
 1325                         if (so->so_rcv.sb_flags & SB_LOCK) {
 1326                                 /*
 1327                                  * This is problematical; it's not clear
 1328                                  * we need to wait for the sockbuf to be
 1329                                  * unlocked (on a uniprocessor, at least),
 1330                                  * and it's also not clear what to do
 1331                                  * if sbwait returns an error due to receipt
 1332                                  * of a signal.  If sbwait does return
 1333                                  * an error, we'll go into an infinite
 1334                                  * loop.  Delete all of this for now.
 1335                                  */
 1336                                 (void) sbwait(&so->so_rcv);
 1337                                 goto restart;
 1338                         }
 1339 #endif
 1340                         /*
 1341                          * So, Ok, it's one of our sockets and it IS externally
 1342                          * accessible (or was defered). Now we look
 1343                          * to see if we hold any file descriptors in its
 1344                          * message buffers. Follow those links and mark them 
 1345                          * as accessible too.
 1346                          */
 1347                         unp_scan(so->so_rcv.sb_mb, unp_mark);
 1348                 }
 1349         } while (unp_defer);
 1350         sx_sunlock(&filelist_lock);
 1351         /*
 1352          * We grab an extra reference to each of the file table entries
 1353          * that are not otherwise accessible and then free the rights
 1354          * that are stored in messages on them.
 1355          *
 1356          * The bug in the orginal code is a little tricky, so I'll describe
 1357          * what's wrong with it here.
 1358          *
 1359          * It is incorrect to simply unp_discard each entry for f_msgcount
 1360          * times -- consider the case of sockets A and B that contain
 1361          * references to each other.  On a last close of some other socket,
 1362          * we trigger a gc since the number of outstanding rights (unp_rights)
 1363          * is non-zero.  If during the sweep phase the gc code un_discards,
 1364          * we end up doing a (full) closef on the descriptor.  A closef on A
 1365          * results in the following chain.  Closef calls soo_close, which
 1366          * calls soclose.   Soclose calls first (through the switch
 1367          * uipc_usrreq) unp_detach, which re-invokes unp_gc.  Unp_gc simply
 1368          * returns because the previous instance had set unp_gcing, and
 1369          * we return all the way back to soclose, which marks the socket
 1370          * with SS_NOFDREF, and then calls sofree.  Sofree calls sorflush
 1371          * to free up the rights that are queued in messages on the socket A,
 1372          * i.e., the reference on B.  The sorflush calls via the dom_dispose
 1373          * switch unp_dispose, which unp_scans with unp_discard.  This second
 1374          * instance of unp_discard just calls closef on B.
 1375          *
 1376          * Well, a similar chain occurs on B, resulting in a sorflush on B,
 1377          * which results in another closef on A.  Unfortunately, A is already
 1378          * being closed, and the descriptor has already been marked with
 1379          * SS_NOFDREF, and soclose panics at this point.
 1380          *
 1381          * Here, we first take an extra reference to each inaccessible
 1382          * descriptor.  Then, we call sorflush ourself, since we know
 1383          * it is a Unix domain socket anyhow.  After we destroy all the
 1384          * rights carried in messages, we do a last closef to get rid
 1385          * of our extra reference.  This is the last close, and the
 1386          * unp_detach etc will shut down the socket.
 1387          *
 1388          * 91/09/19, bsy@cs.cmu.edu
 1389          */
 1390         extra_ref = malloc(nfiles * sizeof(struct file *), M_TEMP, M_WAITOK);
 1391         sx_slock(&filelist_lock);
 1392         for (nunref = 0, fp = LIST_FIRST(&filehead), fpp = extra_ref; fp != 0;
 1393             fp = nextfp) {
 1394                 nextfp = LIST_NEXT(fp, f_list);
 1395                 FILE_LOCK(fp);
 1396                 /* 
 1397                  * If it's not open, skip it
 1398                  */
 1399                 if (fp->f_count == 0) {
 1400                         FILE_UNLOCK(fp);
 1401                         continue;
 1402                 }
 1403                 /* 
 1404                  * If all refs are from msgs, and it's not marked accessible
 1405                  * then it must be referenced from some unreachable cycle
 1406                  * of (shut-down) FDs, so include it in our
 1407                  * list of FDs to remove
 1408                  */
 1409                 if (fp->f_count == fp->f_msgcount && !(fp->f_gcflag & FMARK)) {
 1410                         *fpp++ = fp;
 1411                         nunref++;
 1412                         fp->f_count++;
 1413                 }
 1414                 FILE_UNLOCK(fp);
 1415         }
 1416         sx_sunlock(&filelist_lock);
 1417         /* 
 1418          * for each FD on our hit list, do the following two things
 1419          */
 1420         for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
 1421                 struct file *tfp = *fpp;
 1422                 FILE_LOCK(tfp);
 1423                 if (tfp->f_type == DTYPE_SOCKET &&
 1424                     tfp->f_data != NULL) {
 1425                         FILE_UNLOCK(tfp);
 1426                         sorflush(tfp->f_data);
 1427                 } else
 1428                         FILE_UNLOCK(tfp);
 1429         }
 1430         for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
 1431                 closef(*fpp, (struct thread *) NULL);
 1432         free(extra_ref, M_TEMP);
 1433         unp_gcing = 0;
 1434 }
 1435 
 1436 void
 1437 unp_dispose(m)
 1438         struct mbuf *m;
 1439 {
 1440 
 1441         if (m)
 1442                 unp_scan(m, unp_discard);
 1443 }
 1444 
 1445 static int
 1446 unp_listen(unp, td)
 1447         struct unpcb *unp;
 1448         struct thread *td;
 1449 {
 1450 
 1451         cru2x(td->td_ucred, &unp->unp_peercred);
 1452         unp->unp_flags |= UNP_HAVEPCCACHED;
 1453         return (0);
 1454 }
 1455 
 1456 static void
 1457 unp_scan(m0, op)
 1458         register struct mbuf *m0;
 1459         void (*op)(struct file *);
 1460 {
 1461         struct mbuf *m;
 1462         struct file **rp;
 1463         struct cmsghdr *cm;
 1464         void *data;
 1465         int i;
 1466         socklen_t clen, datalen;
 1467         int qfds;
 1468 
 1469         while (m0) {
 1470                 for (m = m0; m; m = m->m_next) {
 1471                         if (m->m_type != MT_CONTROL)
 1472                                 continue;
 1473 
 1474                         cm = mtod(m, struct cmsghdr *);
 1475                         clen = m->m_len;
 1476 
 1477                         while (cm != NULL) {
 1478                                 if (sizeof(*cm) > clen || cm->cmsg_len > clen)
 1479                                         break;
 1480 
 1481                                 data = CMSG_DATA(cm);
 1482                                 datalen = (caddr_t)cm + cm->cmsg_len
 1483                                     - (caddr_t)data;
 1484 
 1485                                 if (cm->cmsg_level == SOL_SOCKET &&
 1486                                     cm->cmsg_type == SCM_RIGHTS) {
 1487                                         qfds = datalen / sizeof (struct file *);
 1488                                         rp = data;
 1489                                         for (i = 0; i < qfds; i++)
 1490                                                 (*op)(*rp++);
 1491                                 }
 1492 
 1493                                 if (CMSG_SPACE(datalen) < clen) {
 1494                                         clen -= CMSG_SPACE(datalen);
 1495                                         cm = (struct cmsghdr *)
 1496                                             ((caddr_t)cm + CMSG_SPACE(datalen));
 1497                                 } else {
 1498                                         clen = 0;
 1499                                         cm = NULL;
 1500                                 }
 1501                         }
 1502                 }
 1503                 m0 = m0->m_act;
 1504         }
 1505 }
 1506 
 1507 static void
 1508 unp_mark(fp)
 1509         struct file *fp;
 1510 {
 1511         if (fp->f_gcflag & FMARK)
 1512                 return;
 1513         unp_defer++;
 1514         fp->f_gcflag |= (FMARK|FDEFER);
 1515 }
 1516 
 1517 static void
 1518 unp_discard(fp)
 1519         struct file *fp;
 1520 {
 1521         FILE_LOCK(fp);
 1522         fp->f_msgcount--;
 1523         unp_rights--;
 1524         FILE_UNLOCK(fp);
 1525         (void) closef(fp, (struct thread *)NULL);
 1526 }

Cache object: af2cc6ad9ec22514488b1edab64be9d1


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.