The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/uipc_usrreq.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 1982, 1986, 1989, 1991, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  * 3. All advertising materials mentioning features or use of this software
   14  *    must display the following acknowledgement:
   15  *      This product includes software developed by the University of
   16  *      California, Berkeley and its contributors.
   17  * 4. Neither the name of the University nor the names of its contributors
   18  *    may be used to endorse or promote products derived from this software
   19  *    without specific prior written permission.
   20  *
   21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   31  * SUCH DAMAGE.
   32  *
   33  *      From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94
   34  * $FreeBSD$
   35  */
   36 
   37 #include <sys/param.h>
   38 #include <sys/systm.h>
   39 #include <sys/kernel.h>
   40 #include <sys/domain.h>
   41 #include <sys/fcntl.h>
   42 #include <sys/malloc.h>         /* XXX must be before <sys/file.h> */
   43 #include <sys/file.h>
   44 #include <sys/filedesc.h>
   45 #include <sys/lock.h>
   46 #include <sys/mbuf.h>
   47 #include <sys/namei.h>
   48 #include <sys/proc.h>
   49 #include <sys/protosw.h>
   50 #include <sys/socket.h>
   51 #include <sys/socketvar.h>
   52 #include <sys/stat.h>
   53 #include <sys/sysctl.h>
   54 #include <sys/un.h>
   55 #include <sys/unpcb.h>
   56 #include <sys/vnode.h>
   57 
   58 #include <vm/vm_zone.h>
   59 
   60 struct  vm_zone *unp_zone;
   61 static  unp_gen_t unp_gencnt;
   62 static  u_int unp_count;
   63 
   64 static  struct unp_head unp_shead, unp_dhead;
   65 
   66 /*
   67  * Unix communications domain.
   68  *
   69  * TODO:
   70  *      SEQPACKET, RDM
   71  *      rethink name space problems
   72  *      need a proper out-of-band
   73  *      lock pushdown
   74  */
   75 static struct   sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL };
   76 static ino_t    unp_ino;                /* prototype for fake inode numbers */
   77 
   78 static int     unp_attach __P((struct socket *));
   79 static void    unp_detach __P((struct unpcb *));
   80 static int     unp_bind __P((struct unpcb *,struct sockaddr *, struct proc *));
   81 static int     unp_connect __P((struct socket *,struct sockaddr *,
   82                                 struct proc *));
   83 static void    unp_disconnect __P((struct unpcb *));
   84 static void    unp_shutdown __P((struct unpcb *));
   85 static void    unp_drop __P((struct unpcb *, int));
   86 static void    unp_gc __P((void));
   87 static void    unp_scan __P((struct mbuf *, void (*)(struct file *)));
   88 static void    unp_mark __P((struct file *));
   89 static void    unp_discard __P((struct file *));
   90 static int     unp_internalize __P((struct mbuf *, struct proc *));
   91 
   92 static int
   93 uipc_abort(struct socket *so)
   94 {
   95         struct unpcb *unp = sotounpcb(so);
   96 
   97         if (unp == 0)
   98                 return EINVAL;
   99         unp_drop(unp, ECONNABORTED);
  100         return 0;
  101 }
  102 
  103 static int
  104 uipc_accept(struct socket *so, struct sockaddr **nam)
  105 {
  106         struct unpcb *unp = sotounpcb(so);
  107 
  108         if (unp == 0)
  109                 return EINVAL;
  110 
  111         /*
  112          * Pass back name of connected socket,
  113          * if it was bound and we are still connected
  114          * (our peer may have closed already!).
  115          */
  116         if (unp->unp_conn && unp->unp_conn->unp_addr) {
  117                 *nam = dup_sockaddr((struct sockaddr *)unp->unp_conn->unp_addr,
  118                                     1);
  119         } else {
  120                 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
  121         }
  122         return 0;
  123 }
  124 
  125 static int
  126 uipc_attach(struct socket *so, int proto, struct proc *p)
  127 {
  128         struct unpcb *unp = sotounpcb(so);
  129 
  130         if (unp != 0)
  131                 return EISCONN;
  132         return unp_attach(so);
  133 }
  134 
  135 static int
  136 uipc_bind(struct socket *so, struct sockaddr *nam, struct proc *p)
  137 {
  138         struct unpcb *unp = sotounpcb(so);
  139 
  140         if (unp == 0)
  141                 return EINVAL;
  142 
  143         return unp_bind(unp, nam, p);
  144 }
  145 
  146 static int
  147 uipc_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
  148 {
  149         struct unpcb *unp = sotounpcb(so);
  150 
  151         if (unp == 0)
  152                 return EINVAL;
  153         return unp_connect(so, nam, curproc);
  154 }
  155 
  156 static int
  157 uipc_connect2(struct socket *so1, struct socket *so2)
  158 {
  159         struct unpcb *unp = sotounpcb(so1);
  160 
  161         if (unp == 0)
  162                 return EINVAL;
  163 
  164         return unp_connect2(so1, so2);
  165 }
  166 
  167 /* control is EOPNOTSUPP */
  168 
  169 static int
  170 uipc_detach(struct socket *so)
  171 {
  172         struct unpcb *unp = sotounpcb(so);
  173 
  174         if (unp == 0)
  175                 return EINVAL;
  176 
  177         unp_detach(unp);
  178         return 0;
  179 }
  180 
  181 static int
  182 uipc_disconnect(struct socket *so)
  183 {
  184         struct unpcb *unp = sotounpcb(so);
  185 
  186         if (unp == 0)
  187                 return EINVAL;
  188         unp_disconnect(unp);
  189         return 0;
  190 }
  191 
  192 static int
  193 uipc_listen(struct socket *so, struct proc *p)
  194 {
  195         struct unpcb *unp = sotounpcb(so);
  196 
  197         if (unp == 0 || unp->unp_vnode == 0)
  198                 return EINVAL;
  199         return 0;
  200 }
  201 
  202 static int
  203 uipc_peeraddr(struct socket *so, struct sockaddr **nam)
  204 {
  205         struct unpcb *unp = sotounpcb(so);
  206 
  207         if (unp == 0)
  208                 return EINVAL;
  209         if (unp->unp_conn && unp->unp_conn->unp_addr)
  210                 *nam = dup_sockaddr((struct sockaddr *)unp->unp_conn->unp_addr,
  211                                     1);
  212         return 0;
  213 }
  214 
  215 static int
  216 uipc_rcvd(struct socket *so, int flags)
  217 {
  218         struct unpcb *unp = sotounpcb(so);
  219         struct socket *so2;
  220 
  221         if (unp == 0)
  222                 return EINVAL;
  223         switch (so->so_type) {
  224         case SOCK_DGRAM:
  225                 panic("uipc_rcvd DGRAM?");
  226                 /*NOTREACHED*/
  227 
  228         case SOCK_STREAM:
  229 #define rcv (&so->so_rcv)
  230 #define snd (&so2->so_snd)
  231                 if (unp->unp_conn == 0)
  232                         break;
  233                 so2 = unp->unp_conn->unp_socket;
  234                 /*
  235                  * Adjust backpressure on sender
  236                  * and wakeup any waiting to write.
  237                  */
  238                 snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
  239                 unp->unp_mbcnt = rcv->sb_mbcnt;
  240                 snd->sb_hiwat += unp->unp_cc - rcv->sb_cc;
  241                 unp->unp_cc = rcv->sb_cc;
  242                 sowwakeup(so2);
  243 #undef snd
  244 #undef rcv
  245                 break;
  246 
  247         default:
  248                 panic("uipc_rcvd unknown socktype");
  249         }
  250         return 0;
  251 }
  252 
  253 /* pru_rcvoob is EOPNOTSUPP */
  254 
  255 static int
  256 uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
  257           struct mbuf *control, struct proc *p)
  258 {
  259         int error = 0;
  260         struct unpcb *unp = sotounpcb(so);
  261         struct socket *so2;
  262 
  263         if (unp == 0) {
  264                 error = EINVAL;
  265                 goto release;
  266         }
  267         if (flags & PRUS_OOB) {
  268                 error = EOPNOTSUPP;
  269                 goto release;
  270         }
  271 
  272         if (control && (error = unp_internalize(control, p)))
  273                 goto release;
  274 
  275         switch (so->so_type) {
  276         case SOCK_DGRAM: 
  277         {
  278                 struct sockaddr *from;
  279 
  280                 if (nam) {
  281                         if (unp->unp_conn) {
  282                                 error = EISCONN;
  283                                 break;
  284                         }
  285                         error = unp_connect(so, nam, p);
  286                         if (error)
  287                                 break;
  288                 } else {
  289                         if (unp->unp_conn == 0) {
  290                                 error = ENOTCONN;
  291                                 break;
  292                         }
  293                 }
  294                 so2 = unp->unp_conn->unp_socket;
  295                 if (unp->unp_addr)
  296                         from = (struct sockaddr *)unp->unp_addr;
  297                 else
  298                         from = &sun_noname;
  299                 if (sbappendaddr(&so2->so_rcv, from, m, control)) {
  300                         sorwakeup(so2);
  301                         m = 0;
  302                         control = 0;
  303                 } else
  304                         error = ENOBUFS;
  305                 if (nam)
  306                         unp_disconnect(unp);
  307                 break;
  308         }
  309 
  310         case SOCK_STREAM:
  311 #define rcv (&so2->so_rcv)
  312 #define snd (&so->so_snd)
  313                 /* Connect if not connected yet. */
  314                 /*
  315                  * Note: A better implementation would complain
  316                  * if not equal to the peer's address.
  317                  */
  318                 if ((so->so_state & SS_ISCONNECTED) == 0) {
  319                         if (nam) {
  320                                 error = unp_connect(so, nam, p);
  321                                 if (error)
  322                                         break;  /* XXX */
  323                         } else {
  324                                 error = ENOTCONN;
  325                                 break;
  326                         }
  327                 }
  328 
  329                 if (so->so_state & SS_CANTSENDMORE) {
  330                         error = EPIPE;
  331                         break;
  332                 }
  333                 if (unp->unp_conn == 0)
  334                         panic("uipc_send connected but no connection?");
  335                 so2 = unp->unp_conn->unp_socket;
  336                 /*
  337                  * Send to paired receive port, and then reduce
  338                  * send buffer hiwater marks to maintain backpressure.
  339                  * Wake up readers.
  340                  */
  341                 if (control) {
  342                         if (sbappendcontrol(rcv, m, control))
  343                                 control = 0;
  344                 } else
  345                         sbappend(rcv, m);
  346                 snd->sb_mbmax -=
  347                         rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
  348                 unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
  349                 snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc;
  350                 unp->unp_conn->unp_cc = rcv->sb_cc;
  351                 sorwakeup(so2);
  352                 m = 0;
  353 #undef snd
  354 #undef rcv
  355                 break;
  356 
  357         default:
  358                 panic("uipc_send unknown socktype");
  359         }
  360 
  361         /*
  362          * SEND_EOF is equivalent to a SEND followed by
  363          * a SHUTDOWN.
  364          */
  365         if (flags & PRUS_EOF) {
  366                 socantsendmore(so);
  367                 unp_shutdown(unp);
  368         }
  369 
  370         if (control && error != 0)
  371                 unp_dispose(control);
  372 
  373 release:
  374         if (control)
  375                 m_freem(control);
  376         if (m)
  377                 m_freem(m);
  378         return error;
  379 }
  380 
  381 static int
  382 uipc_sense(struct socket *so, struct stat *sb)
  383 {
  384         struct unpcb *unp = sotounpcb(so);
  385         struct socket *so2;
  386 
  387         if (unp == 0)
  388                 return EINVAL;
  389         sb->st_blksize = so->so_snd.sb_hiwat;
  390         if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
  391                 so2 = unp->unp_conn->unp_socket;
  392                 sb->st_blksize += so2->so_rcv.sb_cc;
  393         }
  394         sb->st_dev = NODEV;
  395         if (unp->unp_ino == 0)
  396                 unp->unp_ino = unp_ino++;
  397         sb->st_ino = unp->unp_ino;
  398         return (0);
  399 }
  400 
  401 static int
  402 uipc_shutdown(struct socket *so)
  403 {
  404         struct unpcb *unp = sotounpcb(so);
  405 
  406         if (unp == 0)
  407                 return EINVAL;
  408         socantsendmore(so);
  409         unp_shutdown(unp);
  410         return 0;
  411 }
  412 
  413 static int
  414 uipc_sockaddr(struct socket *so, struct sockaddr **nam)
  415 {
  416         struct unpcb *unp = sotounpcb(so);
  417 
  418         if (unp == 0)
  419                 return EINVAL;
  420         if (unp->unp_addr)
  421                 *nam = dup_sockaddr((struct sockaddr *)unp->unp_addr, 1);
  422         return 0;
  423 }
  424 
  425 struct pr_usrreqs uipc_usrreqs = {
  426         uipc_abort, uipc_accept, uipc_attach, uipc_bind, uipc_connect,
  427         uipc_connect2, pru_control_notsupp, uipc_detach, uipc_disconnect,
  428         uipc_listen, uipc_peeraddr, uipc_rcvd, pru_rcvoob_notsupp,
  429         uipc_send, uipc_sense, uipc_shutdown, uipc_sockaddr,
  430         sosend, soreceive, sopoll
  431 };
  432         
  433 /*
  434  * Both send and receive buffers are allocated PIPSIZ bytes of buffering
  435  * for stream sockets, although the total for sender and receiver is
  436  * actually only PIPSIZ.
  437  * Datagram sockets really use the sendspace as the maximum datagram size,
  438  * and don't really want to reserve the sendspace.  Their recvspace should
  439  * be large enough for at least one max-size datagram plus address.
  440  */
  441 #ifndef PIPSIZ
  442 #define PIPSIZ  8192
  443 #endif
  444 static u_long   unpst_sendspace = PIPSIZ;
  445 static u_long   unpst_recvspace = PIPSIZ;
  446 static u_long   unpdg_sendspace = 2*1024;       /* really max datagram size */
  447 static u_long   unpdg_recvspace = 4*1024;
  448 
  449 static int      unp_rights;                     /* file descriptors in flight */
  450 
  451 SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW, 
  452            &unpst_sendspace, 0, "");
  453 SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW,
  454            &unpst_recvspace, 0, "");
  455 SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW,
  456            &unpdg_sendspace, 0, "");
  457 SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW,
  458            &unpdg_recvspace, 0, "");
  459 SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, "");
  460 
  461 static int
  462 unp_attach(so)
  463         struct socket *so;
  464 {
  465         register struct unpcb *unp;
  466         int error;
  467 
  468         if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
  469                 switch (so->so_type) {
  470 
  471                 case SOCK_STREAM:
  472                         error = soreserve(so, unpst_sendspace, unpst_recvspace);
  473                         break;
  474 
  475                 case SOCK_DGRAM:
  476                         error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
  477                         break;
  478 
  479                 default:
  480                         panic("unp_attach");
  481                 }
  482                 if (error)
  483                         return (error);
  484         }
  485         unp = zalloc(unp_zone);
  486         if (unp == NULL)
  487                 return (ENOBUFS);
  488         bzero(unp, sizeof *unp);
  489         unp->unp_gencnt = ++unp_gencnt;
  490         unp_count++;
  491         LIST_INIT(&unp->unp_refs);
  492         unp->unp_socket = so;
  493         LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ? &unp_dhead
  494                          : &unp_shead, unp, unp_link);
  495         so->so_pcb = (caddr_t)unp;
  496         return (0);
  497 }
  498 
  499 static void
  500 unp_detach(unp)
  501         register struct unpcb *unp;
  502 {
  503         LIST_REMOVE(unp, unp_link);
  504         unp->unp_gencnt = ++unp_gencnt;
  505         --unp_count;
  506         if (unp->unp_vnode) {
  507                 unp->unp_vnode->v_socket = 0;
  508                 vrele(unp->unp_vnode);
  509                 unp->unp_vnode = 0;
  510         }
  511         if (unp->unp_conn)
  512                 unp_disconnect(unp);
  513         while (unp->unp_refs.lh_first)
  514                 unp_drop(unp->unp_refs.lh_first, ECONNRESET);
  515         soisdisconnected(unp->unp_socket);
  516         unp->unp_socket->so_pcb = 0;
  517         if (unp_rights) {
  518                 /*
  519                  * Normally the receive buffer is flushed later,
  520                  * in sofree, but if our receive buffer holds references
  521                  * to descriptors that are now garbage, we will dispose
  522                  * of those descriptor references after the garbage collector
  523                  * gets them (resulting in a "panic: closef: count < 0").
  524                  */
  525                 sorflush(unp->unp_socket);
  526                 unp_gc();
  527         }
  528         if (unp->unp_addr)
  529                 FREE(unp->unp_addr, M_SONAME);
  530         zfree(unp_zone, unp);
  531 }
  532 
  533 static int
  534 unp_bind(unp, nam, p)
  535         struct unpcb *unp;
  536         struct sockaddr *nam;
  537         struct proc *p;
  538 {
  539         struct sockaddr_un *soun = (struct sockaddr_un *)nam;
  540         register struct vnode *vp;
  541         struct vattr vattr;
  542         int error, namelen;
  543         struct nameidata nd;
  544         char buf[SOCK_MAXADDRLEN];
  545 
  546         if (unp->unp_vnode != NULL)
  547                 return (EINVAL);
  548 #define offsetof(s, e) ((char *)&((s *)0)->e - (char *)((s *)0))
  549         namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path);
  550         if (namelen <= 0)
  551                 return EINVAL;
  552         strncpy(buf, soun->sun_path, namelen);
  553         buf[namelen] = 0;       /* null-terminate the string */
  554         NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT, UIO_SYSSPACE,
  555             buf, p);
  556 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
  557         error = namei(&nd);
  558         if (error)
  559                 return (error);
  560         vp = nd.ni_vp;
  561         if (vp != NULL) {
  562                 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
  563                 if (nd.ni_dvp == vp)
  564                         vrele(nd.ni_dvp);
  565                 else
  566                         vput(nd.ni_dvp);
  567                 vrele(vp);
  568                 return (EADDRINUSE);
  569         }
  570         VATTR_NULL(&vattr);
  571         vattr.va_type = VSOCK;
  572         vattr.va_mode = (ACCESSPERMS & ~p->p_fd->fd_cmask);
  573         VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
  574         error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
  575         vput(nd.ni_dvp);
  576         if (error)
  577                 return (error);
  578         vp = nd.ni_vp;
  579         vp->v_socket = unp->unp_socket;
  580         unp->unp_vnode = vp;
  581         unp->unp_addr = (struct sockaddr_un *)dup_sockaddr(nam, 1);
  582         VOP_UNLOCK(vp, 0, p);
  583         return (0);
  584 }
  585 
  586 static int
  587 unp_connect(so, nam, p)
  588         struct socket *so;
  589         struct sockaddr *nam;
  590         struct proc *p;
  591 {
  592         register struct sockaddr_un *soun = (struct sockaddr_un *)nam;
  593         register struct vnode *vp;
  594         register struct socket *so2, *so3;
  595         struct unpcb *unp2, *unp3;
  596         int error, len;
  597         struct nameidata nd;
  598         char buf[SOCK_MAXADDRLEN];
  599 
  600         len = nam->sa_len - offsetof(struct sockaddr_un, sun_path);
  601         if (len <= 0)
  602                 return EINVAL;
  603         strncpy(buf, soun->sun_path, len);
  604         buf[len] = 0;
  605 
  606         NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, buf, p);
  607         error = namei(&nd);
  608         if (error)
  609                 return (error);
  610         vp = nd.ni_vp;
  611         if (vp->v_type != VSOCK) {
  612                 error = ENOTSOCK;
  613                 goto bad;
  614         }
  615         error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p);
  616         if (error)
  617                 goto bad;
  618         so2 = vp->v_socket;
  619         if (so2 == 0) {
  620                 error = ECONNREFUSED;
  621                 goto bad;
  622         }
  623         if (so->so_type != so2->so_type) {
  624                 error = EPROTOTYPE;
  625                 goto bad;
  626         }
  627         if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
  628                 if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
  629                     (so3 = sonewconn(so2, 0)) == 0) {
  630                         error = ECONNREFUSED;
  631                         goto bad;
  632                 }
  633                 unp2 = sotounpcb(so2);
  634                 unp3 = sotounpcb(so3);
  635                 if (unp2->unp_addr)
  636                         unp3->unp_addr = (struct sockaddr_un *)
  637                                 dup_sockaddr((struct sockaddr *)
  638                                              unp2->unp_addr, 1);
  639                 so2 = so3;
  640         }
  641         error = unp_connect2(so, so2);
  642 bad:
  643         vput(vp);
  644         return (error);
  645 }
  646 
  647 int
  648 unp_connect2(so, so2)
  649         register struct socket *so;
  650         register struct socket *so2;
  651 {
  652         register struct unpcb *unp = sotounpcb(so);
  653         register struct unpcb *unp2;
  654 
  655         if (so2->so_type != so->so_type)
  656                 return (EPROTOTYPE);
  657         unp2 = sotounpcb(so2);
  658         unp->unp_conn = unp2;
  659         switch (so->so_type) {
  660 
  661         case SOCK_DGRAM:
  662                 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink);
  663                 soisconnected(so);
  664                 break;
  665 
  666         case SOCK_STREAM:
  667                 unp2->unp_conn = unp;
  668                 soisconnected(so);
  669                 soisconnected(so2);
  670                 break;
  671 
  672         default:
  673                 panic("unp_connect2");
  674         }
  675         return (0);
  676 }
  677 
  678 static void
  679 unp_disconnect(unp)
  680         struct unpcb *unp;
  681 {
  682         register struct unpcb *unp2 = unp->unp_conn;
  683 
  684         if (unp2 == 0)
  685                 return;
  686         unp->unp_conn = 0;
  687         switch (unp->unp_socket->so_type) {
  688 
  689         case SOCK_DGRAM:
  690                 LIST_REMOVE(unp, unp_reflink);
  691                 unp->unp_socket->so_state &= ~SS_ISCONNECTED;
  692                 break;
  693 
  694         case SOCK_STREAM:
  695                 soisdisconnected(unp->unp_socket);
  696                 unp2->unp_conn = 0;
  697                 soisdisconnected(unp2->unp_socket);
  698                 break;
  699         }
  700 }
  701 
  702 #ifdef notdef
  703 void
  704 unp_abort(unp)
  705         struct unpcb *unp;
  706 {
  707 
  708         unp_detach(unp);
  709 }
  710 #endif
  711 
  712 static int
  713 unp_pcblist SYSCTL_HANDLER_ARGS
  714 {
  715         int error, i, n;
  716         struct unpcb *unp, **unp_list;
  717         unp_gen_t gencnt;
  718         struct xunpgen xug;
  719         struct unp_head *head;
  720 
  721         head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
  722 
  723         /*
  724          * The process of preparing the PCB list is too time-consuming and
  725          * resource-intensive to repeat twice on every request.
  726          */
  727         if (req->oldptr == 0) {
  728                 n = unp_count;
  729                 req->oldidx = 2 * (sizeof xug)
  730                         + (n + n/8) * sizeof(struct xunpcb);
  731                 return 0;
  732         }
  733 
  734         if (req->newptr != 0)
  735                 return EPERM;
  736 
  737         /*
  738          * OK, now we're committed to doing something.
  739          */
  740         gencnt = unp_gencnt;
  741         n = unp_count;
  742 
  743         xug.xug_len = sizeof xug;
  744         xug.xug_count = n;
  745         xug.xug_gen = gencnt;
  746         xug.xug_sogen = so_gencnt;
  747         error = SYSCTL_OUT(req, &xug, sizeof xug);
  748         if (error)
  749                 return error;
  750 
  751         unp_list = malloc(n * sizeof *unp_list, M_TEMP, M_WAITOK);
  752         if (unp_list == 0)
  753                 return ENOMEM;
  754         
  755         for (unp = head->lh_first, i = 0; unp && i < n;
  756              unp = unp->unp_link.le_next) {
  757                 if (unp->unp_gencnt <= gencnt)
  758                         unp_list[i++] = unp;
  759         }
  760         n = i;                  /* in case we lost some during malloc */
  761 
  762         error = 0;
  763         for (i = 0; i < n; i++) {
  764                 unp = unp_list[i];
  765                 if (unp->unp_gencnt <= gencnt) {
  766                         struct xunpcb xu;
  767                         xu.xu_len = sizeof xu;
  768                         xu.xu_unpp = unp;
  769                         /*
  770                          * XXX - need more locking here to protect against
  771                          * connect/disconnect races for SMP.
  772                          */
  773                         if (unp->unp_addr)
  774                                 bcopy(unp->unp_addr, &xu.xu_addr, 
  775                                       unp->unp_addr->sun_len);
  776                         if (unp->unp_conn && unp->unp_conn->unp_addr)
  777                                 bcopy(unp->unp_conn->unp_addr,
  778                                       &xu.xu_caddr,
  779                                       unp->unp_conn->unp_addr->sun_len);
  780                         bcopy(unp, &xu.xu_unp, sizeof *unp);
  781                         sotoxsocket(unp->unp_socket, &xu.xu_socket);
  782                         error = SYSCTL_OUT(req, &xu, sizeof xu);
  783                 }
  784         }
  785         if (!error) {
  786                 /*
  787                  * Give the user an updated idea of our state.
  788                  * If the generation differs from what we told
  789                  * her before, she knows that something happened
  790                  * while we were processing this request, and it
  791                  * might be necessary to retry.
  792                  */
  793                 xug.xug_gen = unp_gencnt;
  794                 xug.xug_sogen = so_gencnt;
  795                 xug.xug_count = unp_count;
  796                 error = SYSCTL_OUT(req, &xug, sizeof xug);
  797         }
  798         free(unp_list, M_TEMP);
  799         return error;
  800 }
  801 
  802 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLFLAG_RD, 
  803             (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb",
  804             "List of active local datagram sockets");
  805 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLFLAG_RD, 
  806             (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb",
  807             "List of active local stream sockets");
  808 
  809 static void
  810 unp_shutdown(unp)
  811         struct unpcb *unp;
  812 {
  813         struct socket *so;
  814 
  815         if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn &&
  816             (so = unp->unp_conn->unp_socket))
  817                 socantrcvmore(so);
  818 }
  819 
  820 static void
  821 unp_drop(unp, errno)
  822         struct unpcb *unp;
  823         int errno;
  824 {
  825         struct socket *so = unp->unp_socket;
  826 
  827         so->so_error = errno;
  828         unp_disconnect(unp);
  829         if (so->so_head) {
  830                 LIST_REMOVE(unp, unp_link);
  831                 unp->unp_gencnt = ++unp_gencnt;
  832                 unp_count--;
  833                 so->so_pcb = (caddr_t) 0;
  834                 if (unp->unp_addr)
  835                         FREE(unp->unp_addr, M_SONAME);
  836                 zfree(unp_zone, unp);
  837                 sofree(so);
  838         }
  839 }
  840 
  841 #ifdef notdef
  842 void
  843 unp_drain()
  844 {
  845 
  846 }
  847 #endif
  848 
  849 int
  850 unp_externalize(rights)
  851         struct mbuf *rights;
  852 {
  853         struct proc *p = curproc;               /* XXX */
  854         register int i;
  855         register struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
  856         register struct file **rp = (struct file **)(cm + 1);
  857         register struct file *fp;
  858         int newfds = (cm->cmsg_len - sizeof(*cm)) / sizeof (int);
  859         int f;
  860 
  861         /*
  862          * if the new FD's will not fit, then we free them all
  863          */
  864         if (!fdavail(p, newfds)) {
  865                 for (i = 0; i < newfds; i++) {
  866                         fp = *rp;
  867                         unp_discard(fp);
  868                         *rp++ = 0;
  869                 }
  870                 return (EMSGSIZE);
  871         }
  872         /*
  873          * now change each pointer to an fd in the global table to 
  874          * an integer that is the index to the local fd table entry
  875          * that we set up to point to the global one we are transferring.
  876          * XXX this assumes a pointer and int are the same size...!
  877          */
  878         for (i = 0; i < newfds; i++) {
  879                 if (fdalloc(p, 0, &f))
  880                         panic("unp_externalize");
  881                 fp = *rp;
  882                 p->p_fd->fd_ofiles[f] = fp;
  883                 fp->f_msgcount--;
  884                 unp_rights--;
  885                 *(int *)rp++ = f;
  886         }
  887         return (0);
  888 }
  889 
  890 void
  891 unp_init(void)
  892 {
  893         unp_zone = zinit("unpcb", sizeof(struct unpcb), nmbclusters, 0, 0);
  894         if (unp_zone == 0)
  895                 panic("unp_init");
  896         LIST_INIT(&unp_dhead);
  897         LIST_INIT(&unp_shead);
  898 }
  899 
  900 #ifndef MIN
  901 #define MIN(a,b) (((a)<(b))?(a):(b))
  902 #endif
  903 
  904 static int
  905 unp_internalize(control, p)
  906         struct mbuf *control;
  907         struct proc *p;
  908 {
  909         struct filedesc *fdp = p->p_fd;
  910         register struct cmsghdr *cm = mtod(control, struct cmsghdr *);
  911         register struct file **rp;
  912         register struct file *fp;
  913         register int i, fd;
  914         register struct cmsgcred *cmcred;
  915         int oldfds;
  916 
  917         if ((cm->cmsg_type != SCM_RIGHTS && cm->cmsg_type != SCM_CREDS) ||
  918             cm->cmsg_level != SOL_SOCKET || cm->cmsg_len != control->m_len)
  919                 return (EINVAL);
  920 
  921         /*
  922          * Fill in credential information.
  923          */
  924         if (cm->cmsg_type == SCM_CREDS) {
  925                 cmcred = (struct cmsgcred *)(cm + 1);
  926                 cmcred->cmcred_pid = p->p_pid;
  927                 cmcred->cmcred_uid = p->p_cred->p_ruid;
  928                 cmcred->cmcred_gid = p->p_cred->p_rgid;
  929                 cmcred->cmcred_euid = p->p_ucred->cr_uid;
  930                 cmcred->cmcred_ngroups = MIN(p->p_ucred->cr_ngroups,
  931                                                         CMGROUP_MAX);
  932                 for (i = 0; i < cmcred->cmcred_ngroups; i++)
  933                         cmcred->cmcred_groups[i] = p->p_ucred->cr_groups[i];
  934                 return(0);
  935         }
  936 
  937         oldfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int);
  938         /*
  939          * check that all the FDs passed in refer to legal OPEN files
  940          * If not, reject the entire operation.
  941          */
  942         rp = (struct file **)(cm + 1);
  943         for (i = 0; i < oldfds; i++) {
  944                 fd = *(int *)rp++;
  945                 if ((unsigned)fd >= fdp->fd_nfiles ||
  946                     fdp->fd_ofiles[fd] == NULL)
  947                         return (EBADF);
  948         }
  949         /*
  950          * Now replace the integer FDs with pointers to
  951          * the associated global file table entry..
  952          * XXX this assumes a pointer and an int are the same size!
  953          */
  954         rp = (struct file **)(cm + 1);
  955         for (i = 0; i < oldfds; i++) {
  956                 fp = fdp->fd_ofiles[*(int *)rp];
  957                 *rp++ = fp;
  958                 fp->f_count++;
  959                 fp->f_msgcount++;
  960                 unp_rights++;
  961         }
  962         return (0);
  963 }
  964 
  965 static int      unp_defer, unp_gcing;
  966 
  967 static void
  968 unp_gc()
  969 {
  970         register struct file *fp, *nextfp;
  971         register struct socket *so;
  972         struct file **extra_ref, **fpp;
  973         int nunref, i;
  974 
  975         if (unp_gcing)
  976                 return;
  977         unp_gcing = 1;
  978         unp_defer = 0;
  979         /* 
  980          * before going through all this, set all FDs to 
  981          * be NOT defered and NOT externally accessible
  982          */
  983         for (fp = filehead.lh_first; fp != 0; fp = fp->f_list.le_next)
  984                 fp->f_flag &= ~(FMARK|FDEFER);
  985         do {
  986                 for (fp = filehead.lh_first; fp != 0; fp = fp->f_list.le_next) {
  987                         /*
  988                          * If the file is not open, skip it
  989                          */
  990                         if (fp->f_count == 0)
  991                                 continue;
  992                         /*
  993                          * If we already marked it as 'defer'  in a
  994                          * previous pass, then try process it this time
  995                          * and un-mark it
  996                          */
  997                         if (fp->f_flag & FDEFER) {
  998                                 fp->f_flag &= ~FDEFER;
  999                                 unp_defer--;
 1000                         } else {
 1001                                 /*
 1002                                  * if it's not defered, then check if it's
 1003                                  * already marked.. if so skip it
 1004                                  */
 1005                                 if (fp->f_flag & FMARK)
 1006                                         continue;
 1007                                 /* 
 1008                                  * If all references are from messages
 1009                                  * in transit, then skip it. it's not 
 1010                                  * externally accessible.
 1011                                  */ 
 1012                                 if (fp->f_count == fp->f_msgcount)
 1013                                         continue;
 1014                                 /* 
 1015                                  * If it got this far then it must be
 1016                                  * externally accessible.
 1017                                  */
 1018                                 fp->f_flag |= FMARK;
 1019                         }
 1020                         /*
 1021                          * either it was defered, or it is externally 
 1022                          * accessible and not already marked so.
 1023                          * Now check if it is possibly one of OUR sockets.
 1024                          */ 
 1025                         if (fp->f_type != DTYPE_SOCKET ||
 1026                             (so = (struct socket *)fp->f_data) == 0)
 1027                                 continue;
 1028                         if (so->so_proto->pr_domain != &localdomain ||
 1029                             (so->so_proto->pr_flags&PR_RIGHTS) == 0)
 1030                                 continue;
 1031 #ifdef notdef
 1032                         if (so->so_rcv.sb_flags & SB_LOCK) {
 1033                                 /*
 1034                                  * This is problematical; it's not clear
 1035                                  * we need to wait for the sockbuf to be
 1036                                  * unlocked (on a uniprocessor, at least),
 1037                                  * and it's also not clear what to do
 1038                                  * if sbwait returns an error due to receipt
 1039                                  * of a signal.  If sbwait does return
 1040                                  * an error, we'll go into an infinite
 1041                                  * loop.  Delete all of this for now.
 1042                                  */
 1043                                 (void) sbwait(&so->so_rcv);
 1044                                 goto restart;
 1045                         }
 1046 #endif
 1047                         /*
 1048                          * So, Ok, it's one of our sockets and it IS externally
 1049                          * accessible (or was defered). Now we look
 1050                          * to see if we hold any file descriptors in its
 1051                          * message buffers. Follow those links and mark them 
 1052                          * as accessible too.
 1053                          */
 1054                         unp_scan(so->so_rcv.sb_mb, unp_mark);
 1055                 }
 1056         } while (unp_defer);
 1057         /*
 1058          * We grab an extra reference to each of the file table entries
 1059          * that are not otherwise accessible and then free the rights
 1060          * that are stored in messages on them.
 1061          *
 1062          * The bug in the orginal code is a little tricky, so I'll describe
 1063          * what's wrong with it here.
 1064          *
 1065          * It is incorrect to simply unp_discard each entry for f_msgcount
 1066          * times -- consider the case of sockets A and B that contain
 1067          * references to each other.  On a last close of some other socket,
 1068          * we trigger a gc since the number of outstanding rights (unp_rights)
 1069          * is non-zero.  If during the sweep phase the gc code un_discards,
 1070          * we end up doing a (full) closef on the descriptor.  A closef on A
 1071          * results in the following chain.  Closef calls soo_close, which
 1072          * calls soclose.   Soclose calls first (through the switch
 1073          * uipc_usrreq) unp_detach, which re-invokes unp_gc.  Unp_gc simply
 1074          * returns because the previous instance had set unp_gcing, and
 1075          * we return all the way back to soclose, which marks the socket
 1076          * with SS_NOFDREF, and then calls sofree.  Sofree calls sorflush
 1077          * to free up the rights that are queued in messages on the socket A,
 1078          * i.e., the reference on B.  The sorflush calls via the dom_dispose
 1079          * switch unp_dispose, which unp_scans with unp_discard.  This second
 1080          * instance of unp_discard just calls closef on B.
 1081          *
 1082          * Well, a similar chain occurs on B, resulting in a sorflush on B,
 1083          * which results in another closef on A.  Unfortunately, A is already
 1084          * being closed, and the descriptor has already been marked with
 1085          * SS_NOFDREF, and soclose panics at this point.
 1086          *
 1087          * Here, we first take an extra reference to each inaccessible
 1088          * descriptor.  Then, we call sorflush ourself, since we know
 1089          * it is a Unix domain socket anyhow.  After we destroy all the
 1090          * rights carried in messages, we do a last closef to get rid
 1091          * of our extra reference.  This is the last close, and the
 1092          * unp_detach etc will shut down the socket.
 1093          *
 1094          * 91/09/19, bsy@cs.cmu.edu
 1095          */
 1096         extra_ref = malloc(nfiles * sizeof(struct file *), M_FILE, M_WAITOK);
 1097         for (nunref = 0, fp = filehead.lh_first, fpp = extra_ref; fp != 0;
 1098             fp = nextfp) {
 1099                 nextfp = fp->f_list.le_next;
 1100                 /* 
 1101                  * If it's not open, skip it
 1102                  */
 1103                 if (fp->f_count == 0)
 1104                         continue;
 1105                 /* 
 1106                  * If all refs are from msgs, and it's not marked accessible
 1107                  * then it must be referenced from some unreachable cycle
 1108                  * of (shut-down) FDs, so include it in our
 1109                  * list of FDs to remove
 1110                  */
 1111                 if (fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) {
 1112                         *fpp++ = fp;
 1113                         nunref++;
 1114                         fp->f_count++;
 1115                 }
 1116         }
 1117         /* 
 1118          * for each FD on our hit list, do the following two things
 1119          */
 1120         for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
 1121                 struct file *tfp = *fpp;
 1122                 if (tfp->f_type == DTYPE_SOCKET && tfp->f_data != NULL)
 1123                         sorflush((struct socket *)(tfp->f_data));
 1124         }
 1125 
 1126 
 1127         for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
 1128                 closef(*fpp, (struct proc *) NULL);
 1129         free((caddr_t)extra_ref, M_FILE);
 1130         unp_gcing = 0;
 1131 }
 1132 
 1133 void
 1134 unp_dispose(m)
 1135         struct mbuf *m;
 1136 {
 1137 
 1138         if (m)
 1139                 unp_scan(m, unp_discard);
 1140 }
 1141 
 1142 static void
 1143 unp_scan(m0, op)
 1144         register struct mbuf *m0;
 1145         void (*op) __P((struct file *));
 1146 {
 1147         register struct mbuf *m;
 1148         register struct file **rp;
 1149         register struct cmsghdr *cm;
 1150         register int i;
 1151         int qfds;
 1152 
 1153         while (m0) {
 1154                 for (m = m0; m; m = m->m_next)
 1155                         if (m->m_type == MT_CONTROL &&
 1156                             m->m_len >= sizeof(*cm)) {
 1157                                 cm = mtod(m, struct cmsghdr *);
 1158                                 if (cm->cmsg_level != SOL_SOCKET ||
 1159                                     cm->cmsg_type != SCM_RIGHTS)
 1160                                         continue;
 1161                                 qfds = (cm->cmsg_len - sizeof *cm)
 1162                                                 / sizeof (struct file *);
 1163                                 rp = (struct file **)(cm + 1);
 1164                                 for (i = 0; i < qfds; i++)
 1165                                         (*op)(*rp++);
 1166                                 break;          /* XXX, but saves time */
 1167                         }
 1168                 m0 = m0->m_act;
 1169         }
 1170 }
 1171 
 1172 static void
 1173 unp_mark(fp)
 1174         struct file *fp;
 1175 {
 1176 
 1177         if (fp->f_flag & FMARK)
 1178                 return;
 1179         unp_defer++;
 1180         fp->f_flag |= (FMARK|FDEFER);
 1181 }
 1182 
 1183 static void
 1184 unp_discard(fp)
 1185         struct file *fp;
 1186 {
 1187 
 1188         fp->f_msgcount--;
 1189         unp_rights--;
 1190         (void) closef(fp, (struct proc *)NULL);
 1191 }

Cache object: e02fb7904faf3804365d81a10830979d


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.