The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/uipc_socket.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 1982, 1986, 1988, 1990, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  * 3. All advertising materials mentioning features or use of this software
   14  *    must display the following acknowledgement:
   15  *      This product includes software developed by the University of
   16  *      California, Berkeley and its contributors.
   17  * 4. Neither the name of the University nor the names of its contributors
   18  *    may be used to endorse or promote products derived from this software
   19  *    without specific prior written permission.
   20  *
   21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   31  * SUCH DAMAGE.
   32  *
   33  *      @(#)uipc_socket.c       8.3 (Berkeley) 4/15/94
   34  * $FreeBSD$
   35  */
   36 
   37 #include <sys/param.h>
   38 #include <sys/systm.h>
   39 #include <sys/fcntl.h>
   40 #include <sys/malloc.h>
   41 #include <sys/mbuf.h>
   42 #include <sys/domain.h>
   43 #include <sys/kernel.h>
   44 #include <sys/malloc.h>
   45 #include <sys/poll.h>
   46 #include <sys/proc.h>
   47 #include <sys/protosw.h>
   48 #include <sys/socket.h>
   49 #include <sys/socketvar.h>
   50 #include <sys/resourcevar.h>
   51 #include <sys/signalvar.h>
   52 #include <sys/sysctl.h>
   53 #include <sys/uio.h>
   54 #include <vm/vm_zone.h>
   55 
   56 #include <machine/limits.h>
   57 
   58 struct  vm_zone *socket_zone;
   59 so_gen_t        so_gencnt;      /* generation count for sockets */
   60 
   61 MALLOC_DEFINE(M_SONAME, "soname", "socket name");
   62 MALLOC_DEFINE(M_PCB, "pcb", "protocol control block");
   63 
   64 static int somaxconn = SOMAXCONN;
   65 SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, &somaxconn,
   66            0, "");
   67 
   68 /*
   69  * Socket operation routines.
   70  * These routines are called by the routines in
   71  * sys_socket.c or from a system process, and
   72  * implement the semantics of socket operations by
   73  * switching out to the protocol specific routines.
   74  */
   75 
   76 /*
   77  * Get a socket structure from our zone, and initialize it.
   78  * We don't implement `waitok' yet (see comments in uipc_domain.c).
   79  * Note that it would probably be better to allocate socket
   80  * and PCB at the same time, but I'm not convinced that all
   81  * the protocols can be easily modified to do this.
   82  */
   83 struct socket *
   84 soalloc(waitok)
   85         int waitok;
   86 {
   87         struct socket *so;
   88 
   89         so = zalloci(socket_zone);
   90         if (so) {
   91                 /* XXX race condition for reentrant kernel */
   92                 bzero(so, sizeof *so);
   93                 so->so_gencnt = ++so_gencnt;
   94                 so->so_zone = socket_zone;
   95         }
   96         return so;
   97 }
   98 
   99 int
  100 socreate(dom, aso, type, proto, p)
  101         int dom;
  102         struct socket **aso;
  103         register int type;
  104         int proto;
  105         struct proc *p;
  106 {
  107         register struct protosw *prp;
  108         register struct socket *so;
  109         register int error;
  110 
  111         if (proto)
  112                 prp = pffindproto(dom, proto, type);
  113         else
  114                 prp = pffindtype(dom, type);
  115         if (prp == 0 || prp->pr_usrreqs->pru_attach == 0)
  116                 return (EPROTONOSUPPORT);
  117         if (prp->pr_type != type)
  118                 return (EPROTOTYPE);
  119         so = soalloc(p != 0);
  120         if (so == 0)
  121                 return (ENOBUFS);
  122 
  123         TAILQ_INIT(&so->so_incomp);
  124         TAILQ_INIT(&so->so_comp);
  125         so->so_type = type;
  126         if (p != NULL) {
  127                 so->so_cred = p->p_cred;
  128                 so->so_cred->p_refcnt++;
  129         } else
  130                 so->so_cred = NULL;
  131         so->so_proto = prp;
  132         error = (*prp->pr_usrreqs->pru_attach)(so, proto, p);
  133         if (error) {
  134                 so->so_state |= SS_NOFDREF;
  135                 sofree(so);
  136                 return (error);
  137         }
  138         *aso = so;
  139         return (0);
  140 }
  141 
  142 int
  143 sobind(so, nam, p)
  144         struct socket *so;
  145         struct sockaddr *nam;
  146         struct proc *p;
  147 {
  148         int s = splnet();
  149         int error;
  150 
  151         error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, p);
  152         splx(s);
  153         return (error);
  154 }
  155 
  156 void
  157 sodealloc(so)
  158         struct socket *so;
  159 {
  160         so->so_gencnt = ++so_gencnt;
  161         if (so->so_cred && --so->so_cred->p_refcnt == 0) {
  162                 crfree(so->so_cred->pc_ucred);
  163                 FREE(so->so_cred, M_SUBPROC);
  164         }
  165         zfreei(so->so_zone, so);
  166 }
  167 
  168 int
  169 solisten(so, backlog, p)
  170         register struct socket *so;
  171         int backlog;
  172         struct proc *p;
  173 {
  174         int s, error;
  175 
  176         s = splnet();
  177         error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p);
  178         if (error) {
  179                 splx(s);
  180                 return (error);
  181         }
  182         if (so->so_comp.tqh_first == NULL)
  183                 so->so_options |= SO_ACCEPTCONN;
  184         if (backlog < 0 || backlog > somaxconn)
  185                 backlog = somaxconn;
  186         so->so_qlimit = backlog;
  187         splx(s);
  188         return (0);
  189 }
  190 
  191 void
  192 sofree(so)
  193         register struct socket *so;
  194 {
  195         struct socket *head = so->so_head;
  196 
  197         if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
  198                 return;
  199         if (head != NULL) {
  200                 if (so->so_state & SS_INCOMP) {
  201                         TAILQ_REMOVE(&head->so_incomp, so, so_list);
  202                         head->so_incqlen--;
  203                 } else if (so->so_state & SS_COMP) {
  204                         TAILQ_REMOVE(&head->so_comp, so, so_list);
  205                 } else {
  206                         panic("sofree: not queued");
  207                 }
  208                 head->so_qlen--;
  209                 so->so_state &= ~(SS_INCOMP|SS_COMP);
  210                 so->so_head = NULL;
  211         }
  212         sbrelease(&so->so_snd);
  213         sorflush(so);
  214         sodealloc(so);
  215 }
  216 
  217 /*
  218  * Close a socket on last file table reference removal.
  219  * Initiate disconnect if connected.
  220  * Free socket when disconnect complete.
  221  */
  222 int
  223 soclose(so)
  224         register struct socket *so;
  225 {
  226         int s = splnet();               /* conservative */
  227         int error = 0;
  228 
  229         funsetown(so->so_sigio);
  230         if (so->so_options & SO_ACCEPTCONN) {
  231                 struct socket *sp, *sonext;
  232 
  233                 for (sp = so->so_incomp.tqh_first; sp != NULL; sp = sonext) {
  234                         sonext = sp->so_list.tqe_next;
  235                         (void) soabort(sp);
  236                 }
  237                 for (sp = so->so_comp.tqh_first; sp != NULL; sp = sonext) {
  238                         sonext = sp->so_list.tqe_next;
  239                         (void) soabort(sp);
  240                 }
  241         }
  242         if (so->so_pcb == 0)
  243                 goto discard;
  244         if (so->so_state & SS_ISCONNECTED) {
  245                 if ((so->so_state & SS_ISDISCONNECTING) == 0) {
  246                         error = sodisconnect(so);
  247                         if (error)
  248                                 goto drop;
  249                 }
  250                 if (so->so_options & SO_LINGER) {
  251                         if ((so->so_state & SS_ISDISCONNECTING) &&
  252                             (so->so_state & SS_NBIO))
  253                                 goto drop;
  254                         while (so->so_state & SS_ISCONNECTED) {
  255                                 error = tsleep((caddr_t)&so->so_timeo,
  256                                     PSOCK | PCATCH, "soclos", so->so_linger * hz);
  257                                 if (error)
  258                                         break;
  259                         }
  260                 }
  261         }
  262 drop:
  263         if (so->so_pcb) {
  264                 int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so);
  265                 if (error == 0)
  266                         error = error2;
  267         }
  268 discard:
  269         if (so->so_state & SS_NOFDREF)
  270                 panic("soclose: NOFDREF");
  271         so->so_state |= SS_NOFDREF;
  272         sofree(so);
  273         splx(s);
  274         return (error);
  275 }
  276 
  277 /*
  278  * Must be called at splnet...
  279  */
  280 int
  281 soabort(so)
  282         struct socket *so;
  283 {
  284         int error;
  285 
  286         error = (*so->so_proto->pr_usrreqs->pru_abort)(so);
  287         if (error) {
  288                 sofree(so);
  289                 return error;
  290         }
  291         return (0);
  292 }
  293 
  294 int
  295 soaccept(so, nam)
  296         register struct socket *so;
  297         struct sockaddr **nam;
  298 {
  299         int s = splnet();
  300         int error;
  301 
  302         if ((so->so_state & SS_NOFDREF) == 0)
  303                 panic("soaccept: !NOFDREF");
  304         so->so_state &= ~SS_NOFDREF;
  305         error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam);
  306         splx(s);
  307         return (error);
  308 }
  309 
  310 int
  311 soconnect(so, nam, p)
  312         register struct socket *so;
  313         struct sockaddr *nam;
  314         struct proc *p;
  315 {
  316         int s;
  317         int error;
  318 
  319         if (so->so_options & SO_ACCEPTCONN)
  320                 return (EOPNOTSUPP);
  321         s = splnet();
  322         /*
  323          * If protocol is connection-based, can only connect once.
  324          * Otherwise, if connected, try to disconnect first.
  325          * This allows user to disconnect by connecting to, e.g.,
  326          * a null address.
  327          */
  328         if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
  329             ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
  330             (error = sodisconnect(so))))
  331                 error = EISCONN;
  332         else
  333                 error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, p);
  334         splx(s);
  335         return (error);
  336 }
  337 
  338 int
  339 soconnect2(so1, so2)
  340         register struct socket *so1;
  341         struct socket *so2;
  342 {
  343         int s = splnet();
  344         int error;
  345 
  346         error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2);
  347         splx(s);
  348         return (error);
  349 }
  350 
  351 int
  352 sodisconnect(so)
  353         register struct socket *so;
  354 {
  355         int s = splnet();
  356         int error;
  357 
  358         if ((so->so_state & SS_ISCONNECTED) == 0) {
  359                 error = ENOTCONN;
  360                 goto bad;
  361         }
  362         if (so->so_state & SS_ISDISCONNECTING) {
  363                 error = EALREADY;
  364                 goto bad;
  365         }
  366         error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so);
  367 bad:
  368         splx(s);
  369         return (error);
  370 }
  371 
  372 #define SBLOCKWAIT(f)   (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK)
  373 /*
  374  * Send on a socket.
  375  * If send must go all at once and message is larger than
  376  * send buffering, then hard error.
  377  * Lock against other senders.
  378  * If must go all at once and not enough room now, then
  379  * inform user that this would block and do nothing.
  380  * Otherwise, if nonblocking, send as much as possible.
  381  * The data to be sent is described by "uio" if nonzero,
  382  * otherwise by the mbuf chain "top" (which must be null
  383  * if uio is not).  Data provided in mbuf chain must be small
  384  * enough to send all at once.
  385  *
  386  * Returns nonzero on error, timeout or signal; callers
  387  * must check for short counts if EINTR/ERESTART are returned.
  388  * Data and control buffers are freed on return.
  389  */
  390 int
  391 sosend(so, addr, uio, top, control, flags, p)
  392         register struct socket *so;
  393         struct sockaddr *addr;
  394         struct uio *uio;
  395         struct mbuf *top;
  396         struct mbuf *control;
  397         int flags;
  398         struct proc *p;
  399 {
  400         struct mbuf **mp;
  401         register struct mbuf *m;
  402         register long space, len, resid;
  403         int clen = 0, error, s, dontroute, mlen;
  404         int atomic = sosendallatonce(so) || top;
  405 
  406         if (uio)
  407                 resid = uio->uio_resid;
  408         else
  409                 resid = top->m_pkthdr.len;
  410         /*
  411          * In theory resid should be unsigned.
  412          * However, space must be signed, as it might be less than 0
  413          * if we over-committed, and we must use a signed comparison
  414          * of space and resid.  On the other hand, a negative resid
  415          * causes us to loop sending 0-length segments to the protocol.
  416          *
  417          * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM
  418          * type sockets since that's an error.
  419          */
  420         if (resid < 0 || so->so_type == SOCK_STREAM && (flags & MSG_EOR)) {
  421                 error = EINVAL;
  422                 goto out;
  423         }
  424 
  425         dontroute =
  426             (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
  427             (so->so_proto->pr_flags & PR_ATOMIC);
  428         if (p)
  429                 p->p_stats->p_ru.ru_msgsnd++;
  430         if (control)
  431                 clen = control->m_len;
  432 #define snderr(errno)   { error = errno; splx(s); goto release; }
  433 
  434 restart:
  435         error = sblock(&so->so_snd, SBLOCKWAIT(flags));
  436         if (error)
  437                 goto out;
  438         do {
  439                 s = splnet();
  440                 if (so->so_state & SS_CANTSENDMORE)
  441                         snderr(EPIPE);
  442                 if (so->so_error) {
  443                         error = so->so_error;
  444                         so->so_error = 0;
  445                         splx(s);
  446                         goto release;
  447                 }
  448                 if ((so->so_state & SS_ISCONNECTED) == 0) {
  449                         /*
  450                          * `sendto' and `sendmsg' is allowed on a connection-
  451                          * based socket if it supports implied connect.
  452                          * Return ENOTCONN if not connected and no address is
  453                          * supplied.
  454                          */
  455                         if ((so->so_proto->pr_flags & PR_CONNREQUIRED) &&
  456                             (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) {
  457                                 if ((so->so_state & SS_ISCONFIRMING) == 0 &&
  458                                     !(resid == 0 && clen != 0))
  459                                         snderr(ENOTCONN);
  460                         } else if (addr == 0)
  461                             snderr(so->so_proto->pr_flags & PR_CONNREQUIRED ?
  462                                    ENOTCONN : EDESTADDRREQ);
  463                 }
  464                 space = sbspace(&so->so_snd);
  465                 if (flags & MSG_OOB)
  466                         space += 1024;
  467                 if ((atomic && resid > so->so_snd.sb_hiwat) ||
  468                     clen > so->so_snd.sb_hiwat)
  469                         snderr(EMSGSIZE);
  470                 if (space < resid + clen && uio &&
  471                     (atomic || space < so->so_snd.sb_lowat || space < clen)) {
  472                         if (so->so_state & SS_NBIO)
  473                                 snderr(EWOULDBLOCK);
  474                         sbunlock(&so->so_snd);
  475                         error = sbwait(&so->so_snd);
  476                         splx(s);
  477                         if (error)
  478                                 goto out;
  479                         goto restart;
  480                 }
  481                 splx(s);
  482                 mp = &top;
  483                 space -= clen;
  484                 do {
  485                     if (uio == NULL) {
  486                         /*
  487                          * Data is prepackaged in "top".
  488                          */
  489                         resid = 0;
  490                         if (flags & MSG_EOR)
  491                                 top->m_flags |= M_EOR;
  492                     } else do {
  493                         if (top == 0) {
  494                                 MGETHDR(m, M_WAIT, MT_DATA);
  495                                 if (m == NULL) {
  496                                         error = ENOBUFS;
  497                                         goto release;
  498                                 }
  499                                 mlen = MHLEN;
  500                                 m->m_pkthdr.len = 0;
  501                                 m->m_pkthdr.rcvif = (struct ifnet *)0;
  502                         } else {
  503                                 MGET(m, M_WAIT, MT_DATA);
  504                                 if (m == NULL) {
  505                                         error = ENOBUFS;
  506                                         goto release;
  507                                 }
  508                                 mlen = MLEN;
  509                         }
  510                         if (resid >= MINCLSIZE) {
  511                                 MCLGET(m, M_WAIT);
  512                                 if ((m->m_flags & M_EXT) == 0) 
  513                                         goto nopages;
  514                                 mlen = MCLBYTES;
  515                                 len = min(min(mlen, resid), space);
  516                         } else {
  517 nopages:
  518                                 len = min(min(mlen, resid), space);
  519                                 /*
  520                                  * For datagram protocols, leave room
  521                                  * for protocol headers in first mbuf.
  522                                  */
  523                                 if (atomic && top == 0 && len < mlen)
  524                                         MH_ALIGN(m, len);
  525                         }
  526                         space -= len;
  527                         error = uiomove(mtod(m, caddr_t), (int)len, uio);
  528                         resid = uio->uio_resid;
  529                         m->m_len = len;
  530                         *mp = m;
  531                         top->m_pkthdr.len += len;
  532                         if (error)
  533                                 goto release;
  534                         mp = &m->m_next;
  535                         if (resid <= 0) {
  536                                 if (flags & MSG_EOR)
  537                                         top->m_flags |= M_EOR;
  538                                 break;
  539                         }
  540                     } while (space > 0 && atomic);
  541                     if (dontroute)
  542                             so->so_options |= SO_DONTROUTE;
  543                     s = splnet();                               /* XXX */
  544                     error = (*so->so_proto->pr_usrreqs->pru_send)(so,
  545                         (flags & MSG_OOB) ? PRUS_OOB :
  546                         /*
  547                          * If the user set MSG_EOF, the protocol
  548                          * understands this flag and nothing left to
  549                          * send then use PRU_SEND_EOF instead of PRU_SEND.
  550                          */
  551                         ((flags & MSG_EOF) &&
  552                          (so->so_proto->pr_flags & PR_IMPLOPCL) &&
  553                          (resid <= 0)) ?
  554                                 PRUS_EOF :
  555                         /* If there is more to send set PRUS_MORETOCOME */
  556                         (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0,
  557                         top, addr, control, p);
  558                     splx(s);
  559                     if (dontroute)
  560                             so->so_options &= ~SO_DONTROUTE;
  561                     clen = 0;
  562                     control = 0;
  563                     top = 0;
  564                     mp = &top;
  565                     if (error)
  566                         goto release;
  567                 } while (resid && space > 0);
  568         } while (resid);
  569 
  570 release:
  571         sbunlock(&so->so_snd);
  572 out:
  573         if (top)
  574                 m_freem(top);
  575         if (control)
  576                 m_freem(control);
  577         return (error);
  578 }
  579 
  580 /*
  581  * Implement receive operations on a socket.
  582  * We depend on the way that records are added to the sockbuf
  583  * by sbappend*.  In particular, each record (mbufs linked through m_next)
  584  * must begin with an address if the protocol so specifies,
  585  * followed by an optional mbuf or mbufs containing ancillary data,
  586  * and then zero or more mbufs of data.
  587  * In order to avoid blocking network interrupts for the entire time here,
  588  * we splx() while doing the actual copy to user space.
  589  * Although the sockbuf is locked, new data may still be appended,
  590  * and thus we must maintain consistency of the sockbuf during that time.
  591  *
  592  * The caller may receive the data as a single mbuf chain by supplying
  593  * an mbuf **mp0 for use in returning the chain.  The uio is then used
  594  * only for the count in uio_resid.
  595  */
  596 int
  597 soreceive(so, psa, uio, mp0, controlp, flagsp)
  598         register struct socket *so;
  599         struct sockaddr **psa;
  600         struct uio *uio;
  601         struct mbuf **mp0;
  602         struct mbuf **controlp;
  603         int *flagsp;
  604 {
  605         register struct mbuf *m, **mp;
  606         register int flags, len, error, s, offset;
  607         struct protosw *pr = so->so_proto;
  608         struct mbuf *nextrecord;
  609         int moff, type = 0;
  610         int orig_resid = uio->uio_resid;
  611 
  612         mp = mp0;
  613         if (psa)
  614                 *psa = 0;
  615         if (controlp)
  616                 *controlp = 0;
  617         if (flagsp)
  618                 flags = *flagsp &~ MSG_EOR;
  619         else
  620                 flags = 0;
  621         if (flags & MSG_OOB) {
  622                 m = m_get(M_WAIT, MT_DATA);
  623                 if (m == NULL)
  624                         return (ENOBUFS);
  625                 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
  626                 if (error)
  627                         goto bad;
  628                 do {
  629                         error = uiomove(mtod(m, caddr_t),
  630                             (int) min(uio->uio_resid, m->m_len), uio);
  631                         m = m_free(m);
  632                 } while (uio->uio_resid && error == 0 && m);
  633 bad:
  634                 if (m)
  635                         m_freem(m);
  636                 return (error);
  637         }
  638         if (mp)
  639                 *mp = (struct mbuf *)0;
  640         if (so->so_state & SS_ISCONFIRMING && uio->uio_resid)
  641                 (*pr->pr_usrreqs->pru_rcvd)(so, 0);
  642 
  643 restart:
  644         error = sblock(&so->so_rcv, SBLOCKWAIT(flags));
  645         if (error)
  646                 return (error);
  647         s = splnet();
  648 
  649         m = so->so_rcv.sb_mb;
  650         /*
  651          * If we have less data than requested, block awaiting more
  652          * (subject to any timeout) if:
  653          *   1. the current count is less than the low water mark, or
  654          *   2. MSG_WAITALL is set, and it is possible to do the entire
  655          *      receive operation at once if we block (resid <= hiwat).
  656          *   3. MSG_DONTWAIT is not set
  657          * If MSG_WAITALL is set but resid is larger than the receive buffer,
  658          * we have to do the receive in sections, and thus risk returning
  659          * a short count if a timeout or signal occurs after we start.
  660          */
  661         if (m == 0 || (((flags & MSG_DONTWAIT) == 0 &&
  662             so->so_rcv.sb_cc < uio->uio_resid) &&
  663             (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
  664             ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) &&
  665             m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) {
  666                 KASSERT(m != 0 || !so->so_rcv.sb_cc, ("receive 1"));
  667                 if (so->so_error) {
  668                         if (m)
  669                                 goto dontblock;
  670                         error = so->so_error;
  671                         if ((flags & MSG_PEEK) == 0)
  672                                 so->so_error = 0;
  673                         goto release;
  674                 }
  675                 if (so->so_state & SS_CANTRCVMORE) {
  676                         if (m)
  677                                 goto dontblock;
  678                         else
  679                                 goto release;
  680                 }
  681                 for (; m; m = m->m_next)
  682                         if (m->m_type == MT_OOBDATA  || (m->m_flags & M_EOR)) {
  683                                 m = so->so_rcv.sb_mb;
  684                                 goto dontblock;
  685                         }
  686                 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
  687                     (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
  688                         error = ENOTCONN;
  689                         goto release;
  690                 }
  691                 if (uio->uio_resid == 0)
  692                         goto release;
  693                 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) {
  694                         error = EWOULDBLOCK;
  695                         goto release;
  696                 }
  697                 sbunlock(&so->so_rcv);
  698                 error = sbwait(&so->so_rcv);
  699                 splx(s);
  700                 if (error)
  701                         return (error);
  702                 goto restart;
  703         }
  704 dontblock:
  705         if (uio->uio_procp)
  706                 uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
  707         nextrecord = m->m_nextpkt;
  708         if (pr->pr_flags & PR_ADDR) {
  709                 KASSERT(m->m_type == MT_SONAME, ("receive 1a"));
  710                 orig_resid = 0;
  711                 if (psa)
  712                         *psa = dup_sockaddr(mtod(m, struct sockaddr *),
  713                                             mp0 == 0);
  714                 if (flags & MSG_PEEK) {
  715                         m = m->m_next;
  716                 } else {
  717                         sbfree(&so->so_rcv, m);
  718                         MFREE(m, so->so_rcv.sb_mb);
  719                         m = so->so_rcv.sb_mb;
  720                 }
  721         }
  722         while (m && m->m_type == MT_CONTROL && error == 0) {
  723                 if (flags & MSG_PEEK) {
  724                         if (controlp)
  725                                 *controlp = m_copy(m, 0, m->m_len);
  726                         m = m->m_next;
  727                 } else {
  728                         sbfree(&so->so_rcv, m);
  729                         if (controlp) {
  730                                 if (pr->pr_domain->dom_externalize &&
  731                                     mtod(m, struct cmsghdr *)->cmsg_type ==
  732                                     SCM_RIGHTS)
  733                                    error = (*pr->pr_domain->dom_externalize)(m);
  734                                 *controlp = m;
  735                                 so->so_rcv.sb_mb = m->m_next;
  736                                 m->m_next = 0;
  737                                 m = so->so_rcv.sb_mb;
  738                         } else {
  739                                 MFREE(m, so->so_rcv.sb_mb);
  740                                 m = so->so_rcv.sb_mb;
  741                         }
  742                 }
  743                 if (controlp) {
  744                         orig_resid = 0;
  745                         controlp = &(*controlp)->m_next;
  746                 }
  747         }
  748         if (m) {
  749                 if ((flags & MSG_PEEK) == 0)
  750                         m->m_nextpkt = nextrecord;
  751                 type = m->m_type;
  752                 if (type == MT_OOBDATA)
  753                         flags |= MSG_OOB;
  754         }
  755         moff = 0;
  756         offset = 0;
  757         while (m && uio->uio_resid > 0 && error == 0) {
  758                 if (m->m_type == MT_OOBDATA) {
  759                         if (type != MT_OOBDATA)
  760                                 break;
  761                 } else if (type == MT_OOBDATA)
  762                         break;
  763                 else
  764                     KASSERT(m->m_type == MT_DATA || m->m_type == MT_HEADER,
  765                         ("receive 3"));
  766                 so->so_state &= ~SS_RCVATMARK;
  767                 len = uio->uio_resid;
  768                 if (so->so_oobmark && len > so->so_oobmark - offset)
  769                         len = so->so_oobmark - offset;
  770                 if (len > m->m_len - moff)
  771                         len = m->m_len - moff;
  772                 /*
  773                  * If mp is set, just pass back the mbufs.
  774                  * Otherwise copy them out via the uio, then free.
  775                  * Sockbuf must be consistent here (points to current mbuf,
  776                  * it points to next record) when we drop priority;
  777                  * we must note any additions to the sockbuf when we
  778                  * block interrupts again.
  779                  */
  780                 if (mp == 0) {
  781                         splx(s);
  782                         error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
  783                         s = splnet();
  784                         if (error)
  785                                 goto release;
  786                 } else
  787                         uio->uio_resid -= len;
  788                 if (len == m->m_len - moff) {
  789                         if (m->m_flags & M_EOR)
  790                                 flags |= MSG_EOR;
  791                         if (flags & MSG_PEEK) {
  792                                 m = m->m_next;
  793                                 moff = 0;
  794                         } else {
  795                                 nextrecord = m->m_nextpkt;
  796                                 sbfree(&so->so_rcv, m);
  797                                 if (mp) {
  798                                         *mp = m;
  799                                         mp = &m->m_next;
  800                                         so->so_rcv.sb_mb = m = m->m_next;
  801                                         *mp = (struct mbuf *)0;
  802                                 } else {
  803                                         MFREE(m, so->so_rcv.sb_mb);
  804                                         m = so->so_rcv.sb_mb;
  805                                 }
  806                                 if (m)
  807                                         m->m_nextpkt = nextrecord;
  808                         }
  809                 } else {
  810                         if (flags & MSG_PEEK)
  811                                 moff += len;
  812                         else {
  813                                 if (mp)
  814                                         *mp = m_copym(m, 0, len, M_WAIT);
  815                                 m->m_data += len;
  816                                 m->m_len -= len;
  817                                 so->so_rcv.sb_cc -= len;
  818                         }
  819                 }
  820                 if (so->so_oobmark) {
  821                         if ((flags & MSG_PEEK) == 0) {
  822                                 so->so_oobmark -= len;
  823                                 if (so->so_oobmark == 0) {
  824                                         so->so_state |= SS_RCVATMARK;
  825                                         break;
  826                                 }
  827                         } else {
  828                                 offset += len;
  829                                 if (offset == so->so_oobmark)
  830                                         break;
  831                         }
  832                 }
  833                 if (flags & MSG_EOR)
  834                         break;
  835                 /*
  836                  * If the MSG_WAITALL flag is set (for non-atomic socket),
  837                  * we must not quit until "uio->uio_resid == 0" or an error
  838                  * termination.  If a signal/timeout occurs, return
  839                  * with a short count but without error.
  840                  * Keep sockbuf locked against other readers.
  841                  */
  842                 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 &&
  843                     !sosendallatonce(so) && !nextrecord) {
  844                         if (so->so_error || so->so_state & SS_CANTRCVMORE)
  845                                 break;
  846                         error = sbwait(&so->so_rcv);
  847                         if (error) {
  848                                 sbunlock(&so->so_rcv);
  849                                 splx(s);
  850                                 return (0);
  851                         }
  852                         m = so->so_rcv.sb_mb;
  853                         if (m)
  854                                 nextrecord = m->m_nextpkt;
  855                 }
  856         }
  857 
  858         if (m && pr->pr_flags & PR_ATOMIC) {
  859                 flags |= MSG_TRUNC;
  860                 if ((flags & MSG_PEEK) == 0)
  861                         (void) sbdroprecord(&so->so_rcv);
  862         }
  863         if ((flags & MSG_PEEK) == 0) {
  864                 if (m == 0)
  865                         so->so_rcv.sb_mb = nextrecord;
  866                 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
  867                         (*pr->pr_usrreqs->pru_rcvd)(so, flags);
  868         }
  869         if (orig_resid == uio->uio_resid && orig_resid &&
  870             (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
  871                 sbunlock(&so->so_rcv);
  872                 splx(s);
  873                 goto restart;
  874         }
  875 
  876         if (flagsp)
  877                 *flagsp |= flags;
  878 release:
  879         sbunlock(&so->so_rcv);
  880         splx(s);
  881         return (error);
  882 }
  883 
  884 int
  885 soshutdown(so, how)
  886         register struct socket *so;
  887         register int how;
  888 {
  889         register struct protosw *pr = so->so_proto;
  890 
  891         how++;
  892         if (how & FREAD)
  893                 sorflush(so);
  894         if (how & FWRITE)
  895                 return ((*pr->pr_usrreqs->pru_shutdown)(so));
  896         return (0);
  897 }
  898 
  899 void
  900 sorflush(so)
  901         register struct socket *so;
  902 {
  903         register struct sockbuf *sb = &so->so_rcv;
  904         register struct protosw *pr = so->so_proto;
  905         register int s;
  906         struct sockbuf asb;
  907 
  908         sb->sb_flags |= SB_NOINTR;
  909         (void) sblock(sb, M_WAITOK);
  910         s = splimp();
  911         socantrcvmore(so);
  912         sbunlock(sb);
  913         asb = *sb;
  914         bzero((caddr_t)sb, sizeof (*sb));
  915         splx(s);
  916         if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
  917                 (*pr->pr_domain->dom_dispose)(asb.sb_mb);
  918         sbrelease(&asb);
  919 }
  920 
  921 /*
  922  * Perhaps this routine, and sooptcopyout(), below, ought to come in
  923  * an additional variant to handle the case where the option value needs
  924  * to be some kind of integer, but not a specific size.
  925  * In addition to their use here, these functions are also called by the
  926  * protocol-level pr_ctloutput() routines.
  927  */
  928 int
  929 sooptcopyin(sopt, buf, len, minlen)
  930         struct  sockopt *sopt;
  931         void    *buf;
  932         size_t  len;
  933         size_t  minlen;
  934 {
  935         size_t  valsize;
  936 
  937         /*
  938          * If the user gives us more than we wanted, we ignore it,
  939          * but if we don't get the minimum length the caller
  940          * wants, we return EINVAL.  On success, sopt->sopt_valsize
  941          * is set to however much we actually retrieved.
  942          */
  943         if ((valsize = sopt->sopt_valsize) < minlen)
  944                 return EINVAL;
  945         if (valsize > len)
  946                 sopt->sopt_valsize = valsize = len;
  947 
  948         if (sopt->sopt_p != 0)
  949                 return (copyin(sopt->sopt_val, buf, valsize));
  950 
  951         bcopy(sopt->sopt_val, buf, valsize);
  952         return 0;
  953 }
  954 
  955 int
  956 sosetopt(so, sopt)
  957         struct socket *so;
  958         struct sockopt *sopt;
  959 {
  960         int     error, optval;
  961         struct  linger l;
  962         struct  timeval tv;
  963         short   val;
  964 
  965         error = 0;
  966         if (sopt->sopt_level != SOL_SOCKET) {
  967                 if (so->so_proto && so->so_proto->pr_ctloutput)
  968                         return ((*so->so_proto->pr_ctloutput)
  969                                   (so, sopt));
  970                 error = ENOPROTOOPT;
  971         } else {
  972                 switch (sopt->sopt_name) {
  973                 case SO_LINGER:
  974                         error = sooptcopyin(sopt, &l, sizeof l, sizeof l);
  975                         if (error)
  976                                 goto bad;
  977 
  978                         so->so_linger = l.l_linger;
  979                         if (l.l_onoff)
  980                                 so->so_options |= SO_LINGER;
  981                         else
  982                                 so->so_options &= ~SO_LINGER;
  983                         break;
  984 
  985                 case SO_DEBUG:
  986                 case SO_KEEPALIVE:
  987                 case SO_DONTROUTE:
  988                 case SO_USELOOPBACK:
  989                 case SO_BROADCAST:
  990                 case SO_REUSEADDR:
  991                 case SO_REUSEPORT:
  992                 case SO_OOBINLINE:
  993                 case SO_TIMESTAMP:
  994                         error = sooptcopyin(sopt, &optval, sizeof optval,
  995                                             sizeof optval);
  996                         if (error)
  997                                 goto bad;
  998                         if (optval)
  999                                 so->so_options |= sopt->sopt_name;
 1000                         else
 1001                                 so->so_options &= ~sopt->sopt_name;
 1002                         break;
 1003 
 1004                 case SO_SNDBUF:
 1005                 case SO_RCVBUF:
 1006                 case SO_SNDLOWAT:
 1007                 case SO_RCVLOWAT:
 1008                         error = sooptcopyin(sopt, &optval, sizeof optval,
 1009                                             sizeof optval);
 1010                         if (error)
 1011                                 goto bad;
 1012 
 1013                         /*
 1014                          * Values < 1 make no sense for any of these
 1015                          * options, so disallow them.
 1016                          */
 1017                         if (optval < 1) {
 1018                                 error = EINVAL;
 1019                                 goto bad;
 1020                         }
 1021 
 1022                         switch (sopt->sopt_name) {
 1023                         case SO_SNDBUF:
 1024                         case SO_RCVBUF:
 1025                                 if (sbreserve(sopt->sopt_name == SO_SNDBUF ?
 1026                                               &so->so_snd : &so->so_rcv,
 1027                                               (u_long) optval) == 0) {
 1028                                         error = ENOBUFS;
 1029                                         goto bad;
 1030                                 }
 1031                                 break;
 1032 
 1033                         /*
 1034                          * Make sure the low-water is never greater than
 1035                          * the high-water.
 1036                          */
 1037                         case SO_SNDLOWAT:
 1038                                 so->so_snd.sb_lowat =
 1039                                     (optval > so->so_snd.sb_hiwat) ?
 1040                                     so->so_snd.sb_hiwat : optval;
 1041                                 break;
 1042                         case SO_RCVLOWAT:
 1043                                 so->so_rcv.sb_lowat =
 1044                                     (optval > so->so_rcv.sb_hiwat) ?
 1045                                     so->so_rcv.sb_hiwat : optval;
 1046                                 break;
 1047                         }
 1048                         break;
 1049 
 1050                 case SO_SNDTIMEO:
 1051                 case SO_RCVTIMEO:
 1052                         error = sooptcopyin(sopt, &tv, sizeof tv,
 1053                                             sizeof tv);
 1054                         if (error)
 1055                                 goto bad;
 1056 
 1057                         if (tv.tv_sec > SHRT_MAX / hz - hz) {
 1058                                 error = EDOM;
 1059                                 goto bad;
 1060                         }
 1061                         val = tv.tv_sec * hz + tv.tv_usec / tick;
 1062 
 1063                         switch (sopt->sopt_name) {
 1064                         case SO_SNDTIMEO:
 1065                                 so->so_snd.sb_timeo = val;
 1066                                 break;
 1067                         case SO_RCVTIMEO:
 1068                                 so->so_rcv.sb_timeo = val;
 1069                                 break;
 1070                         }
 1071                         break;
 1072 
 1073                 default:
 1074                         error = ENOPROTOOPT;
 1075                         break;
 1076                 }
 1077                 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) {
 1078                         (void) ((*so->so_proto->pr_ctloutput)
 1079                                   (so, sopt));
 1080                 }
 1081         }
 1082 bad:
 1083         return (error);
 1084 }
 1085 
 1086 /* Helper routine for getsockopt */
 1087 int
 1088 sooptcopyout(sopt, buf, len)
 1089         struct  sockopt *sopt;
 1090         void    *buf;
 1091         size_t  len;
 1092 {
 1093         int     error;
 1094         size_t  valsize;
 1095 
 1096         error = 0;
 1097 
 1098         /*
 1099          * Documented get behavior is that we always return a value,
 1100          * possibly truncated to fit in the user's buffer.
 1101          * Traditional behavior is that we always tell the user
 1102          * precisely how much we copied, rather than something useful
 1103          * like the total amount we had available for her.
 1104          * Note that this interface is not idempotent; the entire answer must
 1105          * generated ahead of time.
 1106          */
 1107         valsize = min(len, sopt->sopt_valsize);
 1108         sopt->sopt_valsize = valsize;
 1109         if (sopt->sopt_val != 0) {
 1110                 if (sopt->sopt_p != 0)
 1111                         error = copyout(buf, sopt->sopt_val, valsize);
 1112                 else
 1113                         bcopy(buf, sopt->sopt_val, valsize);
 1114         }
 1115         return error;
 1116 }
 1117 
 1118 int
 1119 sogetopt(so, sopt)
 1120         struct socket *so;
 1121         struct sockopt *sopt;
 1122 {
 1123         int     error, optval;
 1124         struct  linger l;
 1125         struct  timeval tv;
 1126 
 1127         error = 0;
 1128         if (sopt->sopt_level != SOL_SOCKET) {
 1129                 if (so->so_proto && so->so_proto->pr_ctloutput) {
 1130                         return ((*so->so_proto->pr_ctloutput)
 1131                                   (so, sopt));
 1132                 } else
 1133                         return (ENOPROTOOPT);
 1134         } else {
 1135                 switch (sopt->sopt_name) {
 1136                 case SO_LINGER:
 1137                         l.l_onoff = so->so_options & SO_LINGER;
 1138                         l.l_linger = so->so_linger;
 1139                         error = sooptcopyout(sopt, &l, sizeof l);
 1140                         break;
 1141 
 1142                 case SO_USELOOPBACK:
 1143                 case SO_DONTROUTE:
 1144                 case SO_DEBUG:
 1145                 case SO_KEEPALIVE:
 1146                 case SO_REUSEADDR:
 1147                 case SO_REUSEPORT:
 1148                 case SO_BROADCAST:
 1149                 case SO_OOBINLINE:
 1150                 case SO_TIMESTAMP:
 1151                         optval = so->so_options & sopt->sopt_name;
 1152 integer:
 1153                         error = sooptcopyout(sopt, &optval, sizeof optval);
 1154                         break;
 1155 
 1156                 case SO_TYPE:
 1157                         optval = so->so_type;
 1158                         goto integer;
 1159 
 1160                 case SO_ERROR:
 1161                         optval = so->so_error;
 1162                         so->so_error = 0;
 1163                         goto integer;
 1164 
 1165                 case SO_SNDBUF:
 1166                         optval = so->so_snd.sb_hiwat;
 1167                         goto integer;
 1168 
 1169                 case SO_RCVBUF:
 1170                         optval = so->so_rcv.sb_hiwat;
 1171                         goto integer;
 1172 
 1173                 case SO_SNDLOWAT:
 1174                         optval = so->so_snd.sb_lowat;
 1175                         goto integer;
 1176 
 1177                 case SO_RCVLOWAT:
 1178                         optval = so->so_rcv.sb_lowat;
 1179                         goto integer;
 1180 
 1181                 case SO_SNDTIMEO:
 1182                 case SO_RCVTIMEO:
 1183                         optval = (sopt->sopt_name == SO_SNDTIMEO ?
 1184                                   so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
 1185 
 1186                         tv.tv_sec = optval / hz;
 1187                         tv.tv_usec = (optval % hz) * tick;
 1188                         error = sooptcopyout(sopt, &tv, sizeof tv);
 1189                         break;                  
 1190 
 1191                 default:
 1192                         error = ENOPROTOOPT;
 1193                         break;
 1194                 }
 1195                 return (error);
 1196         }
 1197 }
 1198 
 1199 void
 1200 sohasoutofband(so)
 1201         register struct socket *so;
 1202 {
 1203         if (so->so_sigio != NULL)
 1204                 pgsigio(so->so_sigio, SIGURG, 0);
 1205         selwakeup(&so->so_rcv.sb_sel);
 1206 }
 1207 
 1208 int
 1209 sopoll(struct socket *so, int events, struct ucred *cred, struct proc *p)
 1210 {
 1211         int revents = 0;
 1212         int s = splnet();
 1213 
 1214         if (events & (POLLIN | POLLRDNORM))
 1215                 if (soreadable(so))
 1216                         revents |= events & (POLLIN | POLLRDNORM);
 1217 
 1218         if (events & (POLLOUT | POLLWRNORM))
 1219                 if (sowriteable(so))
 1220                         revents |= events & (POLLOUT | POLLWRNORM);
 1221 
 1222         if (events & (POLLPRI | POLLRDBAND))
 1223                 if (so->so_oobmark || (so->so_state & SS_RCVATMARK))
 1224                         revents |= events & (POLLPRI | POLLRDBAND);
 1225 
 1226         if (revents == 0) {
 1227                 if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) {
 1228                         selrecord(p, &so->so_rcv.sb_sel);
 1229                         so->so_rcv.sb_flags |= SB_SEL;
 1230                 }
 1231 
 1232                 if (events & (POLLOUT | POLLWRNORM)) {
 1233                         selrecord(p, &so->so_snd.sb_sel);
 1234                         so->so_snd.sb_flags |= SB_SEL;
 1235                 }
 1236         }
 1237 
 1238         splx(s);
 1239         return (revents);
 1240 }

Cache object: acfcfc6094f4dd0c24cd132219a48303


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.