The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/uipc_socket2.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: uipc_socket2.c,v 1.66 2005/02/26 21:34:55 perry Exp $  */
    2 
    3 /*
    4  * Copyright (c) 1982, 1986, 1988, 1990, 1993
    5  *      The Regents of the University of California.  All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  * 3. Neither the name of the University nor the names of its contributors
   16  *    may be used to endorse or promote products derived from this software
   17  *    without specific prior written permission.
   18  *
   19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   29  * SUCH DAMAGE.
   30  *
   31  *      @(#)uipc_socket2.c      8.2 (Berkeley) 2/14/95
   32  */
   33 
   34 #include <sys/cdefs.h>
   35 __KERNEL_RCSID(0, "$NetBSD: uipc_socket2.c,v 1.66 2005/02/26 21:34:55 perry Exp $");
   36 
   37 #include "opt_mbuftrace.h"
   38 #include "opt_sb_max.h"
   39 
   40 #include <sys/param.h>
   41 #include <sys/systm.h>
   42 #include <sys/proc.h>
   43 #include <sys/file.h>
   44 #include <sys/buf.h>
   45 #include <sys/malloc.h>
   46 #include <sys/mbuf.h>
   47 #include <sys/protosw.h>
   48 #include <sys/poll.h>
   49 #include <sys/socket.h>
   50 #include <sys/socketvar.h>
   51 #include <sys/signalvar.h>
   52 
   53 /*
   54  * Primitive routines for operating on sockets and socket buffers
   55  */
   56 
   57 /* strings for sleep message: */
   58 const char      netcon[] = "netcon";
   59 const char      netcls[] = "netcls";
   60 const char      netio[] = "netio";
   61 const char      netlck[] = "netlck";
   62 
   63 u_long  sb_max = SB_MAX;        /* maximum socket buffer size */
   64 static u_long sb_max_adj;       /* adjusted sb_max */
   65 
   66 /*
   67  * Procedures to manipulate state flags of socket
   68  * and do appropriate wakeups.  Normal sequence from the
   69  * active (originating) side is that soisconnecting() is
   70  * called during processing of connect() call,
   71  * resulting in an eventual call to soisconnected() if/when the
   72  * connection is established.  When the connection is torn down
   73  * soisdisconnecting() is called during processing of disconnect() call,
   74  * and soisdisconnected() is called when the connection to the peer
   75  * is totally severed.  The semantics of these routines are such that
   76  * connectionless protocols can call soisconnected() and soisdisconnected()
   77  * only, bypassing the in-progress calls when setting up a ``connection''
   78  * takes no time.
   79  *
   80  * From the passive side, a socket is created with
   81  * two queues of sockets: so_q0 for connections in progress
   82  * and so_q for connections already made and awaiting user acceptance.
   83  * As a protocol is preparing incoming connections, it creates a socket
   84  * structure queued on so_q0 by calling sonewconn().  When the connection
   85  * is established, soisconnected() is called, and transfers the
   86  * socket structure to so_q, making it available to accept().
   87  *
   88  * If a socket is closed with sockets on either
   89  * so_q0 or so_q, these sockets are dropped.
   90  *
   91  * If higher level protocols are implemented in
   92  * the kernel, the wakeups done here will sometimes
   93  * cause software-interrupt process scheduling.
   94  */
   95 
   96 void
   97 soisconnecting(struct socket *so)
   98 {
   99 
  100         so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
  101         so->so_state |= SS_ISCONNECTING;
  102 }
  103 
  104 void
  105 soisconnected(struct socket *so)
  106 {
  107         struct socket   *head;
  108 
  109         head = so->so_head;
  110         so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
  111         so->so_state |= SS_ISCONNECTED;
  112         if (head && soqremque(so, 0)) {
  113                 soqinsque(head, so, 1);
  114                 sorwakeup(head);
  115                 wakeup((caddr_t)&head->so_timeo);
  116         } else {
  117                 wakeup((caddr_t)&so->so_timeo);
  118                 sorwakeup(so);
  119                 sowwakeup(so);
  120         }
  121 }
  122 
  123 void
  124 soisdisconnecting(struct socket *so)
  125 {
  126 
  127         so->so_state &= ~SS_ISCONNECTING;
  128         so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE);
  129         wakeup((caddr_t)&so->so_timeo);
  130         sowwakeup(so);
  131         sorwakeup(so);
  132 }
  133 
  134 void
  135 soisdisconnected(struct socket *so)
  136 {
  137 
  138         so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
  139         so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED);
  140         wakeup((caddr_t)&so->so_timeo);
  141         sowwakeup(so);
  142         sorwakeup(so);
  143 }
  144 
  145 /*
  146  * When an attempt at a new connection is noted on a socket
  147  * which accepts connections, sonewconn is called.  If the
  148  * connection is possible (subject to space constraints, etc.)
  149  * then we allocate a new structure, propoerly linked into the
  150  * data structure of the original socket, and return this.
  151  * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED.
  152  *
  153  * Currently, sonewconn() is defined as sonewconn1() in socketvar.h
  154  * to catch calls that are missing the (new) second parameter.
  155  */
  156 struct socket *
  157 sonewconn1(struct socket *head, int connstatus)
  158 {
  159         struct socket   *so;
  160         int             soqueue;
  161 
  162         soqueue = connstatus ? 1 : 0;
  163         if (head->so_qlen + head->so_q0len > 3 * head->so_qlimit / 2)
  164                 return ((struct socket *)0);
  165         so = pool_get(&socket_pool, PR_NOWAIT);
  166         if (so == NULL)
  167                 return (NULL);
  168         memset((caddr_t)so, 0, sizeof(*so));
  169         so->so_type = head->so_type;
  170         so->so_options = head->so_options &~ SO_ACCEPTCONN;
  171         so->so_linger = head->so_linger;
  172         so->so_state = head->so_state | SS_NOFDREF;
  173         so->so_proto = head->so_proto;
  174         so->so_timeo = head->so_timeo;
  175         so->so_pgid = head->so_pgid;
  176         so->so_send = head->so_send;
  177         so->so_receive = head->so_receive;
  178         so->so_uid = head->so_uid;
  179 #ifdef MBUFTRACE
  180         so->so_mowner = head->so_mowner;
  181         so->so_rcv.sb_mowner = head->so_rcv.sb_mowner;
  182         so->so_snd.sb_mowner = head->so_snd.sb_mowner;
  183 #endif
  184         (void) soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat);
  185         soqinsque(head, so, soqueue);
  186         if ((*so->so_proto->pr_usrreq)(so, PRU_ATTACH,
  187             (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0,
  188             (struct proc *)0)) {
  189                 (void) soqremque(so, soqueue);
  190                 pool_put(&socket_pool, so);
  191                 return (NULL);
  192         }
  193         if (connstatus) {
  194                 sorwakeup(head);
  195                 wakeup((caddr_t)&head->so_timeo);
  196                 so->so_state |= connstatus;
  197         }
  198         return (so);
  199 }
  200 
  201 void
  202 soqinsque(struct socket *head, struct socket *so, int q)
  203 {
  204 
  205 #ifdef DIAGNOSTIC
  206         if (so->so_onq != NULL)
  207                 panic("soqinsque");
  208 #endif
  209 
  210         so->so_head = head;
  211         if (q == 0) {
  212                 head->so_q0len++;
  213                 so->so_onq = &head->so_q0;
  214         } else {
  215                 head->so_qlen++;
  216                 so->so_onq = &head->so_q;
  217         }
  218         TAILQ_INSERT_TAIL(so->so_onq, so, so_qe);
  219 }
  220 
  221 int
  222 soqremque(struct socket *so, int q)
  223 {
  224         struct socket   *head;
  225 
  226         head = so->so_head;
  227         if (q == 0) {
  228                 if (so->so_onq != &head->so_q0)
  229                         return (0);
  230                 head->so_q0len--;
  231         } else {
  232                 if (so->so_onq != &head->so_q)
  233                         return (0);
  234                 head->so_qlen--;
  235         }
  236         TAILQ_REMOVE(so->so_onq, so, so_qe);
  237         so->so_onq = NULL;
  238         so->so_head = NULL;
  239         return (1);
  240 }
  241 
  242 /*
  243  * Socantsendmore indicates that no more data will be sent on the
  244  * socket; it would normally be applied to a socket when the user
  245  * informs the system that no more data is to be sent, by the protocol
  246  * code (in case PRU_SHUTDOWN).  Socantrcvmore indicates that no more data
  247  * will be received, and will normally be applied to the socket by a
  248  * protocol when it detects that the peer will send no more data.
  249  * Data queued for reading in the socket may yet be read.
  250  */
  251 
  252 void
  253 socantsendmore(struct socket *so)
  254 {
  255 
  256         so->so_state |= SS_CANTSENDMORE;
  257         sowwakeup(so);
  258 }
  259 
  260 void
  261 socantrcvmore(struct socket *so)
  262 {
  263 
  264         so->so_state |= SS_CANTRCVMORE;
  265         sorwakeup(so);
  266 }
  267 
  268 /*
  269  * Wait for data to arrive at/drain from a socket buffer.
  270  */
  271 int
  272 sbwait(struct sockbuf *sb)
  273 {
  274 
  275         sb->sb_flags |= SB_WAIT;
  276         return (tsleep((caddr_t)&sb->sb_cc,
  277             (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, netio,
  278             sb->sb_timeo));
  279 }
  280 
  281 /*
  282  * Lock a sockbuf already known to be locked;
  283  * return any error returned from sleep (EINTR).
  284  */
  285 int
  286 sb_lock(struct sockbuf *sb)
  287 {
  288         int     error;
  289 
  290         while (sb->sb_flags & SB_LOCK) {
  291                 sb->sb_flags |= SB_WANT;
  292                 error = tsleep((caddr_t)&sb->sb_flags,
  293                     (sb->sb_flags & SB_NOINTR) ?  PSOCK : PSOCK|PCATCH,
  294                     netlck, 0);
  295                 if (error)
  296                         return (error);
  297         }
  298         sb->sb_flags |= SB_LOCK;
  299         return (0);
  300 }
  301 
  302 /*
  303  * Wakeup processes waiting on a socket buffer.
  304  * Do asynchronous notification via SIGIO
  305  * if the socket buffer has the SB_ASYNC flag set.
  306  */
  307 void
  308 sowakeup(struct socket *so, struct sockbuf *sb, int code)
  309 {
  310         selnotify(&sb->sb_sel, 0);
  311         sb->sb_flags &= ~SB_SEL;
  312         if (sb->sb_flags & SB_WAIT) {
  313                 sb->sb_flags &= ~SB_WAIT;
  314                 wakeup((caddr_t)&sb->sb_cc);
  315         }
  316         if (sb->sb_flags & SB_ASYNC) {
  317                 int band;
  318                 if (code == POLL_IN)
  319                         band = POLLIN|POLLRDNORM;
  320                 else
  321                         band = POLLOUT|POLLWRNORM;
  322                 fownsignal(so->so_pgid, SIGIO, code, band, so);
  323         }
  324         if (sb->sb_flags & SB_UPCALL)
  325                 (*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT);
  326 }
  327 
  328 /*
  329  * Socket buffer (struct sockbuf) utility routines.
  330  *
  331  * Each socket contains two socket buffers: one for sending data and
  332  * one for receiving data.  Each buffer contains a queue of mbufs,
  333  * information about the number of mbufs and amount of data in the
  334  * queue, and other fields allowing poll() statements and notification
  335  * on data availability to be implemented.
  336  *
  337  * Data stored in a socket buffer is maintained as a list of records.
  338  * Each record is a list of mbufs chained together with the m_next
  339  * field.  Records are chained together with the m_nextpkt field. The upper
  340  * level routine soreceive() expects the following conventions to be
  341  * observed when placing information in the receive buffer:
  342  *
  343  * 1. If the protocol requires each message be preceded by the sender's
  344  *    name, then a record containing that name must be present before
  345  *    any associated data (mbuf's must be of type MT_SONAME).
  346  * 2. If the protocol supports the exchange of ``access rights'' (really
  347  *    just additional data associated with the message), and there are
  348  *    ``rights'' to be received, then a record containing this data
  349  *    should be present (mbuf's must be of type MT_CONTROL).
  350  * 3. If a name or rights record exists, then it must be followed by
  351  *    a data record, perhaps of zero length.
  352  *
  353  * Before using a new socket structure it is first necessary to reserve
  354  * buffer space to the socket, by calling sbreserve().  This should commit
  355  * some of the available buffer space in the system buffer pool for the
  356  * socket (currently, it does nothing but enforce limits).  The space
  357  * should be released by calling sbrelease() when the socket is destroyed.
  358  */
  359 
  360 int
  361 sb_max_set(u_long new_sbmax)
  362 {
  363         int s;
  364 
  365         if (new_sbmax < (16 * 1024))
  366                 return (EINVAL);
  367 
  368         s = splsoftnet();
  369         sb_max = new_sbmax;
  370         sb_max_adj = (u_quad_t)new_sbmax * MCLBYTES / (MSIZE + MCLBYTES);
  371         splx(s);
  372 
  373         return (0);
  374 }
  375 
  376 int
  377 soreserve(struct socket *so, u_long sndcc, u_long rcvcc)
  378 {
  379 
  380         if (sbreserve(&so->so_snd, sndcc, so) == 0)
  381                 goto bad;
  382         if (sbreserve(&so->so_rcv, rcvcc, so) == 0)
  383                 goto bad2;
  384         if (so->so_rcv.sb_lowat == 0)
  385                 so->so_rcv.sb_lowat = 1;
  386         if (so->so_snd.sb_lowat == 0)
  387                 so->so_snd.sb_lowat = MCLBYTES;
  388         if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
  389                 so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
  390         return (0);
  391  bad2:
  392         sbrelease(&so->so_snd, so);
  393  bad:
  394         return (ENOBUFS);
  395 }
  396 
  397 /*
  398  * Allot mbufs to a sockbuf.
  399  * Attempt to scale mbmax so that mbcnt doesn't become limiting
  400  * if buffering efficiency is near the normal case.
  401  */
  402 int
  403 sbreserve(struct sockbuf *sb, u_long cc, struct socket *so)
  404 {
  405         struct proc *p = curproc; /* XXX */
  406         rlim_t maxcc;
  407         uid_t uid;
  408 
  409         KDASSERT(sb_max_adj != 0);
  410         if (cc == 0 || cc > sb_max_adj)
  411                 return (0);
  412         if (so) {
  413                 if (p && p->p_ucred->cr_uid == so->so_uid)
  414                         maxcc = p->p_rlimit[RLIMIT_SBSIZE].rlim_cur;
  415                 else
  416                         maxcc = RLIM_INFINITY;
  417                 uid = so->so_uid;
  418         } else {
  419                 uid = 0;        /* XXX: nothing better */
  420                 maxcc = RLIM_INFINITY;
  421         }
  422         if (!chgsbsize(uid, &sb->sb_hiwat, cc, maxcc))
  423                 return 0;
  424         sb->sb_mbmax = min(cc * 2, sb_max);
  425         if (sb->sb_lowat > sb->sb_hiwat)
  426                 sb->sb_lowat = sb->sb_hiwat;
  427         return (1);
  428 }
  429 
  430 /*
  431  * Free mbufs held by a socket, and reserved mbuf space.
  432  */
  433 void
  434 sbrelease(struct sockbuf *sb, struct socket *so)
  435 {
  436 
  437         sbflush(sb);
  438         (void)chgsbsize(so->so_uid, &sb->sb_hiwat, 0,
  439             RLIM_INFINITY);
  440         sb->sb_mbmax = 0;
  441 }
  442 
  443 /*
  444  * Routines to add and remove
  445  * data from an mbuf queue.
  446  *
  447  * The routines sbappend() or sbappendrecord() are normally called to
  448  * append new mbufs to a socket buffer, after checking that adequate
  449  * space is available, comparing the function sbspace() with the amount
  450  * of data to be added.  sbappendrecord() differs from sbappend() in
  451  * that data supplied is treated as the beginning of a new record.
  452  * To place a sender's address, optional access rights, and data in a
  453  * socket receive buffer, sbappendaddr() should be used.  To place
  454  * access rights and data in a socket receive buffer, sbappendrights()
  455  * should be used.  In either case, the new data begins a new record.
  456  * Note that unlike sbappend() and sbappendrecord(), these routines check
  457  * for the caller that there will be enough space to store the data.
  458  * Each fails if there is not enough space, or if it cannot find mbufs
  459  * to store additional information in.
  460  *
  461  * Reliable protocols may use the socket send buffer to hold data
  462  * awaiting acknowledgement.  Data is normally copied from a socket
  463  * send buffer in a protocol with m_copy for output to a peer,
  464  * and then removing the data from the socket buffer with sbdrop()
  465  * or sbdroprecord() when the data is acknowledged by the peer.
  466  */
  467 
  468 #ifdef SOCKBUF_DEBUG
  469 void
  470 sblastrecordchk(struct sockbuf *sb, const char *where)
  471 {
  472         struct mbuf *m = sb->sb_mb;
  473 
  474         while (m && m->m_nextpkt)
  475                 m = m->m_nextpkt;
  476 
  477         if (m != sb->sb_lastrecord) {
  478                 printf("sblastrecordchk: sb_mb %p sb_lastrecord %p last %p\n",
  479                     sb->sb_mb, sb->sb_lastrecord, m);
  480                 printf("packet chain:\n");
  481                 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt)
  482                         printf("\t%p\n", m);
  483                 panic("sblastrecordchk from %s", where);
  484         }
  485 }
  486 
  487 void
  488 sblastmbufchk(struct sockbuf *sb, const char *where)
  489 {
  490         struct mbuf *m = sb->sb_mb;
  491         struct mbuf *n;
  492 
  493         while (m && m->m_nextpkt)
  494                 m = m->m_nextpkt;
  495 
  496         while (m && m->m_next)
  497                 m = m->m_next;
  498 
  499         if (m != sb->sb_mbtail) {
  500                 printf("sblastmbufchk: sb_mb %p sb_mbtail %p last %p\n",
  501                     sb->sb_mb, sb->sb_mbtail, m);
  502                 printf("packet tree:\n");
  503                 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) {
  504                         printf("\t");
  505                         for (n = m; n != NULL; n = n->m_next)
  506                                 printf("%p ", n);
  507                         printf("\n");
  508                 }
  509                 panic("sblastmbufchk from %s", where);
  510         }
  511 }
  512 #endif /* SOCKBUF_DEBUG */
  513 
  514 /*
  515  * Link a chain of records onto a socket buffer
  516  */
  517 #define SBLINKRECORDCHAIN(sb, m0, mlast)                                \
  518 do {                                                                    \
  519         if ((sb)->sb_lastrecord != NULL)                                \
  520                 (sb)->sb_lastrecord->m_nextpkt = (m0);                  \
  521         else                                                            \
  522                 (sb)->sb_mb = (m0);                                     \
  523         (sb)->sb_lastrecord = (mlast);                                  \
  524 } while (/*CONSTCOND*/0)
  525 
  526 
  527 #define SBLINKRECORD(sb, m0)                                            \
  528     SBLINKRECORDCHAIN(sb, m0, m0)
  529 
  530 /*
  531  * Append mbuf chain m to the last record in the
  532  * socket buffer sb.  The additional space associated
  533  * the mbuf chain is recorded in sb.  Empty mbufs are
  534  * discarded and mbufs are compacted where possible.
  535  */
  536 void
  537 sbappend(struct sockbuf *sb, struct mbuf *m)
  538 {
  539         struct mbuf     *n;
  540 
  541         if (m == 0)
  542                 return;
  543 
  544 #ifdef MBUFTRACE
  545         m_claimm(m, sb->sb_mowner);
  546 #endif
  547 
  548         SBLASTRECORDCHK(sb, "sbappend 1");
  549 
  550         if ((n = sb->sb_lastrecord) != NULL) {
  551                 /*
  552                  * XXX Would like to simply use sb_mbtail here, but
  553                  * XXX I need to verify that I won't miss an EOR that
  554                  * XXX way.
  555                  */
  556                 do {
  557                         if (n->m_flags & M_EOR) {
  558                                 sbappendrecord(sb, m); /* XXXXXX!!!! */
  559                                 return;
  560                         }
  561                 } while (n->m_next && (n = n->m_next));
  562         } else {
  563                 /*
  564                  * If this is the first record in the socket buffer, it's
  565                  * also the last record.
  566                  */
  567                 sb->sb_lastrecord = m;
  568         }
  569         sbcompress(sb, m, n);
  570         SBLASTRECORDCHK(sb, "sbappend 2");
  571 }
  572 
  573 /*
  574  * This version of sbappend() should only be used when the caller
  575  * absolutely knows that there will never be more than one record
  576  * in the socket buffer, that is, a stream protocol (such as TCP).
  577  */
  578 void
  579 sbappendstream(struct sockbuf *sb, struct mbuf *m)
  580 {
  581 
  582         KDASSERT(m->m_nextpkt == NULL);
  583         KASSERT(sb->sb_mb == sb->sb_lastrecord);
  584 
  585         SBLASTMBUFCHK(sb, __func__);
  586 
  587 #ifdef MBUFTRACE
  588         m_claimm(m, sb->sb_mowner);
  589 #endif
  590 
  591         sbcompress(sb, m, sb->sb_mbtail);
  592 
  593         sb->sb_lastrecord = sb->sb_mb;
  594         SBLASTRECORDCHK(sb, __func__);
  595 }
  596 
  597 #ifdef SOCKBUF_DEBUG
  598 void
  599 sbcheck(struct sockbuf *sb)
  600 {
  601         struct mbuf     *m;
  602         u_long          len, mbcnt;
  603 
  604         len = 0;
  605         mbcnt = 0;
  606         for (m = sb->sb_mb; m; m = m->m_next) {
  607                 len += m->m_len;
  608                 mbcnt += MSIZE;
  609                 if (m->m_flags & M_EXT)
  610                         mbcnt += m->m_ext.ext_size;
  611                 if (m->m_nextpkt)
  612                         panic("sbcheck nextpkt");
  613         }
  614         if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
  615                 printf("cc %lu != %lu || mbcnt %lu != %lu\n", len, sb->sb_cc,
  616                     mbcnt, sb->sb_mbcnt);
  617                 panic("sbcheck");
  618         }
  619 }
  620 #endif
  621 
  622 /*
  623  * As above, except the mbuf chain
  624  * begins a new record.
  625  */
  626 void
  627 sbappendrecord(struct sockbuf *sb, struct mbuf *m0)
  628 {
  629         struct mbuf     *m;
  630 
  631         if (m0 == 0)
  632                 return;
  633 
  634 #ifdef MBUFTRACE
  635         m_claimm(m0, sb->sb_mowner);
  636 #endif
  637         /*
  638          * Put the first mbuf on the queue.
  639          * Note this permits zero length records.
  640          */
  641         sballoc(sb, m0);
  642         SBLASTRECORDCHK(sb, "sbappendrecord 1");
  643         SBLINKRECORD(sb, m0);
  644         m = m0->m_next;
  645         m0->m_next = 0;
  646         if (m && (m0->m_flags & M_EOR)) {
  647                 m0->m_flags &= ~M_EOR;
  648                 m->m_flags |= M_EOR;
  649         }
  650         sbcompress(sb, m, m0);
  651         SBLASTRECORDCHK(sb, "sbappendrecord 2");
  652 }
  653 
  654 /*
  655  * As above except that OOB data
  656  * is inserted at the beginning of the sockbuf,
  657  * but after any other OOB data.
  658  */
  659 void
  660 sbinsertoob(struct sockbuf *sb, struct mbuf *m0)
  661 {
  662         struct mbuf     *m, **mp;
  663 
  664         if (m0 == 0)
  665                 return;
  666 
  667         SBLASTRECORDCHK(sb, "sbinsertoob 1");
  668 
  669         for (mp = &sb->sb_mb; (m = *mp) != NULL; mp = &((*mp)->m_nextpkt)) {
  670             again:
  671                 switch (m->m_type) {
  672 
  673                 case MT_OOBDATA:
  674                         continue;               /* WANT next train */
  675 
  676                 case MT_CONTROL:
  677                         if ((m = m->m_next) != NULL)
  678                                 goto again;     /* inspect THIS train further */
  679                 }
  680                 break;
  681         }
  682         /*
  683          * Put the first mbuf on the queue.
  684          * Note this permits zero length records.
  685          */
  686         sballoc(sb, m0);
  687         m0->m_nextpkt = *mp;
  688         if (*mp == NULL) {
  689                 /* m0 is actually the new tail */
  690                 sb->sb_lastrecord = m0;
  691         }
  692         *mp = m0;
  693         m = m0->m_next;
  694         m0->m_next = 0;
  695         if (m && (m0->m_flags & M_EOR)) {
  696                 m0->m_flags &= ~M_EOR;
  697                 m->m_flags |= M_EOR;
  698         }
  699         sbcompress(sb, m, m0);
  700         SBLASTRECORDCHK(sb, "sbinsertoob 2");
  701 }
  702 
  703 /*
  704  * Append address and data, and optionally, control (ancillary) data
  705  * to the receive queue of a socket.  If present,
  706  * m0 must include a packet header with total length.
  707  * Returns 0 if no space in sockbuf or insufficient mbufs.
  708  */
  709 int
  710 sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa, struct mbuf *m0,
  711         struct mbuf *control)
  712 {
  713         struct mbuf     *m, *n, *nlast;
  714         int             space, len;
  715 
  716         space = asa->sa_len;
  717 
  718         if (m0 != NULL) {
  719                 if ((m0->m_flags & M_PKTHDR) == 0)
  720                         panic("sbappendaddr");
  721                 space += m0->m_pkthdr.len;
  722 #ifdef MBUFTRACE
  723                 m_claimm(m0, sb->sb_mowner);
  724 #endif
  725         }
  726         for (n = control; n; n = n->m_next) {
  727                 space += n->m_len;
  728                 MCLAIM(n, sb->sb_mowner);
  729                 if (n->m_next == 0)     /* keep pointer to last control buf */
  730                         break;
  731         }
  732         if (space > sbspace(sb))
  733                 return (0);
  734         MGET(m, M_DONTWAIT, MT_SONAME);
  735         if (m == 0)
  736                 return (0);
  737         MCLAIM(m, sb->sb_mowner);
  738         /*
  739          * XXX avoid 'comparison always true' warning which isn't easily
  740          * avoided.
  741          */
  742         len = asa->sa_len;
  743         if (len > MLEN) {
  744                 MEXTMALLOC(m, asa->sa_len, M_NOWAIT);
  745                 if ((m->m_flags & M_EXT) == 0) {
  746                         m_free(m);
  747                         return (0);
  748                 }
  749         }
  750         m->m_len = asa->sa_len;
  751         memcpy(mtod(m, caddr_t), (caddr_t)asa, asa->sa_len);
  752         if (n)
  753                 n->m_next = m0;         /* concatenate data to control */
  754         else
  755                 control = m0;
  756         m->m_next = control;
  757 
  758         SBLASTRECORDCHK(sb, "sbappendaddr 1");
  759 
  760         for (n = m; n->m_next != NULL; n = n->m_next)
  761                 sballoc(sb, n);
  762         sballoc(sb, n);
  763         nlast = n;
  764         SBLINKRECORD(sb, m);
  765 
  766         sb->sb_mbtail = nlast;
  767         SBLASTMBUFCHK(sb, "sbappendaddr");
  768 
  769         SBLASTRECORDCHK(sb, "sbappendaddr 2");
  770 
  771         return (1);
  772 }
  773 
  774 /*
  775  * Helper for sbappendchainaddr: prepend a struct sockaddr* to
  776  * an mbuf chain.
  777  */
  778 static __inline struct mbuf *
  779 m_prepend_sockaddr(struct sockbuf *sb, struct mbuf *m0,
  780                    const struct sockaddr *asa)
  781 {
  782         struct mbuf *m;
  783         const int salen = asa->sa_len;
  784 
  785         /* only the first in each chain need be a pkthdr */
  786         MGETHDR(m, M_DONTWAIT, MT_SONAME);
  787         if (m == 0)
  788                 return (0);
  789         MCLAIM(m, sb->sb_mowner);
  790 #ifdef notyet
  791         if (salen > MHLEN) {
  792                 MEXTMALLOC(m, salen, M_NOWAIT);
  793                 if ((m->m_flags & M_EXT) == 0) {
  794                         m_free(m);
  795                         return (0);
  796                 }
  797         }
  798 #else
  799         KASSERT(salen <= MHLEN);
  800 #endif
  801         m->m_len = salen;
  802         memcpy(mtod(m, caddr_t), (caddr_t)asa, salen);
  803         m->m_next = m0;
  804         m->m_pkthdr.len = salen + m0->m_pkthdr.len;
  805 
  806         return m;
  807 }
  808 
  809 int
  810 sbappendaddrchain(struct sockbuf *sb, const struct sockaddr *asa,
  811                   struct mbuf *m0, int sbprio)
  812 {
  813         int space;
  814         struct mbuf *m, *n, *n0, *nlast;
  815         int error;
  816 
  817         /*
  818          * XXX sbprio reserved for encoding priority of this* request:
  819          *  SB_PRIO_NONE --> honour normal sb limits
  820          *  SB_PRIO_ONESHOT_OVERFLOW --> if socket has any space,
  821          *      take whole chain. Intended for large requests
  822          *      that should be delivered atomically (all, or none).
  823          * SB_PRIO_OVERDRAFT -- allow a small (2*MLEN) overflow
  824          *       over normal socket limits, for messages indicating
  825          *       buffer overflow in earlier normal/lower-priority messages
  826          * SB_PRIO_BESTEFFORT -->  ignore limits entirely.
  827          *       Intended for  kernel-generated messages only.
  828          *        Up to generator to avoid total mbuf resource exhaustion.
  829          */
  830         (void)sbprio;
  831 
  832         if (m0 && (m0->m_flags & M_PKTHDR) == 0)
  833                 panic("sbappendaddrchain");
  834 
  835         space = sbspace(sb);
  836 
  837 #ifdef notyet
  838         /*
  839          * Enforce SB_PRIO_* limits as described above.
  840          */
  841 #endif
  842 
  843         n0 = NULL;
  844         nlast = NULL;
  845         for (m = m0; m; m = m->m_nextpkt) {
  846                 struct mbuf *np;
  847 
  848 #ifdef MBUFTRACE
  849                 m_claimm(m, sb->sb_mowner);
  850 #endif
  851 
  852                 /* Prepend sockaddr to this record (m) of input chain m0 */
  853                 n = m_prepend_sockaddr(sb, m, asa);
  854                 if (n == NULL) {
  855                         error = ENOBUFS;
  856                         goto bad;
  857                 }
  858 
  859                 /* Append record (asa+m) to end of new chain n0 */
  860                 if (n0 == NULL) {
  861                         n0 = n;
  862                 } else {
  863                         nlast->m_nextpkt = n;
  864                 }
  865                 /* Keep track of last record on new chain */
  866                 nlast = n;
  867 
  868                 for (np = n; np; np = np->m_next)
  869                         sballoc(sb, np);
  870         }
  871 
  872         SBLASTRECORDCHK(sb, "sbappendaddrchain 1");
  873 
  874         /* Drop the entire chain of (asa+m) records onto the socket */
  875         SBLINKRECORDCHAIN(sb, n0, nlast);
  876 
  877         SBLASTRECORDCHK(sb, "sbappendaddrchain 2");
  878 
  879         for (m = nlast; m->m_next; m = m->m_next)
  880                 ;
  881         sb->sb_mbtail = m;
  882         SBLASTMBUFCHK(sb, "sbappendaddrchain");
  883 
  884         return (1);
  885 
  886 bad:
  887         /*
  888          * On error, free the prepended addreseses. For consistency
  889          * with sbappendaddr(), leave it to our caller to free
  890          * the input record chain passed to us as m0.
  891          */
  892         while ((n = n0) != NULL) {
  893                 struct mbuf *np;
  894 
  895                 /* Undo the sballoc() of this record */
  896                 for (np = n; np; np = np->m_next)
  897                         sbfree(sb, np);
  898 
  899                 n0 = n->m_nextpkt;      /* iterate at next prepended address */
  900                 MFREE(n, np);           /* free prepended address (not data) */
  901         }
  902         return 0;
  903 }
  904 
  905 
  906 int
  907 sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control)
  908 {
  909         struct mbuf     *m, *mlast, *n;
  910         int             space;
  911 
  912         space = 0;
  913         if (control == 0)
  914                 panic("sbappendcontrol");
  915         for (m = control; ; m = m->m_next) {
  916                 space += m->m_len;
  917                 MCLAIM(m, sb->sb_mowner);
  918                 if (m->m_next == 0)
  919                         break;
  920         }
  921         n = m;                  /* save pointer to last control buffer */
  922         for (m = m0; m; m = m->m_next) {
  923                 MCLAIM(m, sb->sb_mowner);
  924                 space += m->m_len;
  925         }
  926         if (space > sbspace(sb))
  927                 return (0);
  928         n->m_next = m0;                 /* concatenate data to control */
  929 
  930         SBLASTRECORDCHK(sb, "sbappendcontrol 1");
  931 
  932         for (m = control; m->m_next != NULL; m = m->m_next)
  933                 sballoc(sb, m);
  934         sballoc(sb, m);
  935         mlast = m;
  936         SBLINKRECORD(sb, control);
  937 
  938         sb->sb_mbtail = mlast;
  939         SBLASTMBUFCHK(sb, "sbappendcontrol");
  940 
  941         SBLASTRECORDCHK(sb, "sbappendcontrol 2");
  942 
  943         return (1);
  944 }
  945 
  946 /*
  947  * Compress mbuf chain m into the socket
  948  * buffer sb following mbuf n.  If n
  949  * is null, the buffer is presumed empty.
  950  */
  951 void
  952 sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n)
  953 {
  954         int             eor;
  955         struct mbuf     *o;
  956 
  957         eor = 0;
  958         while (m) {
  959                 eor |= m->m_flags & M_EOR;
  960                 if (m->m_len == 0 &&
  961                     (eor == 0 ||
  962                      (((o = m->m_next) || (o = n)) &&
  963                       o->m_type == m->m_type))) {
  964                         if (sb->sb_lastrecord == m)
  965                                 sb->sb_lastrecord = m->m_next;
  966                         m = m_free(m);
  967                         continue;
  968                 }
  969                 if (n && (n->m_flags & M_EOR) == 0 &&
  970                     /* M_TRAILINGSPACE() checks buffer writeability */
  971                     m->m_len <= MCLBYTES / 4 && /* XXX Don't copy too much */
  972                     m->m_len <= M_TRAILINGSPACE(n) &&
  973                     n->m_type == m->m_type) {
  974                         memcpy(mtod(n, caddr_t) + n->m_len, mtod(m, caddr_t),
  975                             (unsigned)m->m_len);
  976                         n->m_len += m->m_len;
  977                         sb->sb_cc += m->m_len;
  978                         m = m_free(m);
  979                         continue;
  980                 }
  981                 if (n)
  982                         n->m_next = m;
  983                 else
  984                         sb->sb_mb = m;
  985                 sb->sb_mbtail = m;
  986                 sballoc(sb, m);
  987                 n = m;
  988                 m->m_flags &= ~M_EOR;
  989                 m = m->m_next;
  990                 n->m_next = 0;
  991         }
  992         if (eor) {
  993                 if (n)
  994                         n->m_flags |= eor;
  995                 else
  996                         printf("semi-panic: sbcompress\n");
  997         }
  998         SBLASTMBUFCHK(sb, __func__);
  999 }
 1000 
 1001 /*
 1002  * Free all mbufs in a sockbuf.
 1003  * Check that all resources are reclaimed.
 1004  */
 1005 void
 1006 sbflush(struct sockbuf *sb)
 1007 {
 1008 
 1009         KASSERT((sb->sb_flags & SB_LOCK) == 0);
 1010 
 1011         while (sb->sb_mbcnt)
 1012                 sbdrop(sb, (int)sb->sb_cc);
 1013 
 1014         KASSERT(sb->sb_cc == 0);
 1015         KASSERT(sb->sb_mb == NULL);
 1016         KASSERT(sb->sb_mbtail == NULL);
 1017         KASSERT(sb->sb_lastrecord == NULL);
 1018 }
 1019 
 1020 /*
 1021  * Drop data from (the front of) a sockbuf.
 1022  */
 1023 void
 1024 sbdrop(struct sockbuf *sb, int len)
 1025 {
 1026         struct mbuf     *m, *mn, *next;
 1027 
 1028         next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
 1029         while (len > 0) {
 1030                 if (m == 0) {
 1031                         if (next == 0)
 1032                                 panic("sbdrop");
 1033                         m = next;
 1034                         next = m->m_nextpkt;
 1035                         continue;
 1036                 }
 1037                 if (m->m_len > len) {
 1038                         m->m_len -= len;
 1039                         m->m_data += len;
 1040                         sb->sb_cc -= len;
 1041                         break;
 1042                 }
 1043                 len -= m->m_len;
 1044                 sbfree(sb, m);
 1045                 MFREE(m, mn);
 1046                 m = mn;
 1047         }
 1048         while (m && m->m_len == 0) {
 1049                 sbfree(sb, m);
 1050                 MFREE(m, mn);
 1051                 m = mn;
 1052         }
 1053         if (m) {
 1054                 sb->sb_mb = m;
 1055                 m->m_nextpkt = next;
 1056         } else
 1057                 sb->sb_mb = next;
 1058         /*
 1059          * First part is an inline SB_EMPTY_FIXUP().  Second part
 1060          * makes sure sb_lastrecord is up-to-date if we dropped
 1061          * part of the last record.
 1062          */
 1063         m = sb->sb_mb;
 1064         if (m == NULL) {
 1065                 sb->sb_mbtail = NULL;
 1066                 sb->sb_lastrecord = NULL;
 1067         } else if (m->m_nextpkt == NULL)
 1068                 sb->sb_lastrecord = m;
 1069 }
 1070 
 1071 /*
 1072  * Drop a record off the front of a sockbuf
 1073  * and move the next record to the front.
 1074  */
 1075 void
 1076 sbdroprecord(struct sockbuf *sb)
 1077 {
 1078         struct mbuf     *m, *mn;
 1079 
 1080         m = sb->sb_mb;
 1081         if (m) {
 1082                 sb->sb_mb = m->m_nextpkt;
 1083                 do {
 1084                         sbfree(sb, m);
 1085                         MFREE(m, mn);
 1086                 } while ((m = mn) != NULL);
 1087         }
 1088         SB_EMPTY_FIXUP(sb);
 1089 }
 1090 
 1091 /*
 1092  * Create a "control" mbuf containing the specified data
 1093  * with the specified type for presentation on a socket buffer.
 1094  */
 1095 struct mbuf *
 1096 sbcreatecontrol(caddr_t p, int size, int type, int level)
 1097 {
 1098         struct cmsghdr  *cp;
 1099         struct mbuf     *m;
 1100 
 1101         if (CMSG_SPACE(size) > MCLBYTES) {
 1102                 printf("sbcreatecontrol: message too large %d\n", size);
 1103                 return NULL;
 1104         }
 1105 
 1106         if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL)
 1107                 return ((struct mbuf *) NULL);
 1108         if (CMSG_SPACE(size) > MLEN) {
 1109                 MCLGET(m, M_DONTWAIT);
 1110                 if ((m->m_flags & M_EXT) == 0) {
 1111                         m_free(m);
 1112                         return NULL;
 1113                 }
 1114         }
 1115         cp = mtod(m, struct cmsghdr *);
 1116         memcpy(CMSG_DATA(cp), p, size);
 1117         m->m_len = CMSG_SPACE(size);
 1118         cp->cmsg_len = CMSG_LEN(size);
 1119         cp->cmsg_level = level;
 1120         cp->cmsg_type = type;
 1121         return (m);
 1122 }

Cache object: 6b757dea0e5457259374264406cbe9a0


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.