The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/uipc_sockbuf.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1982, 1986, 1988, 1990, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  * 4. Neither the name of the University nor the names of its contributors
   14  *    may be used to endorse or promote products derived from this software
   15  *    without specific prior written permission.
   16  *
   17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   27  * SUCH DAMAGE.
   28  *
   29  *      @(#)uipc_socket2.c      8.1 (Berkeley) 6/10/93
   30  */
   31 
   32 #include <sys/cdefs.h>
   33 __FBSDID("$FreeBSD: releng/7.3/sys/kern/uipc_sockbuf.c 196538 2009-08-25 12:32:16Z bz $");
   34 
   35 #include "opt_param.h"
   36 
   37 #include <sys/param.h>
   38 #include <sys/aio.h> /* for aio_swake proto */
   39 #include <sys/kernel.h>
   40 #include <sys/lock.h>
   41 #include <sys/mbuf.h>
   42 #include <sys/mutex.h>
   43 #include <sys/proc.h>
   44 #include <sys/protosw.h>
   45 #include <sys/resourcevar.h>
   46 #include <sys/signalvar.h>
   47 #include <sys/socket.h>
   48 #include <sys/socketvar.h>
   49 #include <sys/sx.h>
   50 #include <sys/sysctl.h>
   51 
   52 /*
   53  * Function pointer set by the AIO routines so that the socket buffer code
   54  * can call back into the AIO module if it is loaded.
   55  */
   56 void    (*aio_swake)(struct socket *, struct sockbuf *);
   57 
   58 /*
   59  * Primitive routines for operating on socket buffers
   60  */
   61 
   62 u_long  sb_max = SB_MAX;
   63 u_long sb_max_adj =
   64        SB_MAX * MCLBYTES / (MSIZE + MCLBYTES); /* adjusted sb_max */
   65 
   66 static  u_long sb_efficiency = 8;       /* parameter for sbreserve() */
   67 
   68 static void     sbdrop_internal(struct sockbuf *sb, int len);
   69 static void     sbflush_internal(struct sockbuf *sb);
   70 
   71 /*
   72  * Socantsendmore indicates that no more data will be sent on the socket; it
   73  * would normally be applied to a socket when the user informs the system
   74  * that no more data is to be sent, by the protocol code (in case
   75  * PRU_SHUTDOWN).  Socantrcvmore indicates that no more data will be
   76  * received, and will normally be applied to the socket by a protocol when it
   77  * detects that the peer will send no more data.  Data queued for reading in
   78  * the socket may yet be read.
   79  */
   80 void
   81 socantsendmore_locked(struct socket *so)
   82 {
   83 
   84         SOCKBUF_LOCK_ASSERT(&so->so_snd);
   85 
   86         so->so_snd.sb_state |= SBS_CANTSENDMORE;
   87         sowwakeup_locked(so);
   88         mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED);
   89 }
   90 
   91 void
   92 socantsendmore(struct socket *so)
   93 {
   94 
   95         SOCKBUF_LOCK(&so->so_snd);
   96         socantsendmore_locked(so);
   97         mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED);
   98 }
   99 
  100 void
  101 socantrcvmore_locked(struct socket *so)
  102 {
  103 
  104         SOCKBUF_LOCK_ASSERT(&so->so_rcv);
  105 
  106         so->so_rcv.sb_state |= SBS_CANTRCVMORE;
  107         sorwakeup_locked(so);
  108         mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED);
  109 }
  110 
  111 void
  112 socantrcvmore(struct socket *so)
  113 {
  114 
  115         SOCKBUF_LOCK(&so->so_rcv);
  116         socantrcvmore_locked(so);
  117         mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED);
  118 }
  119 
  120 /*
  121  * Wait for data to arrive at/drain from a socket buffer.
  122  */
  123 int
  124 sbwait(struct sockbuf *sb)
  125 {
  126 
  127         SOCKBUF_LOCK_ASSERT(sb);
  128 
  129         sb->sb_flags |= SB_WAIT;
  130         return (msleep(&sb->sb_cc, &sb->sb_mtx,
  131             (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait",
  132             sb->sb_timeo));
  133 }
  134 
  135 int
  136 sblock(struct sockbuf *sb, int flags)
  137 {
  138 
  139         KASSERT((flags & SBL_VALID) == flags,
  140             ("sblock: flags invalid (0x%x)", flags));
  141 
  142         if (flags & SBL_WAIT) {
  143                 if ((sb->sb_flags & SB_NOINTR) ||
  144                     (flags & SBL_NOINTR)) {
  145                         sx_xlock(&sb->sb_sx);
  146                         return (0);
  147                 }
  148                 return (sx_xlock_sig(&sb->sb_sx));
  149         } else {
  150                 if (sx_try_xlock(&sb->sb_sx) == 0)
  151                         return (EWOULDBLOCK);
  152                 return (0);
  153         }
  154 }
  155 
  156 void
  157 sbunlock(struct sockbuf *sb)
  158 {
  159 
  160         sx_xunlock(&sb->sb_sx);
  161 }
  162 
  163 /*
  164  * Wakeup processes waiting on a socket buffer.  Do asynchronous notification
  165  * via SIGIO if the socket has the SS_ASYNC flag set.
  166  *
  167  * Called with the socket buffer lock held; will release the lock by the end
  168  * of the function.  This allows the caller to acquire the socket buffer lock
  169  * while testing for the need for various sorts of wakeup and hold it through
  170  * to the point where it's no longer required.  We currently hold the lock
  171  * through calls out to other subsystems (with the exception of kqueue), and
  172  * then release it to avoid lock order issues.  It's not clear that's
  173  * correct.
  174  */
  175 void
  176 sowakeup(struct socket *so, struct sockbuf *sb)
  177 {
  178 
  179         SOCKBUF_LOCK_ASSERT(sb);
  180 
  181         selwakeuppri(&sb->sb_sel, PSOCK);
  182         sb->sb_flags &= ~SB_SEL;
  183         if (sb->sb_flags & SB_WAIT) {
  184                 sb->sb_flags &= ~SB_WAIT;
  185                 wakeup(&sb->sb_cc);
  186         }
  187         KNOTE_LOCKED(&sb->sb_sel.si_note, 0);
  188         SOCKBUF_UNLOCK(sb);
  189         if ((so->so_state & SS_ASYNC) && so->so_sigio != NULL)
  190                 pgsigio(&so->so_sigio, SIGIO, 0);
  191         if (sb->sb_flags & SB_UPCALL)
  192                 (*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT);
  193         if (sb->sb_flags & SB_AIO)
  194                 aio_swake(so, sb);
  195         mtx_assert(SOCKBUF_MTX(sb), MA_NOTOWNED);
  196 }
  197 
  198 /*
  199  * Socket buffer (struct sockbuf) utility routines.
  200  *
  201  * Each socket contains two socket buffers: one for sending data and one for
  202  * receiving data.  Each buffer contains a queue of mbufs, information about
  203  * the number of mbufs and amount of data in the queue, and other fields
  204  * allowing select() statements and notification on data availability to be
  205  * implemented.
  206  *
  207  * Data stored in a socket buffer is maintained as a list of records.  Each
  208  * record is a list of mbufs chained together with the m_next field.  Records
  209  * are chained together with the m_nextpkt field. The upper level routine
  210  * soreceive() expects the following conventions to be observed when placing
  211  * information in the receive buffer:
  212  *
  213  * 1. If the protocol requires each message be preceded by the sender's name,
  214  *    then a record containing that name must be present before any
  215  *    associated data (mbuf's must be of type MT_SONAME).
  216  * 2. If the protocol supports the exchange of ``access rights'' (really just
  217  *    additional data associated with the message), and there are ``rights''
  218  *    to be received, then a record containing this data should be present
  219  *    (mbuf's must be of type MT_RIGHTS).
  220  * 3. If a name or rights record exists, then it must be followed by a data
  221  *    record, perhaps of zero length.
  222  *
  223  * Before using a new socket structure it is first necessary to reserve
  224  * buffer space to the socket, by calling sbreserve().  This should commit
  225  * some of the available buffer space in the system buffer pool for the
  226  * socket (currently, it does nothing but enforce limits).  The space should
  227  * be released by calling sbrelease() when the socket is destroyed.
  228  */
  229 int
  230 soreserve(struct socket *so, u_long sndcc, u_long rcvcc)
  231 {
  232         struct thread *td = curthread;
  233 
  234         SOCKBUF_LOCK(&so->so_snd);
  235         SOCKBUF_LOCK(&so->so_rcv);
  236         if (sbreserve_locked(&so->so_snd, sndcc, so, td) == 0)
  237                 goto bad;
  238         if (sbreserve_locked(&so->so_rcv, rcvcc, so, td) == 0)
  239                 goto bad2;
  240         if (so->so_rcv.sb_lowat == 0)
  241                 so->so_rcv.sb_lowat = 1;
  242         if (so->so_snd.sb_lowat == 0)
  243                 so->so_snd.sb_lowat = MCLBYTES;
  244         if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
  245                 so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
  246         SOCKBUF_UNLOCK(&so->so_rcv);
  247         SOCKBUF_UNLOCK(&so->so_snd);
  248         return (0);
  249 bad2:
  250         sbrelease_locked(&so->so_snd, so);
  251 bad:
  252         SOCKBUF_UNLOCK(&so->so_rcv);
  253         SOCKBUF_UNLOCK(&so->so_snd);
  254         return (ENOBUFS);
  255 }
  256 
  257 static int
  258 sysctl_handle_sb_max(SYSCTL_HANDLER_ARGS)
  259 {
  260         int error = 0;
  261         u_long tmp_sb_max = sb_max;
  262 
  263         error = sysctl_handle_long(oidp, &tmp_sb_max, arg2, req);
  264         if (error || !req->newptr)
  265                 return (error);
  266         if (tmp_sb_max < MSIZE + MCLBYTES)
  267                 return (EINVAL);
  268         sb_max = tmp_sb_max;
  269         sb_max_adj = (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES);
  270         return (0);
  271 }
  272         
  273 /*
  274  * Allot mbufs to a sockbuf.  Attempt to scale mbmax so that mbcnt doesn't
  275  * become limiting if buffering efficiency is near the normal case.
  276  */
  277 int
  278 sbreserve_locked(struct sockbuf *sb, u_long cc, struct socket *so,
  279     struct thread *td)
  280 {
  281         rlim_t sbsize_limit;
  282 
  283         SOCKBUF_LOCK_ASSERT(sb);
  284 
  285         /*
  286          * When a thread is passed, we take into account the thread's socket
  287          * buffer size limit.  The caller will generally pass curthread, but
  288          * in the TCP input path, NULL will be passed to indicate that no
  289          * appropriate thread resource limits are available.  In that case,
  290          * we don't apply a process limit.
  291          */
  292         if (cc > sb_max_adj)
  293                 return (0);
  294         if (td != NULL) {
  295                 PROC_LOCK(td->td_proc);
  296                 sbsize_limit = lim_cur(td->td_proc, RLIMIT_SBSIZE);
  297                 PROC_UNLOCK(td->td_proc);
  298         } else
  299                 sbsize_limit = RLIM_INFINITY;
  300         if (!chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, cc,
  301             sbsize_limit))
  302                 return (0);
  303         sb->sb_mbmax = min(cc * sb_efficiency, sb_max);
  304         if (sb->sb_lowat > sb->sb_hiwat)
  305                 sb->sb_lowat = sb->sb_hiwat;
  306         return (1);
  307 }
  308 
  309 int
  310 sbreserve(struct sockbuf *sb, u_long cc, struct socket *so, 
  311     struct thread *td)
  312 {
  313         int error;
  314 
  315         SOCKBUF_LOCK(sb);
  316         error = sbreserve_locked(sb, cc, so, td);
  317         SOCKBUF_UNLOCK(sb);
  318         return (error);
  319 }
  320 
  321 /*
  322  * Free mbufs held by a socket, and reserved mbuf space.
  323  */
  324 void
  325 sbrelease_internal(struct sockbuf *sb, struct socket *so)
  326 {
  327 
  328         sbflush_internal(sb);
  329         (void)chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, 0,
  330             RLIM_INFINITY);
  331         sb->sb_mbmax = 0;
  332 }
  333 
  334 void
  335 sbrelease_locked(struct sockbuf *sb, struct socket *so)
  336 {
  337 
  338         SOCKBUF_LOCK_ASSERT(sb);
  339 
  340         sbrelease_internal(sb, so);
  341 }
  342 
  343 void
  344 sbrelease(struct sockbuf *sb, struct socket *so)
  345 {
  346 
  347         SOCKBUF_LOCK(sb);
  348         sbrelease_locked(sb, so);
  349         SOCKBUF_UNLOCK(sb);
  350 }
  351 
  352 void
  353 sbdestroy(struct sockbuf *sb, struct socket *so)
  354 {
  355 
  356         sbrelease_internal(sb, so);
  357 }
  358 
  359 /*
  360  * Routines to add and remove data from an mbuf queue.
  361  *
  362  * The routines sbappend() or sbappendrecord() are normally called to append
  363  * new mbufs to a socket buffer, after checking that adequate space is
  364  * available, comparing the function sbspace() with the amount of data to be
  365  * added.  sbappendrecord() differs from sbappend() in that data supplied is
  366  * treated as the beginning of a new record.  To place a sender's address,
  367  * optional access rights, and data in a socket receive buffer,
  368  * sbappendaddr() should be used.  To place access rights and data in a
  369  * socket receive buffer, sbappendrights() should be used.  In either case,
  370  * the new data begins a new record.  Note that unlike sbappend() and
  371  * sbappendrecord(), these routines check for the caller that there will be
  372  * enough space to store the data.  Each fails if there is not enough space,
  373  * or if it cannot find mbufs to store additional information in.
  374  *
  375  * Reliable protocols may use the socket send buffer to hold data awaiting
  376  * acknowledgement.  Data is normally copied from a socket send buffer in a
  377  * protocol with m_copy for output to a peer, and then removing the data from
  378  * the socket buffer with sbdrop() or sbdroprecord() when the data is
  379  * acknowledged by the peer.
  380  */
  381 #ifdef SOCKBUF_DEBUG
  382 void
  383 sblastrecordchk(struct sockbuf *sb, const char *file, int line)
  384 {
  385         struct mbuf *m = sb->sb_mb;
  386 
  387         SOCKBUF_LOCK_ASSERT(sb);
  388 
  389         while (m && m->m_nextpkt)
  390                 m = m->m_nextpkt;
  391 
  392         if (m != sb->sb_lastrecord) {
  393                 printf("%s: sb_mb %p sb_lastrecord %p last %p\n",
  394                         __func__, sb->sb_mb, sb->sb_lastrecord, m);
  395                 printf("packet chain:\n");
  396                 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt)
  397                         printf("\t%p\n", m);
  398                 panic("%s from %s:%u", __func__, file, line);
  399         }
  400 }
  401 
  402 void
  403 sblastmbufchk(struct sockbuf *sb, const char *file, int line)
  404 {
  405         struct mbuf *m = sb->sb_mb;
  406         struct mbuf *n;
  407 
  408         SOCKBUF_LOCK_ASSERT(sb);
  409 
  410         while (m && m->m_nextpkt)
  411                 m = m->m_nextpkt;
  412 
  413         while (m && m->m_next)
  414                 m = m->m_next;
  415 
  416         if (m != sb->sb_mbtail) {
  417                 printf("%s: sb_mb %p sb_mbtail %p last %p\n",
  418                         __func__, sb->sb_mb, sb->sb_mbtail, m);
  419                 printf("packet tree:\n");
  420                 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) {
  421                         printf("\t");
  422                         for (n = m; n != NULL; n = n->m_next)
  423                                 printf("%p ", n);
  424                         printf("\n");
  425                 }
  426                 panic("%s from %s:%u", __func__, file, line);
  427         }
  428 }
  429 #endif /* SOCKBUF_DEBUG */
  430 
  431 #define SBLINKRECORD(sb, m0) do {                                       \
  432         SOCKBUF_LOCK_ASSERT(sb);                                        \
  433         if ((sb)->sb_lastrecord != NULL)                                \
  434                 (sb)->sb_lastrecord->m_nextpkt = (m0);                  \
  435         else                                                            \
  436                 (sb)->sb_mb = (m0);                                     \
  437         (sb)->sb_lastrecord = (m0);                                     \
  438 } while (/*CONSTCOND*/0)
  439 
  440 /*
  441  * Append mbuf chain m to the last record in the socket buffer sb.  The
  442  * additional space associated the mbuf chain is recorded in sb.  Empty mbufs
  443  * are discarded and mbufs are compacted where possible.
  444  */
  445 void
  446 sbappend_locked(struct sockbuf *sb, struct mbuf *m)
  447 {
  448         struct mbuf *n;
  449 
  450         SOCKBUF_LOCK_ASSERT(sb);
  451 
  452         if (m == 0)
  453                 return;
  454 
  455         SBLASTRECORDCHK(sb);
  456         n = sb->sb_mb;
  457         if (n) {
  458                 while (n->m_nextpkt)
  459                         n = n->m_nextpkt;
  460                 do {
  461                         if (n->m_flags & M_EOR) {
  462                                 sbappendrecord_locked(sb, m); /* XXXXXX!!!! */
  463                                 return;
  464                         }
  465                 } while (n->m_next && (n = n->m_next));
  466         } else {
  467                 /*
  468                  * XXX Would like to simply use sb_mbtail here, but
  469                  * XXX I need to verify that I won't miss an EOR that
  470                  * XXX way.
  471                  */
  472                 if ((n = sb->sb_lastrecord) != NULL) {
  473                         do {
  474                                 if (n->m_flags & M_EOR) {
  475                                         sbappendrecord_locked(sb, m); /* XXXXXX!!!! */
  476                                         return;
  477                                 }
  478                         } while (n->m_next && (n = n->m_next));
  479                 } else {
  480                         /*
  481                          * If this is the first record in the socket buffer,
  482                          * it's also the last record.
  483                          */
  484                         sb->sb_lastrecord = m;
  485                 }
  486         }
  487         sbcompress(sb, m, n);
  488         SBLASTRECORDCHK(sb);
  489 }
  490 
  491 /*
  492  * Append mbuf chain m to the last record in the socket buffer sb.  The
  493  * additional space associated the mbuf chain is recorded in sb.  Empty mbufs
  494  * are discarded and mbufs are compacted where possible.
  495  */
  496 void
  497 sbappend(struct sockbuf *sb, struct mbuf *m)
  498 {
  499 
  500         SOCKBUF_LOCK(sb);
  501         sbappend_locked(sb, m);
  502         SOCKBUF_UNLOCK(sb);
  503 }
  504 
  505 /*
  506  * This version of sbappend() should only be used when the caller absolutely
  507  * knows that there will never be more than one record in the socket buffer,
  508  * that is, a stream protocol (such as TCP).
  509  */
  510 void
  511 sbappendstream_locked(struct sockbuf *sb, struct mbuf *m)
  512 {
  513         SOCKBUF_LOCK_ASSERT(sb);
  514 
  515         KASSERT(m->m_nextpkt == NULL,("sbappendstream 0"));
  516         KASSERT(sb->sb_mb == sb->sb_lastrecord,("sbappendstream 1"));
  517 
  518         SBLASTMBUFCHK(sb);
  519 
  520         sbcompress(sb, m, sb->sb_mbtail);
  521 
  522         sb->sb_lastrecord = sb->sb_mb;
  523         SBLASTRECORDCHK(sb);
  524 }
  525 
  526 /*
  527  * This version of sbappend() should only be used when the caller absolutely
  528  * knows that there will never be more than one record in the socket buffer,
  529  * that is, a stream protocol (such as TCP).
  530  */
  531 void
  532 sbappendstream(struct sockbuf *sb, struct mbuf *m)
  533 {
  534 
  535         SOCKBUF_LOCK(sb);
  536         sbappendstream_locked(sb, m);
  537         SOCKBUF_UNLOCK(sb);
  538 }
  539 
  540 #ifdef SOCKBUF_DEBUG
  541 void
  542 sbcheck(struct sockbuf *sb)
  543 {
  544         struct mbuf *m;
  545         struct mbuf *n = 0;
  546         u_long len = 0, mbcnt = 0;
  547 
  548         SOCKBUF_LOCK_ASSERT(sb);
  549 
  550         for (m = sb->sb_mb; m; m = n) {
  551             n = m->m_nextpkt;
  552             for (; m; m = m->m_next) {
  553                 len += m->m_len;
  554                 mbcnt += MSIZE;
  555                 if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */
  556                         mbcnt += m->m_ext.ext_size;
  557             }
  558         }
  559         if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
  560                 printf("cc %ld != %u || mbcnt %ld != %u\n", len, sb->sb_cc,
  561                     mbcnt, sb->sb_mbcnt);
  562                 panic("sbcheck");
  563         }
  564 }
  565 #endif
  566 
  567 /*
  568  * As above, except the mbuf chain begins a new record.
  569  */
  570 void
  571 sbappendrecord_locked(struct sockbuf *sb, struct mbuf *m0)
  572 {
  573         struct mbuf *m;
  574 
  575         SOCKBUF_LOCK_ASSERT(sb);
  576 
  577         if (m0 == 0)
  578                 return;
  579         /*
  580          * Put the first mbuf on the queue.  Note this permits zero length
  581          * records.
  582          */
  583         sballoc(sb, m0);
  584         SBLASTRECORDCHK(sb);
  585         SBLINKRECORD(sb, m0);
  586         sb->sb_mbtail = m0;
  587         m = m0->m_next;
  588         m0->m_next = 0;
  589         if (m && (m0->m_flags & M_EOR)) {
  590                 m0->m_flags &= ~M_EOR;
  591                 m->m_flags |= M_EOR;
  592         }
  593         /* always call sbcompress() so it can do SBLASTMBUFCHK() */
  594         sbcompress(sb, m, m0);
  595 }
  596 
  597 /*
  598  * As above, except the mbuf chain begins a new record.
  599  */
  600 void
  601 sbappendrecord(struct sockbuf *sb, struct mbuf *m0)
  602 {
  603 
  604         SOCKBUF_LOCK(sb);
  605         sbappendrecord_locked(sb, m0);
  606         SOCKBUF_UNLOCK(sb);
  607 }
  608 
  609 /*
  610  * Append address and data, and optionally, control (ancillary) data to the
  611  * receive queue of a socket.  If present, m0 must include a packet header
  612  * with total length.  Returns 0 if no space in sockbuf or insufficient
  613  * mbufs.
  614  */
  615 int
  616 sbappendaddr_locked(struct sockbuf *sb, const struct sockaddr *asa,
  617     struct mbuf *m0, struct mbuf *control)
  618 {
  619         struct mbuf *m, *n, *nlast;
  620         int space = asa->sa_len;
  621 
  622         SOCKBUF_LOCK_ASSERT(sb);
  623 
  624         if (m0 && (m0->m_flags & M_PKTHDR) == 0)
  625                 panic("sbappendaddr_locked");
  626         if (m0)
  627                 space += m0->m_pkthdr.len;
  628         space += m_length(control, &n);
  629 
  630         if (space > sbspace(sb))
  631                 return (0);
  632 #if MSIZE <= 256
  633         if (asa->sa_len > MLEN)
  634                 return (0);
  635 #endif
  636         MGET(m, M_DONTWAIT, MT_SONAME);
  637         if (m == 0)
  638                 return (0);
  639         m->m_len = asa->sa_len;
  640         bcopy(asa, mtod(m, caddr_t), asa->sa_len);
  641         if (n)
  642                 n->m_next = m0;         /* concatenate data to control */
  643         else
  644                 control = m0;
  645         m->m_next = control;
  646         for (n = m; n->m_next != NULL; n = n->m_next)
  647                 sballoc(sb, n);
  648         sballoc(sb, n);
  649         nlast = n;
  650         SBLINKRECORD(sb, m);
  651 
  652         sb->sb_mbtail = nlast;
  653         SBLASTMBUFCHK(sb);
  654 
  655         SBLASTRECORDCHK(sb);
  656         return (1);
  657 }
  658 
  659 /*
  660  * Append address and data, and optionally, control (ancillary) data to the
  661  * receive queue of a socket.  If present, m0 must include a packet header
  662  * with total length.  Returns 0 if no space in sockbuf or insufficient
  663  * mbufs.
  664  */
  665 int
  666 sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa,
  667     struct mbuf *m0, struct mbuf *control)
  668 {
  669         int retval;
  670 
  671         SOCKBUF_LOCK(sb);
  672         retval = sbappendaddr_locked(sb, asa, m0, control);
  673         SOCKBUF_UNLOCK(sb);
  674         return (retval);
  675 }
  676 
  677 int
  678 sbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0,
  679     struct mbuf *control)
  680 {
  681         struct mbuf *m, *n, *mlast;
  682         int space;
  683 
  684         SOCKBUF_LOCK_ASSERT(sb);
  685 
  686         if (control == 0)
  687                 panic("sbappendcontrol_locked");
  688         space = m_length(control, &n) + m_length(m0, NULL);
  689 
  690         if (space > sbspace(sb))
  691                 return (0);
  692         n->m_next = m0;                 /* concatenate data to control */
  693 
  694         SBLASTRECORDCHK(sb);
  695 
  696         for (m = control; m->m_next; m = m->m_next)
  697                 sballoc(sb, m);
  698         sballoc(sb, m);
  699         mlast = m;
  700         SBLINKRECORD(sb, control);
  701 
  702         sb->sb_mbtail = mlast;
  703         SBLASTMBUFCHK(sb);
  704 
  705         SBLASTRECORDCHK(sb);
  706         return (1);
  707 }
  708 
  709 int
  710 sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control)
  711 {
  712         int retval;
  713 
  714         SOCKBUF_LOCK(sb);
  715         retval = sbappendcontrol_locked(sb, m0, control);
  716         SOCKBUF_UNLOCK(sb);
  717         return (retval);
  718 }
  719 
  720 /*
  721  * Append the data in mbuf chain (m) into the socket buffer sb following mbuf
  722  * (n).  If (n) is NULL, the buffer is presumed empty.
  723  *
  724  * When the data is compressed, mbufs in the chain may be handled in one of
  725  * three ways:
  726  *
  727  * (1) The mbuf may simply be dropped, if it contributes nothing (no data, no
  728  *     record boundary, and no change in data type).
  729  *
  730  * (2) The mbuf may be coalesced -- i.e., data in the mbuf may be copied into
  731  *     an mbuf already in the socket buffer.  This can occur if an
  732  *     appropriate mbuf exists, there is room, and no merging of data types
  733  *     will occur.
  734  *
  735  * (3) The mbuf may be appended to the end of the existing mbuf chain.
  736  *
  737  * If any of the new mbufs is marked as M_EOR, mark the last mbuf appended as
  738  * end-of-record.
  739  */
  740 void
  741 sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n)
  742 {
  743         int eor = 0;
  744         struct mbuf *o;
  745 
  746         SOCKBUF_LOCK_ASSERT(sb);
  747 
  748         while (m) {
  749                 eor |= m->m_flags & M_EOR;
  750                 if (m->m_len == 0 &&
  751                     (eor == 0 ||
  752                      (((o = m->m_next) || (o = n)) &&
  753                       o->m_type == m->m_type))) {
  754                         if (sb->sb_lastrecord == m)
  755                                 sb->sb_lastrecord = m->m_next;
  756                         m = m_free(m);
  757                         continue;
  758                 }
  759                 if (n && (n->m_flags & M_EOR) == 0 &&
  760                     M_WRITABLE(n) &&
  761                     m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
  762                     m->m_len <= M_TRAILINGSPACE(n) &&
  763                     n->m_type == m->m_type) {
  764                         bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len,
  765                             (unsigned)m->m_len);
  766                         n->m_len += m->m_len;
  767                         sb->sb_cc += m->m_len;
  768                         if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA)
  769                                 /* XXX: Probably don't need.*/
  770                                 sb->sb_ctl += m->m_len;
  771                         m = m_free(m);
  772                         continue;
  773                 }
  774                 if (n)
  775                         n->m_next = m;
  776                 else
  777                         sb->sb_mb = m;
  778                 sb->sb_mbtail = m;
  779                 sballoc(sb, m);
  780                 n = m;
  781                 m->m_flags &= ~M_EOR;
  782                 m = m->m_next;
  783                 n->m_next = 0;
  784         }
  785         if (eor) {
  786                 KASSERT(n != NULL, ("sbcompress: eor && n == NULL"));
  787                 n->m_flags |= eor;
  788         }
  789         SBLASTMBUFCHK(sb);
  790 }
  791 
  792 /*
  793  * Free all mbufs in a sockbuf.  Check that all resources are reclaimed.
  794  */
  795 static void
  796 sbflush_internal(struct sockbuf *sb)
  797 {
  798 
  799         while (sb->sb_mbcnt) {
  800                 /*
  801                  * Don't call sbdrop(sb, 0) if the leading mbuf is non-empty:
  802                  * we would loop forever. Panic instead.
  803                  */
  804                 if (!sb->sb_cc && (sb->sb_mb == NULL || sb->sb_mb->m_len))
  805                         break;
  806                 sbdrop_internal(sb, (int)sb->sb_cc);
  807         }
  808         if (sb->sb_cc || sb->sb_mb || sb->sb_mbcnt)
  809                 panic("sbflush_internal: cc %u || mb %p || mbcnt %u",
  810                     sb->sb_cc, (void *)sb->sb_mb, sb->sb_mbcnt);
  811 }
  812 
  813 void
  814 sbflush_locked(struct sockbuf *sb)
  815 {
  816 
  817         SOCKBUF_LOCK_ASSERT(sb);
  818         sbflush_internal(sb);
  819 }
  820 
  821 void
  822 sbflush(struct sockbuf *sb)
  823 {
  824 
  825         SOCKBUF_LOCK(sb);
  826         sbflush_locked(sb);
  827         SOCKBUF_UNLOCK(sb);
  828 }
  829 
  830 /*
  831  * Drop data from (the front of) a sockbuf.
  832  */
  833 static void
  834 sbdrop_internal(struct sockbuf *sb, int len)
  835 {
  836         struct mbuf *m;
  837         struct mbuf *next;
  838 
  839         next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
  840         while (len > 0) {
  841                 if (m == 0) {
  842                         if (next == 0)
  843                                 panic("sbdrop");
  844                         m = next;
  845                         next = m->m_nextpkt;
  846                         continue;
  847                 }
  848                 if (m->m_len > len) {
  849                         m->m_len -= len;
  850                         m->m_data += len;
  851                         sb->sb_cc -= len;
  852                         if (sb->sb_sndptroff != 0)
  853                                 sb->sb_sndptroff -= len;
  854                         if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA)
  855                                 sb->sb_ctl -= len;
  856                         break;
  857                 }
  858                 len -= m->m_len;
  859                 sbfree(sb, m);
  860                 m = m_free(m);
  861         }
  862         while (m && m->m_len == 0) {
  863                 sbfree(sb, m);
  864                 m = m_free(m);
  865         }
  866         if (m) {
  867                 sb->sb_mb = m;
  868                 m->m_nextpkt = next;
  869         } else
  870                 sb->sb_mb = next;
  871         /*
  872          * First part is an inline SB_EMPTY_FIXUP().  Second part makes sure
  873          * sb_lastrecord is up-to-date if we dropped part of the last record.
  874          */
  875         m = sb->sb_mb;
  876         if (m == NULL) {
  877                 sb->sb_mbtail = NULL;
  878                 sb->sb_lastrecord = NULL;
  879         } else if (m->m_nextpkt == NULL) {
  880                 sb->sb_lastrecord = m;
  881         }
  882 }
  883 
  884 /*
  885  * Drop data from (the front of) a sockbuf.
  886  */
  887 void
  888 sbdrop_locked(struct sockbuf *sb, int len)
  889 {
  890 
  891         SOCKBUF_LOCK_ASSERT(sb);
  892 
  893         sbdrop_internal(sb, len);
  894 }
  895 
  896 void
  897 sbdrop(struct sockbuf *sb, int len)
  898 {
  899 
  900         SOCKBUF_LOCK(sb);
  901         sbdrop_locked(sb, len);
  902         SOCKBUF_UNLOCK(sb);
  903 }
  904 
  905 /*
  906  * Maintain a pointer and offset pair into the socket buffer mbuf chain to
  907  * avoid traversal of the entire socket buffer for larger offsets.
  908  */
  909 struct mbuf *
  910 sbsndptr(struct sockbuf *sb, u_int off, u_int len, u_int *moff)
  911 {
  912         struct mbuf *m, *ret;
  913 
  914         KASSERT(sb->sb_mb != NULL, ("%s: sb_mb is NULL", __func__));
  915         KASSERT(off + len <= sb->sb_cc, ("%s: beyond sb", __func__));
  916         KASSERT(sb->sb_sndptroff <= sb->sb_cc, ("%s: sndptroff broken", __func__));
  917 
  918         /*
  919          * Is off below stored offset? Happens on retransmits.
  920          * Just return, we can't help here.
  921          */
  922         if (sb->sb_sndptroff > off) {
  923                 *moff = off;
  924                 return (sb->sb_mb);
  925         }
  926 
  927         /* Return closest mbuf in chain for current offset. */
  928         *moff = off - sb->sb_sndptroff;
  929         m = ret = sb->sb_sndptr ? sb->sb_sndptr : sb->sb_mb;
  930 
  931         /* Advance by len to be as close as possible for the next transmit. */
  932         for (off = off - sb->sb_sndptroff + len - 1;
  933              off > 0 && m != NULL && off >= m->m_len;
  934              m = m->m_next) {
  935                 sb->sb_sndptroff += m->m_len;
  936                 off -= m->m_len;
  937         }
  938         if (off > 0 && m == NULL)
  939                 panic("%s: sockbuf %p and mbuf %p clashing", __func__, sb, ret);
  940         sb->sb_sndptr = m;
  941 
  942         return (ret);
  943 }
  944 
  945 /*
  946  * Drop a record off the front of a sockbuf and move the next record to the
  947  * front.
  948  */
  949 void
  950 sbdroprecord_locked(struct sockbuf *sb)
  951 {
  952         struct mbuf *m;
  953 
  954         SOCKBUF_LOCK_ASSERT(sb);
  955 
  956         m = sb->sb_mb;
  957         if (m) {
  958                 sb->sb_mb = m->m_nextpkt;
  959                 do {
  960                         sbfree(sb, m);
  961                         m = m_free(m);
  962                 } while (m);
  963         }
  964         SB_EMPTY_FIXUP(sb);
  965 }
  966 
  967 /*
  968  * Drop a record off the front of a sockbuf and move the next record to the
  969  * front.
  970  */
  971 void
  972 sbdroprecord(struct sockbuf *sb)
  973 {
  974 
  975         SOCKBUF_LOCK(sb);
  976         sbdroprecord_locked(sb);
  977         SOCKBUF_UNLOCK(sb);
  978 }
  979 
  980 /*
  981  * Create a "control" mbuf containing the specified data with the specified
  982  * type for presentation on a socket buffer.
  983  */
  984 struct mbuf *
  985 sbcreatecontrol(caddr_t p, int size, int type, int level)
  986 {
  987         struct cmsghdr *cp;
  988         struct mbuf *m;
  989 
  990         if (CMSG_SPACE((u_int)size) > MCLBYTES)
  991                 return ((struct mbuf *) NULL);
  992         if (CMSG_SPACE((u_int)size) > MLEN)
  993                 m = m_getcl(M_DONTWAIT, MT_CONTROL, 0);
  994         else
  995                 m = m_get(M_DONTWAIT, MT_CONTROL);
  996         if (m == NULL)
  997                 return ((struct mbuf *) NULL);
  998         cp = mtod(m, struct cmsghdr *);
  999         m->m_len = 0;
 1000         KASSERT(CMSG_SPACE((u_int)size) <= M_TRAILINGSPACE(m),
 1001             ("sbcreatecontrol: short mbuf"));
 1002         if (p != NULL)
 1003                 (void)memcpy(CMSG_DATA(cp), p, size);
 1004         m->m_len = CMSG_SPACE(size);
 1005         cp->cmsg_len = CMSG_LEN(size);
 1006         cp->cmsg_level = level;
 1007         cp->cmsg_type = type;
 1008         return (m);
 1009 }
 1010 
 1011 /*
 1012  * This does the same for socket buffers that sotoxsocket does for sockets:
 1013  * generate an user-format data structure describing the socket buffer.  Note
 1014  * that the xsockbuf structure, since it is always embedded in a socket, does
 1015  * not include a self pointer nor a length.  We make this entry point public
 1016  * in case some other mechanism needs it.
 1017  */
 1018 void
 1019 sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb)
 1020 {
 1021 
 1022         xsb->sb_cc = sb->sb_cc;
 1023         xsb->sb_hiwat = sb->sb_hiwat;
 1024         xsb->sb_mbcnt = sb->sb_mbcnt;
 1025         xsb->sb_mbmax = sb->sb_mbmax;
 1026         xsb->sb_lowat = sb->sb_lowat;
 1027         xsb->sb_flags = sb->sb_flags;
 1028         xsb->sb_timeo = sb->sb_timeo;
 1029 }
 1030 
 1031 /* This takes the place of kern.maxsockbuf, which moved to kern.ipc. */
 1032 static int dummy;
 1033 SYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW, &dummy, 0, "");
 1034 SYSCTL_OID(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, CTLTYPE_ULONG|CTLFLAG_RW,
 1035     &sb_max, 0, sysctl_handle_sb_max, "LU", "Maximum socket buffer size");
 1036 SYSCTL_ULONG(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW,
 1037     &sb_efficiency, 0, "");

Cache object: d2a4bd42e45bb6219c0689a516ec97b8


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.