uipc_sockbuf.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*-
    2  * Copyright (c) 1982, 1986, 1988, 1990, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  * 4. Neither the name of the University nor the names of its contributors
   14  *    may be used to endorse or promote products derived from this software
   15  *    without specific prior written permission.
   16  *
   17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   27  * SUCH DAMAGE.
   28  *
   29  *      @(#)uipc_socket2.c      8.1 (Berkeley) 6/10/93
   30  */
   31 
   32 #include <sys/cdefs.h>
   33 __FBSDID("$FreeBSD$");
   34 
   35 #include "opt_param.h"
   36 
   37 #include <sys/param.h>
   38 #include <sys/aio.h> /* for aio_swake proto */
   39 #include <sys/kernel.h>
   40 #include <sys/lock.h>
   41 #include <sys/mbuf.h>
   42 #include <sys/mutex.h>
   43 #include <sys/proc.h>
   44 #include <sys/protosw.h>
   45 #include <sys/resourcevar.h>
   46 #include <sys/signalvar.h>
   47 #include <sys/socket.h>
   48 #include <sys/socketvar.h>
   49 #include <sys/sx.h>
   50 #include <sys/sysctl.h>
   51 
   52 /*
   53  * Function pointer set by the AIO routines so that the socket buffer code
   54  * can call back into the AIO module if it is loaded.
   55  */
   56 void    (*aio_swake)(struct socket *, struct sockbuf *);
   57 
   58 /*
   59  * Primitive routines for operating on socket buffers
   60  */
   61 
   62 u_long  sb_max = SB_MAX;
   63 u_long sb_max_adj =
   64        SB_MAX * MCLBYTES / (MSIZE + MCLBYTES); /* adjusted sb_max */
   65 
   66 static  u_long sb_efficiency = 8;       /* parameter for sbreserve() */
   67 
   68 static void     sbdrop_internal(struct sockbuf *sb, int len);
   69 static void     sbflush_internal(struct sockbuf *sb);
   70 
   71 /*
   72  * Socantsendmore indicates that no more data will be sent on the socket; it
   73  * would normally be applied to a socket when the user informs the system
   74  * that no more data is to be sent, by the protocol code (in case
   75  * PRU_SHUTDOWN).  Socantrcvmore indicates that no more data will be
   76  * received, and will normally be applied to the socket by a protocol when it
   77  * detects that the peer will send no more data.  Data queued for reading in
   78  * the socket may yet be read.
   79  */
   80 void
   81 socantsendmore_locked(struct socket *so)
   82 {
   83 
   84         SOCKBUF_LOCK_ASSERT(&so->so_snd);
   85 
   86         so->so_snd.sb_state |= SBS_CANTSENDMORE;
   87         sowwakeup_locked(so);
   88         mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED);
   89 }
   90 
   91 void
   92 socantsendmore(struct socket *so)
   93 {
   94 
   95         SOCKBUF_LOCK(&so->so_snd);
   96         socantsendmore_locked(so);
   97         mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED);
   98 }
   99 
  100 void
  101 socantrcvmore_locked(struct socket *so)
  102 {
  103 
  104         SOCKBUF_LOCK_ASSERT(&so->so_rcv);
  105 
  106         so->so_rcv.sb_state |= SBS_CANTRCVMORE;
  107         sorwakeup_locked(so);
  108         mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED);
  109 }
  110 
  111 void
  112 socantrcvmore(struct socket *so)
  113 {
  114 
  115         SOCKBUF_LOCK(&so->so_rcv);
  116         socantrcvmore_locked(so);
  117         mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED);
  118 }
  119 
  120 /*
  121  * Wait for data to arrive at/drain from a socket buffer.
  122  */
  123 int
  124 sbwait(struct sockbuf *sb)
  125 {
  126 
  127         SOCKBUF_LOCK_ASSERT(sb);
  128 
  129         sb->sb_flags |= SB_WAIT;
  130         return (msleep(&sb->sb_cc, &sb->sb_mtx,
  131             (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait",
  132             sb->sb_timeo));
  133 }
  134 
  135 int
  136 sblock(struct sockbuf *sb, int flags)
  137 {
  138 
  139         KASSERT((flags & SBL_VALID) == flags,
  140             ("sblock: flags invalid (0x%x)", flags));
  141 
  142         if (flags & SBL_WAIT) {
  143                 if ((sb->sb_flags & SB_NOINTR) ||
  144                     (flags & SBL_NOINTR)) {
  145                         sx_xlock(&sb->sb_sx);
  146                         return (0);
  147                 }
  148                 return (sx_xlock_sig(&sb->sb_sx));
  149         } else {
  150                 if (sx_try_xlock(&sb->sb_sx) == 0)
  151                         return (EWOULDBLOCK);
  152                 return (0);
  153         }
  154 }
  155 
  156 void
  157 sbunlock(struct sockbuf *sb)
  158 {
  159 
  160         sx_xunlock(&sb->sb_sx);
  161 }
  162 
  163 /*
  164  * Wakeup processes waiting on a socket buffer.  Do asynchronous notification
  165  * via SIGIO if the socket has the SS_ASYNC flag set.
  166  *
  167  * Called with the socket buffer lock held; will release the lock by the end
  168  * of the function.  This allows the caller to acquire the socket buffer lock
  169  * while testing for the need for various sorts of wakeup and hold it through
  170  * to the point where it's no longer required.  We currently hold the lock
  171  * through calls out to other subsystems (with the exception of kqueue), and
  172  * then release it to avoid lock order issues.  It's not clear that's
  173  * correct.
  174  */
  175 void
  176 sowakeup(struct socket *so, struct sockbuf *sb)
  177 {
  178 
  179         SOCKBUF_LOCK_ASSERT(sb);
  180 
  181         selwakeuppri(&sb->sb_sel, PSOCK);
  182         sb->sb_flags &= ~SB_SEL;
  183         if (sb->sb_flags & SB_WAIT) {
  184                 sb->sb_flags &= ~SB_WAIT;
  185                 wakeup(&sb->sb_cc);
  186         }
  187         KNOTE_LOCKED(&sb->sb_sel.si_note, 0);
  188         SOCKBUF_UNLOCK(sb);
  189         if ((so->so_state & SS_ASYNC) && so->so_sigio != NULL)
  190                 pgsigio(&so->so_sigio, SIGIO, 0);
  191         if (sb->sb_flags & SB_UPCALL)
  192                 (*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT);
  193         if (sb->sb_flags & SB_AIO)
  194                 aio_swake(so, sb);
  195         mtx_assert(SOCKBUF_MTX(sb), MA_NOTOWNED);
  196 }
  197 
  198 /*
  199  * Socket buffer (struct sockbuf) utility routines.
  200  *
  201  * Each socket contains two socket buffers: one for sending data and one for
  202  * receiving data.  Each buffer contains a queue of mbufs, information about
  203  * the number of mbufs and amount of data in the queue, and other fields
  204  * allowing select() statements and notification on data availability to be
  205  * implemented.
  206  *
  207  * Data stored in a socket buffer is maintained as a list of records.  Each
  208  * record is a list of mbufs chained together with the m_next field.  Records
  209  * are chained together with the m_nextpkt field. The upper level routine
  210  * soreceive() expects the following conventions to be observed when placing
  211  * information in the receive buffer:
  212  *
  213  * 1. If the protocol requires each message be preceded by the sender's name,
  214  *    then a record containing that name must be present before any
  215  *    associated data (mbuf's must be of type MT_SONAME).
  216  * 2. If the protocol supports the exchange of ``access rights'' (really just
  217  *    additional data associated with the message), and there are ``rights''
  218  *    to be received, then a record containing this data should be present
  219  *    (mbuf's must be of type MT_RIGHTS).
  220  * 3. If a name or rights record exists, then it must be followed by a data
  221  *    record, perhaps of zero length.
  222  *
  223  * Before using a new socket structure it is first necessary to reserve
  224  * buffer space to the socket, by calling sbreserve().  This should commit
  225  * some of the available buffer space in the system buffer pool for the
  226  * socket (currently, it does nothing but enforce limits).  The space should
  227  * be released by calling sbrelease() when the socket is destroyed.
  228  */
  229 int
  230 soreserve(struct socket *so, u_long sndcc, u_long rcvcc)
  231 {
  232         struct thread *td = curthread;
  233 
  234         SOCKBUF_LOCK(&so->so_snd);
  235         SOCKBUF_LOCK(&so->so_rcv);
  236         if (sbreserve_locked(&so->so_snd, sndcc, so, td) == 0)
  237                 goto bad;
  238         if (sbreserve_locked(&so->so_rcv, rcvcc, so, td) == 0)
  239                 goto bad2;
  240         if (so->so_rcv.sb_lowat == 0)
  241                 so->so_rcv.sb_lowat = 1;
  242         if (so->so_snd.sb_lowat == 0)
  243                 so->so_snd.sb_lowat = MCLBYTES;
  244         if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
  245                 so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
  246         SOCKBUF_UNLOCK(&so->so_rcv);
  247         SOCKBUF_UNLOCK(&so->so_snd);
  248         return (0);
  249 bad2:
  250         sbrelease_locked(&so->so_snd, so);
  251 bad:
  252         SOCKBUF_UNLOCK(&so->so_rcv);
  253         SOCKBUF_UNLOCK(&so->so_snd);
  254         return (ENOBUFS);
  255 }
  256 
  257 static int
  258 sysctl_handle_sb_max(SYSCTL_HANDLER_ARGS)
  259 {
  260         int error = 0;
  261         u_long tmp_sb_max = sb_max;
  262 
  263         error = sysctl_handle_long(oidp, &tmp_sb_max, arg2, req);
  264         if (error || !req->newptr)
  265                 return (error);
  266         if (tmp_sb_max < MSIZE + MCLBYTES)
  267                 return (EINVAL);
  268         sb_max = tmp_sb_max;
  269         sb_max_adj = (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES);
  270         return (0);
  271 }
  272         
  273 /*
  274  * Allot mbufs to a sockbuf.  Attempt to scale mbmax so that mbcnt doesn't
  275  * become limiting if buffering efficiency is near the normal case.
  276  */
  277 int
  278 sbreserve_locked(struct sockbuf *sb, u_long cc, struct socket *so,
  279     struct thread *td)
  280 {
  281         rlim_t sbsize_limit;
  282 
  283         SOCKBUF_LOCK_ASSERT(sb);
  284 
  285         /*
  286          * When a thread is passed, we take into account the thread's socket
  287          * buffer size limit.  The caller will generally pass curthread, but
  288          * in the TCP input path, NULL will be passed to indicate that no
  289          * appropriate thread resource limits are available.  In that case,
  290          * we don't apply a process limit.
  291          */
  292         if (cc > sb_max_adj)
  293                 return (0);
  294         if (td != NULL) {
  295                 PROC_LOCK(td->td_proc);
  296                 sbsize_limit = lim_cur(td->td_proc, RLIMIT_SBSIZE);
  297                 PROC_UNLOCK(td->td_proc);
  298         } else
  299                 sbsize_limit = RLIM_INFINITY;
  300         if (!chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, cc,
  301             sbsize_limit))
  302                 return (0);
  303         sb->sb_mbmax = min(cc * sb_efficiency, sb_max);
  304         if (sb->sb_lowat > sb->sb_hiwat)
  305                 sb->sb_lowat = sb->sb_hiwat;
  306         return (1);
  307 }
  308 
  309 int
  310 sbreserve(struct sockbuf *sb, u_long cc, struct socket *so, 
  311     struct thread *td)
  312 {
  313         int error;
  314 
  315         SOCKBUF_LOCK(sb);
  316         error = sbreserve_locked(sb, cc, so, td);
  317         SOCKBUF_UNLOCK(sb);
  318         return (error);
  319 }
  320 
  321 /*
  322  * Free mbufs held by a socket, and reserved mbuf space.
  323  */
  324 void
  325 sbrelease_internal(struct sockbuf *sb, struct socket *so)
  326 {
  327 
  328         sbflush_internal(sb);
  329         (void)chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, 0,
  330             RLIM_INFINITY);
  331         sb->sb_mbmax = 0;
  332 }
  333 
  334 void
  335 sbrelease_locked(struct sockbuf *sb, struct socket *so)
  336 {
  337 
  338         SOCKBUF_LOCK_ASSERT(sb);
  339 
  340         sbrelease_internal(sb, so);
  341 }
  342 
  343 void
  344 sbrelease(struct sockbuf *sb, struct socket *so)
  345 {
  346 
  347         SOCKBUF_LOCK(sb);
  348         sbrelease_locked(sb, so);
  349         SOCKBUF_UNLOCK(sb);
  350 }
  351 
  352 void
  353 sbdestroy(struct sockbuf *sb, struct socket *so)
  354 {
  355 
  356         sbrelease_internal(sb, so);
  357 }
  358 
  359 /*
  360  * Routines to add and remove data from an mbuf queue.
  361  *
  362  * The routines sbappend() or sbappendrecord() are normally called to append
  363  * new mbufs to a socket buffer, after checking that adequate space is
  364  * available, comparing the function sbspace() with the amount of data to be
  365  * added.  sbappendrecord() differs from sbappend() in that data supplied is
  366  * treated as the beginning of a new record.  To place a sender's address,
  367  * optional access rights, and data in a socket receive buffer,
  368  * sbappendaddr() should be used.  To place access rights and data in a
  369  * socket receive buffer, sbappendrights() should be used.  In either case,
  370  * the new data begins a new record.  Note that unlike sbappend() and
  371  * sbappendrecord(), these routines check for the caller that there will be
  372  * enough space to store the data.  Each fails if there is not enough space,
  373  * or if it cannot find mbufs to store additional information in.
  374  *
  375  * Reliable protocols may use the socket send buffer to hold data awaiting
  376  * acknowledgement.  Data is normally copied from a socket send buffer in a
  377  * protocol with m_copy for output to a peer, and then removing the data from
  378  * the socket buffer with sbdrop() or sbdroprecord() when the data is
  379  * acknowledged by the peer.
  380  */
  381 #ifdef SOCKBUF_DEBUG
  382 void
  383 sblastrecordchk(struct sockbuf *sb, const char *file, int line)
  384 {
  385         struct mbuf *m = sb->sb_mb;
  386 
  387         SOCKBUF_LOCK_ASSERT(sb);
  388 
  389         while (m && m->m_nextpkt)
  390                 m = m->m_nextpkt;
  391 
  392         if (m != sb->sb_lastrecord) {
  393                 printf("%s: sb_mb %p sb_lastrecord %p last %p\n",
  394                         __func__, sb->sb_mb, sb->sb_lastrecord, m);
  395                 printf("packet chain:\n");
  396                 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt)
  397                         printf("\t%p\n", m);
  398                 panic("%s from %s:%u", __func__, file, line);
  399         }
  400 }
  401 
  402 void
  403 sblastmbufchk(struct sockbuf *sb, const char *file, int line)
  404 {
  405         struct mbuf *m = sb->sb_mb;
  406         struct mbuf *n;
  407 
  408         SOCKBUF_LOCK_ASSERT(sb);
  409 
  410         while (m && m->m_nextpkt)
  411                 m = m->m_nextpkt;
  412 
  413         while (m && m->m_next)
  414                 m = m->m_next;
  415 
  416         if (m != sb->sb_mbtail) {
  417                 printf("%s: sb_mb %p sb_mbtail %p last %p\n",
  418                         __func__, sb->sb_mb, sb->sb_mbtail, m);
  419                 printf("packet tree:\n");
  420                 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) {
  421                         printf("\t");
  422                         for (n = m; n != NULL; n = n->m_next)
  423                                 printf("%p ", n);
  424                         printf("\n");
  425                 }
  426                 panic("%s from %s:%u", __func__, file, line);
  427         }
  428 }
  429 #endif /* SOCKBUF_DEBUG */
  430 
  431 #define SBLINKRECORD(sb, m0) do {                                       \
  432         SOCKBUF_LOCK_ASSERT(sb);                                        \
  433         if ((sb)->sb_lastrecord != NULL)                                \
  434                 (sb)->sb_lastrecord->m_nextpkt = (m0);                  \
  435         else                                                            \
  436                 (sb)->sb_mb = (m0);                                     \
  437         (sb)->sb_lastrecord = (m0);                                     \
  438 } while (/*CONSTCOND*/0)
  439 
  440 /*
  441  * Append mbuf chain m to the last record in the socket buffer sb.  The
  442  * additional space associated the mbuf chain is recorded in sb.  Empty mbufs
  443  * are discarded and mbufs are compacted where possible.
  444  */
  445 void
  446 sbappend_locked(struct sockbuf *sb, struct mbuf *m)
  447 {
  448         struct mbuf *n;
  449 
  450         SOCKBUF_LOCK_ASSERT(sb);
  451 
  452         if (m == 0)
  453                 return;
  454 
  455         SBLASTRECORDCHK(sb);
  456         n = sb->sb_mb;
  457         if (n) {
  458                 while (n->m_nextpkt)
  459                         n = n->m_nextpkt;
  460                 do {
  461                         if (n->m_flags & M_EOR) {
  462                                 sbappendrecord_locked(sb, m); /* XXXXXX!!!! */
  463                                 return;
  464                         }
  465                 } while (n->m_next && (n = n->m_next));
  466         } else {
  467                 /*
  468                  * XXX Would like to simply use sb_mbtail here, but
  469                  * XXX I need to verify that I won't miss an EOR that
  470                  * XXX way.
  471                  */
  472                 if ((n = sb->sb_lastrecord) != NULL) {
  473                         do {
  474                                 if (n->m_flags & M_EOR) {
  475                                         sbappendrecord_locked(sb, m); /* XXXXXX!!!! */
  476                                         return;
  477                                 }
  478                         } while (n->m_next && (n = n->m_next));
  479                 } else {
  480                         /*
  481                          * If this is the first record in the socket buffer,
  482                          * it's also the last record.
  483                          */
  484                         sb->sb_lastrecord = m;
  485                 }
  486         }
  487         sbcompress(sb, m, n);
  488         SBLASTRECORDCHK(sb);
  489 }
  490 
  491 /*
  492  * Append mbuf chain m to the last record in the socket buffer sb.  The
  493  * additional space associated the mbuf chain is recorded in sb.  Empty mbufs
  494  * are discarded and mbufs are compacted where possible.
  495  */
  496 void
  497 sbappend(struct sockbuf *sb, struct mbuf *m)
  498 {
  499 
  500         SOCKBUF_LOCK(sb);
  501         sbappend_locked(sb, m);
  502         SOCKBUF_UNLOCK(sb);
  503 }
  504 
  505 /*
  506  * This version of sbappend() should only be used when the caller absolutely
  507  * knows that there will never be more than one record in the socket buffer,
  508  * that is, a stream protocol (such as TCP).
  509  */
  510 void
  511 sbappendstream_locked(struct sockbuf *sb, struct mbuf *m)
  512 {
  513         SOCKBUF_LOCK_ASSERT(sb);
  514 
  515         KASSERT(m->m_nextpkt == NULL,("sbappendstream 0"));
  516         KASSERT(sb->sb_mb == sb->sb_lastrecord,("sbappendstream 1"));
  517 
  518         SBLASTMBUFCHK(sb);
  519 
  520         sbcompress(sb, m, sb->sb_mbtail);
  521 
  522         sb->sb_lastrecord = sb->sb_mb;
  523         SBLASTRECORDCHK(sb);
  524 }
  525 
  526 /*
  527  * This version of sbappend() should only be used when the caller absolutely
  528  * knows that there will never be more than one record in the socket buffer,
  529  * that is, a stream protocol (such as TCP).
  530  */
  531 void
  532 sbappendstream(struct sockbuf *sb, struct mbuf *m)
  533 {
  534 
  535         SOCKBUF_LOCK(sb);
  536         sbappendstream_locked(sb, m);
  537         SOCKBUF_UNLOCK(sb);
  538 }
  539 
  540 #ifdef SOCKBUF_DEBUG
  541 void
  542 sbcheck(struct sockbuf *sb)
  543 {
  544         struct mbuf *m;
  545         struct mbuf *n = 0;
  546         u_long len = 0, mbcnt = 0;
  547 
  548         SOCKBUF_LOCK_ASSERT(sb);
  549 
  550         for (m = sb->sb_mb; m; m = n) {
  551             n = m->m_nextpkt;
  552             for (; m; m = m->m_next) {
  553                 len += m->m_len;
  554                 mbcnt += MSIZE;
  555                 if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */
  556                         mbcnt += m->m_ext.ext_size;
  557             }
  558         }
  559         if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
  560                 printf("cc %ld != %u || mbcnt %ld != %u\n", len, sb->sb_cc,
  561                     mbcnt, sb->sb_mbcnt);
  562                 panic("sbcheck");
  563         }
  564 }
  565 #endif
  566 
  567 /*
  568  * As above, except the mbuf chain begins a new record.
  569  */
  570 void
  571 sbappendrecord_locked(struct sockbuf *sb, struct mbuf *m0)
  572 {
  573         struct mbuf *m;
  574 
  575         SOCKBUF_LOCK_ASSERT(sb);
  576 
  577         if (m0 == 0)
  578                 return;
  579         m = sb->sb_mb;
  580         if (m)
  581                 while (m->m_nextpkt)
  582                         m = m->m_nextpkt;
  583         /*
  584          * Put the first mbuf on the queue.  Note this permits zero length
  585          * records.
  586          */
  587         sballoc(sb, m0);
  588         SBLASTRECORDCHK(sb);
  589         SBLINKRECORD(sb, m0);
  590         if (m)
  591                 m->m_nextpkt = m0;
  592         else
  593                 sb->sb_mb = m0;
  594         m = m0->m_next;
  595         m0->m_next = 0;
  596         if (m && (m0->m_flags & M_EOR)) {
  597                 m0->m_flags &= ~M_EOR;
  598                 m->m_flags |= M_EOR;
  599         }
  600         sbcompress(sb, m, m0);
  601 }
  602 
  603 /*
  604  * As above, except the mbuf chain begins a new record.
  605  */
  606 void
  607 sbappendrecord(struct sockbuf *sb, struct mbuf *m0)
  608 {
  609 
  610         SOCKBUF_LOCK(sb);
  611         sbappendrecord_locked(sb, m0);
  612         SOCKBUF_UNLOCK(sb);
  613 }
  614 
  615 /*
  616  * Append address and data, and optionally, control (ancillary) data to the
  617  * receive queue of a socket.  If present, m0 must include a packet header
  618  * with total length.  Returns 0 if no space in sockbuf or insufficient
  619  * mbufs.
  620  */
  621 int
  622 sbappendaddr_locked(struct sockbuf *sb, const struct sockaddr *asa,
  623     struct mbuf *m0, struct mbuf *control)
  624 {
  625         struct mbuf *m, *n, *nlast;
  626         int space = asa->sa_len;
  627 
  628         SOCKBUF_LOCK_ASSERT(sb);
  629 
  630         if (m0 && (m0->m_flags & M_PKTHDR) == 0)
  631                 panic("sbappendaddr_locked");
  632         if (m0)
  633                 space += m0->m_pkthdr.len;
  634         space += m_length(control, &n);
  635 
  636         if (space > sbspace(sb))
  637                 return (0);
  638 #if MSIZE <= 256
  639         if (asa->sa_len > MLEN)
  640                 return (0);
  641 #endif
  642         MGET(m, M_DONTWAIT, MT_SONAME);
  643         if (m == 0)
  644                 return (0);
  645         m->m_len = asa->sa_len;
  646         bcopy(asa, mtod(m, caddr_t), asa->sa_len);
  647         if (n)
  648                 n->m_next = m0;         /* concatenate data to control */
  649         else
  650                 control = m0;
  651         m->m_next = control;
  652         for (n = m; n->m_next != NULL; n = n->m_next)
  653                 sballoc(sb, n);
  654         sballoc(sb, n);
  655         nlast = n;
  656         SBLINKRECORD(sb, m);
  657 
  658         sb->sb_mbtail = nlast;
  659         SBLASTMBUFCHK(sb);
  660 
  661         SBLASTRECORDCHK(sb);
  662         return (1);
  663 }
  664 
  665 /*
  666  * Append address and data, and optionally, control (ancillary) data to the
  667  * receive queue of a socket.  If present, m0 must include a packet header
  668  * with total length.  Returns 0 if no space in sockbuf or insufficient
  669  * mbufs.
  670  */
  671 int
  672 sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa,
  673     struct mbuf *m0, struct mbuf *control)
  674 {
  675         int retval;
  676 
  677         SOCKBUF_LOCK(sb);
  678         retval = sbappendaddr_locked(sb, asa, m0, control);
  679         SOCKBUF_UNLOCK(sb);
  680         return (retval);
  681 }
  682 
  683 int
  684 sbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0,
  685     struct mbuf *control)
  686 {
  687         struct mbuf *m, *n, *mlast;
  688         int space;
  689 
  690         SOCKBUF_LOCK_ASSERT(sb);
  691 
  692         if (control == 0)
  693                 panic("sbappendcontrol_locked");
  694         space = m_length(control, &n) + m_length(m0, NULL);
  695 
  696         if (space > sbspace(sb))
  697                 return (0);
  698         n->m_next = m0;                 /* concatenate data to control */
  699 
  700         SBLASTRECORDCHK(sb);
  701 
  702         for (m = control; m->m_next; m = m->m_next)
  703                 sballoc(sb, m);
  704         sballoc(sb, m);
  705         mlast = m;
  706         SBLINKRECORD(sb, control);
  707 
  708         sb->sb_mbtail = mlast;
  709         SBLASTMBUFCHK(sb);
  710 
  711         SBLASTRECORDCHK(sb);
  712         return (1);
  713 }
  714 
  715 int
  716 sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control)
  717 {
  718         int retval;
  719 
  720         SOCKBUF_LOCK(sb);
  721         retval = sbappendcontrol_locked(sb, m0, control);
  722         SOCKBUF_UNLOCK(sb);
  723         return (retval);
  724 }
  725 
  726 /*
  727  * Append the data in mbuf chain (m) into the socket buffer sb following mbuf
  728  * (n).  If (n) is NULL, the buffer is presumed empty.
  729  *
  730  * When the data is compressed, mbufs in the chain may be handled in one of
  731  * three ways:
  732  *
  733  * (1) The mbuf may simply be dropped, if it contributes nothing (no data, no
  734  *     record boundary, and no change in data type).
  735  *
  736  * (2) The mbuf may be coalesced -- i.e., data in the mbuf may be copied into
  737  *     an mbuf already in the socket buffer.  This can occur if an
  738  *     appropriate mbuf exists, there is room, and no merging of data types
  739  *     will occur.
  740  *
  741  * (3) The mbuf may be appended to the end of the existing mbuf chain.
  742  *
  743  * If any of the new mbufs is marked as M_EOR, mark the last mbuf appended as
  744  * end-of-record.
  745  */
  746 void
  747 sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n)
  748 {
  749         int eor = 0;
  750         struct mbuf *o;
  751 
  752         SOCKBUF_LOCK_ASSERT(sb);
  753 
  754         while (m) {
  755                 eor |= m->m_flags & M_EOR;
  756                 if (m->m_len == 0 &&
  757                     (eor == 0 ||
  758                      (((o = m->m_next) || (o = n)) &&
  759                       o->m_type == m->m_type))) {
  760                         if (sb->sb_lastrecord == m)
  761                                 sb->sb_lastrecord = m->m_next;
  762                         m = m_free(m);
  763                         continue;
  764                 }
  765                 if (n && (n->m_flags & M_EOR) == 0 &&
  766                     M_WRITABLE(n) &&
  767                     m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
  768                     m->m_len <= M_TRAILINGSPACE(n) &&
  769                     n->m_type == m->m_type) {
  770                         bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len,
  771                             (unsigned)m->m_len);
  772                         n->m_len += m->m_len;
  773                         sb->sb_cc += m->m_len;
  774                         if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA)
  775                                 /* XXX: Probably don't need.*/
  776                                 sb->sb_ctl += m->m_len;
  777                         m = m_free(m);
  778                         continue;
  779                 }
  780                 if (n)
  781                         n->m_next = m;
  782                 else
  783                         sb->sb_mb = m;
  784                 sb->sb_mbtail = m;
  785                 sballoc(sb, m);
  786                 n = m;
  787                 m->m_flags &= ~M_EOR;
  788                 m = m->m_next;
  789                 n->m_next = 0;
  790         }
  791         if (eor) {
  792                 KASSERT(n != NULL, ("sbcompress: eor && n == NULL"));
  793                 n->m_flags |= eor;
  794         }
  795         SBLASTMBUFCHK(sb);
  796 }
  797 
  798 /*
  799  * Free all mbufs in a sockbuf.  Check that all resources are reclaimed.
  800  */
  801 static void
  802 sbflush_internal(struct sockbuf *sb)
  803 {
  804 
  805         while (sb->sb_mbcnt) {
  806                 /*
  807                  * Don't call sbdrop(sb, 0) if the leading mbuf is non-empty:
  808                  * we would loop forever. Panic instead.
  809                  */
  810                 if (!sb->sb_cc && (sb->sb_mb == NULL || sb->sb_mb->m_len))
  811                         break;
  812                 sbdrop_internal(sb, (int)sb->sb_cc);
  813         }
  814         if (sb->sb_cc || sb->sb_mb || sb->sb_mbcnt)
  815                 panic("sbflush_internal: cc %u || mb %p || mbcnt %u",
  816                     sb->sb_cc, (void *)sb->sb_mb, sb->sb_mbcnt);
  817 }
  818 
  819 void
  820 sbflush_locked(struct sockbuf *sb)
  821 {
  822 
  823         SOCKBUF_LOCK_ASSERT(sb);
  824         sbflush_internal(sb);
  825 }
  826 
  827 void
  828 sbflush(struct sockbuf *sb)
  829 {
  830 
  831         SOCKBUF_LOCK(sb);
  832         sbflush_locked(sb);
  833         SOCKBUF_UNLOCK(sb);
  834 }
  835 
  836 /*
  837  * Drop data from (the front of) a sockbuf.
  838  */
  839 static void
  840 sbdrop_internal(struct sockbuf *sb, int len)
  841 {
  842         struct mbuf *m;
  843         struct mbuf *next;
  844 
  845         next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
  846         while (len > 0) {
  847                 if (m == 0) {
  848                         if (next == 0)
  849                                 panic("sbdrop");
  850                         m = next;
  851                         next = m->m_nextpkt;
  852                         continue;
  853                 }
  854                 if (m->m_len > len) {
  855                         m->m_len -= len;
  856                         m->m_data += len;
  857                         sb->sb_cc -= len;
  858                         if (sb->sb_sndptroff != 0)
  859                                 sb->sb_sndptroff -= len;
  860                         if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA)
  861                                 sb->sb_ctl -= len;
  862                         break;
  863                 }
  864                 len -= m->m_len;
  865                 sbfree(sb, m);
  866                 m = m_free(m);
  867         }
  868         while (m && m->m_len == 0) {
  869                 sbfree(sb, m);
  870                 m = m_free(m);
  871         }
  872         if (m) {
  873                 sb->sb_mb = m;
  874                 m->m_nextpkt = next;
  875         } else
  876                 sb->sb_mb = next;
  877         /*
  878          * First part is an inline SB_EMPTY_FIXUP().  Second part makes sure
  879          * sb_lastrecord is up-to-date if we dropped part of the last record.
  880          */
  881         m = sb->sb_mb;
  882         if (m == NULL) {
  883                 sb->sb_mbtail = NULL;
  884                 sb->sb_lastrecord = NULL;
  885         } else if (m->m_nextpkt == NULL) {
  886                 sb->sb_lastrecord = m;
  887         }
  888 }
  889 
  890 /*
  891  * Drop data from (the front of) a sockbuf.
  892  */
  893 void
  894 sbdrop_locked(struct sockbuf *sb, int len)
  895 {
  896 
  897         SOCKBUF_LOCK_ASSERT(sb);
  898 
  899         sbdrop_internal(sb, len);
  900 }
  901 
  902 void
  903 sbdrop(struct sockbuf *sb, int len)
  904 {
  905 
  906         SOCKBUF_LOCK(sb);
  907         sbdrop_locked(sb, len);
  908         SOCKBUF_UNLOCK(sb);
  909 }
  910 
  911 /*
  912  * Maintain a pointer and offset pair into the socket buffer mbuf chain to
  913  * avoid traversal of the entire socket buffer for larger offsets.
  914  */
  915 struct mbuf *
  916 sbsndptr(struct sockbuf *sb, u_int off, u_int len, u_int *moff)
  917 {
  918         struct mbuf *m, *ret;
  919 
  920         KASSERT(sb->sb_mb != NULL, ("%s: sb_mb is NULL", __func__));
  921         KASSERT(off + len <= sb->sb_cc, ("%s: beyond sb", __func__));
  922         KASSERT(sb->sb_sndptroff <= sb->sb_cc, ("%s: sndptroff broken", __func__));
  923 
  924         /*
  925          * Is off below stored offset? Happens on retransmits.
  926          * Just return, we can't help here.
  927          */
  928         if (sb->sb_sndptroff > off) {
  929                 *moff = off;
  930                 return (sb->sb_mb);
  931         }
  932 
  933         /* Return closest mbuf in chain for current offset. */
  934         *moff = off - sb->sb_sndptroff;
  935         m = ret = sb->sb_sndptr ? sb->sb_sndptr : sb->sb_mb;
  936 
  937         /* Advance by len to be as close as possible for the next transmit. */
  938         for (off = off - sb->sb_sndptroff + len - 1;
  939              off > 0 && off >= m->m_len;
  940              m = m->m_next) {
  941                 sb->sb_sndptroff += m->m_len;
  942                 off -= m->m_len;
  943         }
  944         sb->sb_sndptr = m;
  945 
  946         return (ret);
  947 }
  948 
  949 /*
  950  * Drop a record off the front of a sockbuf and move the next record to the
  951  * front.
  952  */
  953 void
  954 sbdroprecord_locked(struct sockbuf *sb)
  955 {
  956         struct mbuf *m;
  957 
  958         SOCKBUF_LOCK_ASSERT(sb);
  959 
  960         m = sb->sb_mb;
  961         if (m) {
  962                 sb->sb_mb = m->m_nextpkt;
  963                 do {
  964                         sbfree(sb, m);
  965                         m = m_free(m);
  966                 } while (m);
  967         }
  968         SB_EMPTY_FIXUP(sb);
  969 }
  970 
  971 /*
  972  * Drop a record off the front of a sockbuf and move the next record to the
  973  * front.
  974  */
  975 void
  976 sbdroprecord(struct sockbuf *sb)
  977 {
  978 
  979         SOCKBUF_LOCK(sb);
  980         sbdroprecord_locked(sb);
  981         SOCKBUF_UNLOCK(sb);
  982 }
  983 
  984 /*
  985  * Create a "control" mbuf containing the specified data with the specified
  986  * type for presentation on a socket buffer.
  987  */
  988 struct mbuf *
  989 sbcreatecontrol(caddr_t p, int size, int type, int level)
  990 {
  991         struct cmsghdr *cp;
  992         struct mbuf *m;
  993 
  994         if (CMSG_SPACE((u_int)size) > MCLBYTES)
  995                 return ((struct mbuf *) NULL);
  996         if (CMSG_SPACE((u_int)size) > MLEN)
  997                 m = m_getcl(M_DONTWAIT, MT_CONTROL, 0);
  998         else
  999                 m = m_get(M_DONTWAIT, MT_CONTROL);
 1000         if (m == NULL)
 1001                 return ((struct mbuf *) NULL);
 1002         cp = mtod(m, struct cmsghdr *);
 1003         m->m_len = 0;
 1004         KASSERT(CMSG_SPACE((u_int)size) <= M_TRAILINGSPACE(m),
 1005             ("sbcreatecontrol: short mbuf"));
 1006         if (p != NULL)
 1007                 (void)memcpy(CMSG_DATA(cp), p, size);
 1008         m->m_len = CMSG_SPACE(size);
 1009         cp->cmsg_len = CMSG_LEN(size);
 1010         cp->cmsg_level = level;
 1011         cp->cmsg_type = type;
 1012         return (m);
 1013 }
 1014 
 1015 /*
 1016  * This does the same for socket buffers that sotoxsocket does for sockets:
 1017  * generate an user-format data structure describing the socket buffer.  Note
 1018  * that the xsockbuf structure, since it is always embedded in a socket, does
 1019  * not include a self pointer nor a length.  We make this entry point public
 1020  * in case some other mechanism needs it.
 1021  */
 1022 void
 1023 sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb)
 1024 {
 1025 
 1026         xsb->sb_cc = sb->sb_cc;
 1027         xsb->sb_hiwat = sb->sb_hiwat;
 1028         xsb->sb_mbcnt = sb->sb_mbcnt;
 1029         xsb->sb_mbmax = sb->sb_mbmax;
 1030         xsb->sb_lowat = sb->sb_lowat;
 1031         xsb->sb_flags = sb->sb_flags;
 1032         xsb->sb_timeo = sb->sb_timeo;
 1033 }
 1034 
 1035 /* This takes the place of kern.maxsockbuf, which moved to kern.ipc. */
 1036 static int dummy;
 1037 SYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW, &dummy, 0, "");
 1038 SYSCTL_OID(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, CTLTYPE_ULONG|CTLFLAG_RW,
 1039     &sb_max, 0, sysctl_handle_sb_max, "LU", "Maximum socket buffer size");
 1040 SYSCTL_ULONG(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW,
 1041     &sb_efficiency, 0, "");
Cache object: 9207b797731b95d538db6c0e8df4c085
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/kern/uipc_sockbuf.c

FreeBSD/Linux Kernel Cross Reference
sys/kern/uipc_sockbuf.c