The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/uipc_sockbuf.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-3-Clause
    3  *
    4  * Copyright (c) 1982, 1986, 1988, 1990, 1993
    5  *      The Regents of the University of California.  All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  * 3. Neither the name of the University nor the names of its contributors
   16  *    may be used to endorse or promote products derived from this software
   17  *    without specific prior written permission.
   18  *
   19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   29  * SUCH DAMAGE.
   30  *
   31  *      @(#)uipc_socket2.c      8.1 (Berkeley) 6/10/93
   32  */
   33 
   34 #include <sys/cdefs.h>
   35 __FBSDID("$FreeBSD$");
   36 
   37 #include "opt_kern_tls.h"
   38 #include "opt_param.h"
   39 
   40 #include <sys/param.h>
   41 #include <sys/aio.h> /* for aio_swake proto */
   42 #include <sys/kernel.h>
   43 #include <sys/ktls.h>
   44 #include <sys/lock.h>
   45 #include <sys/malloc.h>
   46 #include <sys/mbuf.h>
   47 #include <sys/mutex.h>
   48 #include <sys/proc.h>
   49 #include <sys/protosw.h>
   50 #include <sys/resourcevar.h>
   51 #include <sys/signalvar.h>
   52 #include <sys/socket.h>
   53 #include <sys/socketvar.h>
   54 #include <sys/sx.h>
   55 #include <sys/sysctl.h>
   56 
   57 #include <netinet/in.h>
   58 
   59 /*
   60  * Function pointer set by the AIO routines so that the socket buffer code
   61  * can call back into the AIO module if it is loaded.
   62  */
   63 void    (*aio_swake)(struct socket *, struct sockbuf *);
   64 
   65 /*
   66  * Primitive routines for operating on socket buffers
   67  */
   68 
   69 #define BUF_MAX_ADJ(_sz)        (((u_quad_t)(_sz)) * MCLBYTES / (MSIZE + MCLBYTES))
   70 
   71 u_long  sb_max = SB_MAX;
   72 u_long sb_max_adj = BUF_MAX_ADJ(SB_MAX);
   73 
   74 static  u_long sb_efficiency = 8;       /* parameter for sbreserve() */
   75 
   76 #ifdef KERN_TLS
   77 static void     sbcompress_ktls_rx(struct sockbuf *sb, struct mbuf *m,
   78     struct mbuf *n);
   79 #endif
   80 static struct mbuf      *sbcut_internal(struct sockbuf *sb, int len);
   81 static void     sbflush_internal(struct sockbuf *sb);
   82 
   83 /*
   84  * Our own version of m_clrprotoflags(), that can preserve M_NOTREADY.
   85  */
   86 static void
   87 sbm_clrprotoflags(struct mbuf *m, int flags)
   88 {
   89         int mask;
   90 
   91         mask = ~M_PROTOFLAGS;
   92         if (flags & PRUS_NOTREADY)
   93                 mask |= M_NOTREADY;
   94         while (m) {
   95                 m->m_flags &= mask;
   96                 m = m->m_next;
   97         }
   98 }
   99 
  100 /*
  101  * Compress M_NOTREADY mbufs after they have been readied by sbready().
  102  *
  103  * sbcompress() skips M_NOTREADY mbufs since the data is not available to
  104  * be copied at the time of sbcompress().  This function combines small
  105  * mbufs similar to sbcompress() once mbufs are ready.  'm0' is the first
  106  * mbuf sbready() marked ready, and 'end' is the first mbuf still not
  107  * ready.
  108  */
  109 static void
  110 sbready_compress(struct sockbuf *sb, struct mbuf *m0, struct mbuf *end)
  111 {
  112         struct mbuf *m, *n;
  113         int ext_size;
  114 
  115         SOCKBUF_LOCK_ASSERT(sb);
  116 
  117         if ((sb->sb_flags & SB_NOCOALESCE) != 0)
  118                 return;
  119 
  120         for (m = m0; m != end; m = m->m_next) {
  121                 MPASS((m->m_flags & M_NOTREADY) == 0);
  122                 /*
  123                  * NB: In sbcompress(), 'n' is the last mbuf in the
  124                  * socket buffer and 'm' is the new mbuf being copied
  125                  * into the trailing space of 'n'.  Here, the roles
  126                  * are reversed and 'n' is the next mbuf after 'm'
  127                  * that is being copied into the trailing space of
  128                  * 'm'.
  129                  */
  130                 n = m->m_next;
  131 #ifdef KERN_TLS
  132                 /* Try to coalesce adjacent ktls mbuf hdr/trailers. */
  133                 if ((n != NULL) && (n != end) && (m->m_flags & M_EOR) == 0 &&
  134                     (m->m_flags & M_EXTPG) &&
  135                     (n->m_flags & M_EXTPG) &&
  136                     !mbuf_has_tls_session(m) &&
  137                     !mbuf_has_tls_session(n)) {
  138                         int hdr_len, trail_len;
  139 
  140                         hdr_len = n->m_epg_hdrlen;
  141                         trail_len = m->m_epg_trllen;
  142                         if (trail_len != 0 && hdr_len != 0 &&
  143                             trail_len + hdr_len <= MBUF_PEXT_TRAIL_LEN) {
  144                                 /* copy n's header to m's trailer */
  145                                 memcpy(&m->m_epg_trail[trail_len],
  146                                     n->m_epg_hdr, hdr_len);
  147                                 m->m_epg_trllen += hdr_len;
  148                                 m->m_len += hdr_len;
  149                                 n->m_epg_hdrlen = 0;
  150                                 n->m_len -= hdr_len;
  151                         }
  152                 }
  153 #endif
  154 
  155                 /* Compress small unmapped mbufs into plain mbufs. */
  156                 if ((m->m_flags & M_EXTPG) && m->m_len <= MLEN &&
  157                     !mbuf_has_tls_session(m)) {
  158                         ext_size = m->m_ext.ext_size;
  159                         if (mb_unmapped_compress(m) == 0)
  160                                 sb->sb_mbcnt -= ext_size;
  161                 }
  162 
  163                 while ((n != NULL) && (n != end) && (m->m_flags & M_EOR) == 0 &&
  164                     M_WRITABLE(m) &&
  165                     (m->m_flags & M_EXTPG) == 0 &&
  166                     !mbuf_has_tls_session(n) &&
  167                     !mbuf_has_tls_session(m) &&
  168                     n->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
  169                     n->m_len <= M_TRAILINGSPACE(m) &&
  170                     m->m_type == n->m_type) {
  171                         KASSERT(sb->sb_lastrecord != n,
  172                     ("%s: merging start of record (%p) into previous mbuf (%p)",
  173                             __func__, n, m));
  174                         m_copydata(n, 0, n->m_len, mtodo(m, m->m_len));
  175                         m->m_len += n->m_len;
  176                         m->m_next = n->m_next;
  177                         m->m_flags |= n->m_flags & M_EOR;
  178                         if (sb->sb_mbtail == n)
  179                                 sb->sb_mbtail = m;
  180 
  181                         sb->sb_mbcnt -= MSIZE;
  182                         if (n->m_flags & M_EXT)
  183                                 sb->sb_mbcnt -= n->m_ext.ext_size;
  184                         m_free(n);
  185                         n = m->m_next;
  186                 }
  187         }
  188         SBLASTRECORDCHK(sb);
  189         SBLASTMBUFCHK(sb);
  190 }
  191 
  192 /*
  193  * Mark ready "count" units of I/O starting with "m".  Most mbufs
  194  * count as a single unit of I/O except for M_EXTPG mbufs which
  195  * are backed by multiple pages.
  196  */
  197 int
  198 sbready(struct sockbuf *sb, struct mbuf *m0, int count)
  199 {
  200         struct mbuf *m;
  201         u_int blocker;
  202 
  203         SOCKBUF_LOCK_ASSERT(sb);
  204         KASSERT(sb->sb_fnrdy != NULL, ("%s: sb %p NULL fnrdy", __func__, sb));
  205         KASSERT(count > 0, ("%s: invalid count %d", __func__, count));
  206 
  207         m = m0;
  208         blocker = (sb->sb_fnrdy == m) ? M_BLOCKED : 0;
  209 
  210         while (count > 0) {
  211                 KASSERT(m->m_flags & M_NOTREADY,
  212                     ("%s: m %p !M_NOTREADY", __func__, m));
  213                 if ((m->m_flags & M_EXTPG) != 0 && m->m_epg_npgs != 0) {
  214                         if (count < m->m_epg_nrdy) {
  215                                 m->m_epg_nrdy -= count;
  216                                 count = 0;
  217                                 break;
  218                         }
  219                         count -= m->m_epg_nrdy;
  220                         m->m_epg_nrdy = 0;
  221                 } else
  222                         count--;
  223 
  224                 m->m_flags &= ~(M_NOTREADY | blocker);
  225                 if (blocker)
  226                         sb->sb_acc += m->m_len;
  227                 m = m->m_next;
  228         }
  229 
  230         /*
  231          * If the first mbuf is still not fully ready because only
  232          * some of its backing pages were readied, no further progress
  233          * can be made.
  234          */
  235         if (m0 == m) {
  236                 MPASS(m->m_flags & M_NOTREADY);
  237                 return (EINPROGRESS);
  238         }
  239 
  240         if (!blocker) {
  241                 sbready_compress(sb, m0, m);
  242                 return (EINPROGRESS);
  243         }
  244 
  245         /* This one was blocking all the queue. */
  246         for (; m && (m->m_flags & M_NOTREADY) == 0; m = m->m_next) {
  247                 KASSERT(m->m_flags & M_BLOCKED,
  248                     ("%s: m %p !M_BLOCKED", __func__, m));
  249                 m->m_flags &= ~M_BLOCKED;
  250                 sb->sb_acc += m->m_len;
  251         }
  252 
  253         sb->sb_fnrdy = m;
  254         sbready_compress(sb, m0, m);
  255 
  256         return (0);
  257 }
  258 
  259 /*
  260  * Adjust sockbuf state reflecting allocation of m.
  261  */
  262 void
  263 sballoc(struct sockbuf *sb, struct mbuf *m)
  264 {
  265 
  266         SOCKBUF_LOCK_ASSERT(sb);
  267 
  268         sb->sb_ccc += m->m_len;
  269 
  270         if (sb->sb_fnrdy == NULL) {
  271                 if (m->m_flags & M_NOTREADY)
  272                         sb->sb_fnrdy = m;
  273                 else
  274                         sb->sb_acc += m->m_len;
  275         } else
  276                 m->m_flags |= M_BLOCKED;
  277 
  278         if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA)
  279                 sb->sb_ctl += m->m_len;
  280 
  281         sb->sb_mbcnt += MSIZE;
  282 
  283         if (m->m_flags & M_EXT)
  284                 sb->sb_mbcnt += m->m_ext.ext_size;
  285 }
  286 
  287 /*
  288  * Adjust sockbuf state reflecting freeing of m.
  289  */
  290 void
  291 sbfree(struct sockbuf *sb, struct mbuf *m)
  292 {
  293 
  294 #if 0   /* XXX: not yet: soclose() call path comes here w/o lock. */
  295         SOCKBUF_LOCK_ASSERT(sb);
  296 #endif
  297 
  298         sb->sb_ccc -= m->m_len;
  299 
  300         if (!(m->m_flags & M_NOTAVAIL))
  301                 sb->sb_acc -= m->m_len;
  302 
  303         if (m == sb->sb_fnrdy) {
  304                 struct mbuf *n;
  305 
  306                 KASSERT(m->m_flags & M_NOTREADY,
  307                     ("%s: m %p !M_NOTREADY", __func__, m));
  308 
  309                 n = m->m_next;
  310                 while (n != NULL && !(n->m_flags & M_NOTREADY)) {
  311                         n->m_flags &= ~M_BLOCKED;
  312                         sb->sb_acc += n->m_len;
  313                         n = n->m_next;
  314                 }
  315                 sb->sb_fnrdy = n;
  316         }
  317 
  318         if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA)
  319                 sb->sb_ctl -= m->m_len;
  320 
  321         sb->sb_mbcnt -= MSIZE;
  322         if (m->m_flags & M_EXT)
  323                 sb->sb_mbcnt -= m->m_ext.ext_size;
  324 
  325         if (sb->sb_sndptr == m) {
  326                 sb->sb_sndptr = NULL;
  327                 sb->sb_sndptroff = 0;
  328         }
  329         if (sb->sb_sndptroff != 0)
  330                 sb->sb_sndptroff -= m->m_len;
  331 }
  332 
  333 #ifdef KERN_TLS
  334 /*
  335  * Similar to sballoc/sbfree but does not adjust state associated with
  336  * the sb_mb chain such as sb_fnrdy or sb_sndptr*.  Also assumes mbufs
  337  * are not ready.
  338  */
  339 void
  340 sballoc_ktls_rx(struct sockbuf *sb, struct mbuf *m)
  341 {
  342 
  343         SOCKBUF_LOCK_ASSERT(sb);
  344 
  345         sb->sb_ccc += m->m_len;
  346         sb->sb_tlscc += m->m_len;
  347 
  348         sb->sb_mbcnt += MSIZE;
  349 
  350         if (m->m_flags & M_EXT)
  351                 sb->sb_mbcnt += m->m_ext.ext_size;
  352 }
  353 
  354 void
  355 sbfree_ktls_rx(struct sockbuf *sb, struct mbuf *m)
  356 {
  357 
  358 #if 0   /* XXX: not yet: soclose() call path comes here w/o lock. */
  359         SOCKBUF_LOCK_ASSERT(sb);
  360 #endif
  361 
  362         sb->sb_ccc -= m->m_len;
  363         sb->sb_tlscc -= m->m_len;
  364 
  365         sb->sb_mbcnt -= MSIZE;
  366 
  367         if (m->m_flags & M_EXT)
  368                 sb->sb_mbcnt -= m->m_ext.ext_size;
  369 }
  370 #endif
  371 
  372 /*
  373  * Socantsendmore indicates that no more data will be sent on the socket; it
  374  * would normally be applied to a socket when the user informs the system
  375  * that no more data is to be sent, by the protocol code (in case
  376  * PRU_SHUTDOWN).  Socantrcvmore indicates that no more data will be
  377  * received, and will normally be applied to the socket by a protocol when it
  378  * detects that the peer will send no more data.  Data queued for reading in
  379  * the socket may yet be read.
  380  */
  381 void
  382 socantsendmore_locked(struct socket *so)
  383 {
  384 
  385         SOCK_SENDBUF_LOCK_ASSERT(so);
  386 
  387         so->so_snd.sb_state |= SBS_CANTSENDMORE;
  388         sowwakeup_locked(so);
  389         SOCK_SENDBUF_UNLOCK_ASSERT(so);
  390 }
  391 
  392 void
  393 socantsendmore(struct socket *so)
  394 {
  395 
  396         SOCK_SENDBUF_LOCK(so);
  397         socantsendmore_locked(so);
  398         SOCK_SENDBUF_UNLOCK_ASSERT(so);
  399 }
  400 
  401 void
  402 socantrcvmore_locked(struct socket *so)
  403 {
  404 
  405         SOCK_RECVBUF_LOCK_ASSERT(so);
  406 
  407         so->so_rcv.sb_state |= SBS_CANTRCVMORE;
  408 #ifdef KERN_TLS
  409         if (so->so_rcv.sb_flags & SB_TLS_RX)
  410                 ktls_check_rx(&so->so_rcv);
  411 #endif
  412         sorwakeup_locked(so);
  413         SOCK_RECVBUF_UNLOCK_ASSERT(so);
  414 }
  415 
  416 void
  417 socantrcvmore(struct socket *so)
  418 {
  419 
  420         SOCK_RECVBUF_LOCK(so);
  421         socantrcvmore_locked(so);
  422         SOCK_RECVBUF_UNLOCK_ASSERT(so);
  423 }
  424 
  425 void
  426 soroverflow_locked(struct socket *so)
  427 {
  428 
  429         SOCK_RECVBUF_LOCK_ASSERT(so);
  430 
  431         if (so->so_options & SO_RERROR) {
  432                 so->so_rerror = ENOBUFS;
  433                 sorwakeup_locked(so);
  434         } else
  435                 SOCK_RECVBUF_UNLOCK(so);
  436 
  437         SOCK_RECVBUF_UNLOCK_ASSERT(so);
  438 }
  439 
  440 void
  441 soroverflow(struct socket *so)
  442 {
  443 
  444         SOCK_RECVBUF_LOCK(so);
  445         soroverflow_locked(so);
  446         SOCK_RECVBUF_UNLOCK_ASSERT(so);
  447 }
  448 
  449 /*
  450  * Wait for data to arrive at/drain from a socket buffer.
  451  */
  452 int
  453 sbwait(struct socket *so, sb_which which)
  454 {
  455         struct sockbuf *sb;
  456 
  457         SOCK_BUF_LOCK_ASSERT(so, which);
  458 
  459         sb = sobuf(so, which);
  460         sb->sb_flags |= SB_WAIT;
  461         return (msleep_sbt(&sb->sb_acc, soeventmtx(so, which),
  462             (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait",
  463             sb->sb_timeo, 0, 0));
  464 }
  465 
  466 /*
  467  * Wakeup processes waiting on a socket buffer.  Do asynchronous notification
  468  * via SIGIO if the socket has the SS_ASYNC flag set.
  469  *
  470  * Called with the socket buffer lock held; will release the lock by the end
  471  * of the function.  This allows the caller to acquire the socket buffer lock
  472  * while testing for the need for various sorts of wakeup and hold it through
  473  * to the point where it's no longer required.  We currently hold the lock
  474  * through calls out to other subsystems (with the exception of kqueue), and
  475  * then release it to avoid lock order issues.  It's not clear that's
  476  * correct.
  477  */
  478 static __always_inline void
  479 sowakeup(struct socket *so, const sb_which which)
  480 {
  481         struct sockbuf *sb;
  482         int ret;
  483 
  484         SOCK_BUF_LOCK_ASSERT(so, which);
  485 
  486         sb = sobuf(so, which);
  487         selwakeuppri(sb->sb_sel, PSOCK);
  488         if (!SEL_WAITING(sb->sb_sel))
  489                 sb->sb_flags &= ~SB_SEL;
  490         if (sb->sb_flags & SB_WAIT) {
  491                 sb->sb_flags &= ~SB_WAIT;
  492                 wakeup(&sb->sb_acc);
  493         }
  494         KNOTE_LOCKED(&sb->sb_sel->si_note, 0);
  495         if (sb->sb_upcall != NULL) {
  496                 ret = sb->sb_upcall(so, sb->sb_upcallarg, M_NOWAIT);
  497                 if (ret == SU_ISCONNECTED) {
  498                         KASSERT(sb == &so->so_rcv,
  499                             ("SO_SND upcall returned SU_ISCONNECTED"));
  500                         soupcall_clear(so, SO_RCV);
  501                 }
  502         } else
  503                 ret = SU_OK;
  504         if (sb->sb_flags & SB_AIO)
  505                 sowakeup_aio(so, which);
  506         SOCK_BUF_UNLOCK(so, which);
  507         if (ret == SU_ISCONNECTED)
  508                 soisconnected(so);
  509         if ((so->so_state & SS_ASYNC) && so->so_sigio != NULL)
  510                 pgsigio(&so->so_sigio, SIGIO, 0);
  511         SOCK_BUF_UNLOCK_ASSERT(so, which);
  512 }
  513 
  514 /*
  515  * Do we need to notify the other side when I/O is possible?
  516  */
  517 static __always_inline bool
  518 sb_notify(const struct sockbuf *sb)
  519 {
  520         return ((sb->sb_flags & (SB_WAIT | SB_SEL | SB_ASYNC |
  521             SB_UPCALL | SB_AIO | SB_KNOTE)) != 0);
  522 }
  523 
  524 void
  525 sorwakeup_locked(struct socket *so)
  526 {
  527         SOCK_RECVBUF_LOCK_ASSERT(so);
  528         if (sb_notify(&so->so_rcv))
  529                 sowakeup(so, SO_RCV);
  530         else
  531                 SOCK_RECVBUF_UNLOCK(so);
  532 }
  533 
  534 void
  535 sowwakeup_locked(struct socket *so)
  536 {
  537         SOCK_SENDBUF_LOCK_ASSERT(so);
  538         if (sb_notify(&so->so_snd))
  539                 sowakeup(so, SO_SND);
  540         else
  541                 SOCK_SENDBUF_UNLOCK(so);
  542 }
  543 
  544 /*
  545  * Socket buffer (struct sockbuf) utility routines.
  546  *
  547  * Each socket contains two socket buffers: one for sending data and one for
  548  * receiving data.  Each buffer contains a queue of mbufs, information about
  549  * the number of mbufs and amount of data in the queue, and other fields
  550  * allowing select() statements and notification on data availability to be
  551  * implemented.
  552  *
  553  * Data stored in a socket buffer is maintained as a list of records.  Each
  554  * record is a list of mbufs chained together with the m_next field.  Records
  555  * are chained together with the m_nextpkt field. The upper level routine
  556  * soreceive() expects the following conventions to be observed when placing
  557  * information in the receive buffer:
  558  *
  559  * 1. If the protocol requires each message be preceded by the sender's name,
  560  *    then a record containing that name must be present before any
  561  *    associated data (mbuf's must be of type MT_SONAME).
  562  * 2. If the protocol supports the exchange of ``access rights'' (really just
  563  *    additional data associated with the message), and there are ``rights''
  564  *    to be received, then a record containing this data should be present
  565  *    (mbuf's must be of type MT_RIGHTS).
  566  * 3. If a name or rights record exists, then it must be followed by a data
  567  *    record, perhaps of zero length.
  568  *
  569  * Before using a new socket structure it is first necessary to reserve
  570  * buffer space to the socket, by calling sbreserve().  This should commit
  571  * some of the available buffer space in the system buffer pool for the
  572  * socket (currently, it does nothing but enforce limits).  The space should
  573  * be released by calling sbrelease() when the socket is destroyed.
  574  */
  575 int
  576 soreserve(struct socket *so, u_long sndcc, u_long rcvcc)
  577 {
  578         struct thread *td = curthread;
  579 
  580         SOCK_SENDBUF_LOCK(so);
  581         SOCK_RECVBUF_LOCK(so);
  582         if (sbreserve_locked(so, SO_SND, sndcc, td) == 0)
  583                 goto bad;
  584         if (sbreserve_locked(so, SO_RCV, rcvcc, td) == 0)
  585                 goto bad2;
  586         if (so->so_rcv.sb_lowat == 0)
  587                 so->so_rcv.sb_lowat = 1;
  588         if (so->so_snd.sb_lowat == 0)
  589                 so->so_snd.sb_lowat = MCLBYTES;
  590         if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
  591                 so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
  592         SOCK_RECVBUF_UNLOCK(so);
  593         SOCK_SENDBUF_UNLOCK(so);
  594         return (0);
  595 bad2:
  596         sbrelease_locked(so, SO_SND);
  597 bad:
  598         SOCK_RECVBUF_UNLOCK(so);
  599         SOCK_SENDBUF_UNLOCK(so);
  600         return (ENOBUFS);
  601 }
  602 
  603 static int
  604 sysctl_handle_sb_max(SYSCTL_HANDLER_ARGS)
  605 {
  606         int error = 0;
  607         u_long tmp_sb_max = sb_max;
  608 
  609         error = sysctl_handle_long(oidp, &tmp_sb_max, arg2, req);
  610         if (error || !req->newptr)
  611                 return (error);
  612         if (tmp_sb_max < MSIZE + MCLBYTES)
  613                 return (EINVAL);
  614         sb_max = tmp_sb_max;
  615         sb_max_adj = BUF_MAX_ADJ(sb_max);
  616         return (0);
  617 }
  618 
  619 /*
  620  * Allot mbufs to a sockbuf.  Attempt to scale mbmax so that mbcnt doesn't
  621  * become limiting if buffering efficiency is near the normal case.
  622  */
  623 bool
  624 sbreserve_locked_limit(struct socket *so, sb_which which, u_long cc,
  625     u_long buf_max, struct thread *td)
  626 {
  627         struct sockbuf *sb = sobuf(so, which);
  628         rlim_t sbsize_limit;
  629 
  630         SOCK_BUF_LOCK_ASSERT(so, which);
  631 
  632         /*
  633          * When a thread is passed, we take into account the thread's socket
  634          * buffer size limit.  The caller will generally pass curthread, but
  635          * in the TCP input path, NULL will be passed to indicate that no
  636          * appropriate thread resource limits are available.  In that case,
  637          * we don't apply a process limit.
  638          */
  639         if (cc > BUF_MAX_ADJ(buf_max))
  640                 return (false);
  641         if (td != NULL) {
  642                 sbsize_limit = lim_cur(td, RLIMIT_SBSIZE);
  643         } else
  644                 sbsize_limit = RLIM_INFINITY;
  645         if (!chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, cc,
  646             sbsize_limit))
  647                 return (false);
  648         sb->sb_mbmax = min(cc * sb_efficiency, buf_max);
  649         if (sb->sb_lowat > sb->sb_hiwat)
  650                 sb->sb_lowat = sb->sb_hiwat;
  651         return (true);
  652 }
  653 
  654 bool
  655 sbreserve_locked(struct socket *so, sb_which which, u_long cc,
  656     struct thread *td)
  657 {
  658         return (sbreserve_locked_limit(so, which, cc, sb_max, td));
  659 }
  660 
  661 int
  662 sbsetopt(struct socket *so, struct sockopt *sopt)
  663 {
  664         struct sockbuf *sb;
  665         sb_which wh;
  666         short *flags;
  667         u_int cc, *hiwat, *lowat;
  668         int error, optval;
  669 
  670         error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval);
  671         if (error != 0)
  672                 return (error);
  673 
  674         /*
  675          * Values < 1 make no sense for any of these options,
  676          * so disallow them.
  677          */
  678         if (optval < 1)
  679                 return (EINVAL);
  680         cc = optval;
  681 
  682         sb = NULL;
  683         SOCK_LOCK(so);
  684         if (SOLISTENING(so)) {
  685                 switch (sopt->sopt_name) {
  686                         case SO_SNDLOWAT:
  687                         case SO_SNDBUF:
  688                                 lowat = &so->sol_sbsnd_lowat;
  689                                 hiwat = &so->sol_sbsnd_hiwat;
  690                                 flags = &so->sol_sbsnd_flags;
  691                                 break;
  692                         case SO_RCVLOWAT:
  693                         case SO_RCVBUF:
  694                                 lowat = &so->sol_sbrcv_lowat;
  695                                 hiwat = &so->sol_sbrcv_hiwat;
  696                                 flags = &so->sol_sbrcv_flags;
  697                                 break;
  698                 }
  699         } else {
  700                 switch (sopt->sopt_name) {
  701                         case SO_SNDLOWAT:
  702                         case SO_SNDBUF:
  703                                 sb = &so->so_snd;
  704                                 wh = SO_SND;
  705                                 break;
  706                         case SO_RCVLOWAT:
  707                         case SO_RCVBUF:
  708                                 sb = &so->so_rcv;
  709                                 wh = SO_RCV;
  710                                 break;
  711                 }
  712                 flags = &sb->sb_flags;
  713                 hiwat = &sb->sb_hiwat;
  714                 lowat = &sb->sb_lowat;
  715                 SOCK_BUF_LOCK(so, wh);
  716         }
  717 
  718         error = 0;
  719         switch (sopt->sopt_name) {
  720         case SO_SNDBUF:
  721         case SO_RCVBUF:
  722                 if (SOLISTENING(so)) {
  723                         if (cc > sb_max_adj) {
  724                                 error = ENOBUFS;
  725                                 break;
  726                         }
  727                         *hiwat = cc;
  728                         if (*lowat > *hiwat)
  729                                 *lowat = *hiwat;
  730                 } else {
  731                         if (!sbreserve_locked(so, wh, cc, curthread))
  732                                 error = ENOBUFS;
  733                 }
  734                 if (error == 0)
  735                         *flags &= ~SB_AUTOSIZE;
  736                 break;
  737         case SO_SNDLOWAT:
  738         case SO_RCVLOWAT:
  739                 /*
  740                  * Make sure the low-water is never greater than the
  741                  * high-water.
  742                  */
  743                 *lowat = (cc > *hiwat) ? *hiwat : cc;
  744                 break;
  745         }
  746 
  747         if (!SOLISTENING(so))
  748                 SOCK_BUF_UNLOCK(so, wh);
  749         SOCK_UNLOCK(so);
  750         return (error);
  751 }
  752 
  753 /*
  754  * Free mbufs held by a socket, and reserved mbuf space.
  755  */
  756 static void
  757 sbrelease_internal(struct socket *so, sb_which which)
  758 {
  759         struct sockbuf *sb = sobuf(so, which);
  760 
  761         sbflush_internal(sb);
  762         (void)chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, 0,
  763             RLIM_INFINITY);
  764         sb->sb_mbmax = 0;
  765 }
  766 
  767 void
  768 sbrelease_locked(struct socket *so, sb_which which)
  769 {
  770 
  771         SOCK_BUF_LOCK_ASSERT(so, which);
  772 
  773         sbrelease_internal(so, which);
  774 }
  775 
  776 void
  777 sbrelease(struct socket *so, sb_which which)
  778 {
  779 
  780         SOCK_BUF_LOCK(so, which);
  781         sbrelease_locked(so, which);
  782         SOCK_BUF_UNLOCK(so, which);
  783 }
  784 
  785 void
  786 sbdestroy(struct socket *so, sb_which which)
  787 {
  788 #ifdef KERN_TLS
  789         struct sockbuf *sb = sobuf(so, which);
  790 
  791         if (sb->sb_tls_info != NULL)
  792                 ktls_free(sb->sb_tls_info);
  793         sb->sb_tls_info = NULL;
  794 #endif
  795         sbrelease_internal(so, which);
  796 }
  797 
  798 /*
  799  * Routines to add and remove data from an mbuf queue.
  800  *
  801  * The routines sbappend() or sbappendrecord() are normally called to append
  802  * new mbufs to a socket buffer, after checking that adequate space is
  803  * available, comparing the function sbspace() with the amount of data to be
  804  * added.  sbappendrecord() differs from sbappend() in that data supplied is
  805  * treated as the beginning of a new record.  To place a sender's address,
  806  * optional access rights, and data in a socket receive buffer,
  807  * sbappendaddr() should be used.  To place access rights and data in a
  808  * socket receive buffer, sbappendrights() should be used.  In either case,
  809  * the new data begins a new record.  Note that unlike sbappend() and
  810  * sbappendrecord(), these routines check for the caller that there will be
  811  * enough space to store the data.  Each fails if there is not enough space,
  812  * or if it cannot find mbufs to store additional information in.
  813  *
  814  * Reliable protocols may use the socket send buffer to hold data awaiting
  815  * acknowledgement.  Data is normally copied from a socket send buffer in a
  816  * protocol with m_copy for output to a peer, and then removing the data from
  817  * the socket buffer with sbdrop() or sbdroprecord() when the data is
  818  * acknowledged by the peer.
  819  */
  820 #ifdef SOCKBUF_DEBUG
  821 void
  822 sblastrecordchk(struct sockbuf *sb, const char *file, int line)
  823 {
  824         struct mbuf *m = sb->sb_mb;
  825 
  826         SOCKBUF_LOCK_ASSERT(sb);
  827 
  828         while (m && m->m_nextpkt)
  829                 m = m->m_nextpkt;
  830 
  831         if (m != sb->sb_lastrecord) {
  832                 printf("%s: sb_mb %p sb_lastrecord %p last %p\n",
  833                         __func__, sb->sb_mb, sb->sb_lastrecord, m);
  834                 printf("packet chain:\n");
  835                 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt)
  836                         printf("\t%p\n", m);
  837                 panic("%s from %s:%u", __func__, file, line);
  838         }
  839 }
  840 
  841 void
  842 sblastmbufchk(struct sockbuf *sb, const char *file, int line)
  843 {
  844         struct mbuf *m = sb->sb_mb;
  845         struct mbuf *n;
  846 
  847         SOCKBUF_LOCK_ASSERT(sb);
  848 
  849         while (m && m->m_nextpkt)
  850                 m = m->m_nextpkt;
  851 
  852         while (m && m->m_next)
  853                 m = m->m_next;
  854 
  855         if (m != sb->sb_mbtail) {
  856                 printf("%s: sb_mb %p sb_mbtail %p last %p\n",
  857                         __func__, sb->sb_mb, sb->sb_mbtail, m);
  858                 printf("packet tree:\n");
  859                 for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) {
  860                         printf("\t");
  861                         for (n = m; n != NULL; n = n->m_next)
  862                                 printf("%p ", n);
  863                         printf("\n");
  864                 }
  865                 panic("%s from %s:%u", __func__, file, line);
  866         }
  867 
  868 #ifdef KERN_TLS
  869         m = sb->sb_mtls;
  870         while (m && m->m_next)
  871                 m = m->m_next;
  872 
  873         if (m != sb->sb_mtlstail) {
  874                 printf("%s: sb_mtls %p sb_mtlstail %p last %p\n",
  875                         __func__, sb->sb_mtls, sb->sb_mtlstail, m);
  876                 printf("TLS packet tree:\n");
  877                 printf("\t");
  878                 for (m = sb->sb_mtls; m != NULL; m = m->m_next) {
  879                         printf("%p ", m);
  880                 }
  881                 printf("\n");
  882                 panic("%s from %s:%u", __func__, file, line);
  883         }
  884 #endif
  885 }
  886 #endif /* SOCKBUF_DEBUG */
  887 
  888 #define SBLINKRECORD(sb, m0) do {                                       \
  889         SOCKBUF_LOCK_ASSERT(sb);                                        \
  890         if ((sb)->sb_lastrecord != NULL)                                \
  891                 (sb)->sb_lastrecord->m_nextpkt = (m0);                  \
  892         else                                                            \
  893                 (sb)->sb_mb = (m0);                                     \
  894         (sb)->sb_lastrecord = (m0);                                     \
  895 } while (/*CONSTCOND*/0)
  896 
  897 /*
  898  * Append mbuf chain m to the last record in the socket buffer sb.  The
  899  * additional space associated the mbuf chain is recorded in sb.  Empty mbufs
  900  * are discarded and mbufs are compacted where possible.
  901  */
  902 void
  903 sbappend_locked(struct sockbuf *sb, struct mbuf *m, int flags)
  904 {
  905         struct mbuf *n;
  906 
  907         SOCKBUF_LOCK_ASSERT(sb);
  908 
  909         if (m == NULL)
  910                 return;
  911         sbm_clrprotoflags(m, flags);
  912         SBLASTRECORDCHK(sb);
  913         n = sb->sb_mb;
  914         if (n) {
  915                 while (n->m_nextpkt)
  916                         n = n->m_nextpkt;
  917                 do {
  918                         if (n->m_flags & M_EOR) {
  919                                 sbappendrecord_locked(sb, m); /* XXXXXX!!!! */
  920                                 return;
  921                         }
  922                 } while (n->m_next && (n = n->m_next));
  923         } else {
  924                 /*
  925                  * XXX Would like to simply use sb_mbtail here, but
  926                  * XXX I need to verify that I won't miss an EOR that
  927                  * XXX way.
  928                  */
  929                 if ((n = sb->sb_lastrecord) != NULL) {
  930                         do {
  931                                 if (n->m_flags & M_EOR) {
  932                                         sbappendrecord_locked(sb, m); /* XXXXXX!!!! */
  933                                         return;
  934                                 }
  935                         } while (n->m_next && (n = n->m_next));
  936                 } else {
  937                         /*
  938                          * If this is the first record in the socket buffer,
  939                          * it's also the last record.
  940                          */
  941                         sb->sb_lastrecord = m;
  942                 }
  943         }
  944         sbcompress(sb, m, n);
  945         SBLASTRECORDCHK(sb);
  946 }
  947 
  948 /*
  949  * Append mbuf chain m to the last record in the socket buffer sb.  The
  950  * additional space associated the mbuf chain is recorded in sb.  Empty mbufs
  951  * are discarded and mbufs are compacted where possible.
  952  */
  953 void
  954 sbappend(struct sockbuf *sb, struct mbuf *m, int flags)
  955 {
  956 
  957         SOCKBUF_LOCK(sb);
  958         sbappend_locked(sb, m, flags);
  959         SOCKBUF_UNLOCK(sb);
  960 }
  961 
  962 #ifdef KERN_TLS
  963 /*
  964  * Append an mbuf containing encrypted TLS data.  The data
  965  * is marked M_NOTREADY until it has been decrypted and
  966  * stored as a TLS record.
  967  */
  968 static void
  969 sbappend_ktls_rx(struct sockbuf *sb, struct mbuf *m)
  970 {
  971         struct ifnet *ifp;
  972         struct mbuf *n;
  973         int flags;
  974 
  975         ifp = NULL;
  976         flags = M_NOTREADY;
  977 
  978         SBLASTMBUFCHK(sb);
  979 
  980         /* Mbuf chain must start with a packet header. */
  981         MPASS((m->m_flags & M_PKTHDR) != 0);
  982 
  983         /* Remove all packet headers and mbuf tags to get a pure data chain. */
  984         for (n = m; n != NULL; n = n->m_next) {
  985                 if (n->m_flags & M_PKTHDR) {
  986                         ifp = m->m_pkthdr.leaf_rcvif;
  987                         if ((n->m_pkthdr.csum_flags & CSUM_TLS_MASK) ==
  988                             CSUM_TLS_DECRYPTED) {
  989                                 /* Mark all mbufs in this packet decrypted. */
  990                                 flags = M_NOTREADY | M_DECRYPTED;
  991                         } else {
  992                                 flags = M_NOTREADY;
  993                         }
  994                         m_demote_pkthdr(n);
  995                 }
  996 
  997                 n->m_flags &= M_DEMOTEFLAGS;
  998                 n->m_flags |= flags;
  999 
 1000                 MPASS((n->m_flags & M_NOTREADY) != 0);
 1001         }
 1002 
 1003         sbcompress_ktls_rx(sb, m, sb->sb_mtlstail);
 1004         ktls_check_rx(sb);
 1005 
 1006         /* Check for incoming packet route changes: */
 1007         if (ifp != NULL && sb->sb_tls_info->rx_ifp != NULL &&
 1008             sb->sb_tls_info->rx_ifp != ifp)
 1009                 ktls_input_ifp_mismatch(sb, ifp);
 1010 }
 1011 #endif
 1012 
 1013 /*
 1014  * This version of sbappend() should only be used when the caller absolutely
 1015  * knows that there will never be more than one record in the socket buffer,
 1016  * that is, a stream protocol (such as TCP).
 1017  */
 1018 void
 1019 sbappendstream_locked(struct sockbuf *sb, struct mbuf *m, int flags)
 1020 {
 1021         SOCKBUF_LOCK_ASSERT(sb);
 1022 
 1023         KASSERT(m->m_nextpkt == NULL,("sbappendstream 0"));
 1024 
 1025 #ifdef KERN_TLS
 1026         /*
 1027          * Decrypted TLS records are appended as records via
 1028          * sbappendrecord().  TCP passes encrypted TLS records to this
 1029          * function which must be scheduled for decryption.
 1030          */
 1031         if (sb->sb_flags & SB_TLS_RX) {
 1032                 sbappend_ktls_rx(sb, m);
 1033                 return;
 1034         }
 1035 #endif
 1036 
 1037         KASSERT(sb->sb_mb == sb->sb_lastrecord,("sbappendstream 1"));
 1038 
 1039         SBLASTMBUFCHK(sb);
 1040 
 1041 #ifdef KERN_TLS
 1042         if (sb->sb_tls_info != NULL)
 1043                 ktls_seq(sb, m);
 1044 #endif
 1045 
 1046         /* Remove all packet headers and mbuf tags to get a pure data chain. */
 1047         m_demote(m, 1, flags & PRUS_NOTREADY ? M_NOTREADY : 0);
 1048 
 1049         sbcompress(sb, m, sb->sb_mbtail);
 1050 
 1051         sb->sb_lastrecord = sb->sb_mb;
 1052         SBLASTRECORDCHK(sb);
 1053 }
 1054 
 1055 /*
 1056  * This version of sbappend() should only be used when the caller absolutely
 1057  * knows that there will never be more than one record in the socket buffer,
 1058  * that is, a stream protocol (such as TCP).
 1059  */
 1060 void
 1061 sbappendstream(struct sockbuf *sb, struct mbuf *m, int flags)
 1062 {
 1063 
 1064         SOCKBUF_LOCK(sb);
 1065         sbappendstream_locked(sb, m, flags);
 1066         SOCKBUF_UNLOCK(sb);
 1067 }
 1068 
 1069 #ifdef SOCKBUF_DEBUG
 1070 void
 1071 sbcheck(struct sockbuf *sb, const char *file, int line)
 1072 {
 1073         struct mbuf *m, *n, *fnrdy;
 1074         u_long acc, ccc, mbcnt;
 1075 #ifdef KERN_TLS
 1076         u_long tlscc;
 1077 #endif
 1078 
 1079         SOCKBUF_LOCK_ASSERT(sb);
 1080 
 1081         acc = ccc = mbcnt = 0;
 1082         fnrdy = NULL;
 1083 
 1084         for (m = sb->sb_mb; m; m = n) {
 1085             n = m->m_nextpkt;
 1086             for (; m; m = m->m_next) {
 1087                 if (m->m_len == 0) {
 1088                         printf("sb %p empty mbuf %p\n", sb, m);
 1089                         goto fail;
 1090                 }
 1091                 if ((m->m_flags & M_NOTREADY) && fnrdy == NULL) {
 1092                         if (m != sb->sb_fnrdy) {
 1093                                 printf("sb %p: fnrdy %p != m %p\n",
 1094                                     sb, sb->sb_fnrdy, m);
 1095                                 goto fail;
 1096                         }
 1097                         fnrdy = m;
 1098                 }
 1099                 if (fnrdy) {
 1100                         if (!(m->m_flags & M_NOTAVAIL)) {
 1101                                 printf("sb %p: fnrdy %p, m %p is avail\n",
 1102                                     sb, sb->sb_fnrdy, m);
 1103                                 goto fail;
 1104                         }
 1105                 } else
 1106                         acc += m->m_len;
 1107                 ccc += m->m_len;
 1108                 mbcnt += MSIZE;
 1109                 if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */
 1110                         mbcnt += m->m_ext.ext_size;
 1111             }
 1112         }
 1113 #ifdef KERN_TLS
 1114         /*
 1115          * Account for mbufs "detached" by ktls_detach_record() while
 1116          * they are decrypted by ktls_decrypt().  tlsdcc gives a count
 1117          * of the detached bytes that are included in ccc.  The mbufs
 1118          * and clusters are not included in the socket buffer
 1119          * accounting.
 1120          */
 1121         ccc += sb->sb_tlsdcc;
 1122 
 1123         tlscc = 0;
 1124         for (m = sb->sb_mtls; m; m = m->m_next) {
 1125                 if (m->m_nextpkt != NULL) {
 1126                         printf("sb %p TLS mbuf %p with nextpkt\n", sb, m);
 1127                         goto fail;
 1128                 }
 1129                 if ((m->m_flags & M_NOTREADY) == 0) {
 1130                         printf("sb %p TLS mbuf %p ready\n", sb, m);
 1131                         goto fail;
 1132                 }
 1133                 tlscc += m->m_len;
 1134                 ccc += m->m_len;
 1135                 mbcnt += MSIZE;
 1136                 if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */
 1137                         mbcnt += m->m_ext.ext_size;
 1138         }
 1139 
 1140         if (sb->sb_tlscc != tlscc) {
 1141                 printf("tlscc %ld/%u dcc %u\n", tlscc, sb->sb_tlscc,
 1142                     sb->sb_tlsdcc);
 1143                 goto fail;
 1144         }
 1145 #endif
 1146         if (acc != sb->sb_acc || ccc != sb->sb_ccc || mbcnt != sb->sb_mbcnt) {
 1147                 printf("acc %ld/%u ccc %ld/%u mbcnt %ld/%u\n",
 1148                     acc, sb->sb_acc, ccc, sb->sb_ccc, mbcnt, sb->sb_mbcnt);
 1149 #ifdef KERN_TLS
 1150                 printf("tlscc %ld/%u dcc %u\n", tlscc, sb->sb_tlscc,
 1151                     sb->sb_tlsdcc);
 1152 #endif
 1153                 goto fail;
 1154         }
 1155         return;
 1156 fail:
 1157         panic("%s from %s:%u", __func__, file, line);
 1158 }
 1159 #endif
 1160 
 1161 /*
 1162  * As above, except the mbuf chain begins a new record.
 1163  */
 1164 void
 1165 sbappendrecord_locked(struct sockbuf *sb, struct mbuf *m0)
 1166 {
 1167         struct mbuf *m;
 1168 
 1169         SOCKBUF_LOCK_ASSERT(sb);
 1170 
 1171         if (m0 == NULL)
 1172                 return;
 1173         m_clrprotoflags(m0);
 1174         /*
 1175          * Put the first mbuf on the queue.  Note this permits zero length
 1176          * records.
 1177          */
 1178         sballoc(sb, m0);
 1179         SBLASTRECORDCHK(sb);
 1180         SBLINKRECORD(sb, m0);
 1181         sb->sb_mbtail = m0;
 1182         m = m0->m_next;
 1183         m0->m_next = 0;
 1184         if (m && (m0->m_flags & M_EOR)) {
 1185                 m0->m_flags &= ~M_EOR;
 1186                 m->m_flags |= M_EOR;
 1187         }
 1188         /* always call sbcompress() so it can do SBLASTMBUFCHK() */
 1189         sbcompress(sb, m, m0);
 1190 }
 1191 
 1192 /*
 1193  * As above, except the mbuf chain begins a new record.
 1194  */
 1195 void
 1196 sbappendrecord(struct sockbuf *sb, struct mbuf *m0)
 1197 {
 1198 
 1199         SOCKBUF_LOCK(sb);
 1200         sbappendrecord_locked(sb, m0);
 1201         SOCKBUF_UNLOCK(sb);
 1202 }
 1203 
 1204 /* Helper routine that appends data, control, and address to a sockbuf. */
 1205 static int
 1206 sbappendaddr_locked_internal(struct sockbuf *sb, const struct sockaddr *asa,
 1207     struct mbuf *m0, struct mbuf *control, struct mbuf *ctrl_last)
 1208 {
 1209         struct mbuf *m, *n, *nlast;
 1210 #if MSIZE <= 256
 1211         if (asa->sa_len > MLEN)
 1212                 return (0);
 1213 #endif
 1214         m = m_get(M_NOWAIT, MT_SONAME);
 1215         if (m == NULL)
 1216                 return (0);
 1217         m->m_len = asa->sa_len;
 1218         bcopy(asa, mtod(m, caddr_t), asa->sa_len);
 1219         if (m0) {
 1220                 M_ASSERT_NO_SND_TAG(m0);
 1221                 m_clrprotoflags(m0);
 1222                 m_tag_delete_chain(m0, NULL);
 1223                 /*
 1224                  * Clear some persistent info from pkthdr.
 1225                  * We don't use m_demote(), because some netgraph consumers
 1226                  * expect M_PKTHDR presence.
 1227                  */
 1228                 m0->m_pkthdr.rcvif = NULL;
 1229                 m0->m_pkthdr.flowid = 0;
 1230                 m0->m_pkthdr.csum_flags = 0;
 1231                 m0->m_pkthdr.fibnum = 0;
 1232                 m0->m_pkthdr.rsstype = 0;
 1233         }
 1234         if (ctrl_last)
 1235                 ctrl_last->m_next = m0; /* concatenate data to control */
 1236         else
 1237                 control = m0;
 1238         m->m_next = control;
 1239         for (n = m; n->m_next != NULL; n = n->m_next)
 1240                 sballoc(sb, n);
 1241         sballoc(sb, n);
 1242         nlast = n;
 1243         SBLINKRECORD(sb, m);
 1244 
 1245         sb->sb_mbtail = nlast;
 1246         SBLASTMBUFCHK(sb);
 1247 
 1248         SBLASTRECORDCHK(sb);
 1249         return (1);
 1250 }
 1251 
 1252 /*
 1253  * Append address and data, and optionally, control (ancillary) data to the
 1254  * receive queue of a socket.  If present, m0 must include a packet header
 1255  * with total length.  Returns 0 if no space in sockbuf or insufficient
 1256  * mbufs.
 1257  */
 1258 int
 1259 sbappendaddr_locked(struct sockbuf *sb, const struct sockaddr *asa,
 1260     struct mbuf *m0, struct mbuf *control)
 1261 {
 1262         struct mbuf *ctrl_last;
 1263         int space = asa->sa_len;
 1264 
 1265         SOCKBUF_LOCK_ASSERT(sb);
 1266 
 1267         if (m0 && (m0->m_flags & M_PKTHDR) == 0)
 1268                 panic("sbappendaddr_locked");
 1269         if (m0)
 1270                 space += m0->m_pkthdr.len;
 1271         space += m_length(control, &ctrl_last);
 1272 
 1273         if (space > sbspace(sb))
 1274                 return (0);
 1275         return (sbappendaddr_locked_internal(sb, asa, m0, control, ctrl_last));
 1276 }
 1277 
 1278 /*
 1279  * Append address and data, and optionally, control (ancillary) data to the
 1280  * receive queue of a socket.  If present, m0 must include a packet header
 1281  * with total length.  Returns 0 if insufficient mbufs.  Does not validate space
 1282  * on the receiving sockbuf.
 1283  */
 1284 int
 1285 sbappendaddr_nospacecheck_locked(struct sockbuf *sb, const struct sockaddr *asa,
 1286     struct mbuf *m0, struct mbuf *control)
 1287 {
 1288         struct mbuf *ctrl_last;
 1289 
 1290         SOCKBUF_LOCK_ASSERT(sb);
 1291 
 1292         ctrl_last = (control == NULL) ? NULL : m_last(control);
 1293         return (sbappendaddr_locked_internal(sb, asa, m0, control, ctrl_last));
 1294 }
 1295 
 1296 /*
 1297  * Append address and data, and optionally, control (ancillary) data to the
 1298  * receive queue of a socket.  If present, m0 must include a packet header
 1299  * with total length.  Returns 0 if no space in sockbuf or insufficient
 1300  * mbufs.
 1301  */
 1302 int
 1303 sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa,
 1304     struct mbuf *m0, struct mbuf *control)
 1305 {
 1306         int retval;
 1307 
 1308         SOCKBUF_LOCK(sb);
 1309         retval = sbappendaddr_locked(sb, asa, m0, control);
 1310         SOCKBUF_UNLOCK(sb);
 1311         return (retval);
 1312 }
 1313 
 1314 void
 1315 sbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0,
 1316     struct mbuf *control, int flags)
 1317 {
 1318         struct mbuf *m, *mlast;
 1319 
 1320         sbm_clrprotoflags(m0, flags);
 1321         m_last(control)->m_next = m0;
 1322 
 1323         SBLASTRECORDCHK(sb);
 1324 
 1325         for (m = control; m->m_next; m = m->m_next)
 1326                 sballoc(sb, m);
 1327         sballoc(sb, m);
 1328         mlast = m;
 1329         SBLINKRECORD(sb, control);
 1330 
 1331         sb->sb_mbtail = mlast;
 1332         SBLASTMBUFCHK(sb);
 1333 
 1334         SBLASTRECORDCHK(sb);
 1335 }
 1336 
 1337 void
 1338 sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control,
 1339     int flags)
 1340 {
 1341 
 1342         SOCKBUF_LOCK(sb);
 1343         sbappendcontrol_locked(sb, m0, control, flags);
 1344         SOCKBUF_UNLOCK(sb);
 1345 }
 1346 
 1347 /*
 1348  * Append the data in mbuf chain (m) into the socket buffer sb following mbuf
 1349  * (n).  If (n) is NULL, the buffer is presumed empty.
 1350  *
 1351  * When the data is compressed, mbufs in the chain may be handled in one of
 1352  * three ways:
 1353  *
 1354  * (1) The mbuf may simply be dropped, if it contributes nothing (no data, no
 1355  *     record boundary, and no change in data type).
 1356  *
 1357  * (2) The mbuf may be coalesced -- i.e., data in the mbuf may be copied into
 1358  *     an mbuf already in the socket buffer.  This can occur if an
 1359  *     appropriate mbuf exists, there is room, both mbufs are not marked as
 1360  *     not ready, and no merging of data types will occur.
 1361  *
 1362  * (3) The mbuf may be appended to the end of the existing mbuf chain.
 1363  *
 1364  * If any of the new mbufs is marked as M_EOR, mark the last mbuf appended as
 1365  * end-of-record.
 1366  */
 1367 void
 1368 sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n)
 1369 {
 1370         int eor = 0;
 1371         struct mbuf *o;
 1372 
 1373         SOCKBUF_LOCK_ASSERT(sb);
 1374 
 1375         while (m) {
 1376                 eor |= m->m_flags & M_EOR;
 1377                 if (m->m_len == 0 &&
 1378                     (eor == 0 ||
 1379                      (((o = m->m_next) || (o = n)) &&
 1380                       o->m_type == m->m_type))) {
 1381                         if (sb->sb_lastrecord == m)
 1382                                 sb->sb_lastrecord = m->m_next;
 1383                         m = m_free(m);
 1384                         continue;
 1385                 }
 1386                 if (n && (n->m_flags & M_EOR) == 0 &&
 1387                     M_WRITABLE(n) &&
 1388                     ((sb->sb_flags & SB_NOCOALESCE) == 0) &&
 1389                     !(m->m_flags & M_NOTREADY) &&
 1390                     !(n->m_flags & (M_NOTREADY | M_EXTPG)) &&
 1391                     !mbuf_has_tls_session(m) &&
 1392                     !mbuf_has_tls_session(n) &&
 1393                     m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
 1394                     m->m_len <= M_TRAILINGSPACE(n) &&
 1395                     n->m_type == m->m_type) {
 1396                         m_copydata(m, 0, m->m_len, mtodo(n, n->m_len));
 1397                         n->m_len += m->m_len;
 1398                         sb->sb_ccc += m->m_len;
 1399                         if (sb->sb_fnrdy == NULL)
 1400                                 sb->sb_acc += m->m_len;
 1401                         if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA)
 1402                                 /* XXX: Probably don't need.*/
 1403                                 sb->sb_ctl += m->m_len;
 1404                         m = m_free(m);
 1405                         continue;
 1406                 }
 1407                 if (m->m_len <= MLEN && (m->m_flags & M_EXTPG) &&
 1408                     (m->m_flags & M_NOTREADY) == 0 &&
 1409                     !mbuf_has_tls_session(m))
 1410                         (void)mb_unmapped_compress(m);
 1411                 if (n)
 1412                         n->m_next = m;
 1413                 else
 1414                         sb->sb_mb = m;
 1415                 sb->sb_mbtail = m;
 1416                 sballoc(sb, m);
 1417                 n = m;
 1418                 m->m_flags &= ~M_EOR;
 1419                 m = m->m_next;
 1420                 n->m_next = 0;
 1421         }
 1422         if (eor) {
 1423                 KASSERT(n != NULL, ("sbcompress: eor && n == NULL"));
 1424                 n->m_flags |= eor;
 1425         }
 1426         SBLASTMBUFCHK(sb);
 1427 }
 1428 
 1429 #ifdef KERN_TLS
 1430 /*
 1431  * A version of sbcompress() for encrypted TLS RX mbufs.  These mbufs
 1432  * are appended to the 'sb_mtls' chain instead of 'sb_mb' and are also
 1433  * a bit simpler (no EOR markers, always MT_DATA, etc.).
 1434  */
 1435 static void
 1436 sbcompress_ktls_rx(struct sockbuf *sb, struct mbuf *m, struct mbuf *n)
 1437 {
 1438 
 1439         SOCKBUF_LOCK_ASSERT(sb);
 1440 
 1441         while (m) {
 1442                 KASSERT((m->m_flags & M_EOR) == 0,
 1443                     ("TLS RX mbuf %p with EOR", m));
 1444                 KASSERT(m->m_type == MT_DATA,
 1445                     ("TLS RX mbuf %p is not MT_DATA", m));
 1446                 KASSERT((m->m_flags & M_NOTREADY) != 0,
 1447                     ("TLS RX mbuf %p ready", m));
 1448                 KASSERT((m->m_flags & M_EXTPG) == 0,
 1449                     ("TLS RX mbuf %p unmapped", m));
 1450 
 1451                 if (m->m_len == 0) {
 1452                         m = m_free(m);
 1453                         continue;
 1454                 }
 1455 
 1456                 /*
 1457                  * Even though both 'n' and 'm' are NOTREADY, it's ok
 1458                  * to coalesce the data.
 1459                  */
 1460                 if (n &&
 1461                     M_WRITABLE(n) &&
 1462                     ((sb->sb_flags & SB_NOCOALESCE) == 0) &&
 1463                     !((m->m_flags ^ n->m_flags) & M_DECRYPTED) &&
 1464                     !(n->m_flags & M_EXTPG) &&
 1465                     m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
 1466                     m->m_len <= M_TRAILINGSPACE(n)) {
 1467                         m_copydata(m, 0, m->m_len, mtodo(n, n->m_len));
 1468                         n->m_len += m->m_len;
 1469                         sb->sb_ccc += m->m_len;
 1470                         sb->sb_tlscc += m->m_len;
 1471                         m = m_free(m);
 1472                         continue;
 1473                 }
 1474                 if (n)
 1475                         n->m_next = m;
 1476                 else
 1477                         sb->sb_mtls = m;
 1478                 sb->sb_mtlstail = m;
 1479                 sballoc_ktls_rx(sb, m);
 1480                 n = m;
 1481                 m = m->m_next;
 1482                 n->m_next = NULL;
 1483         }
 1484         SBLASTMBUFCHK(sb);
 1485 }
 1486 #endif
 1487 
 1488 /*
 1489  * Free all mbufs in a sockbuf.  Check that all resources are reclaimed.
 1490  */
 1491 static void
 1492 sbflush_internal(struct sockbuf *sb)
 1493 {
 1494 
 1495         while (sb->sb_mbcnt || sb->sb_tlsdcc) {
 1496                 /*
 1497                  * Don't call sbcut(sb, 0) if the leading mbuf is non-empty:
 1498                  * we would loop forever. Panic instead.
 1499                  */
 1500                 if (sb->sb_ccc == 0 && (sb->sb_mb == NULL || sb->sb_mb->m_len))
 1501                         break;
 1502                 m_freem(sbcut_internal(sb, (int)sb->sb_ccc));
 1503         }
 1504         KASSERT(sb->sb_ccc == 0 && sb->sb_mb == 0 && sb->sb_mbcnt == 0,
 1505             ("%s: ccc %u mb %p mbcnt %u", __func__,
 1506             sb->sb_ccc, (void *)sb->sb_mb, sb->sb_mbcnt));
 1507 }
 1508 
 1509 void
 1510 sbflush_locked(struct sockbuf *sb)
 1511 {
 1512 
 1513         SOCKBUF_LOCK_ASSERT(sb);
 1514         sbflush_internal(sb);
 1515 }
 1516 
 1517 void
 1518 sbflush(struct sockbuf *sb)
 1519 {
 1520 
 1521         SOCKBUF_LOCK(sb);
 1522         sbflush_locked(sb);
 1523         SOCKBUF_UNLOCK(sb);
 1524 }
 1525 
 1526 /*
 1527  * Cut data from (the front of) a sockbuf.
 1528  */
 1529 static struct mbuf *
 1530 sbcut_internal(struct sockbuf *sb, int len)
 1531 {
 1532         struct mbuf *m, *next, *mfree;
 1533         bool is_tls;
 1534 
 1535         KASSERT(len >= 0, ("%s: len is %d but it is supposed to be >= 0",
 1536             __func__, len));
 1537         KASSERT(len <= sb->sb_ccc, ("%s: len: %d is > ccc: %u",
 1538             __func__, len, sb->sb_ccc));
 1539 
 1540         next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
 1541         is_tls = false;
 1542         mfree = NULL;
 1543 
 1544         while (len > 0) {
 1545                 if (m == NULL) {
 1546 #ifdef KERN_TLS
 1547                         if (next == NULL && !is_tls) {
 1548                                 if (sb->sb_tlsdcc != 0) {
 1549                                         MPASS(len >= sb->sb_tlsdcc);
 1550                                         len -= sb->sb_tlsdcc;
 1551                                         sb->sb_ccc -= sb->sb_tlsdcc;
 1552                                         sb->sb_tlsdcc = 0;
 1553                                         if (len == 0)
 1554                                                 break;
 1555                                 }
 1556                                 next = sb->sb_mtls;
 1557                                 is_tls = true;
 1558                         }
 1559 #endif
 1560                         KASSERT(next, ("%s: no next, len %d", __func__, len));
 1561                         m = next;
 1562                         next = m->m_nextpkt;
 1563                 }
 1564                 if (m->m_len > len) {
 1565                         KASSERT(!(m->m_flags & M_NOTAVAIL),
 1566                             ("%s: m %p M_NOTAVAIL", __func__, m));
 1567                         m->m_len -= len;
 1568                         m->m_data += len;
 1569                         sb->sb_ccc -= len;
 1570                         sb->sb_acc -= len;
 1571                         if (sb->sb_sndptroff != 0)
 1572                                 sb->sb_sndptroff -= len;
 1573                         if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA)
 1574                                 sb->sb_ctl -= len;
 1575                         break;
 1576                 }
 1577                 len -= m->m_len;
 1578 #ifdef KERN_TLS
 1579                 if (is_tls)
 1580                         sbfree_ktls_rx(sb, m);
 1581                 else
 1582 #endif
 1583                         sbfree(sb, m);
 1584                 /*
 1585                  * Do not put M_NOTREADY buffers to the free list, they
 1586                  * are referenced from outside.
 1587                  */
 1588                 if (m->m_flags & M_NOTREADY && !is_tls)
 1589                         m = m->m_next;
 1590                 else {
 1591                         struct mbuf *n;
 1592 
 1593                         n = m->m_next;
 1594                         m->m_next = mfree;
 1595                         mfree = m;
 1596                         m = n;
 1597                 }
 1598         }
 1599         /*
 1600          * Free any zero-length mbufs from the buffer.
 1601          * For SOCK_DGRAM sockets such mbufs represent empty records.
 1602          * XXX: For SOCK_STREAM sockets such mbufs can appear in the buffer,
 1603          * when sosend_generic() needs to send only control data.
 1604          */
 1605         while (m && m->m_len == 0) {
 1606                 struct mbuf *n;
 1607 
 1608                 sbfree(sb, m);
 1609                 n = m->m_next;
 1610                 m->m_next = mfree;
 1611                 mfree = m;
 1612                 m = n;
 1613         }
 1614 #ifdef KERN_TLS
 1615         if (is_tls) {
 1616                 sb->sb_mb = NULL;
 1617                 sb->sb_mtls = m;
 1618                 if (m == NULL)
 1619                         sb->sb_mtlstail = NULL;
 1620         } else
 1621 #endif
 1622         if (m) {
 1623                 sb->sb_mb = m;
 1624                 m->m_nextpkt = next;
 1625         } else
 1626                 sb->sb_mb = next;
 1627         /*
 1628          * First part is an inline SB_EMPTY_FIXUP().  Second part makes sure
 1629          * sb_lastrecord is up-to-date if we dropped part of the last record.
 1630          */
 1631         m = sb->sb_mb;
 1632         if (m == NULL) {
 1633                 sb->sb_mbtail = NULL;
 1634                 sb->sb_lastrecord = NULL;
 1635         } else if (m->m_nextpkt == NULL) {
 1636                 sb->sb_lastrecord = m;
 1637         }
 1638 
 1639         return (mfree);
 1640 }
 1641 
 1642 /*
 1643  * Drop data from (the front of) a sockbuf.
 1644  */
 1645 void
 1646 sbdrop_locked(struct sockbuf *sb, int len)
 1647 {
 1648 
 1649         SOCKBUF_LOCK_ASSERT(sb);
 1650         m_freem(sbcut_internal(sb, len));
 1651 }
 1652 
 1653 /*
 1654  * Drop data from (the front of) a sockbuf,
 1655  * and return it to caller.
 1656  */
 1657 struct mbuf *
 1658 sbcut_locked(struct sockbuf *sb, int len)
 1659 {
 1660 
 1661         SOCKBUF_LOCK_ASSERT(sb);
 1662         return (sbcut_internal(sb, len));
 1663 }
 1664 
 1665 void
 1666 sbdrop(struct sockbuf *sb, int len)
 1667 {
 1668         struct mbuf *mfree;
 1669 
 1670         SOCKBUF_LOCK(sb);
 1671         mfree = sbcut_internal(sb, len);
 1672         SOCKBUF_UNLOCK(sb);
 1673 
 1674         m_freem(mfree);
 1675 }
 1676 
 1677 struct mbuf *
 1678 sbsndptr_noadv(struct sockbuf *sb, uint32_t off, uint32_t *moff)
 1679 {
 1680         struct mbuf *m;
 1681 
 1682         KASSERT(sb->sb_mb != NULL, ("%s: sb_mb is NULL", __func__));
 1683         if (sb->sb_sndptr == NULL || sb->sb_sndptroff > off) {
 1684                 *moff = off;
 1685                 if (sb->sb_sndptr == NULL) {
 1686                         sb->sb_sndptr = sb->sb_mb;
 1687                         sb->sb_sndptroff = 0;
 1688                 }
 1689                 return (sb->sb_mb);
 1690         } else {
 1691                 m = sb->sb_sndptr;
 1692                 off -= sb->sb_sndptroff;
 1693         }
 1694         *moff = off;
 1695         return (m);
 1696 }
 1697 
 1698 void
 1699 sbsndptr_adv(struct sockbuf *sb, struct mbuf *mb, uint32_t len)
 1700 {
 1701         /*
 1702          * A small copy was done, advance forward the sb_sbsndptr to cover
 1703          * it.
 1704          */
 1705         struct mbuf *m;
 1706 
 1707         if (mb != sb->sb_sndptr) {
 1708                 /* Did not copyout at the same mbuf */
 1709                 return;
 1710         }
 1711         m = mb;
 1712         while (m && (len > 0)) {
 1713                 if (len >= m->m_len) {
 1714                         len -= m->m_len;
 1715                         if (m->m_next) {
 1716                                 sb->sb_sndptroff += m->m_len;
 1717                                 sb->sb_sndptr = m->m_next;
 1718                         }
 1719                         m = m->m_next;
 1720                 } else {
 1721                         len = 0;
 1722                 }
 1723         }
 1724 }
 1725 
 1726 /*
 1727  * Return the first mbuf and the mbuf data offset for the provided
 1728  * send offset without changing the "sb_sndptroff" field.
 1729  */
 1730 struct mbuf *
 1731 sbsndmbuf(struct sockbuf *sb, u_int off, u_int *moff)
 1732 {
 1733         struct mbuf *m;
 1734 
 1735         KASSERT(sb->sb_mb != NULL, ("%s: sb_mb is NULL", __func__));
 1736 
 1737         /*
 1738          * If the "off" is below the stored offset, which happens on
 1739          * retransmits, just use "sb_mb":
 1740          */
 1741         if (sb->sb_sndptr == NULL || sb->sb_sndptroff > off) {
 1742                 m = sb->sb_mb;
 1743         } else {
 1744                 m = sb->sb_sndptr;
 1745                 off -= sb->sb_sndptroff;
 1746         }
 1747         while (off > 0 && m != NULL) {
 1748                 if (off < m->m_len)
 1749                         break;
 1750                 off -= m->m_len;
 1751                 m = m->m_next;
 1752         }
 1753         *moff = off;
 1754         return (m);
 1755 }
 1756 
 1757 /*
 1758  * Drop a record off the front of a sockbuf and move the next record to the
 1759  * front.
 1760  */
 1761 void
 1762 sbdroprecord_locked(struct sockbuf *sb)
 1763 {
 1764         struct mbuf *m;
 1765 
 1766         SOCKBUF_LOCK_ASSERT(sb);
 1767 
 1768         m = sb->sb_mb;
 1769         if (m) {
 1770                 sb->sb_mb = m->m_nextpkt;
 1771                 do {
 1772                         sbfree(sb, m);
 1773                         m = m_free(m);
 1774                 } while (m);
 1775         }
 1776         SB_EMPTY_FIXUP(sb);
 1777 }
 1778 
 1779 /*
 1780  * Drop a record off the front of a sockbuf and move the next record to the
 1781  * front.
 1782  */
 1783 void
 1784 sbdroprecord(struct sockbuf *sb)
 1785 {
 1786 
 1787         SOCKBUF_LOCK(sb);
 1788         sbdroprecord_locked(sb);
 1789         SOCKBUF_UNLOCK(sb);
 1790 }
 1791 
 1792 /*
 1793  * Create a "control" mbuf containing the specified data with the specified
 1794  * type for presentation on a socket buffer.
 1795  */
 1796 struct mbuf *
 1797 sbcreatecontrol(const void *p, u_int size, int type, int level, int wait)
 1798 {
 1799         struct cmsghdr *cp;
 1800         struct mbuf *m;
 1801 
 1802         MBUF_CHECKSLEEP(wait);
 1803 
 1804         if (wait == M_NOWAIT) {
 1805                 if (CMSG_SPACE(size) > MCLBYTES)
 1806                         return (NULL);
 1807         } else
 1808                 KASSERT(CMSG_SPACE(size) <= MCLBYTES,
 1809                     ("%s: passed CMSG_SPACE(%u) > MCLBYTES", __func__, size));
 1810 
 1811         if (CMSG_SPACE(size) > MLEN)
 1812                 m = m_getcl(wait, MT_CONTROL, 0);
 1813         else
 1814                 m = m_get(wait, MT_CONTROL);
 1815         if (m == NULL)
 1816                 return (NULL);
 1817 
 1818         KASSERT(CMSG_SPACE(size) <= M_TRAILINGSPACE(m),
 1819             ("sbcreatecontrol: short mbuf"));
 1820         /*
 1821          * Don't leave the padding between the msg header and the
 1822          * cmsg data and the padding after the cmsg data un-initialized.
 1823          */
 1824         cp = mtod(m, struct cmsghdr *);
 1825         bzero(cp, CMSG_SPACE(size));
 1826         if (p != NULL)
 1827                 (void)memcpy(CMSG_DATA(cp), p, size);
 1828         m->m_len = CMSG_SPACE(size);
 1829         cp->cmsg_len = CMSG_LEN(size);
 1830         cp->cmsg_level = level;
 1831         cp->cmsg_type = type;
 1832         return (m);
 1833 }
 1834 
 1835 /*
 1836  * This does the same for socket buffers that sotoxsocket does for sockets:
 1837  * generate an user-format data structure describing the socket buffer.  Note
 1838  * that the xsockbuf structure, since it is always embedded in a socket, does
 1839  * not include a self pointer nor a length.  We make this entry point public
 1840  * in case some other mechanism needs it.
 1841  */
 1842 void
 1843 sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb)
 1844 {
 1845 
 1846         xsb->sb_cc = sb->sb_ccc;
 1847         xsb->sb_hiwat = sb->sb_hiwat;
 1848         xsb->sb_mbcnt = sb->sb_mbcnt;
 1849         xsb->sb_mbmax = sb->sb_mbmax;
 1850         xsb->sb_lowat = sb->sb_lowat;
 1851         xsb->sb_flags = sb->sb_flags;
 1852         xsb->sb_timeo = sb->sb_timeo;
 1853 }
 1854 
 1855 /* This takes the place of kern.maxsockbuf, which moved to kern.ipc. */
 1856 static int dummy;
 1857 SYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW | CTLFLAG_SKIP, &dummy, 0, "");
 1858 SYSCTL_OID(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf,
 1859     CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, &sb_max, 0,
 1860     sysctl_handle_sb_max, "LU",
 1861     "Maximum socket buffer size");
 1862 SYSCTL_ULONG(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW,
 1863     &sb_efficiency, 0, "Socket buffer size waste factor");

Cache object: 3650f998b29d92ec5ca66192e22497d1


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.