The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_mbuf.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2004, 2005,
    5  *      Bosko Milekic <bmilekic@FreeBSD.org>.  All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice unmodified, this list of conditions and the following
   12  *    disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  *
   17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   27  * SUCH DAMAGE.
   28  */
   29 
   30 #include <sys/cdefs.h>
   31 __FBSDID("$FreeBSD$");
   32 
   33 #include "opt_param.h"
   34 #include "opt_kern_tls.h"
   35 
   36 #include <sys/param.h>
   37 #include <sys/conf.h>
   38 #include <sys/domainset.h>
   39 #include <sys/malloc.h>
   40 #include <sys/systm.h>
   41 #include <sys/mbuf.h>
   42 #include <sys/eventhandler.h>
   43 #include <sys/kernel.h>
   44 #include <sys/ktls.h>
   45 #include <sys/limits.h>
   46 #include <sys/lock.h>
   47 #include <sys/mutex.h>
   48 #include <sys/refcount.h>
   49 #include <sys/sf_buf.h>
   50 #include <sys/smp.h>
   51 #include <sys/socket.h>
   52 #include <sys/sysctl.h>
   53 
   54 #include <net/if.h>
   55 #include <net/if_var.h>
   56 
   57 #include <vm/vm.h>
   58 #include <vm/vm_extern.h>
   59 #include <vm/vm_kern.h>
   60 #include <vm/vm_page.h>
   61 #include <vm/vm_pageout.h>
   62 #include <vm/vm_map.h>
   63 #include <vm/uma.h>
   64 #include <vm/uma_dbg.h>
   65 
   66 _Static_assert(MJUMPAGESIZE > MCLBYTES,
   67     "Cluster must be smaller than a jumbo page");
   68 
   69 /*
   70  * In FreeBSD, Mbufs and Mbuf Clusters are allocated from UMA
   71  * Zones.
   72  *
   73  * Mbuf Clusters (2K, contiguous) are allocated from the Cluster
   74  * Zone.  The Zone can be capped at kern.ipc.nmbclusters, if the
   75  * administrator so desires.
   76  *
   77  * Mbufs are allocated from a UMA Primary Zone called the Mbuf
   78  * Zone.
   79  *
   80  * Additionally, FreeBSD provides a Packet Zone, which it
   81  * configures as a Secondary Zone to the Mbuf Primary Zone,
   82  * thus sharing backend Slab kegs with the Mbuf Primary Zone.
   83  *
   84  * Thus common-case allocations and locking are simplified:
   85  *
   86  *  m_clget()                m_getcl()
   87  *    |                         |
   88  *    |   .------------>[(Packet Cache)]    m_get(), m_gethdr()
   89  *    |   |             [     Packet   ]            |
   90  *  [(Cluster Cache)]   [    Secondary ]   [ (Mbuf Cache)     ]
   91  *  [ Cluster Zone  ]   [     Zone     ]   [ Mbuf Primary Zone ]
   92  *        |                       \________         |
   93  *  [ Cluster Keg   ]                      \       /
   94  *        |                              [ Mbuf Keg   ]
   95  *  [ Cluster Slabs ]                         |
   96  *        |                              [ Mbuf Slabs ]
   97  *         \____________(VM)_________________/
   98  *
   99  *
  100  * Whenever an object is allocated with uma_zalloc() out of
  101  * one of the Zones its _ctor_ function is executed.  The same
  102  * for any deallocation through uma_zfree() the _dtor_ function
  103  * is executed.
  104  *
  105  * Caches are per-CPU and are filled from the Primary Zone.
  106  *
  107  * Whenever an object is allocated from the underlying global
  108  * memory pool it gets pre-initialized with the _zinit_ functions.
  109  * When the Keg's are overfull objects get decommissioned with
  110  * _zfini_ functions and free'd back to the global memory pool.
  111  *
  112  */
  113 
  114 int nmbufs;                     /* limits number of mbufs */
  115 int nmbclusters;                /* limits number of mbuf clusters */
  116 int nmbjumbop;                  /* limits number of page size jumbo clusters */
  117 int nmbjumbo9;                  /* limits number of 9k jumbo clusters */
  118 int nmbjumbo16;                 /* limits number of 16k jumbo clusters */
  119 
  120 bool mb_use_ext_pgs = false;    /* use M_EXTPG mbufs for sendfile & TLS */
  121 
  122 static int
  123 sysctl_mb_use_ext_pgs(SYSCTL_HANDLER_ARGS)
  124 {
  125         int error, extpg;
  126 
  127         extpg = mb_use_ext_pgs;
  128         error = sysctl_handle_int(oidp, &extpg, 0, req);
  129         if (error == 0 && req->newptr != NULL) {
  130                 if (extpg != 0 && !PMAP_HAS_DMAP)
  131                         error = EOPNOTSUPP;
  132                 else
  133                         mb_use_ext_pgs = extpg != 0;
  134         }
  135         return (error);
  136 }
  137 SYSCTL_PROC(_kern_ipc, OID_AUTO, mb_use_ext_pgs, CTLTYPE_INT | CTLFLAG_RW,
  138     &mb_use_ext_pgs, 0,
  139     sysctl_mb_use_ext_pgs, "IU",
  140     "Use unmapped mbufs for sendfile(2) and TLS offload");
  141 
  142 static quad_t maxmbufmem;       /* overall real memory limit for all mbufs */
  143 
  144 SYSCTL_QUAD(_kern_ipc, OID_AUTO, maxmbufmem, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &maxmbufmem, 0,
  145     "Maximum real memory allocatable to various mbuf types");
  146 
  147 static counter_u64_t snd_tag_count;
  148 SYSCTL_COUNTER_U64(_kern_ipc, OID_AUTO, num_snd_tags, CTLFLAG_RW,
  149     &snd_tag_count, "# of active mbuf send tags");
  150 
  151 /*
  152  * tunable_mbinit() has to be run before any mbuf allocations are done.
  153  */
  154 static void
  155 tunable_mbinit(void *dummy)
  156 {
  157         quad_t realmem;
  158         int extpg;
  159 
  160         /*
  161          * The default limit for all mbuf related memory is 1/2 of all
  162          * available kernel memory (physical or kmem).
  163          * At most it can be 3/4 of available kernel memory.
  164          */
  165         realmem = qmin((quad_t)physmem * PAGE_SIZE, vm_kmem_size);
  166         maxmbufmem = realmem / 2;
  167         TUNABLE_QUAD_FETCH("kern.ipc.maxmbufmem", &maxmbufmem);
  168         if (maxmbufmem > realmem / 4 * 3)
  169                 maxmbufmem = realmem / 4 * 3;
  170 
  171         TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters);
  172         if (nmbclusters == 0)
  173                 nmbclusters = maxmbufmem / MCLBYTES / 4;
  174 
  175         TUNABLE_INT_FETCH("kern.ipc.nmbjumbop", &nmbjumbop);
  176         if (nmbjumbop == 0)
  177                 nmbjumbop = maxmbufmem / MJUMPAGESIZE / 4;
  178 
  179         TUNABLE_INT_FETCH("kern.ipc.nmbjumbo9", &nmbjumbo9);
  180         if (nmbjumbo9 == 0)
  181                 nmbjumbo9 = maxmbufmem / MJUM9BYTES / 6;
  182 
  183         TUNABLE_INT_FETCH("kern.ipc.nmbjumbo16", &nmbjumbo16);
  184         if (nmbjumbo16 == 0)
  185                 nmbjumbo16 = maxmbufmem / MJUM16BYTES / 6;
  186 
  187         /*
  188          * We need at least as many mbufs as we have clusters of
  189          * the various types added together.
  190          */
  191         TUNABLE_INT_FETCH("kern.ipc.nmbufs", &nmbufs);
  192         if (nmbufs < nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16)
  193                 nmbufs = lmax(maxmbufmem / MSIZE / 5,
  194                     nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16);
  195 
  196         /*
  197          * Unmapped mbufs can only safely be used on platforms with a direct
  198          * map.
  199          */
  200         if (PMAP_HAS_DMAP) {
  201                 extpg = 1;
  202                 TUNABLE_INT_FETCH("kern.ipc.mb_use_ext_pgs", &extpg);
  203                 mb_use_ext_pgs = extpg != 0;
  204         }
  205 }
  206 SYSINIT(tunable_mbinit, SI_SUB_KMEM, SI_ORDER_MIDDLE, tunable_mbinit, NULL);
  207 
  208 static int
  209 sysctl_nmbclusters(SYSCTL_HANDLER_ARGS)
  210 {
  211         int error, newnmbclusters;
  212 
  213         newnmbclusters = nmbclusters;
  214         error = sysctl_handle_int(oidp, &newnmbclusters, 0, req);
  215         if (error == 0 && req->newptr && newnmbclusters != nmbclusters) {
  216                 if (newnmbclusters > nmbclusters &&
  217                     nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) {
  218                         nmbclusters = newnmbclusters;
  219                         nmbclusters = uma_zone_set_max(zone_clust, nmbclusters);
  220                         EVENTHANDLER_INVOKE(nmbclusters_change);
  221                 } else
  222                         error = EINVAL;
  223         }
  224         return (error);
  225 }
  226 SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbclusters,
  227     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, &nmbclusters, 0,
  228     sysctl_nmbclusters, "IU",
  229     "Maximum number of mbuf clusters allowed");
  230 
  231 static int
  232 sysctl_nmbjumbop(SYSCTL_HANDLER_ARGS)
  233 {
  234         int error, newnmbjumbop;
  235 
  236         newnmbjumbop = nmbjumbop;
  237         error = sysctl_handle_int(oidp, &newnmbjumbop, 0, req);
  238         if (error == 0 && req->newptr && newnmbjumbop != nmbjumbop) {
  239                 if (newnmbjumbop > nmbjumbop &&
  240                     nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) {
  241                         nmbjumbop = newnmbjumbop;
  242                         nmbjumbop = uma_zone_set_max(zone_jumbop, nmbjumbop);
  243                 } else
  244                         error = EINVAL;
  245         }
  246         return (error);
  247 }
  248 SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbop,
  249     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, &nmbjumbop, 0,
  250     sysctl_nmbjumbop, "IU",
  251     "Maximum number of mbuf page size jumbo clusters allowed");
  252 
  253 static int
  254 sysctl_nmbjumbo9(SYSCTL_HANDLER_ARGS)
  255 {
  256         int error, newnmbjumbo9;
  257 
  258         newnmbjumbo9 = nmbjumbo9;
  259         error = sysctl_handle_int(oidp, &newnmbjumbo9, 0, req);
  260         if (error == 0 && req->newptr && newnmbjumbo9 != nmbjumbo9) {
  261                 if (newnmbjumbo9 > nmbjumbo9 &&
  262                     nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) {
  263                         nmbjumbo9 = newnmbjumbo9;
  264                         nmbjumbo9 = uma_zone_set_max(zone_jumbo9, nmbjumbo9);
  265                 } else
  266                         error = EINVAL;
  267         }
  268         return (error);
  269 }
  270 SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbo9,
  271     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, &nmbjumbo9, 0,
  272     sysctl_nmbjumbo9, "IU",
  273     "Maximum number of mbuf 9k jumbo clusters allowed");
  274 
  275 static int
  276 sysctl_nmbjumbo16(SYSCTL_HANDLER_ARGS)
  277 {
  278         int error, newnmbjumbo16;
  279 
  280         newnmbjumbo16 = nmbjumbo16;
  281         error = sysctl_handle_int(oidp, &newnmbjumbo16, 0, req);
  282         if (error == 0 && req->newptr && newnmbjumbo16 != nmbjumbo16) {
  283                 if (newnmbjumbo16 > nmbjumbo16 &&
  284                     nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) {
  285                         nmbjumbo16 = newnmbjumbo16;
  286                         nmbjumbo16 = uma_zone_set_max(zone_jumbo16, nmbjumbo16);
  287                 } else
  288                         error = EINVAL;
  289         }
  290         return (error);
  291 }
  292 SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbo16,
  293     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, &nmbjumbo16, 0,
  294     sysctl_nmbjumbo16, "IU",
  295     "Maximum number of mbuf 16k jumbo clusters allowed");
  296 
  297 static int
  298 sysctl_nmbufs(SYSCTL_HANDLER_ARGS)
  299 {
  300         int error, newnmbufs;
  301 
  302         newnmbufs = nmbufs;
  303         error = sysctl_handle_int(oidp, &newnmbufs, 0, req);
  304         if (error == 0 && req->newptr && newnmbufs != nmbufs) {
  305                 if (newnmbufs > nmbufs) {
  306                         nmbufs = newnmbufs;
  307                         nmbufs = uma_zone_set_max(zone_mbuf, nmbufs);
  308                         EVENTHANDLER_INVOKE(nmbufs_change);
  309                 } else
  310                         error = EINVAL;
  311         }
  312         return (error);
  313 }
  314 SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbufs,
  315     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
  316     &nmbufs, 0, sysctl_nmbufs, "IU",
  317     "Maximum number of mbufs allowed");
  318 
  319 /*
  320  * Zones from which we allocate.
  321  */
  322 uma_zone_t      zone_mbuf;
  323 uma_zone_t      zone_clust;
  324 uma_zone_t      zone_pack;
  325 uma_zone_t      zone_jumbop;
  326 uma_zone_t      zone_jumbo9;
  327 uma_zone_t      zone_jumbo16;
  328 
  329 /*
  330  * Local prototypes.
  331  */
  332 static int      mb_ctor_mbuf(void *, int, void *, int);
  333 static int      mb_ctor_clust(void *, int, void *, int);
  334 static int      mb_ctor_pack(void *, int, void *, int);
  335 static void     mb_dtor_mbuf(void *, int, void *);
  336 static void     mb_dtor_pack(void *, int, void *);
  337 static int      mb_zinit_pack(void *, int, int);
  338 static void     mb_zfini_pack(void *, int);
  339 static void     mb_reclaim(uma_zone_t, int);
  340 
  341 /* Ensure that MSIZE is a power of 2. */
  342 CTASSERT((((MSIZE - 1) ^ MSIZE) + 1) >> 1 == MSIZE);
  343 
  344 _Static_assert(sizeof(struct mbuf) <= MSIZE,
  345     "size of mbuf exceeds MSIZE");
  346 /*
  347  * Initialize FreeBSD Network buffer allocation.
  348  */
  349 static void
  350 mbuf_init(void *dummy)
  351 {
  352 
  353         /*
  354          * Configure UMA zones for Mbufs, Clusters, and Packets.
  355          */
  356         zone_mbuf = uma_zcreate(MBUF_MEM_NAME, MSIZE,
  357             mb_ctor_mbuf, mb_dtor_mbuf, NULL, NULL,
  358             MSIZE - 1, UMA_ZONE_CONTIG | UMA_ZONE_MAXBUCKET);
  359         if (nmbufs > 0)
  360                 nmbufs = uma_zone_set_max(zone_mbuf, nmbufs);
  361         uma_zone_set_warning(zone_mbuf, "kern.ipc.nmbufs limit reached");
  362         uma_zone_set_maxaction(zone_mbuf, mb_reclaim);
  363 
  364         zone_clust = uma_zcreate(MBUF_CLUSTER_MEM_NAME, MCLBYTES,
  365             mb_ctor_clust, NULL, NULL, NULL,
  366             UMA_ALIGN_PTR, UMA_ZONE_CONTIG);
  367         if (nmbclusters > 0)
  368                 nmbclusters = uma_zone_set_max(zone_clust, nmbclusters);
  369         uma_zone_set_warning(zone_clust, "kern.ipc.nmbclusters limit reached");
  370         uma_zone_set_maxaction(zone_clust, mb_reclaim);
  371 
  372         zone_pack = uma_zsecond_create(MBUF_PACKET_MEM_NAME, mb_ctor_pack,
  373             mb_dtor_pack, mb_zinit_pack, mb_zfini_pack, zone_mbuf);
  374 
  375         /* Make jumbo frame zone too. Page size, 9k and 16k. */
  376         zone_jumbop = uma_zcreate(MBUF_JUMBOP_MEM_NAME, MJUMPAGESIZE,
  377             mb_ctor_clust, NULL, NULL, NULL,
  378             UMA_ALIGN_PTR, UMA_ZONE_CONTIG);
  379         if (nmbjumbop > 0)
  380                 nmbjumbop = uma_zone_set_max(zone_jumbop, nmbjumbop);
  381         uma_zone_set_warning(zone_jumbop, "kern.ipc.nmbjumbop limit reached");
  382         uma_zone_set_maxaction(zone_jumbop, mb_reclaim);
  383 
  384         zone_jumbo9 = uma_zcreate(MBUF_JUMBO9_MEM_NAME, MJUM9BYTES,
  385             mb_ctor_clust, NULL, NULL, NULL,
  386             UMA_ALIGN_PTR, UMA_ZONE_CONTIG);
  387         if (nmbjumbo9 > 0)
  388                 nmbjumbo9 = uma_zone_set_max(zone_jumbo9, nmbjumbo9);
  389         uma_zone_set_warning(zone_jumbo9, "kern.ipc.nmbjumbo9 limit reached");
  390         uma_zone_set_maxaction(zone_jumbo9, mb_reclaim);
  391 
  392         zone_jumbo16 = uma_zcreate(MBUF_JUMBO16_MEM_NAME, MJUM16BYTES,
  393             mb_ctor_clust, NULL, NULL, NULL,
  394             UMA_ALIGN_PTR, UMA_ZONE_CONTIG);
  395         if (nmbjumbo16 > 0)
  396                 nmbjumbo16 = uma_zone_set_max(zone_jumbo16, nmbjumbo16);
  397         uma_zone_set_warning(zone_jumbo16, "kern.ipc.nmbjumbo16 limit reached");
  398         uma_zone_set_maxaction(zone_jumbo16, mb_reclaim);
  399 
  400         snd_tag_count = counter_u64_alloc(M_WAITOK);
  401 }
  402 SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL);
  403 
  404 #ifdef DEBUGNET
  405 /*
  406  * debugnet makes use of a pre-allocated pool of mbufs and clusters.  When
  407  * debugnet is configured, we initialize a set of UMA cache zones which return
  408  * items from this pool.  At panic-time, the regular UMA zone pointers are
  409  * overwritten with those of the cache zones so that drivers may allocate and
  410  * free mbufs and clusters without attempting to allocate physical memory.
  411  *
  412  * We keep mbufs and clusters in a pair of mbuf queues.  In particular, for
  413  * the purpose of caching clusters, we treat them as mbufs.
  414  */
  415 static struct mbufq dn_mbufq =
  416     { STAILQ_HEAD_INITIALIZER(dn_mbufq.mq_head), 0, INT_MAX };
  417 static struct mbufq dn_clustq =
  418     { STAILQ_HEAD_INITIALIZER(dn_clustq.mq_head), 0, INT_MAX };
  419 
  420 static int dn_clsize;
  421 static uma_zone_t dn_zone_mbuf;
  422 static uma_zone_t dn_zone_clust;
  423 static uma_zone_t dn_zone_pack;
  424 
  425 static struct debugnet_saved_zones {
  426         uma_zone_t dsz_mbuf;
  427         uma_zone_t dsz_clust;
  428         uma_zone_t dsz_pack;
  429         uma_zone_t dsz_jumbop;
  430         uma_zone_t dsz_jumbo9;
  431         uma_zone_t dsz_jumbo16;
  432         bool dsz_debugnet_zones_enabled;
  433 } dn_saved_zones;
  434 
  435 static int
  436 dn_buf_import(void *arg, void **store, int count, int domain __unused,
  437     int flags)
  438 {
  439         struct mbufq *q;
  440         struct mbuf *m;
  441         int i;
  442 
  443         q = arg;
  444 
  445         for (i = 0; i < count; i++) {
  446                 m = mbufq_dequeue(q);
  447                 if (m == NULL)
  448                         break;
  449                 trash_init(m, q == &dn_mbufq ? MSIZE : dn_clsize, flags);
  450                 store[i] = m;
  451         }
  452         KASSERT((flags & M_WAITOK) == 0 || i == count,
  453             ("%s: ran out of pre-allocated mbufs", __func__));
  454         return (i);
  455 }
  456 
  457 static void
  458 dn_buf_release(void *arg, void **store, int count)
  459 {
  460         struct mbufq *q;
  461         struct mbuf *m;
  462         int i;
  463 
  464         q = arg;
  465 
  466         for (i = 0; i < count; i++) {
  467                 m = store[i];
  468                 (void)mbufq_enqueue(q, m);
  469         }
  470 }
  471 
  472 static int
  473 dn_pack_import(void *arg __unused, void **store, int count, int domain __unused,
  474     int flags __unused)
  475 {
  476         struct mbuf *m;
  477         void *clust;
  478         int i;
  479 
  480         for (i = 0; i < count; i++) {
  481                 m = m_get(M_NOWAIT, MT_DATA);
  482                 if (m == NULL)
  483                         break;
  484                 clust = uma_zalloc(dn_zone_clust, M_NOWAIT);
  485                 if (clust == NULL) {
  486                         m_free(m);
  487                         break;
  488                 }
  489                 mb_ctor_clust(clust, dn_clsize, m, 0);
  490                 store[i] = m;
  491         }
  492         KASSERT((flags & M_WAITOK) == 0 || i == count,
  493             ("%s: ran out of pre-allocated mbufs", __func__));
  494         return (i);
  495 }
  496 
  497 static void
  498 dn_pack_release(void *arg __unused, void **store, int count)
  499 {
  500         struct mbuf *m;
  501         void *clust;
  502         int i;
  503 
  504         for (i = 0; i < count; i++) {
  505                 m = store[i];
  506                 clust = m->m_ext.ext_buf;
  507                 uma_zfree(dn_zone_clust, clust);
  508                 uma_zfree(dn_zone_mbuf, m);
  509         }
  510 }
  511 
  512 /*
  513  * Free the pre-allocated mbufs and clusters reserved for debugnet, and destroy
  514  * the corresponding UMA cache zones.
  515  */
  516 void
  517 debugnet_mbuf_drain(void)
  518 {
  519         struct mbuf *m;
  520         void *item;
  521 
  522         if (dn_zone_mbuf != NULL) {
  523                 uma_zdestroy(dn_zone_mbuf);
  524                 dn_zone_mbuf = NULL;
  525         }
  526         if (dn_zone_clust != NULL) {
  527                 uma_zdestroy(dn_zone_clust);
  528                 dn_zone_clust = NULL;
  529         }
  530         if (dn_zone_pack != NULL) {
  531                 uma_zdestroy(dn_zone_pack);
  532                 dn_zone_pack = NULL;
  533         }
  534 
  535         while ((m = mbufq_dequeue(&dn_mbufq)) != NULL)
  536                 m_free(m);
  537         while ((item = mbufq_dequeue(&dn_clustq)) != NULL)
  538                 uma_zfree(m_getzone(dn_clsize), item);
  539 }
  540 
  541 /*
  542  * Callback invoked immediately prior to starting a debugnet connection.
  543  */
  544 void
  545 debugnet_mbuf_start(void)
  546 {
  547 
  548         MPASS(!dn_saved_zones.dsz_debugnet_zones_enabled);
  549 
  550         /* Save the old zone pointers to restore when debugnet is closed. */
  551         dn_saved_zones = (struct debugnet_saved_zones) {
  552                 .dsz_debugnet_zones_enabled = true,
  553                 .dsz_mbuf = zone_mbuf,
  554                 .dsz_clust = zone_clust,
  555                 .dsz_pack = zone_pack,
  556                 .dsz_jumbop = zone_jumbop,
  557                 .dsz_jumbo9 = zone_jumbo9,
  558                 .dsz_jumbo16 = zone_jumbo16,
  559         };
  560 
  561         /*
  562          * All cluster zones return buffers of the size requested by the
  563          * drivers.  It's up to the driver to reinitialize the zones if the
  564          * MTU of a debugnet-enabled interface changes.
  565          */
  566         printf("debugnet: overwriting mbuf zone pointers\n");
  567         zone_mbuf = dn_zone_mbuf;
  568         zone_clust = dn_zone_clust;
  569         zone_pack = dn_zone_pack;
  570         zone_jumbop = dn_zone_clust;
  571         zone_jumbo9 = dn_zone_clust;
  572         zone_jumbo16 = dn_zone_clust;
  573 }
  574 
  575 /*
  576  * Callback invoked when a debugnet connection is closed/finished.
  577  */
  578 void
  579 debugnet_mbuf_finish(void)
  580 {
  581 
  582         MPASS(dn_saved_zones.dsz_debugnet_zones_enabled);
  583 
  584         printf("debugnet: restoring mbuf zone pointers\n");
  585         zone_mbuf = dn_saved_zones.dsz_mbuf;
  586         zone_clust = dn_saved_zones.dsz_clust;
  587         zone_pack = dn_saved_zones.dsz_pack;
  588         zone_jumbop = dn_saved_zones.dsz_jumbop;
  589         zone_jumbo9 = dn_saved_zones.dsz_jumbo9;
  590         zone_jumbo16 = dn_saved_zones.dsz_jumbo16;
  591 
  592         memset(&dn_saved_zones, 0, sizeof(dn_saved_zones));
  593 }
  594 
  595 /*
  596  * Reinitialize the debugnet mbuf+cluster pool and cache zones.
  597  */
  598 void
  599 debugnet_mbuf_reinit(int nmbuf, int nclust, int clsize)
  600 {
  601         struct mbuf *m;
  602         void *item;
  603 
  604         debugnet_mbuf_drain();
  605 
  606         dn_clsize = clsize;
  607 
  608         dn_zone_mbuf = uma_zcache_create("debugnet_" MBUF_MEM_NAME,
  609             MSIZE, mb_ctor_mbuf, mb_dtor_mbuf, NULL, NULL,
  610             dn_buf_import, dn_buf_release,
  611             &dn_mbufq, UMA_ZONE_NOBUCKET);
  612 
  613         dn_zone_clust = uma_zcache_create("debugnet_" MBUF_CLUSTER_MEM_NAME,
  614             clsize, mb_ctor_clust, NULL, NULL, NULL,
  615             dn_buf_import, dn_buf_release,
  616             &dn_clustq, UMA_ZONE_NOBUCKET);
  617 
  618         dn_zone_pack = uma_zcache_create("debugnet_" MBUF_PACKET_MEM_NAME,
  619             MCLBYTES, mb_ctor_pack, mb_dtor_pack, NULL, NULL,
  620             dn_pack_import, dn_pack_release,
  621             NULL, UMA_ZONE_NOBUCKET);
  622 
  623         while (nmbuf-- > 0) {
  624                 m = m_get(M_WAITOK, MT_DATA);
  625                 uma_zfree(dn_zone_mbuf, m);
  626         }
  627         while (nclust-- > 0) {
  628                 item = uma_zalloc(m_getzone(dn_clsize), M_WAITOK);
  629                 uma_zfree(dn_zone_clust, item);
  630         }
  631 }
  632 #endif /* DEBUGNET */
  633 
  634 /*
  635  * Constructor for Mbuf primary zone.
  636  *
  637  * The 'arg' pointer points to a mb_args structure which
  638  * contains call-specific information required to support the
  639  * mbuf allocation API.  See mbuf.h.
  640  */
  641 static int
  642 mb_ctor_mbuf(void *mem, int size, void *arg, int how)
  643 {
  644         struct mbuf *m;
  645         struct mb_args *args;
  646         int error;
  647         int flags;
  648         short type;
  649 
  650         args = (struct mb_args *)arg;
  651         type = args->type;
  652 
  653         /*
  654          * The mbuf is initialized later.  The caller has the
  655          * responsibility to set up any MAC labels too.
  656          */
  657         if (type == MT_NOINIT)
  658                 return (0);
  659 
  660         m = (struct mbuf *)mem;
  661         flags = args->flags;
  662         MPASS((flags & M_NOFREE) == 0);
  663 
  664         error = m_init(m, how, type, flags);
  665 
  666         return (error);
  667 }
  668 
  669 /*
  670  * The Mbuf primary zone destructor.
  671  */
  672 static void
  673 mb_dtor_mbuf(void *mem, int size, void *arg)
  674 {
  675         struct mbuf *m;
  676         unsigned long flags __diagused;
  677 
  678         m = (struct mbuf *)mem;
  679         flags = (unsigned long)arg;
  680 
  681         KASSERT((m->m_flags & M_NOFREE) == 0, ("%s: M_NOFREE set", __func__));
  682         KASSERT((flags & 0x1) == 0, ("%s: obsolete MB_DTOR_SKIP passed", __func__));
  683         if ((m->m_flags & M_PKTHDR) && !SLIST_EMPTY(&m->m_pkthdr.tags))
  684                 m_tag_delete_chain(m, NULL);
  685 }
  686 
  687 /*
  688  * The Mbuf Packet zone destructor.
  689  */
  690 static void
  691 mb_dtor_pack(void *mem, int size, void *arg)
  692 {
  693         struct mbuf *m;
  694 
  695         m = (struct mbuf *)mem;
  696         if ((m->m_flags & M_PKTHDR) != 0)
  697                 m_tag_delete_chain(m, NULL);
  698 
  699         /* Make sure we've got a clean cluster back. */
  700         KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__));
  701         KASSERT(m->m_ext.ext_buf != NULL, ("%s: ext_buf == NULL", __func__));
  702         KASSERT(m->m_ext.ext_free == NULL, ("%s: ext_free != NULL", __func__));
  703         KASSERT(m->m_ext.ext_arg1 == NULL, ("%s: ext_arg1 != NULL", __func__));
  704         KASSERT(m->m_ext.ext_arg2 == NULL, ("%s: ext_arg2 != NULL", __func__));
  705         KASSERT(m->m_ext.ext_size == MCLBYTES, ("%s: ext_size != MCLBYTES", __func__));
  706         KASSERT(m->m_ext.ext_type == EXT_PACKET, ("%s: ext_type != EXT_PACKET", __func__));
  707 #if defined(INVARIANTS) && !defined(KMSAN)
  708         trash_dtor(m->m_ext.ext_buf, MCLBYTES, arg);
  709 #endif
  710         /*
  711          * If there are processes blocked on zone_clust, waiting for pages
  712          * to be freed up, cause them to be woken up by draining the
  713          * packet zone.  We are exposed to a race here (in the check for
  714          * the UMA_ZFLAG_FULL) where we might miss the flag set, but that
  715          * is deliberate. We don't want to acquire the zone lock for every
  716          * mbuf free.
  717          */
  718         if (uma_zone_exhausted(zone_clust))
  719                 uma_zone_reclaim(zone_pack, UMA_RECLAIM_DRAIN);
  720 }
  721 
  722 /*
  723  * The Cluster and Jumbo[PAGESIZE|9|16] zone constructor.
  724  *
  725  * Here the 'arg' pointer points to the Mbuf which we
  726  * are configuring cluster storage for.  If 'arg' is
  727  * empty we allocate just the cluster without setting
  728  * the mbuf to it.  See mbuf.h.
  729  */
  730 static int
  731 mb_ctor_clust(void *mem, int size, void *arg, int how)
  732 {
  733         struct mbuf *m;
  734 
  735         m = (struct mbuf *)arg;
  736         if (m != NULL) {
  737                 m->m_ext.ext_buf = (char *)mem;
  738                 m->m_data = m->m_ext.ext_buf;
  739                 m->m_flags |= M_EXT;
  740                 m->m_ext.ext_free = NULL;
  741                 m->m_ext.ext_arg1 = NULL;
  742                 m->m_ext.ext_arg2 = NULL;
  743                 m->m_ext.ext_size = size;
  744                 m->m_ext.ext_type = m_gettype(size);
  745                 m->m_ext.ext_flags = EXT_FLAG_EMBREF;
  746                 m->m_ext.ext_count = 1;
  747         }
  748 
  749         return (0);
  750 }
  751 
  752 /*
  753  * The Packet secondary zone's init routine, executed on the
  754  * object's transition from mbuf keg slab to zone cache.
  755  */
  756 static int
  757 mb_zinit_pack(void *mem, int size, int how)
  758 {
  759         struct mbuf *m;
  760 
  761         m = (struct mbuf *)mem;         /* m is virgin. */
  762         if (uma_zalloc_arg(zone_clust, m, how) == NULL ||
  763             m->m_ext.ext_buf == NULL)
  764                 return (ENOMEM);
  765         m->m_ext.ext_type = EXT_PACKET; /* Override. */
  766 #if defined(INVARIANTS) && !defined(KMSAN)
  767         trash_init(m->m_ext.ext_buf, MCLBYTES, how);
  768 #endif
  769         return (0);
  770 }
  771 
  772 /*
  773  * The Packet secondary zone's fini routine, executed on the
  774  * object's transition from zone cache to keg slab.
  775  */
  776 static void
  777 mb_zfini_pack(void *mem, int size)
  778 {
  779         struct mbuf *m;
  780 
  781         m = (struct mbuf *)mem;
  782 #if defined(INVARIANTS) && !defined(KMSAN)
  783         trash_fini(m->m_ext.ext_buf, MCLBYTES);
  784 #endif
  785         uma_zfree_arg(zone_clust, m->m_ext.ext_buf, NULL);
  786 #if defined(INVARIANTS) && !defined(KMSAN)
  787         trash_dtor(mem, size, NULL);
  788 #endif
  789 }
  790 
  791 /*
  792  * The "packet" keg constructor.
  793  */
  794 static int
  795 mb_ctor_pack(void *mem, int size, void *arg, int how)
  796 {
  797         struct mbuf *m;
  798         struct mb_args *args;
  799         int error, flags;
  800         short type;
  801 
  802         m = (struct mbuf *)mem;
  803         args = (struct mb_args *)arg;
  804         flags = args->flags;
  805         type = args->type;
  806         MPASS((flags & M_NOFREE) == 0);
  807 
  808 #if defined(INVARIANTS) && !defined(KMSAN)
  809         trash_ctor(m->m_ext.ext_buf, MCLBYTES, arg, how);
  810 #endif
  811 
  812         error = m_init(m, how, type, flags);
  813 
  814         /* m_ext is already initialized. */
  815         m->m_data = m->m_ext.ext_buf;
  816         m->m_flags = (flags | M_EXT);
  817 
  818         return (error);
  819 }
  820 
  821 /*
  822  * This is the protocol drain routine.  Called by UMA whenever any of the
  823  * mbuf zones is closed to its limit.
  824  */
  825 static void
  826 mb_reclaim(uma_zone_t zone __unused, int pending __unused)
  827 {
  828 
  829         EVENTHANDLER_INVOKE(mbuf_lowmem, VM_LOW_MBUFS);
  830 }
  831 
  832 /*
  833  * Free "count" units of I/O from an mbuf chain.  They could be held
  834  * in M_EXTPG or just as a normal mbuf.  This code is intended to be
  835  * called in an error path (I/O error, closed connection, etc).
  836  */
  837 void
  838 mb_free_notready(struct mbuf *m, int count)
  839 {
  840         int i;
  841 
  842         for (i = 0; i < count && m != NULL; i++) {
  843                 if ((m->m_flags & M_EXTPG) != 0) {
  844                         m->m_epg_nrdy--;
  845                         if (m->m_epg_nrdy != 0)
  846                                 continue;
  847                 }
  848                 m = m_free(m);
  849         }
  850         KASSERT(i == count, ("Removed only %d items from %p", i, m));
  851 }
  852 
  853 /*
  854  * Compress an unmapped mbuf into a simple mbuf when it holds a small
  855  * amount of data.  This is used as a DOS defense to avoid having
  856  * small packets tie up wired pages, an ext_pgs structure, and an
  857  * mbuf.  Since this converts the existing mbuf in place, it can only
  858  * be used if there are no other references to 'm'.
  859  */
  860 int
  861 mb_unmapped_compress(struct mbuf *m)
  862 {
  863         volatile u_int *refcnt;
  864         char buf[MLEN];
  865 
  866         /*
  867          * Assert that 'm' does not have a packet header.  If 'm' had
  868          * a packet header, it would only be able to hold MHLEN bytes
  869          * and m_data would have to be initialized differently.
  870          */
  871         KASSERT((m->m_flags & M_PKTHDR) == 0 && (m->m_flags & M_EXTPG),
  872             ("%s: m %p !M_EXTPG or M_PKTHDR", __func__, m));
  873         KASSERT(m->m_len <= MLEN, ("m_len too large %p", m));
  874 
  875         if (m->m_ext.ext_flags & EXT_FLAG_EMBREF) {
  876                 refcnt = &m->m_ext.ext_count;
  877         } else {
  878                 KASSERT(m->m_ext.ext_cnt != NULL,
  879                     ("%s: no refcounting pointer on %p", __func__, m));
  880                 refcnt = m->m_ext.ext_cnt;
  881         }
  882 
  883         if (*refcnt != 1)
  884                 return (EBUSY);
  885 
  886         m_copydata(m, 0, m->m_len, buf);
  887 
  888         /* Free the backing pages. */
  889         m->m_ext.ext_free(m);
  890 
  891         /* Turn 'm' into a "normal" mbuf. */
  892         m->m_flags &= ~(M_EXT | M_RDONLY | M_EXTPG);
  893         m->m_data = m->m_dat;
  894 
  895         /* Copy data back into m. */
  896         bcopy(buf, mtod(m, char *), m->m_len);
  897 
  898         return (0);
  899 }
  900 
  901 /*
  902  * These next few routines are used to permit downgrading an unmapped
  903  * mbuf to a chain of mapped mbufs.  This is used when an interface
  904  * doesn't supported unmapped mbufs or if checksums need to be
  905  * computed in software.
  906  *
  907  * Each unmapped mbuf is converted to a chain of mbufs.  First, any
  908  * TLS header data is stored in a regular mbuf.  Second, each page of
  909  * unmapped data is stored in an mbuf with an EXT_SFBUF external
  910  * cluster.  These mbufs use an sf_buf to provide a valid KVA for the
  911  * associated physical page.  They also hold a reference on the
  912  * original M_EXTPG mbuf to ensure the physical page doesn't go away.
  913  * Finally, any TLS trailer data is stored in a regular mbuf.
  914  *
  915  * mb_unmapped_free_mext() is the ext_free handler for the EXT_SFBUF
  916  * mbufs.  It frees the associated sf_buf and releases its reference
  917  * on the original M_EXTPG mbuf.
  918  *
  919  * _mb_unmapped_to_ext() is a helper function that converts a single
  920  * unmapped mbuf into a chain of mbufs.
  921  *
  922  * mb_unmapped_to_ext() is the public function that walks an mbuf
  923  * chain converting any unmapped mbufs to mapped mbufs.  It returns
  924  * the new chain of unmapped mbufs on success.  On failure it frees
  925  * the original mbuf chain and returns NULL.
  926  */
  927 static void
  928 mb_unmapped_free_mext(struct mbuf *m)
  929 {
  930         struct sf_buf *sf;
  931         struct mbuf *old_m;
  932 
  933         sf = m->m_ext.ext_arg1;
  934         sf_buf_free(sf);
  935 
  936         /* Drop the reference on the backing M_EXTPG mbuf. */
  937         old_m = m->m_ext.ext_arg2;
  938         mb_free_extpg(old_m);
  939 }
  940 
  941 static struct mbuf *
  942 _mb_unmapped_to_ext(struct mbuf *m)
  943 {
  944         struct mbuf *m_new, *top, *prev, *mref;
  945         struct sf_buf *sf;
  946         vm_page_t pg;
  947         int i, len, off, pglen, pgoff, seglen, segoff;
  948         volatile u_int *refcnt;
  949         u_int ref_inc = 0;
  950 
  951         M_ASSERTEXTPG(m);
  952         len = m->m_len;
  953         KASSERT(m->m_epg_tls == NULL, ("%s: can't convert TLS mbuf %p",
  954             __func__, m));
  955 
  956         /* See if this is the mbuf that holds the embedded refcount. */
  957         if (m->m_ext.ext_flags & EXT_FLAG_EMBREF) {
  958                 refcnt = &m->m_ext.ext_count;
  959                 mref = m;
  960         } else {
  961                 KASSERT(m->m_ext.ext_cnt != NULL,
  962                     ("%s: no refcounting pointer on %p", __func__, m));
  963                 refcnt = m->m_ext.ext_cnt;
  964                 mref = __containerof(refcnt, struct mbuf, m_ext.ext_count);
  965         }
  966 
  967         /* Skip over any data removed from the front. */
  968         off = mtod(m, vm_offset_t);
  969 
  970         top = NULL;
  971         if (m->m_epg_hdrlen != 0) {
  972                 if (off >= m->m_epg_hdrlen) {
  973                         off -= m->m_epg_hdrlen;
  974                 } else {
  975                         seglen = m->m_epg_hdrlen - off;
  976                         segoff = off;
  977                         seglen = min(seglen, len);
  978                         off = 0;
  979                         len -= seglen;
  980                         m_new = m_get(M_NOWAIT, MT_DATA);
  981                         if (m_new == NULL)
  982                                 goto fail;
  983                         m_new->m_len = seglen;
  984                         prev = top = m_new;
  985                         memcpy(mtod(m_new, void *), &m->m_epg_hdr[segoff],
  986                             seglen);
  987                 }
  988         }
  989         pgoff = m->m_epg_1st_off;
  990         for (i = 0; i < m->m_epg_npgs && len > 0; i++) {
  991                 pglen = m_epg_pagelen(m, i, pgoff);
  992                 if (off >= pglen) {
  993                         off -= pglen;
  994                         pgoff = 0;
  995                         continue;
  996                 }
  997                 seglen = pglen - off;
  998                 segoff = pgoff + off;
  999                 off = 0;
 1000                 seglen = min(seglen, len);
 1001                 len -= seglen;
 1002 
 1003                 pg = PHYS_TO_VM_PAGE(m->m_epg_pa[i]);
 1004                 m_new = m_get(M_NOWAIT, MT_DATA);
 1005                 if (m_new == NULL)
 1006                         goto fail;
 1007                 if (top == NULL) {
 1008                         top = prev = m_new;
 1009                 } else {
 1010                         prev->m_next = m_new;
 1011                         prev = m_new;
 1012                 }
 1013                 sf = sf_buf_alloc(pg, SFB_NOWAIT);
 1014                 if (sf == NULL)
 1015                         goto fail;
 1016 
 1017                 ref_inc++;
 1018                 m_extadd(m_new, (char *)sf_buf_kva(sf), PAGE_SIZE,
 1019                     mb_unmapped_free_mext, sf, mref, M_RDONLY, EXT_SFBUF);
 1020                 m_new->m_data += segoff;
 1021                 m_new->m_len = seglen;
 1022 
 1023                 pgoff = 0;
 1024         };
 1025         if (len != 0) {
 1026                 KASSERT((off + len) <= m->m_epg_trllen,
 1027                     ("off + len > trail (%d + %d > %d)", off, len,
 1028                     m->m_epg_trllen));
 1029                 m_new = m_get(M_NOWAIT, MT_DATA);
 1030                 if (m_new == NULL)
 1031                         goto fail;
 1032                 if (top == NULL)
 1033                         top = m_new;
 1034                 else
 1035                         prev->m_next = m_new;
 1036                 m_new->m_len = len;
 1037                 memcpy(mtod(m_new, void *), &m->m_epg_trail[off], len);
 1038         }
 1039 
 1040         if (ref_inc != 0) {
 1041                 /*
 1042                  * Obtain an additional reference on the old mbuf for
 1043                  * each created EXT_SFBUF mbuf.  They will be dropped
 1044                  * in mb_unmapped_free_mext().
 1045                  */
 1046                 if (*refcnt == 1)
 1047                         *refcnt += ref_inc;
 1048                 else
 1049                         atomic_add_int(refcnt, ref_inc);
 1050         }
 1051         m_free(m);
 1052         return (top);
 1053 
 1054 fail:
 1055         if (ref_inc != 0) {
 1056                 /*
 1057                  * Obtain an additional reference on the old mbuf for
 1058                  * each created EXT_SFBUF mbuf.  They will be
 1059                  * immediately dropped when these mbufs are freed
 1060                  * below.
 1061                  */
 1062                 if (*refcnt == 1)
 1063                         *refcnt += ref_inc;
 1064                 else
 1065                         atomic_add_int(refcnt, ref_inc);
 1066         }
 1067         m_free(m);
 1068         m_freem(top);
 1069         return (NULL);
 1070 }
 1071 
 1072 struct mbuf *
 1073 mb_unmapped_to_ext(struct mbuf *top)
 1074 {
 1075         struct mbuf *m, *next, *prev = NULL;
 1076 
 1077         prev = NULL;
 1078         for (m = top; m != NULL; m = next) {
 1079                 /* m might be freed, so cache the next pointer. */
 1080                 next = m->m_next;
 1081                 if (m->m_flags & M_EXTPG) {
 1082                         if (prev != NULL) {
 1083                                 /*
 1084                                  * Remove 'm' from the new chain so
 1085                                  * that the 'top' chain terminates
 1086                                  * before 'm' in case 'top' is freed
 1087                                  * due to an error.
 1088                                  */
 1089                                 prev->m_next = NULL;
 1090                         }
 1091                         m = _mb_unmapped_to_ext(m);
 1092                         if (m == NULL) {
 1093                                 m_freem(top);
 1094                                 m_freem(next);
 1095                                 return (NULL);
 1096                         }
 1097                         if (prev == NULL) {
 1098                                 top = m;
 1099                         } else {
 1100                                 prev->m_next = m;
 1101                         }
 1102 
 1103                         /*
 1104                          * Replaced one mbuf with a chain, so we must
 1105                          * find the end of chain.
 1106                          */
 1107                         prev = m_last(m);
 1108                 } else {
 1109                         if (prev != NULL) {
 1110                                 prev->m_next = m;
 1111                         }
 1112                         prev = m;
 1113                 }
 1114         }
 1115         return (top);
 1116 }
 1117 
 1118 /*
 1119  * Allocate an empty M_EXTPG mbuf.  The ext_free routine is
 1120  * responsible for freeing any pages backing this mbuf when it is
 1121  * freed.
 1122  */
 1123 struct mbuf *
 1124 mb_alloc_ext_pgs(int how, m_ext_free_t ext_free)
 1125 {
 1126         struct mbuf *m;
 1127 
 1128         m = m_get(how, MT_DATA);
 1129         if (m == NULL)
 1130                 return (NULL);
 1131 
 1132         m->m_epg_npgs = 0;
 1133         m->m_epg_nrdy = 0;
 1134         m->m_epg_1st_off = 0;
 1135         m->m_epg_last_len = 0;
 1136         m->m_epg_flags = 0;
 1137         m->m_epg_hdrlen = 0;
 1138         m->m_epg_trllen = 0;
 1139         m->m_epg_tls = NULL;
 1140         m->m_epg_so = NULL;
 1141         m->m_data = NULL;
 1142         m->m_flags |= (M_EXT | M_RDONLY | M_EXTPG);
 1143         m->m_ext.ext_flags = EXT_FLAG_EMBREF;
 1144         m->m_ext.ext_count = 1;
 1145         m->m_ext.ext_size = 0;
 1146         m->m_ext.ext_free = ext_free;
 1147         return (m);
 1148 }
 1149 
 1150 /*
 1151  * Clean up after mbufs with M_EXT storage attached to them if the
 1152  * reference count hits 1.
 1153  */
 1154 void
 1155 mb_free_ext(struct mbuf *m)
 1156 {
 1157         volatile u_int *refcnt;
 1158         struct mbuf *mref;
 1159         int freembuf;
 1160 
 1161         KASSERT(m->m_flags & M_EXT, ("%s: M_EXT not set on %p", __func__, m));
 1162 
 1163         /* See if this is the mbuf that holds the embedded refcount. */
 1164         if (m->m_ext.ext_flags & EXT_FLAG_EMBREF) {
 1165                 refcnt = &m->m_ext.ext_count;
 1166                 mref = m;
 1167         } else {
 1168                 KASSERT(m->m_ext.ext_cnt != NULL,
 1169                     ("%s: no refcounting pointer on %p", __func__, m));
 1170                 refcnt = m->m_ext.ext_cnt;
 1171                 mref = __containerof(refcnt, struct mbuf, m_ext.ext_count);
 1172         }
 1173 
 1174         /*
 1175          * Check if the header is embedded in the cluster.  It is
 1176          * important that we can't touch any of the mbuf fields
 1177          * after we have freed the external storage, since mbuf
 1178          * could have been embedded in it.  For now, the mbufs
 1179          * embedded into the cluster are always of type EXT_EXTREF,
 1180          * and for this type we won't free the mref.
 1181          */
 1182         if (m->m_flags & M_NOFREE) {
 1183                 freembuf = 0;
 1184                 KASSERT(m->m_ext.ext_type == EXT_EXTREF ||
 1185                     m->m_ext.ext_type == EXT_RXRING,
 1186                     ("%s: no-free mbuf %p has wrong type", __func__, m));
 1187         } else
 1188                 freembuf = 1;
 1189 
 1190         /* Free attached storage if this mbuf is the only reference to it. */
 1191         if (*refcnt == 1 || atomic_fetchadd_int(refcnt, -1) == 1) {
 1192                 switch (m->m_ext.ext_type) {
 1193                 case EXT_PACKET:
 1194                         /* The packet zone is special. */
 1195                         if (*refcnt == 0)
 1196                                 *refcnt = 1;
 1197                         uma_zfree(zone_pack, mref);
 1198                         break;
 1199                 case EXT_CLUSTER:
 1200                         uma_zfree(zone_clust, m->m_ext.ext_buf);
 1201                         m_free_raw(mref);
 1202                         break;
 1203                 case EXT_JUMBOP:
 1204                         uma_zfree(zone_jumbop, m->m_ext.ext_buf);
 1205                         m_free_raw(mref);
 1206                         break;
 1207                 case EXT_JUMBO9:
 1208                         uma_zfree(zone_jumbo9, m->m_ext.ext_buf);
 1209                         m_free_raw(mref);
 1210                         break;
 1211                 case EXT_JUMBO16:
 1212                         uma_zfree(zone_jumbo16, m->m_ext.ext_buf);
 1213                         m_free_raw(mref);
 1214                         break;
 1215                 case EXT_SFBUF:
 1216                 case EXT_NET_DRV:
 1217                 case EXT_MOD_TYPE:
 1218                 case EXT_DISPOSABLE:
 1219                         KASSERT(mref->m_ext.ext_free != NULL,
 1220                             ("%s: ext_free not set", __func__));
 1221                         mref->m_ext.ext_free(mref);
 1222                         m_free_raw(mref);
 1223                         break;
 1224                 case EXT_EXTREF:
 1225                         KASSERT(m->m_ext.ext_free != NULL,
 1226                             ("%s: ext_free not set", __func__));
 1227                         m->m_ext.ext_free(m);
 1228                         break;
 1229                 case EXT_RXRING:
 1230                         KASSERT(m->m_ext.ext_free == NULL,
 1231                             ("%s: ext_free is set", __func__));
 1232                         break;
 1233                 default:
 1234                         KASSERT(m->m_ext.ext_type == 0,
 1235                             ("%s: unknown ext_type", __func__));
 1236                 }
 1237         }
 1238 
 1239         if (freembuf && m != mref)
 1240                 m_free_raw(m);
 1241 }
 1242 
 1243 /*
 1244  * Clean up after mbufs with M_EXTPG storage attached to them if the
 1245  * reference count hits 1.
 1246  */
 1247 void
 1248 mb_free_extpg(struct mbuf *m)
 1249 {
 1250         volatile u_int *refcnt;
 1251         struct mbuf *mref;
 1252 
 1253         M_ASSERTEXTPG(m);
 1254 
 1255         /* See if this is the mbuf that holds the embedded refcount. */
 1256         if (m->m_ext.ext_flags & EXT_FLAG_EMBREF) {
 1257                 refcnt = &m->m_ext.ext_count;
 1258                 mref = m;
 1259         } else {
 1260                 KASSERT(m->m_ext.ext_cnt != NULL,
 1261                     ("%s: no refcounting pointer on %p", __func__, m));
 1262                 refcnt = m->m_ext.ext_cnt;
 1263                 mref = __containerof(refcnt, struct mbuf, m_ext.ext_count);
 1264         }
 1265 
 1266         /* Free attached storage if this mbuf is the only reference to it. */
 1267         if (*refcnt == 1 || atomic_fetchadd_int(refcnt, -1) == 1) {
 1268                 KASSERT(mref->m_ext.ext_free != NULL,
 1269                     ("%s: ext_free not set", __func__));
 1270 
 1271                 mref->m_ext.ext_free(mref);
 1272 #ifdef KERN_TLS
 1273                 if (mref->m_epg_tls != NULL &&
 1274                     !refcount_release_if_not_last(&mref->m_epg_tls->refcount))
 1275                         ktls_enqueue_to_free(mref);
 1276                 else
 1277 #endif
 1278                         m_free_raw(mref);
 1279         }
 1280 
 1281         if (m != mref)
 1282                 m_free_raw(m);
 1283 }
 1284 
 1285 /*
 1286  * Official mbuf(9) allocation KPI for stack and drivers:
 1287  *
 1288  * m_get()      - a single mbuf without any attachments, sys/mbuf.h.
 1289  * m_gethdr()   - a single mbuf initialized as M_PKTHDR, sys/mbuf.h.
 1290  * m_getcl()    - an mbuf + 2k cluster, sys/mbuf.h.
 1291  * m_clget()    - attach cluster to already allocated mbuf.
 1292  * m_cljget()   - attach jumbo cluster to already allocated mbuf.
 1293  * m_get2()     - allocate minimum mbuf that would fit size argument.
 1294  * m_getm2()    - allocate a chain of mbufs/clusters.
 1295  * m_extadd()   - attach external cluster to mbuf.
 1296  *
 1297  * m_free()     - free single mbuf with its tags and ext, sys/mbuf.h.
 1298  * m_freem()    - free chain of mbufs.
 1299  */
 1300 
 1301 int
 1302 m_clget(struct mbuf *m, int how)
 1303 {
 1304 
 1305         KASSERT((m->m_flags & M_EXT) == 0, ("%s: mbuf %p has M_EXT",
 1306             __func__, m));
 1307         m->m_ext.ext_buf = (char *)NULL;
 1308         uma_zalloc_arg(zone_clust, m, how);
 1309         /*
 1310          * On a cluster allocation failure, drain the packet zone and retry,
 1311          * we might be able to loosen a few clusters up on the drain.
 1312          */
 1313         if ((how & M_NOWAIT) && (m->m_ext.ext_buf == NULL)) {
 1314                 uma_zone_reclaim(zone_pack, UMA_RECLAIM_DRAIN);
 1315                 uma_zalloc_arg(zone_clust, m, how);
 1316         }
 1317         MBUF_PROBE2(m__clget, m, how);
 1318         return (m->m_flags & M_EXT);
 1319 }
 1320 
 1321 /*
 1322  * m_cljget() is different from m_clget() as it can allocate clusters without
 1323  * attaching them to an mbuf.  In that case the return value is the pointer
 1324  * to the cluster of the requested size.  If an mbuf was specified, it gets
 1325  * the cluster attached to it and the return value can be safely ignored.
 1326  * For size it takes MCLBYTES, MJUMPAGESIZE, MJUM9BYTES, MJUM16BYTES.
 1327  */
 1328 void *
 1329 m_cljget(struct mbuf *m, int how, int size)
 1330 {
 1331         uma_zone_t zone;
 1332         void *retval;
 1333 
 1334         if (m != NULL) {
 1335                 KASSERT((m->m_flags & M_EXT) == 0, ("%s: mbuf %p has M_EXT",
 1336                     __func__, m));
 1337                 m->m_ext.ext_buf = NULL;
 1338         }
 1339 
 1340         zone = m_getzone(size);
 1341         retval = uma_zalloc_arg(zone, m, how);
 1342 
 1343         MBUF_PROBE4(m__cljget, m, how, size, retval);
 1344 
 1345         return (retval);
 1346 }
 1347 
 1348 /*
 1349  * m_get2() allocates minimum mbuf that would fit "size" argument.
 1350  */
 1351 struct mbuf *
 1352 m_get2(int size, int how, short type, int flags)
 1353 {
 1354         struct mb_args args;
 1355         struct mbuf *m, *n;
 1356 
 1357         args.flags = flags;
 1358         args.type = type;
 1359 
 1360         if (size <= MHLEN || (size <= MLEN && (flags & M_PKTHDR) == 0))
 1361                 return (uma_zalloc_arg(zone_mbuf, &args, how));
 1362         if (size <= MCLBYTES)
 1363                 return (uma_zalloc_arg(zone_pack, &args, how));
 1364 
 1365         if (size > MJUMPAGESIZE)
 1366                 return (NULL);
 1367 
 1368         m = uma_zalloc_arg(zone_mbuf, &args, how);
 1369         if (m == NULL)
 1370                 return (NULL);
 1371 
 1372         n = uma_zalloc_arg(zone_jumbop, m, how);
 1373         if (n == NULL) {
 1374                 m_free_raw(m);
 1375                 return (NULL);
 1376         }
 1377 
 1378         return (m);
 1379 }
 1380 
 1381 /*
 1382  * m_get3() allocates minimum mbuf that would fit "size" argument.
 1383  * Unlike m_get2() it can allocate clusters up to MJUM16BYTES.
 1384  */
 1385 struct mbuf *
 1386 m_get3(int size, int how, short type, int flags)
 1387 {
 1388         struct mb_args args;
 1389         struct mbuf *m, *n;
 1390         uma_zone_t zone;
 1391 
 1392         if (size <= MJUMPAGESIZE)
 1393                 return (m_get2(size, how, type, flags));
 1394 
 1395         if (size > MJUM16BYTES)
 1396                 return (NULL);
 1397 
 1398         args.flags = flags;
 1399         args.type = type;
 1400 
 1401         m = uma_zalloc_arg(zone_mbuf, &args, how);
 1402         if (m == NULL)
 1403                 return (NULL);
 1404 
 1405         if (size <= MJUM9BYTES)
 1406                 zone = zone_jumbo9;
 1407         else
 1408                 zone = zone_jumbo16;
 1409 
 1410         n = uma_zalloc_arg(zone, m, how);
 1411         if (n == NULL) {
 1412                 m_free_raw(m);
 1413                 return (NULL);
 1414         }
 1415 
 1416         return (m);
 1417 }
 1418 
 1419 /*
 1420  * m_getjcl() returns an mbuf with a cluster of the specified size attached.
 1421  * For size it takes MCLBYTES, MJUMPAGESIZE, MJUM9BYTES, MJUM16BYTES.
 1422  */
 1423 struct mbuf *
 1424 m_getjcl(int how, short type, int flags, int size)
 1425 {
 1426         struct mb_args args;
 1427         struct mbuf *m, *n;
 1428         uma_zone_t zone;
 1429 
 1430         if (size == MCLBYTES)
 1431                 return m_getcl(how, type, flags);
 1432 
 1433         args.flags = flags;
 1434         args.type = type;
 1435 
 1436         m = uma_zalloc_arg(zone_mbuf, &args, how);
 1437         if (m == NULL)
 1438                 return (NULL);
 1439 
 1440         zone = m_getzone(size);
 1441         n = uma_zalloc_arg(zone, m, how);
 1442         if (n == NULL) {
 1443                 m_free_raw(m);
 1444                 return (NULL);
 1445         }
 1446         MBUF_PROBE5(m__getjcl, how, type, flags, size, m);
 1447         return (m);
 1448 }
 1449 
 1450 /*
 1451  * Allocate a given length worth of mbufs and/or clusters (whatever fits
 1452  * best) and return a pointer to the top of the allocated chain.  If an
 1453  * existing mbuf chain is provided, then we will append the new chain
 1454  * to the existing one and return a pointer to the provided mbuf.
 1455  */
 1456 struct mbuf *
 1457 m_getm2(struct mbuf *m, int len, int how, short type, int flags)
 1458 {
 1459         struct mbuf *mb, *nm = NULL, *mtail = NULL;
 1460 
 1461         KASSERT(len >= 0, ("%s: len is < 0", __func__));
 1462 
 1463         /* Validate flags. */
 1464         flags &= (M_PKTHDR | M_EOR);
 1465 
 1466         /* Packet header mbuf must be first in chain. */
 1467         if ((flags & M_PKTHDR) && m != NULL)
 1468                 flags &= ~M_PKTHDR;
 1469 
 1470         /* Loop and append maximum sized mbufs to the chain tail. */
 1471         while (len > 0) {
 1472                 mb = NULL;
 1473                 if (len > MCLBYTES) {
 1474                         mb = m_getjcl(M_NOWAIT, type, (flags & M_PKTHDR),
 1475                             MJUMPAGESIZE);
 1476                 }
 1477                 if (mb == NULL) {
 1478                         if (len >= MINCLSIZE)
 1479                                 mb = m_getcl(how, type, (flags & M_PKTHDR));
 1480                         else if (flags & M_PKTHDR)
 1481                                 mb = m_gethdr(how, type);
 1482                         else
 1483                                 mb = m_get(how, type);
 1484 
 1485                         /*
 1486                          * Fail the whole operation if one mbuf can't be
 1487                          * allocated.
 1488                          */
 1489                         if (mb == NULL) {
 1490                                 m_freem(nm);
 1491                                 return (NULL);
 1492                         }
 1493                 }
 1494 
 1495                 /* Book keeping. */
 1496                 len -= M_SIZE(mb);
 1497                 if (mtail != NULL)
 1498                         mtail->m_next = mb;
 1499                 else
 1500                         nm = mb;
 1501                 mtail = mb;
 1502                 flags &= ~M_PKTHDR;     /* Only valid on the first mbuf. */
 1503         }
 1504         if (flags & M_EOR)
 1505                 mtail->m_flags |= M_EOR;  /* Only valid on the last mbuf. */
 1506 
 1507         /* If mbuf was supplied, append new chain to the end of it. */
 1508         if (m != NULL) {
 1509                 for (mtail = m; mtail->m_next != NULL; mtail = mtail->m_next)
 1510                         ;
 1511                 mtail->m_next = nm;
 1512                 mtail->m_flags &= ~M_EOR;
 1513         } else
 1514                 m = nm;
 1515 
 1516         return (m);
 1517 }
 1518 
 1519 /*-
 1520  * Configure a provided mbuf to refer to the provided external storage
 1521  * buffer and setup a reference count for said buffer.
 1522  *
 1523  * Arguments:
 1524  *    mb     The existing mbuf to which to attach the provided buffer.
 1525  *    buf    The address of the provided external storage buffer.
 1526  *    size   The size of the provided buffer.
 1527  *    freef  A pointer to a routine that is responsible for freeing the
 1528  *           provided external storage buffer.
 1529  *    args   A pointer to an argument structure (of any type) to be passed
 1530  *           to the provided freef routine (may be NULL).
 1531  *    flags  Any other flags to be passed to the provided mbuf.
 1532  *    type   The type that the external storage buffer should be
 1533  *           labeled with.
 1534  *
 1535  * Returns:
 1536  *    Nothing.
 1537  */
 1538 void
 1539 m_extadd(struct mbuf *mb, char *buf, u_int size, m_ext_free_t freef,
 1540     void *arg1, void *arg2, int flags, int type)
 1541 {
 1542 
 1543         KASSERT(type != EXT_CLUSTER, ("%s: EXT_CLUSTER not allowed", __func__));
 1544 
 1545         mb->m_flags |= (M_EXT | flags);
 1546         mb->m_ext.ext_buf = buf;
 1547         mb->m_data = mb->m_ext.ext_buf;
 1548         mb->m_ext.ext_size = size;
 1549         mb->m_ext.ext_free = freef;
 1550         mb->m_ext.ext_arg1 = arg1;
 1551         mb->m_ext.ext_arg2 = arg2;
 1552         mb->m_ext.ext_type = type;
 1553 
 1554         if (type != EXT_EXTREF) {
 1555                 mb->m_ext.ext_count = 1;
 1556                 mb->m_ext.ext_flags = EXT_FLAG_EMBREF;
 1557         } else
 1558                 mb->m_ext.ext_flags = 0;
 1559 }
 1560 
 1561 /*
 1562  * Free an entire chain of mbufs and associated external buffers, if
 1563  * applicable.
 1564  */
 1565 void
 1566 m_freem(struct mbuf *mb)
 1567 {
 1568 
 1569         MBUF_PROBE1(m__freem, mb);
 1570         while (mb != NULL)
 1571                 mb = m_free(mb);
 1572 }
 1573 
 1574 /*
 1575  * Temporary primitive to allow freeing without going through m_free.
 1576  */
 1577 void
 1578 m_free_raw(struct mbuf *mb)
 1579 {
 1580 
 1581         uma_zfree(zone_mbuf, mb);
 1582 }
 1583 
 1584 int
 1585 m_snd_tag_alloc(struct ifnet *ifp, union if_snd_tag_alloc_params *params,
 1586     struct m_snd_tag **mstp)
 1587 {
 1588 
 1589         if (ifp->if_snd_tag_alloc == NULL)
 1590                 return (EOPNOTSUPP);
 1591         return (ifp->if_snd_tag_alloc(ifp, params, mstp));
 1592 }
 1593 
 1594 void
 1595 m_snd_tag_init(struct m_snd_tag *mst, struct ifnet *ifp,
 1596     const struct if_snd_tag_sw *sw)
 1597 {
 1598 
 1599         if_ref(ifp);
 1600         mst->ifp = ifp;
 1601         refcount_init(&mst->refcount, 1);
 1602         mst->sw = sw;
 1603         counter_u64_add(snd_tag_count, 1);
 1604 }
 1605 
 1606 void
 1607 m_snd_tag_destroy(struct m_snd_tag *mst)
 1608 {
 1609         struct ifnet *ifp;
 1610 
 1611         ifp = mst->ifp;
 1612         mst->sw->snd_tag_free(mst);
 1613         if_rele(ifp);
 1614         counter_u64_add(snd_tag_count, -1);
 1615 }
 1616 
 1617 void
 1618 m_rcvif_serialize(struct mbuf *m)
 1619 {
 1620         u_short idx, gen;
 1621 
 1622         M_ASSERTPKTHDR(m);
 1623         idx = m->m_pkthdr.rcvif->if_index;
 1624         gen = m->m_pkthdr.rcvif->if_idxgen;
 1625         m->m_pkthdr.rcvidx = idx;
 1626         m->m_pkthdr.rcvgen = gen;
 1627         if (__predict_false(m->m_pkthdr.leaf_rcvif != NULL)) {
 1628                 idx = m->m_pkthdr.leaf_rcvif->if_index;
 1629                 gen = m->m_pkthdr.leaf_rcvif->if_idxgen;
 1630         } else {
 1631                 idx = -1;
 1632                 gen = 0;
 1633         }
 1634         m->m_pkthdr.leaf_rcvidx = idx;
 1635         m->m_pkthdr.leaf_rcvgen = gen;
 1636 }
 1637 
 1638 struct ifnet *
 1639 m_rcvif_restore(struct mbuf *m)
 1640 {
 1641         struct ifnet *ifp, *leaf_ifp;
 1642 
 1643         M_ASSERTPKTHDR(m);
 1644         NET_EPOCH_ASSERT();
 1645 
 1646         ifp = ifnet_byindexgen(m->m_pkthdr.rcvidx, m->m_pkthdr.rcvgen);
 1647         if (ifp == NULL || (ifp->if_flags & IFF_DYING))
 1648                 return (NULL);
 1649 
 1650         if (__predict_true(m->m_pkthdr.leaf_rcvidx == (u_short)-1)) {
 1651                 leaf_ifp = NULL;
 1652         } else {
 1653                 leaf_ifp = ifnet_byindexgen(m->m_pkthdr.leaf_rcvidx,
 1654                     m->m_pkthdr.leaf_rcvgen);
 1655                 if (__predict_false(leaf_ifp != NULL && (leaf_ifp->if_flags & IFF_DYING)))
 1656                         leaf_ifp = NULL;
 1657         }
 1658 
 1659         m->m_pkthdr.leaf_rcvif = leaf_ifp;
 1660         m->m_pkthdr.rcvif = ifp;
 1661 
 1662         return (ifp);
 1663 }
 1664 
 1665 /*
 1666  * Allocate an mbuf with anonymous external pages.
 1667  */
 1668 struct mbuf *
 1669 mb_alloc_ext_plus_pages(int len, int how)
 1670 {
 1671         struct mbuf *m;
 1672         vm_page_t pg;
 1673         int i, npgs;
 1674 
 1675         m = mb_alloc_ext_pgs(how, mb_free_mext_pgs);
 1676         if (m == NULL)
 1677                 return (NULL);
 1678         m->m_epg_flags |= EPG_FLAG_ANON;
 1679         npgs = howmany(len, PAGE_SIZE);
 1680         for (i = 0; i < npgs; i++) {
 1681                 do {
 1682                         pg = vm_page_alloc_noobj(VM_ALLOC_NODUMP |
 1683                             VM_ALLOC_WIRED);
 1684                         if (pg == NULL) {
 1685                                 if (how == M_NOWAIT) {
 1686                                         m->m_epg_npgs = i;
 1687                                         m_free(m);
 1688                                         return (NULL);
 1689                                 }
 1690                                 vm_wait(NULL);
 1691                         }
 1692                 } while (pg == NULL);
 1693                 m->m_epg_pa[i] = VM_PAGE_TO_PHYS(pg);
 1694         }
 1695         m->m_epg_npgs = npgs;
 1696         return (m);
 1697 }
 1698 
 1699 /*
 1700  * Copy the data in the mbuf chain to a chain of mbufs with anonymous external
 1701  * unmapped pages.
 1702  * len is the length of data in the input mbuf chain.
 1703  * mlen is the maximum number of bytes put into each ext_page mbuf.
 1704  */
 1705 struct mbuf *
 1706 mb_mapped_to_unmapped(struct mbuf *mp, int len, int mlen, int how,
 1707     struct mbuf **mlast)
 1708 {
 1709         struct mbuf *m, *mout;
 1710         char *pgpos, *mbpos;
 1711         int i, mblen, mbufsiz, pglen, xfer;
 1712 
 1713         if (len == 0)
 1714                 return (NULL);
 1715         mbufsiz = min(mlen, len);
 1716         m = mout = mb_alloc_ext_plus_pages(mbufsiz, how);
 1717         if (m == NULL)
 1718                 return (m);
 1719         pgpos = (char *)(void *)PHYS_TO_DMAP(m->m_epg_pa[0]);
 1720         pglen = PAGE_SIZE;
 1721         mblen = 0;
 1722         i = 0;
 1723         do {
 1724                 if (pglen == 0) {
 1725                         if (++i == m->m_epg_npgs) {
 1726                                 m->m_epg_last_len = PAGE_SIZE;
 1727                                 mbufsiz = min(mlen, len);
 1728                                 m->m_next = mb_alloc_ext_plus_pages(mbufsiz,
 1729                                     how);
 1730                                 m = m->m_next;
 1731                                 if (m == NULL) {
 1732                                         m_freem(mout);
 1733                                         return (m);
 1734                                 }
 1735                                 i = 0;
 1736                         }
 1737                         pgpos = (char *)(void *)PHYS_TO_DMAP(m->m_epg_pa[i]);
 1738                         pglen = PAGE_SIZE;
 1739                 }
 1740                 while (mblen == 0) {
 1741                         if (mp == NULL) {
 1742                                 m_freem(mout);
 1743                                 return (NULL);
 1744                         }
 1745                         KASSERT((mp->m_flags & M_EXTPG) == 0,
 1746                             ("mb_copym_ext_pgs: ext_pgs input mbuf"));
 1747                         mbpos = mtod(mp, char *);
 1748                         mblen = mp->m_len;
 1749                         mp = mp->m_next;
 1750                 }
 1751                 xfer = min(mblen, pglen);
 1752                 memcpy(pgpos, mbpos, xfer);
 1753                 pgpos += xfer;
 1754                 mbpos += xfer;
 1755                 pglen -= xfer;
 1756                 mblen -= xfer;
 1757                 len -= xfer;
 1758                 m->m_len += xfer;
 1759         } while (len > 0);
 1760         m->m_epg_last_len = PAGE_SIZE - pglen;
 1761         if (mlast != NULL)
 1762                 *mlast = m;
 1763         return (mout);
 1764 }

Cache object: 74093fc91659a462900c4f198cebc2e0


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.