The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/uipc_mbuf.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 1982, 1986, 1988, 1991, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  * 3. All advertising materials mentioning features or use of this software
   14  *    must display the following acknowledgement:
   15  *      This product includes software developed by the University of
   16  *      California, Berkeley and its contributors.
   17  * 4. Neither the name of the University nor the names of its contributors
   18  *    may be used to endorse or promote products derived from this software
   19  *    without specific prior written permission.
   20  *
   21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   31  * SUCH DAMAGE.
   32  *
   33  *      @(#)uipc_mbuf.c 8.2 (Berkeley) 1/4/94
   34  * $FreeBSD$
   35  */
   36 
   37 #include "opt_param.h"
   38 #include "opt_mbuf_stress_test.h"
   39 #include <sys/param.h>
   40 #include <sys/systm.h>
   41 #include <sys/malloc.h>
   42 #include <sys/mbuf.h>
   43 #include <sys/kernel.h>
   44 #include <sys/sysctl.h>
   45 #include <sys/domain.h>
   46 #include <sys/protosw.h>
   47 
   48 #include <vm/vm.h>
   49 #include <vm/vm_kern.h>
   50 #include <vm/vm_extern.h>
   51 
   52 #ifdef INVARIANTS
   53 #include <machine/cpu.h>
   54 #endif
   55 
   56 static void mbinit __P((void *));
   57 SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbinit, NULL)
   58 
   59 struct mbuf *mbutl;
   60 struct mbuf *mbutltop;
   61 char    *mclrefcnt;
   62 struct mbstat mbstat;
   63 u_long  mbtypes[MT_NTYPES];
   64 struct mbuf *mmbfree;
   65 union mcluster *mclfree;
   66 int     max_linkhdr;
   67 int     max_protohdr;
   68 int     max_hdr;
   69 int     max_datalen;
   70 #ifdef MBUF_STRESS_TEST
   71 int     m_defragpackets;
   72 int     m_defragbytes;
   73 int     m_defraguseless;
   74 int     m_defragfailure;
   75 int     m_defragrandomfailures;
   76 #endif
   77 int     m_clreflimithits;
   78 
   79 int     nmbclusters;
   80 int     nmbufs;
   81 int     nsfbufspeak;
   82 int     nsfbufsused;
   83 u_int   m_mballoc_wid = 0;
   84 u_int   m_clalloc_wid = 0;
   85 
   86 SYSCTL_DECL(_kern_ipc);
   87 SYSCTL_INT(_kern_ipc, KIPC_MAX_LINKHDR, max_linkhdr, CTLFLAG_RW,
   88            &max_linkhdr, 0, "");
   89 SYSCTL_INT(_kern_ipc, KIPC_MAX_PROTOHDR, max_protohdr, CTLFLAG_RW,
   90            &max_protohdr, 0, "");
   91 SYSCTL_INT(_kern_ipc, KIPC_MAX_HDR, max_hdr, CTLFLAG_RW, &max_hdr, 0, "");
   92 SYSCTL_INT(_kern_ipc, KIPC_MAX_DATALEN, max_datalen, CTLFLAG_RW,
   93            &max_datalen, 0, "");
   94 SYSCTL_INT(_kern_ipc, OID_AUTO, mbuf_wait, CTLFLAG_RW,
   95            &mbuf_wait, 0, "");
   96 SYSCTL_STRUCT(_kern_ipc, KIPC_MBSTAT, mbstat, CTLFLAG_RW, &mbstat, mbstat, "");
   97 SYSCTL_OPAQUE(_kern_ipc, OID_AUTO, mbtypes, CTLFLAG_RD, mbtypes,
   98            sizeof(mbtypes), "LU", "");
   99 SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLFLAG_RD, 
  100            &nmbclusters, 0, "Maximum number of mbuf clusters available");
  101 SYSCTL_INT(_kern_ipc, OID_AUTO, nmbufs, CTLFLAG_RD, &nmbufs, 0,
  102            "Maximum number of mbufs available"); 
  103 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RD, &nsfbufs, 0,
  104            "Maximum number of sendfile(2) sf_bufs available");
  105 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
  106            "Number of sendfile(2) sf_bufs at peak usage");
  107 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
  108            "Number of sendfile(2) sf_bufs in use");
  109 #ifdef MBUF_STRESS_TEST
  110 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragpackets, CTLFLAG_RD,
  111            &m_defragpackets, 0, "");
  112 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragbytes, CTLFLAG_RD,
  113            &m_defragbytes, 0, "");
  114 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defraguseless, CTLFLAG_RD,
  115            &m_defraguseless, 0, "");
  116 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragfailure, CTLFLAG_RD,
  117            &m_defragfailure, 0, "");
  118 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragrandomfailures, CTLFLAG_RW,
  119            &m_defragrandomfailures, 0, "");
  120 #endif
  121 SYSCTL_INT(_kern_ipc, OID_AUTO, m_clreflimithits, CTLFLAG_RD,
  122            &m_clreflimithits, 0, "");
  123 
  124 static void     m_reclaim __P((void));
  125 static struct mbuf *m_clreflimit(struct mbuf *m0, int how);
  126 
  127 #ifndef NMBCLUSTERS
  128 #define NMBCLUSTERS     (512 + maxusers * 16)
  129 #endif
  130 #ifndef NMBUFS
  131 #define NMBUFS          (nmbclusters * 4)
  132 #endif
  133 
  134 /*
  135  * Perform sanity checks of tunables declared above.
  136  */
  137 static void
  138 tunable_mbinit(void *dummy)
  139 {
  140 
  141         /*
  142          * This has to be done before VM init.
  143          */
  144         nmbclusters = NMBCLUSTERS;
  145         TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters);
  146         nmbufs = NMBUFS;
  147         TUNABLE_INT_FETCH("kern.ipc.nmbufs", &nmbufs);
  148         /* Sanity checks */
  149         if (nmbufs < nmbclusters * 2)
  150                 nmbufs = nmbclusters * 2;
  151 
  152         return;
  153 }
  154 SYSINIT(tunable_mbinit, SI_SUB_TUNABLES, SI_ORDER_ANY, tunable_mbinit, NULL);
  155 
  156 /* "number of clusters of pages" */
  157 #define NCL_INIT        1
  158 
  159 #define NMB_INIT        16
  160 
  161 /* ARGSUSED*/
  162 static void
  163 mbinit(dummy)
  164         void *dummy;
  165 {
  166         int s;
  167 
  168         mmbfree = NULL; mclfree = NULL;
  169         mbstat.m_msize = MSIZE;
  170         mbstat.m_mclbytes = MCLBYTES;
  171         mbstat.m_minclsize = MINCLSIZE;
  172         mbstat.m_mlen = MLEN;
  173         mbstat.m_mhlen = MHLEN;
  174 
  175         s = splimp();
  176         if (m_mballoc(NMB_INIT, M_DONTWAIT) == 0)
  177                 goto bad;
  178 #if MCLBYTES <= PAGE_SIZE
  179         if (m_clalloc(NCL_INIT, M_DONTWAIT) == 0)
  180                 goto bad;
  181 #else
  182         /* It's OK to call contigmalloc in this context. */
  183         if (m_clalloc(16, M_WAIT) == 0)
  184                 goto bad;
  185 #endif
  186         splx(s);
  187         return;
  188 bad:
  189         panic("mbinit");
  190 }
  191 
  192 /*
  193  * Allocate at least nmb mbufs and place on mbuf free list.
  194  * Must be called at splimp.
  195  */
  196 /* ARGSUSED */
  197 int
  198 m_mballoc(nmb, how)
  199         register int nmb;
  200         int how;
  201 {
  202         register caddr_t p;
  203         register int i;
  204         int nbytes;
  205 
  206         /*
  207          * If we've hit the mbuf limit, stop allocating from mb_map,
  208          * (or trying to) in order to avoid dipping into the section of
  209          * mb_map which we've "reserved" for clusters.
  210          */
  211         if ((nmb + mbstat.m_mbufs) > nmbufs)
  212                 return (0);
  213 
  214         /*
  215          * Once we run out of map space, it will be impossible to get
  216          * any more (nothing is ever freed back to the map)
  217          * -- however you are not dead as m_reclaim might
  218          * still be able to free a substantial amount of space.
  219          *
  220          * XXX Furthermore, we can also work with "recycled" mbufs (when
  221          * we're calling with M_WAIT the sleep procedure will be woken
  222          * up when an mbuf is freed. See m_mballoc_wait()).
  223          */
  224         if (mb_map_full)
  225                 return (0);
  226 
  227         nbytes = round_page(nmb * MSIZE);
  228         p = (caddr_t)kmem_malloc(mb_map, nbytes, M_NOWAIT);
  229         if (p == 0 && how == M_WAIT) {
  230                 mbstat.m_wait++;
  231                 p = (caddr_t)kmem_malloc(mb_map, nbytes, M_WAITOK);
  232         }
  233 
  234         /*
  235          * Either the map is now full, or `how' is M_NOWAIT and there
  236          * are no pages left.
  237          */
  238         if (p == NULL)
  239                 return (0);
  240 
  241         mbutltop = (struct mbuf *)((char *)mbutltop + nbytes);
  242         nmb = nbytes / MSIZE;
  243         for (i = 0; i < nmb; i++) {
  244                 ((struct mbuf *)p)->m_next = mmbfree;
  245                 mmbfree = (struct mbuf *)p;
  246                 p += MSIZE;
  247         }
  248         mbstat.m_mbufs += nmb;
  249         mbtypes[MT_FREE] += nmb;
  250         return (1);
  251 }
  252 
  253 /*
  254  * Once the mb_map has been exhausted and if the call to the allocation macros
  255  * (or, in some cases, functions) is with M_WAIT, then it is necessary to rely
  256  * solely on reclaimed mbufs. Here we wait for an mbuf to be freed for a 
  257  * designated (mbuf_wait) time. 
  258  */
  259 struct mbuf *
  260 m_mballoc_wait(int caller, int type)
  261 {
  262         struct mbuf *p;
  263         int s;
  264 
  265         s = splimp();
  266         m_mballoc_wid++;
  267         if ((tsleep(&m_mballoc_wid, PVM, "mballc", mbuf_wait)) == EWOULDBLOCK)
  268                 m_mballoc_wid--;
  269         splx(s);
  270 
  271         /*
  272          * Now that we (think) that we've got something, we will redo an
  273          * MGET, but avoid getting into another instance of m_mballoc_wait()
  274          * XXX: We retry to fetch _even_ if the sleep timed out. This is left
  275          *      this way, purposely, in the [unlikely] case that an mbuf was
  276          *      freed but the sleep was not awakened in time. 
  277          */
  278         p = NULL;
  279         switch (caller) {
  280         case MGET_C:
  281                 MGET(p, M_DONTWAIT, type);
  282                 break;
  283         case MGETHDR_C:
  284                 MGETHDR(p, M_DONTWAIT, type);
  285                 break;
  286         default:
  287                 panic("m_mballoc_wait: invalid caller (%d)", caller);
  288         }
  289 
  290         s = splimp();
  291         if (p != NULL) {                /* We waited and got something... */
  292                 mbstat.m_wait++;
  293                 /* Wake up another if we have more free. */
  294                 if (mmbfree != NULL)
  295                         MMBWAKEUP();
  296         }
  297         splx(s);
  298         return (p);
  299 }
  300 
  301 #if MCLBYTES > PAGE_SIZE
  302 static int i_want_my_mcl;
  303 
  304 static void
  305 kproc_mclalloc(void)
  306 {
  307         int status;
  308 
  309         while (1) {
  310                 tsleep(&i_want_my_mcl, PVM, "mclalloc", 0);
  311 
  312                 for (; i_want_my_mcl; i_want_my_mcl--) {
  313                         if (m_clalloc(1, M_WAIT) == 0)
  314                                 printf("m_clalloc failed even in process context!\n");
  315                 }
  316         }
  317 }
  318 
  319 static struct proc *mclallocproc;
  320 static struct kproc_desc mclalloc_kp = {
  321         "mclalloc",
  322         kproc_mclalloc,
  323         &mclallocproc
  324 };
  325 SYSINIT(mclallocproc, SI_SUB_KTHREAD_UPDATE, SI_ORDER_ANY, kproc_start,
  326            &mclalloc_kp);
  327 #endif
  328 
  329 /*
  330  * Allocate some number of mbuf clusters
  331  * and place on cluster free list.
  332  * Must be called at splimp.
  333  */
  334 /* ARGSUSED */
  335 int
  336 m_clalloc(ncl, how)
  337         register int ncl;
  338         int how;
  339 {
  340         register caddr_t p;
  341         register int i;
  342         int npg;
  343 
  344         /*
  345          * If we've hit the mcluster number limit, stop allocating from
  346          * mb_map, (or trying to) in order to avoid dipping into the section
  347          * of mb_map which we've "reserved" for mbufs.
  348          */
  349         if ((ncl + mbstat.m_clusters) > nmbclusters)
  350                 goto m_clalloc_fail;
  351 
  352         /*
  353          * Once we run out of map space, it will be impossible
  354          * to get any more (nothing is ever freed back to the
  355          * map). From this point on, we solely rely on freed 
  356          * mclusters.
  357          */
  358         if (mb_map_full)
  359                 goto m_clalloc_fail;
  360 
  361 #if MCLBYTES > PAGE_SIZE
  362         if (how != M_WAIT) {
  363                 i_want_my_mcl += ncl;
  364                 wakeup(&i_want_my_mcl);
  365                 mbstat.m_wait++;
  366                 p = 0;
  367         } else {
  368                 p = contigmalloc1(MCLBYTES * ncl, M_DEVBUF, M_WAITOK, 0ul,
  369                                   ~0ul, PAGE_SIZE, 0, mb_map);
  370         }
  371 #else
  372         npg = ncl;
  373         p = (caddr_t)kmem_malloc(mb_map, ctob(npg),
  374                                  how != M_WAIT ? M_NOWAIT : M_WAITOK);
  375         ncl = ncl * PAGE_SIZE / MCLBYTES;
  376 #endif
  377         /*
  378          * Either the map is now full, or `how' is M_NOWAIT and there
  379          * are no pages left.
  380          */
  381         if (p == NULL) {
  382                 static int last_report ; /* when we did that (in ticks) */
  383 m_clalloc_fail:
  384                 mbstat.m_drops++;
  385                 if (ticks < last_report || (ticks - last_report) >= hz) {
  386                         last_report = ticks;
  387                         printf("All mbuf clusters exhausted, please see tuning(7).\n");
  388                 }
  389                 return (0);
  390         }
  391 
  392         mbutltop = (struct mbuf *)((char *)mbutltop + ctob(npg));
  393 
  394         for (i = 0; i < ncl; i++) {
  395                 ((union mcluster *)p)->mcl_next = mclfree;
  396                 mclfree = (union mcluster *)p;
  397                 p += MCLBYTES;
  398                 mbstat.m_clfree++;
  399         }
  400         mbstat.m_clusters += ncl;
  401         return (1);
  402 }
  403 
  404 /*
  405  * Once the mb_map submap has been exhausted and the allocation is called with
  406  * M_WAIT, we rely on the mclfree union pointers. If nothing is free, we will
  407  * sleep for a designated amount of time (mbuf_wait) or until we're woken up
  408  * due to sudden mcluster availability.
  409  */
  410 caddr_t
  411 m_clalloc_wait(void)
  412 {
  413         caddr_t p;
  414         int s;
  415 
  416 #ifdef __i386__
  417         /* If in interrupt context, and INVARIANTS, maintain sanity and die. */
  418         KASSERT(intr_nesting_level == 0, ("CLALLOC: CANNOT WAIT IN INTERRUPT"));
  419 #endif
  420 
  421         /* Sleep until something's available or until we expire. */
  422         m_clalloc_wid++;
  423         if ((tsleep(&m_clalloc_wid, PVM, "mclalc", mbuf_wait)) == EWOULDBLOCK)
  424                 m_clalloc_wid--;
  425 
  426         /*
  427          * Now that we (think) that we've got something, we will redo and
  428          * MGET, but avoid getting into another instance of m_clalloc_wait()
  429          */
  430         p = NULL;
  431         MCLALLOC(p, M_DONTWAIT);
  432 
  433         s = splimp();
  434         if (p != NULL) {        /* We waited and got something... */
  435                 mbstat.m_wait++;
  436                 /* Wake up another if we have more free. */
  437                 if (mclfree != NULL)
  438                         MCLWAKEUP();
  439         }
  440 
  441         splx(s);
  442         return (p);
  443 }
  444 
  445 /*
  446  * When MGET fails, ask protocols to free space when short of memory,
  447  * then re-attempt to allocate an mbuf.
  448  */
  449 struct mbuf *
  450 m_retry(i, t)
  451         int i, t;
  452 {
  453         register struct mbuf *m;
  454 
  455         /*
  456          * Must only do the reclaim if not in an interrupt context.
  457          */
  458         if (i == M_WAIT) {
  459 #ifdef __i386__
  460                 KASSERT(intr_nesting_level == 0,
  461                     ("MBALLOC: CANNOT WAIT IN INTERRUPT"));
  462 #endif
  463                 m_reclaim();
  464         }
  465 
  466         /*
  467          * Both m_mballoc_wait and m_retry must be nulled because
  468          * when the MGET macro is run from here, we deffinately do _not_
  469          * want to enter an instance of m_mballoc_wait() or m_retry() (again!)
  470          */
  471 #define m_mballoc_wait(caller,type)    (struct mbuf *)0
  472 #define m_retry(i, t)   (struct mbuf *)0
  473         MGET(m, i, t);
  474 #undef m_retry
  475 #undef m_mballoc_wait
  476 
  477         if (m != NULL)
  478                 mbstat.m_wait++;
  479         else {
  480                 static int last_report ; /* when we did that (in ticks) */
  481                 mbstat.m_drops++;
  482                 if (ticks < last_report || (ticks - last_report) >= hz) {
  483                         last_report = ticks;
  484                         printf("All mbufs exhausted, please see tuning(7).\n");
  485                 }
  486         }
  487 
  488         return (m);
  489 }
  490 
  491 /*
  492  * As above; retry an MGETHDR.
  493  */
  494 struct mbuf *
  495 m_retryhdr(i, t)
  496         int i, t;
  497 {
  498         register struct mbuf *m;
  499 
  500         /*
  501          * Must only do the reclaim if not in an interrupt context.
  502          */
  503         if (i == M_WAIT) {
  504 #ifdef __i386__
  505                 KASSERT(intr_nesting_level == 0,
  506                     ("MBALLOC: CANNOT WAIT IN INTERRUPT"));
  507 #endif
  508                 m_reclaim();
  509         }
  510 
  511 #define m_mballoc_wait(caller,type)    (struct mbuf *)0
  512 #define m_retryhdr(i, t) (struct mbuf *)0
  513         MGETHDR(m, i, t);
  514 #undef m_retryhdr
  515 #undef m_mballoc_wait
  516 
  517         if (m != NULL)  
  518                 mbstat.m_wait++;
  519         else    {
  520                 static int last_report ; /* when we did that (in ticks) */
  521                 mbstat.m_drops++;
  522                 if (ticks < last_report || (ticks - last_report) >= hz) {
  523                         last_report = ticks;
  524                         printf("All mbufs exhausted, please see tuning(7).\n");
  525                 }
  526         }
  527         
  528         return (m);
  529 }
  530 
  531 static void
  532 m_reclaim()
  533 {
  534         register struct domain *dp;
  535         register struct protosw *pr;
  536         int s = splimp();
  537 
  538         for (dp = domains; dp; dp = dp->dom_next)
  539                 for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
  540                         if (pr->pr_drain)
  541                                 (*pr->pr_drain)();
  542         splx(s);
  543         mbstat.m_drain++;
  544 }
  545 
  546 /*
  547  * Space allocation routines.
  548  * These are also available as macros
  549  * for critical paths.
  550  */
  551 struct mbuf *
  552 m_get(how, type)
  553         int how, type;
  554 {
  555         register struct mbuf *m;
  556 
  557         MGET(m, how, type);
  558         return (m);
  559 }
  560 
  561 struct mbuf *
  562 m_gethdr(how, type)
  563         int how, type;
  564 {
  565         register struct mbuf *m;
  566 
  567         MGETHDR(m, how, type);
  568         return (m);
  569 }
  570 
  571 struct mbuf *
  572 m_getclr(how, type)
  573         int how, type;
  574 {
  575         register struct mbuf *m;
  576 
  577         MGET(m, how, type);
  578         if (m == 0)
  579                 return (0);
  580         bzero(mtod(m, caddr_t), MLEN);
  581         return (m);
  582 }
  583 
  584 /*
  585  * m_getcl() returns an mbuf with an attached cluster.
  586  * Because many network drivers use this kind of buffers a lot, it is
  587  * convenient to keep a small pool of free buffers of this kind.
  588  * Even a small size such as 10 gives about 10% improvement in the
  589  * forwarding rate in a bridge or router.
  590  * The size of this free list is controlled by the sysctl variable
  591  * mcl_pool_max. The list is populated on m_freem(), and used in
  592  * m_getcl() if elements are available.
  593  */
  594 static struct mbuf *mcl_pool;
  595 static int mcl_pool_now;
  596 static int mcl_pool_max = 0;
  597  
  598 SYSCTL_INT(_kern_ipc, OID_AUTO, mcl_pool_max, CTLFLAG_RW, &mcl_pool_max, 0,
  599            "Maximum number of mbufs+cluster in free list");
  600 SYSCTL_INT(_kern_ipc, OID_AUTO, mcl_pool_now, CTLFLAG_RD, &mcl_pool_now, 0,
  601            "Current number of mbufs+cluster in free list");
  602 
  603 struct mbuf *
  604 m_getcl(int how, short type, int flags)
  605 {
  606         int s = splimp();
  607         struct mbuf *mp;
  608 
  609         if (flags & M_PKTHDR) {
  610                 if (type == MT_DATA && mcl_pool) {
  611                         mp = mcl_pool;
  612                         mcl_pool = mp->m_nextpkt;
  613                         mcl_pool_now--;
  614                         splx(s);
  615                         mp->m_nextpkt = NULL;
  616                         mp->m_data = mp->m_ext.ext_buf;
  617                         mp->m_flags = M_PKTHDR|M_EXT;
  618                         mp->m_pkthdr.rcvif = NULL;
  619                         mp->m_pkthdr.csum_flags = 0;
  620                         return mp;
  621                 } else
  622                         MGETHDR(mp, how, type);
  623         } else
  624                 MGET(mp, how, type);
  625         if (mp) {
  626                 MCLGET(mp, how);
  627                 if ( (mp->m_flags & M_EXT) == 0) {
  628                         m_free(mp);
  629                         mp = NULL;
  630                 }
  631         }
  632         splx(s);
  633         return mp;
  634 }
  635 
  636 /*
  637  * struct mbuf *
  638  * m_getm(m, len, how, type)
  639  *
  640  * This will allocate len-worth of mbufs and/or mbuf clusters (whatever fits
  641  * best) and return a pointer to the top of the allocated chain. If m is
  642  * non-null, then we assume that it is a single mbuf or an mbuf chain to
  643  * which we want len bytes worth of mbufs and/or clusters attached, and so
  644  * if we succeed in allocating it, we will just return a pointer to m.
  645  *
  646  * If we happen to fail at any point during the allocation, we will free
  647  * up everything we have already allocated and return NULL.
  648  *
  649  */
  650 struct mbuf *
  651 m_getm(struct mbuf *m, int len, int how, int type)
  652 {
  653         struct mbuf *top, *tail, *mp, *mtail = NULL;
  654 
  655         KASSERT(len >= 0, ("len is < 0 in m_getm"));
  656 
  657         MGET(mp, how, type);
  658         if (mp == NULL)
  659                 return (NULL);
  660         else if (len > MINCLSIZE) {
  661                 MCLGET(mp, how);
  662                 if ((mp->m_flags & M_EXT) == 0) {
  663                         m_free(mp);
  664                         return (NULL);
  665                 }
  666         }
  667         mp->m_len = 0;
  668         len -= M_TRAILINGSPACE(mp);
  669 
  670         if (m != NULL)
  671                 for (mtail = m; mtail->m_next != NULL; mtail = mtail->m_next);
  672         else
  673                 m = mp;
  674 
  675         top = tail = mp;
  676         while (len > 0) {
  677                 MGET(mp, how, type);
  678                 if (mp == NULL)
  679                         goto failed;
  680 
  681                 tail->m_next = mp;
  682                 tail = mp;
  683                 if (len > MINCLSIZE) {
  684                         MCLGET(mp, how);
  685                         if ((mp->m_flags & M_EXT) == 0)
  686                                 goto failed;
  687                 }
  688 
  689                 mp->m_len = 0;
  690                 len -= M_TRAILINGSPACE(mp);
  691         }
  692 
  693         if (mtail != NULL)
  694                 mtail->m_next = top;
  695         return (m);
  696 
  697 failed:
  698         m_freem(top);
  699         return (NULL);
  700 }
  701 
  702 /*
  703  * MFREE(struct mbuf *m, struct mbuf *n)
  704  * Free a single mbuf and associated external storage.
  705  * Place the successor, if any, in n.
  706  *
  707  * we do need to check non-first mbuf for m_aux, since some of existing
  708  * code does not call M_PREPEND properly.
  709  * (example: call to bpf_mtap from drivers)
  710  */
  711 #define MFREE(m, n) MBUFLOCK(                                           \
  712         struct mbuf *_mm = (m);                                         \
  713                                                                         \
  714         KASSERT(_mm->m_type != MT_FREE, ("freeing free mbuf"));         \
  715         mbtypes[_mm->m_type]--;                                         \
  716         if ((_mm->m_flags & M_PKTHDR) != 0)                             \
  717                 m_tag_delete_chain(_mm, NULL);                          \
  718         if (_mm->m_flags & M_EXT)                                       \
  719                 MEXTFREE1(m);                                           \
  720         (n) = _mm->m_next;                                              \
  721         _mm->m_type = MT_FREE;                                          \
  722         mbtypes[MT_FREE]++;                                             \
  723         _mm->m_next = mmbfree;                                          \
  724         mmbfree = _mm;                                                  \
  725         MMBWAKEUP();                                                    \
  726 )
  727 
  728 struct mbuf *
  729 m_free(m)
  730         struct mbuf *m;
  731 {
  732         register struct mbuf *n;
  733 
  734         MFREE(m, n);
  735         return (n);
  736 }
  737 
  738 void
  739 m_freem(m)
  740         struct mbuf *m;
  741 {
  742         int s = splimp();
  743 
  744         /*
  745          * Try to keep a small pool of mbuf+cluster for quick use in
  746          * device drivers. A good candidate is a M_PKTHDR buffer with
  747          * only one cluster attached. Other mbufs, or those exceeding
  748          * the pool size, are just m_free'd in the usual way.
  749          * The following code makes sure that m_next, m_type,
  750          * m_pkthdr.aux and m_ext.* are properly initialized.
  751          * Other fields in the mbuf are initialized in m_getcl()
  752          * upon allocation.
  753          */
  754         if (mcl_pool_now < mcl_pool_max && m && m->m_next == NULL &&
  755             (m->m_flags & (M_PKTHDR|M_EXT)) == (M_PKTHDR|M_EXT) &&
  756             m->m_type == MT_DATA && M_EXT_WRITABLE(m) ) {
  757                 m_tag_delete_chain(m, NULL);
  758                 m->m_nextpkt = mcl_pool;
  759                 mcl_pool = m;
  760                 mcl_pool_now++;
  761         } else {
  762                 while (m)
  763                         m = m_free(m);
  764         }
  765         splx(s);
  766 }
  767 
  768 /*
  769  * Mbuffer utility routines.
  770  */
  771 
  772 /*
  773  * Lesser-used path for M_PREPEND:
  774  * allocate new mbuf to prepend to chain,
  775  * copy junk along.
  776  */
  777 struct mbuf *
  778 m_prepend(m, len, how)
  779         register struct mbuf *m;
  780         int len, how;
  781 {
  782         struct mbuf *mn;
  783 
  784         if (m->m_flags & M_PKTHDR)
  785                 MGETHDR(mn, how, m->m_type);
  786         else
  787                 MGET(mn, how, m->m_type);
  788         if (mn == (struct mbuf *)NULL) {
  789                 m_freem(m);
  790                 return ((struct mbuf *)NULL);
  791         }
  792         if (m->m_flags & M_PKTHDR)
  793                 M_MOVE_PKTHDR(mn, m);
  794         mn->m_next = m;
  795         m = mn;
  796         if (len < MHLEN)
  797                 MH_ALIGN(m, len);
  798         m->m_len = len;
  799         return (m);
  800 }
  801 
  802 /*
  803  * Make a copy of an mbuf chain starting "off0" bytes from the beginning,
  804  * continuing for "len" bytes.  If len is M_COPYALL, copy to end of mbuf.
  805  * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller.
  806  * Note that the copy is read-only, because clusters are not copied,
  807  * only their reference counts are incremented.
  808  */
  809 #define MCFail (mbstat.m_mcfail)
  810 
  811 struct mbuf *
  812 m_copym(m, off0, len, wait)
  813         register struct mbuf *m;
  814         int off0, wait;
  815         register int len;
  816 {
  817         register struct mbuf *n, **np;
  818         register int off = off0;
  819         struct mbuf *top;
  820         int copyhdr = 0;
  821 
  822         KASSERT(off >= 0, ("m_copym, negative off %d", off));
  823         KASSERT(len >= 0, ("m_copym, negative len %d", len));
  824         if (off == 0 && m->m_flags & M_PKTHDR)
  825                 copyhdr = 1;
  826         while (off > 0) {
  827                 KASSERT(m != NULL, ("m_copym, offset > size of mbuf chain"));
  828                 if (off < m->m_len)
  829                         break;
  830                 off -= m->m_len;
  831                 m = m->m_next;
  832         }
  833         np = &top;
  834         top = 0;
  835         while (len > 0) {
  836                 if (m == 0) {
  837                         KASSERT(len == M_COPYALL, 
  838                             ("m_copym, length > size of mbuf chain"));
  839                         break;
  840                 }
  841                 if (copyhdr)
  842                         MGETHDR(n, wait, m->m_type);
  843                 else
  844                         MGET(n, wait, m->m_type);
  845                 *np = n;
  846                 if (n == 0)
  847                         goto nospace;
  848                 if (copyhdr) {
  849                         if (!m_dup_pkthdr(n, m, wait))
  850                                 goto nospace;
  851                         if (len == M_COPYALL)
  852                                 n->m_pkthdr.len -= off0;
  853                         else
  854                                 n->m_pkthdr.len = len;
  855                         copyhdr = 0;
  856                 }
  857                 n->m_len = min(len, m->m_len - off);
  858                 if (m->m_flags & M_EXT) {
  859                         n->m_data = m->m_data + off;
  860                         if (m->m_ext.ext_ref == NULL) {
  861                                 atomic_add_char(
  862                                     &mclrefcnt[mtocl(m->m_ext.ext_buf)], 1);
  863                         } else {
  864                                 int s = splimp();
  865 
  866                                 (*m->m_ext.ext_ref)(m->m_ext.ext_buf,
  867                                     m->m_ext.ext_size);
  868                                 splx(s);
  869                         }
  870                         n->m_ext = m->m_ext;
  871                         n->m_flags |= M_EXT;
  872                 } else
  873                         bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t),
  874                             (unsigned)n->m_len);
  875                 if (len != M_COPYALL)
  876                         len -= n->m_len;
  877                 off = 0;
  878                 m = m->m_next;
  879                 np = &n->m_next;
  880         }
  881         top = m_clreflimit(top, wait);
  882         if (top == 0)
  883                 MCFail++;
  884         return (top);
  885 nospace:
  886         m_freem(top);
  887         MCFail++;
  888         return (0);
  889 }
  890 
  891 /*
  892  * Copy an entire packet, including header (which must be present).
  893  * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'.
  894  * Note that the copy is read-only, because clusters are not copied,
  895  * only their reference counts are incremented.
  896  * Preserve alignment of the first mbuf so if the creator has left
  897  * some room at the beginning (e.g. for inserting protocol headers)
  898  * the copies also have the room available.
  899  */
  900 struct mbuf *
  901 m_copypacket(m, how)
  902         struct mbuf *m;
  903         int how;
  904 {
  905         struct mbuf *top, *n, *o;
  906 
  907         MGET(n, how, m->m_type);
  908         top = n;
  909         if (!n)
  910                 goto nospace;
  911 
  912         if (!m_dup_pkthdr(n, m, how))
  913                 goto nospace;
  914         n->m_len = m->m_len;
  915         if (m->m_flags & M_EXT) {
  916                 n->m_data = m->m_data;
  917                 if (m->m_ext.ext_ref == NULL)
  918                         atomic_add_char(&mclrefcnt[mtocl(m->m_ext.ext_buf)], 1);
  919                 else {
  920                         int s = splimp();
  921 
  922                         (*m->m_ext.ext_ref)(m->m_ext.ext_buf,
  923                             m->m_ext.ext_size);
  924                         splx(s);
  925                 }
  926                 n->m_ext = m->m_ext;
  927                 n->m_flags |= M_EXT;
  928         } else {
  929                 n->m_data = n->m_pktdat + (m->m_data - m->m_pktdat );
  930                 bcopy(mtod(m, char *), mtod(n, char *), n->m_len);
  931         }
  932 
  933         m = m->m_next;
  934         while (m) {
  935                 MGET(o, how, m->m_type);
  936                 if (!o)
  937                         goto nospace;
  938 
  939                 n->m_next = o;
  940                 n = n->m_next;
  941 
  942                 n->m_len = m->m_len;
  943                 if (m->m_flags & M_EXT) {
  944                         n->m_data = m->m_data;
  945                         if (m->m_ext.ext_ref == NULL) {
  946                                 atomic_add_char(
  947                                     &mclrefcnt[mtocl(m->m_ext.ext_buf)], 1);
  948                         } else {
  949                                 int s = splimp();
  950 
  951                                 (*m->m_ext.ext_ref)(m->m_ext.ext_buf,
  952                                     m->m_ext.ext_size);
  953                                 splx(s);
  954                         }
  955                         n->m_ext = m->m_ext;
  956                         n->m_flags |= M_EXT;
  957                 } else {
  958                         bcopy(mtod(m, char *), mtod(n, char *), n->m_len);
  959                 }
  960 
  961                 m = m->m_next;
  962         }
  963         top = m_clreflimit(top, how);
  964         return top;
  965 nospace:
  966         m_freem(top);
  967         MCFail++;
  968         return 0;
  969 }
  970 
  971 /*
  972  * Copy data from an mbuf chain starting "off" bytes from the beginning,
  973  * continuing for "len" bytes, into the indicated buffer.
  974  */
  975 void
  976 m_copydata(m, off, len, cp)
  977         register struct mbuf *m;
  978         register int off;
  979         register int len;
  980         caddr_t cp;
  981 {
  982         register unsigned count;
  983 
  984         KASSERT(off >= 0, ("m_copydata, negative off %d", off));
  985         KASSERT(len >= 0, ("m_copydata, negative len %d", len));
  986         while (off > 0) {
  987                 KASSERT(m != NULL, ("m_copydata, offset > size of mbuf chain"));
  988                 if (off < m->m_len)
  989                         break;
  990                 off -= m->m_len;
  991                 m = m->m_next;
  992         }
  993         while (len > 0) {
  994                 KASSERT(m != NULL, ("m_copydata, length > size of mbuf chain"));
  995                 count = min(m->m_len - off, len);
  996                 bcopy(mtod(m, caddr_t) + off, cp, count);
  997                 len -= count;
  998                 cp += count;
  999                 off = 0;
 1000                 m = m->m_next;
 1001         }
 1002 }
 1003 
 1004 /*
 1005  * Copy a packet header mbuf chain into a completely new chain, including
 1006  * copying any mbuf clusters.  Use this instead of m_copypacket() when
 1007  * you need a writable copy of an mbuf chain.
 1008  */
 1009 struct mbuf *
 1010 m_dup(m, how)
 1011         struct mbuf *m;
 1012         int how;
 1013 {
 1014         struct mbuf **p, *top = NULL;
 1015         int remain, moff, nsize;
 1016 
 1017         /* Sanity check */
 1018         if (m == NULL)
 1019                 return (0);
 1020         KASSERT((m->m_flags & M_PKTHDR) != 0, ("%s: !PKTHDR", __FUNCTION__));
 1021 
 1022         /* While there's more data, get a new mbuf, tack it on, and fill it */
 1023         remain = m->m_pkthdr.len;
 1024         moff = 0;
 1025         p = &top;
 1026         while (remain > 0 || top == NULL) {     /* allow m->m_pkthdr.len == 0 */
 1027                 struct mbuf *n;
 1028 
 1029                 /* Get the next new mbuf */
 1030                 MGET(n, how, m->m_type);
 1031                 if (n == NULL)
 1032                         goto nospace;
 1033                 if (top == NULL) {              /* first one, must be PKTHDR */
 1034                         if (!m_dup_pkthdr(n, m, how))
 1035                                 goto nospace;
 1036                         nsize = MHLEN;
 1037                 } else                          /* not the first one */
 1038                         nsize = MLEN;
 1039                 if (remain >= MINCLSIZE) {
 1040                         MCLGET(n, how);
 1041                         if ((n->m_flags & M_EXT) == 0) {
 1042                                 (void)m_free(n);
 1043                                 goto nospace;
 1044                         }
 1045                         nsize = MCLBYTES;
 1046                 }
 1047                 n->m_len = 0;
 1048 
 1049                 /* Link it into the new chain */
 1050                 *p = n;
 1051                 p = &n->m_next;
 1052 
 1053                 /* Copy data from original mbuf(s) into new mbuf */
 1054                 while (n->m_len < nsize && m != NULL) {
 1055                         int chunk = min(nsize - n->m_len, m->m_len - moff);
 1056 
 1057                         bcopy(m->m_data + moff, n->m_data + n->m_len, chunk);
 1058                         moff += chunk;
 1059                         n->m_len += chunk;
 1060                         remain -= chunk;
 1061                         if (moff == m->m_len) {
 1062                                 m = m->m_next;
 1063                                 moff = 0;
 1064                         }
 1065                 }
 1066 
 1067                 /* Check correct total mbuf length */
 1068                 KASSERT((remain > 0 && m != NULL) || (remain == 0 && m == NULL),
 1069                         ("%s: bogus m_pkthdr.len", __FUNCTION__));
 1070         }
 1071         return (top);
 1072 
 1073 nospace:
 1074         m_freem(top);
 1075         MCFail++;
 1076         return (0);
 1077 }
 1078 
 1079 /*
 1080  * Concatenate mbuf chain n to m.
 1081  * Both chains must be of the same type (e.g. MT_DATA).
 1082  * Any m_pkthdr is not updated.
 1083  */
 1084 void
 1085 m_cat(m, n)
 1086         register struct mbuf *m, *n;
 1087 {
 1088         while (m->m_next)
 1089                 m = m->m_next;
 1090         while (n) {
 1091                 if (m->m_flags & M_EXT ||
 1092                     m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) {
 1093                         /* just join the two chains */
 1094                         m->m_next = n;
 1095                         return;
 1096                 }
 1097                 /* splat the data from one into the other */
 1098                 bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
 1099                     (u_int)n->m_len);
 1100                 m->m_len += n->m_len;
 1101                 n = m_free(n);
 1102         }
 1103 }
 1104 
 1105 void
 1106 m_adj(mp, req_len)
 1107         struct mbuf *mp;
 1108         int req_len;
 1109 {
 1110         register int len = req_len;
 1111         register struct mbuf *m;
 1112         register int count;
 1113 
 1114         if ((m = mp) == NULL)
 1115                 return;
 1116         if (len >= 0) {
 1117                 /*
 1118                  * Trim from head.
 1119                  */
 1120                 while (m != NULL && len > 0) {
 1121                         if (m->m_len <= len) {
 1122                                 len -= m->m_len;
 1123                                 m->m_len = 0;
 1124                                 m = m->m_next;
 1125                         } else {
 1126                                 m->m_len -= len;
 1127                                 m->m_data += len;
 1128                                 len = 0;
 1129                         }
 1130                 }
 1131                 m = mp;
 1132                 if (mp->m_flags & M_PKTHDR)
 1133                         m->m_pkthdr.len -= (req_len - len);
 1134         } else {
 1135                 /*
 1136                  * Trim from tail.  Scan the mbuf chain,
 1137                  * calculating its length and finding the last mbuf.
 1138                  * If the adjustment only affects this mbuf, then just
 1139                  * adjust and return.  Otherwise, rescan and truncate
 1140                  * after the remaining size.
 1141                  */
 1142                 len = -len;
 1143                 count = 0;
 1144                 for (;;) {
 1145                         count += m->m_len;
 1146                         if (m->m_next == (struct mbuf *)0)
 1147                                 break;
 1148                         m = m->m_next;
 1149                 }
 1150                 if (m->m_len >= len) {
 1151                         m->m_len -= len;
 1152                         if (mp->m_flags & M_PKTHDR)
 1153                                 mp->m_pkthdr.len -= len;
 1154                         return;
 1155                 }
 1156                 count -= len;
 1157                 if (count < 0)
 1158                         count = 0;
 1159                 /*
 1160                  * Correct length for chain is "count".
 1161                  * Find the mbuf with last data, adjust its length,
 1162                  * and toss data from remaining mbufs on chain.
 1163                  */
 1164                 m = mp;
 1165                 if (m->m_flags & M_PKTHDR)
 1166                         m->m_pkthdr.len = count;
 1167                 for (; m; m = m->m_next) {
 1168                         if (m->m_len >= count) {
 1169                                 m->m_len = count;
 1170                                 break;
 1171                         }
 1172                         count -= m->m_len;
 1173                 }
 1174                 while (m->m_next)
 1175                         (m = m->m_next) ->m_len = 0;
 1176         }
 1177 }
 1178 
 1179 /*
 1180  * Rearange an mbuf chain so that len bytes are contiguous
 1181  * and in the data area of an mbuf (so that mtod and dtom
 1182  * will work for a structure of size len).  Returns the resulting
 1183  * mbuf chain on success, frees it and returns null on failure.
 1184  * If there is room, it will add up to max_protohdr-len extra bytes to the
 1185  * contiguous region in an attempt to avoid being called next time.
 1186  */
 1187 #define MPFail (mbstat.m_mpfail)
 1188 
 1189 struct mbuf *
 1190 m_pullup(n, len)
 1191         register struct mbuf *n;
 1192         int len;
 1193 {
 1194         register struct mbuf *m;
 1195         register int count;
 1196         int space;
 1197 
 1198         /*
 1199          * If first mbuf has no cluster, and has room for len bytes
 1200          * without shifting current data, pullup into it,
 1201          * otherwise allocate a new mbuf to prepend to the chain.
 1202          */
 1203         if ((n->m_flags & M_EXT) == 0 &&
 1204             n->m_data + len < &n->m_dat[MLEN] && n->m_next) {
 1205                 if (n->m_len >= len)
 1206                         return (n);
 1207                 m = n;
 1208                 n = n->m_next;
 1209                 len -= m->m_len;
 1210         } else {
 1211                 if (len > MHLEN)
 1212                         goto bad;
 1213                 MGET(m, M_DONTWAIT, n->m_type);
 1214                 if (m == 0)
 1215                         goto bad;
 1216                 m->m_len = 0;
 1217                 if (n->m_flags & M_PKTHDR)
 1218                         M_MOVE_PKTHDR(m, n);
 1219         }
 1220         space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
 1221         do {
 1222                 count = min(min(max(len, max_protohdr), space), n->m_len);
 1223                 bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
 1224                   (unsigned)count);
 1225                 len -= count;
 1226                 m->m_len += count;
 1227                 n->m_len -= count;
 1228                 space -= count;
 1229                 if (n->m_len)
 1230                         n->m_data += count;
 1231                 else
 1232                         n = m_free(n);
 1233         } while (len > 0 && n);
 1234         if (len > 0) {
 1235                 (void) m_free(m);
 1236                 goto bad;
 1237         }
 1238         m->m_next = n;
 1239         return (m);
 1240 bad:
 1241         m_freem(n);
 1242         MPFail++;
 1243         return (0);
 1244 }
 1245 
 1246 /*
 1247  * Partition an mbuf chain in two pieces, returning the tail --
 1248  * all but the first len0 bytes.  In case of failure, it returns NULL and
 1249  * attempts to restore the chain to its original state.
 1250  *
 1251  * Note that the resulting mbufs might be read-only, because the new
 1252  * mbuf can end up sharing an mbuf cluster with the original mbuf if
 1253  * the "breaking point" happens to lie within a cluster mbuf. Use the
 1254  * M_WRITABLE() macro to check for this case.
 1255  */
 1256 struct mbuf *
 1257 m_split(m0, len0, wait)
 1258         register struct mbuf *m0;
 1259         int len0, wait;
 1260 {
 1261         register struct mbuf *m, *n;
 1262         unsigned len = len0, remain;
 1263 
 1264         for (m = m0; m && len > m->m_len; m = m->m_next)
 1265                 len -= m->m_len;
 1266         if (m == 0)
 1267                 return (0);
 1268         remain = m->m_len - len;
 1269         if (m0->m_flags & M_PKTHDR) {
 1270                 MGETHDR(n, wait, m0->m_type);
 1271                 if (n == 0)
 1272                         return (0);
 1273                 n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
 1274                 n->m_pkthdr.len = m0->m_pkthdr.len - len0;
 1275                 m0->m_pkthdr.len = len0;
 1276                 if (m->m_flags & M_EXT)
 1277                         goto extpacket;
 1278                 if (remain > MHLEN) {
 1279                         /* m can't be the lead packet */
 1280                         MH_ALIGN(n, 0);
 1281                         n->m_next = m_split(m, len, wait);
 1282                         if (n->m_next == 0) {
 1283                                 (void) m_free(n);
 1284                                 return (0);
 1285                         } else {
 1286                                 n->m_len = 0;
 1287                                 return (n);
 1288                         }
 1289                 } else
 1290                         MH_ALIGN(n, remain);
 1291         } else if (remain == 0) {
 1292                 n = m->m_next;
 1293                 m->m_next = 0;
 1294                 return (n);
 1295         } else {
 1296                 MGET(n, wait, m->m_type);
 1297                 if (n == 0)
 1298                         return (0);
 1299                 M_ALIGN(n, remain);
 1300         }
 1301 extpacket:
 1302         if (m->m_flags & M_EXT) {
 1303                 n->m_flags |= M_EXT;
 1304                 n->m_ext = m->m_ext;
 1305                 if (m->m_ext.ext_ref == NULL)
 1306                         atomic_add_char(&mclrefcnt[mtocl(m->m_ext.ext_buf)], 1);
 1307                 else {
 1308                         int s = splimp();
 1309 
 1310                         (*m->m_ext.ext_ref)(m->m_ext.ext_buf,
 1311                             m->m_ext.ext_size);
 1312                         splx(s);
 1313                 }
 1314                 n->m_data = m->m_data + len;
 1315         } else {
 1316                 bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain);
 1317         }
 1318         n->m_len = remain;
 1319         m->m_len = len;
 1320         n->m_next = m->m_next;
 1321         m->m_next = 0;
 1322         n = m_clreflimit(n, wait);
 1323         return (n);
 1324 }
 1325 /*
 1326  * Routine to copy from device local memory into mbufs.
 1327  */
 1328 struct mbuf *
 1329 m_devget(buf, totlen, off0, ifp, copy)
 1330         char *buf;
 1331         int totlen, off0;
 1332         struct ifnet *ifp;
 1333         void (*copy) __P((char *from, caddr_t to, u_int len));
 1334 {
 1335         register struct mbuf *m;
 1336         struct mbuf *top = 0, **mp = &top;
 1337         register int off = off0, len;
 1338         register char *cp;
 1339         char *epkt;
 1340 
 1341         cp = buf;
 1342         epkt = cp + totlen;
 1343         if (off) {
 1344                 cp += off + 2 * sizeof(u_short);
 1345                 totlen -= 2 * sizeof(u_short);
 1346         }
 1347         MGETHDR(m, M_DONTWAIT, MT_DATA);
 1348         if (m == 0)
 1349                 return (0);
 1350         m->m_pkthdr.rcvif = ifp;
 1351         m->m_pkthdr.len = totlen;
 1352         m->m_len = MHLEN;
 1353 
 1354         while (totlen > 0) {
 1355                 if (top) {
 1356                         MGET(m, M_DONTWAIT, MT_DATA);
 1357                         if (m == 0) {
 1358                                 m_freem(top);
 1359                                 return (0);
 1360                         }
 1361                         m->m_len = MLEN;
 1362                 }
 1363                 len = min(totlen, epkt - cp);
 1364                 if (len >= MINCLSIZE) {
 1365                         MCLGET(m, M_DONTWAIT);
 1366                         if (m->m_flags & M_EXT)
 1367                                 m->m_len = len = min(len, MCLBYTES);
 1368                         else
 1369                                 len = m->m_len;
 1370                 } else {
 1371                         /*
 1372                          * Place initial small packet/header at end of mbuf.
 1373                          */
 1374                         if (len < m->m_len) {
 1375                                 if (top == 0 && len + max_linkhdr <= m->m_len)
 1376                                         m->m_data += max_linkhdr;
 1377                                 m->m_len = len;
 1378                         } else
 1379                                 len = m->m_len;
 1380                 }
 1381                 if (copy)
 1382                         copy(cp, mtod(m, caddr_t), (unsigned)len);
 1383                 else
 1384                         bcopy(cp, mtod(m, caddr_t), (unsigned)len);
 1385                 cp += len;
 1386                 *mp = m;
 1387                 mp = &m->m_next;
 1388                 totlen -= len;
 1389                 if (cp == epkt)
 1390                         cp = buf;
 1391         }
 1392         return (top);
 1393 }
 1394 
 1395 /*
 1396  * Copy data from a buffer back into the indicated mbuf chain,
 1397  * starting "off" bytes from the beginning, extending the mbuf
 1398  * chain if necessary.
 1399  */
 1400 void
 1401 m_copyback(m0, off, len, cp)
 1402         struct  mbuf *m0;
 1403         register int off;
 1404         register int len;
 1405         caddr_t cp;
 1406 {
 1407         register int mlen;
 1408         register struct mbuf *m = m0, *n;
 1409         int totlen = 0;
 1410 
 1411         if (m0 == 0)
 1412                 return;
 1413         while (off > (mlen = m->m_len)) {
 1414                 off -= mlen;
 1415                 totlen += mlen;
 1416                 if (m->m_next == 0) {
 1417                         n = m_getclr(M_DONTWAIT, m->m_type);
 1418                         if (n == 0)
 1419                                 goto out;
 1420                         n->m_len = min(MLEN, len + off);
 1421                         m->m_next = n;
 1422                 }
 1423                 m = m->m_next;
 1424         }
 1425         while (len > 0) {
 1426                 mlen = min (m->m_len - off, len);
 1427                 bcopy(cp, off + mtod(m, caddr_t), (unsigned)mlen);
 1428                 cp += mlen;
 1429                 len -= mlen;
 1430                 mlen += off;
 1431                 off = 0;
 1432                 totlen += mlen;
 1433                 if (len == 0)
 1434                         break;
 1435                 if (m->m_next == 0) {
 1436                         n = m_get(M_DONTWAIT, m->m_type);
 1437                         if (n == 0)
 1438                                 break;
 1439                         n->m_len = min(MLEN, len);
 1440                         m->m_next = n;
 1441                 }
 1442                 m = m->m_next;
 1443         }
 1444 out:    if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen))
 1445                 m->m_pkthdr.len = totlen;
 1446 }
 1447 
 1448 /*
 1449  * Apply function f to the data in an mbuf chain starting "off" bytes from
 1450  * the beginning, continuing for "len" bytes.
 1451  */
 1452 int
 1453 m_apply(struct mbuf *m, int off, int len,
 1454     int (*f)(void *, void *, u_int), void *arg)
 1455 {
 1456         u_int count;
 1457         int rval;
 1458 
 1459         KASSERT(off >= 0, ("m_apply, negative off %d", off));
 1460         KASSERT(len >= 0, ("m_apply, negative len %d", len));
 1461         while (off > 0) {
 1462                 KASSERT(m != NULL, ("m_apply, offset > size of mbuf chain"));
 1463                 if (off < m->m_len)
 1464                         break;
 1465                 off -= m->m_len;
 1466                 m = m->m_next;
 1467         }
 1468         while (len > 0) {
 1469                 KASSERT(m != NULL, ("m_apply, offset > size of mbuf chain"));
 1470                 count = min(m->m_len - off, len);
 1471                 rval = (*f)(arg, mtod(m, caddr_t) + off, count);
 1472                 if (rval)
 1473                         return (rval);
 1474                 len -= count;
 1475                 off = 0;
 1476                 m = m->m_next;
 1477         }
 1478         return (0);
 1479 }
 1480 
 1481 /*
 1482  * Return a pointer to mbuf/offset of location in mbuf chain.
 1483  */
 1484 struct mbuf *
 1485 m_getptr(struct mbuf *m, int loc, int *off)
 1486 {
 1487 
 1488         while (loc >= 0) {
 1489                 /* Normal end of search. */
 1490                 if (m->m_len > loc) {
 1491                         *off = loc;
 1492                         return (m);
 1493                 } else {
 1494                         loc -= m->m_len;
 1495                         if (m->m_next == NULL) {
 1496                                 if (loc == 0) {
 1497                                         /* Point at the end of valid data. */
 1498                                         *off = m->m_len;
 1499                                         return (m);
 1500                                 }
 1501                                 return (NULL);
 1502                         }
 1503                         m = m->m_next;
 1504                 }
 1505         }
 1506         return (NULL);
 1507 }
 1508 
 1509 void
 1510 m_print(const struct mbuf *m)
 1511 {
 1512         int len;
 1513         const struct mbuf *m2;
 1514 
 1515         len = m->m_pkthdr.len;
 1516         m2 = m;
 1517         while (len) {
 1518                 printf("%p %*D\n", m2, m2->m_len, (u_char *)m2->m_data, "-");
 1519                 len -= m2->m_len;
 1520                 m2 = m2->m_next;
 1521         }
 1522         return;
 1523 }
 1524 
 1525 /*
 1526  * "Move" mbuf pkthdr from "from" to "to".
 1527  * "from" must have M_PKTHDR set, and "to" must be empty.
 1528  */
 1529 void
 1530 m_move_pkthdr(struct mbuf *to, struct mbuf *from)
 1531 {
 1532         KASSERT((to->m_flags & M_EXT) == 0, ("m_move_pkthdr: to has cluster"));
 1533 
 1534         to->m_flags = from->m_flags & M_COPYFLAGS;
 1535         to->m_data = to->m_pktdat;
 1536         to->m_pkthdr = from->m_pkthdr;          /* especially tags */
 1537         SLIST_INIT(&from->m_pkthdr.tags);       /* purge tags from src */
 1538         from->m_flags &= ~M_PKTHDR;
 1539 }
 1540 
 1541 /*
 1542  * Duplicate "from"'s mbuf pkthdr in "to".
 1543  * "from" must have M_PKTHDR set, and "to" must be empty.
 1544  * In particular, this does a deep copy of the packet tags.
 1545  */
 1546 int
 1547 m_dup_pkthdr(struct mbuf *to, struct mbuf *from, int how)
 1548 {
 1549         to->m_flags = (from->m_flags & M_COPYFLAGS) | (to->m_flags & M_EXT);
 1550         if ((to->m_flags & M_EXT) == 0)
 1551                 to->m_data = to->m_pktdat;
 1552         to->m_pkthdr = from->m_pkthdr;
 1553         SLIST_INIT(&to->m_pkthdr.tags);
 1554         return (m_tag_copy_chain(to, from, how));
 1555 }
 1556 
 1557 u_int
 1558 m_fixhdr(struct mbuf *m0)
 1559 {
 1560         u_int len;
 1561 
 1562         len = m_length(m0, NULL);
 1563         m0->m_pkthdr.len = len;
 1564         return (len);
 1565 }
 1566 
 1567 u_int
 1568 m_length(struct mbuf *m0, struct mbuf **last)
 1569 {
 1570         struct mbuf *m;
 1571         u_int len;
 1572 
 1573         len = 0;
 1574         for (m = m0; m != NULL; m = m->m_next) {
 1575                 len += m->m_len;
 1576                 if (m->m_next == NULL)
 1577                         break;
 1578         }
 1579         if (last != NULL)
 1580                 *last = m;
 1581         return (len);
 1582 }
 1583 
 1584 /*
 1585  * Defragment a mbuf chain, returning the shortest possible
 1586  * chain of mbufs and clusters.  If allocation fails and
 1587  * this cannot be completed, NULL will be returned, but
 1588  * the passed in chain will be unchanged.  Upon success,
 1589  * the original chain will be freed, and the new chain
 1590  * will be returned.
 1591  *
 1592  * If a non-packet header is passed in, the original
 1593  * mbuf (chain?) will be returned unharmed.
 1594  */
 1595 struct mbuf *
 1596 m_defrag(struct mbuf *m0, int how)
 1597 {
 1598         struct mbuf     *m_new = NULL, *m_final = NULL;
 1599         int             progress = 0, length;
 1600 
 1601         if (!(m0->m_flags & M_PKTHDR))
 1602                 return (m0);
 1603 
 1604         m_fixhdr(m0); /* Needed sanity check */
 1605 
 1606 #ifdef MBUF_STRESS_TEST
 1607         if (m_defragrandomfailures) {
 1608                 int temp = arc4random() & 0xff;
 1609                 if (temp == 0xba)
 1610                         goto nospace;
 1611         }
 1612 #endif
 1613         
 1614         if (m0->m_pkthdr.len > MHLEN)
 1615                 m_final = m_getcl(how, MT_DATA, M_PKTHDR);
 1616         else
 1617                 m_final = m_gethdr(how, MT_DATA);
 1618 
 1619         if (m_final == NULL)
 1620                 goto nospace;
 1621 
 1622         if (m_dup_pkthdr(m_final, m0, how) == NULL)
 1623                 goto nospace;
 1624 
 1625         m_new = m_final;
 1626 
 1627         while (progress < m0->m_pkthdr.len) {
 1628                 length = m0->m_pkthdr.len - progress;
 1629                 if (length > MCLBYTES)
 1630                         length = MCLBYTES;
 1631 
 1632                 if (m_new == NULL) {
 1633                         if (length > MLEN)
 1634                                 m_new = m_getcl(how, MT_DATA, 0);
 1635                         else
 1636                                 m_new = m_get(how, MT_DATA);
 1637                         if (m_new == NULL)
 1638                                 goto nospace;
 1639                 }
 1640 
 1641                 m_copydata(m0, progress, length, mtod(m_new, caddr_t));
 1642                 progress += length;
 1643                 m_new->m_len = length;
 1644                 if (m_new != m_final)
 1645                         m_cat(m_final, m_new);
 1646                 m_new = NULL;
 1647         }
 1648 #ifdef MBUF_STRESS_TEST
 1649         if (m0->m_next == NULL)
 1650                 m_defraguseless++;
 1651 #endif
 1652         m_freem(m0);
 1653         m0 = m_final;
 1654 #ifdef MBUF_STRESS_TEST
 1655         m_defragpackets++;
 1656         m_defragbytes += m0->m_pkthdr.len;
 1657 #endif
 1658         return (m0);
 1659 nospace:
 1660 #ifdef MBUF_STRESS_TEST
 1661         m_defragfailure++;
 1662 #endif
 1663         if (m_new)
 1664                 m_free(m_new);
 1665         if (m_final)
 1666                 m_freem(m_final);
 1667         return (NULL);
 1668 }
 1669 
 1670 #ifdef MBUF_STRESS_TEST
 1671 
 1672 /*
 1673  * Fragment an mbuf chain.  There's no reason you'd ever want to do
 1674  * this in normal usage, but it's great for stress testing various
 1675  * mbuf consumers.
 1676  *
 1677  * If fragmentation is not possible, the original chain will be
 1678  * returned.
 1679  *
 1680  * Possible length values:
 1681  * 0     no fragmentation will occur
 1682  * > 0  each fragment will be of the specified length
 1683  * -1   each fragment will be the same random value in length
 1684  * -2   each fragment's length will be entirely random
 1685  * (Random values range from 1 to 256)
 1686  */
 1687 struct mbuf *
 1688 m_fragment(struct mbuf *m0, int how, int length)
 1689 {
 1690         struct mbuf     *m_new = NULL, *m_final = NULL;
 1691         int             progress = 0;
 1692 
 1693         if (!(m0->m_flags & M_PKTHDR))
 1694                 return (m0);
 1695         
 1696         if ((length == 0) || (length < -2))
 1697                 return (m0);
 1698 
 1699         m_fixhdr(m0); /* Needed sanity check */
 1700 
 1701         m_final = m_getcl(how, MT_DATA, M_PKTHDR);
 1702 
 1703         if (m_final == NULL)
 1704                 goto nospace;
 1705 
 1706         if (m_dup_pkthdr(m_final, m0, how) == NULL)
 1707                 goto nospace;
 1708 
 1709         m_new = m_final;
 1710 
 1711         if (length == -1)
 1712                 length = 1 + (arc4random() & 255);
 1713 
 1714         while (progress < m0->m_pkthdr.len) {
 1715                 int fraglen;
 1716 
 1717                 if (length > 0)
 1718                         fraglen = length;
 1719                 else
 1720                         fraglen = 1 + (arc4random() & 255);
 1721                 if (fraglen > m0->m_pkthdr.len - progress)
 1722                         fraglen = m0->m_pkthdr.len - progress;
 1723 
 1724                 if (fraglen > MCLBYTES)
 1725                         fraglen = MCLBYTES;
 1726 
 1727                 if (m_new == NULL) {
 1728                         m_new = m_getcl(how, MT_DATA, 0);
 1729                         if (m_new == NULL)
 1730                                 goto nospace;
 1731                 }
 1732 
 1733                 m_copydata(m0, progress, fraglen, mtod(m_new, caddr_t));
 1734                 progress += fraglen;
 1735                 m_new->m_len = fraglen;
 1736                 if (m_new != m_final)
 1737                         m_cat(m_final, m_new);
 1738                 m_new = NULL;
 1739         }
 1740         m_freem(m0);
 1741         m0 = m_final;
 1742         return (m0);
 1743 nospace:
 1744         if (m_new)
 1745                 m_free(m_new);
 1746         if (m_final)
 1747                 m_freem(m_final);
 1748         /* Return the original chain on failure */
 1749         return (m0);
 1750 }
 1751 
 1752 #endif
 1753 
 1754 #define MAX_CLREFCOUNT  32
 1755 
 1756 /*
 1757  * Ensure that the number of mbuf cluster references stays less than our
 1758  * desired amount by making a new copy of the entire chain.
 1759  *
 1760  * If a reference count has already gone negative, panic.
 1761  */
 1762 static struct mbuf *
 1763 m_clreflimit(struct mbuf *m0, int how)
 1764 {
 1765         struct mbuf *m;
 1766         int maxrefs = 0;
 1767 
 1768         for (m = m0; m != NULL; m = m->m_next) {
 1769                 if ((m->m_flags & M_EXT) && (m->m_ext.ext_ref == NULL)) {
 1770                         maxrefs = max(maxrefs,
 1771                                 mclrefcnt[mtocl(m->m_ext.ext_buf)]);
 1772                         KASSERT(mclrefcnt[mtocl(m->m_ext.ext_buf)] > 0,
 1773                         ("m_clreflimit: bad reference count: %d",
 1774                          mclrefcnt[mtocl(m->m_ext.ext_buf)]));
 1775                 }
 1776         }
 1777 
 1778         if (maxrefs < MAX_CLREFCOUNT)
 1779                 return (m0);
 1780 
 1781         m_clreflimithits++;
 1782         m = m_defrag(m0, how);
 1783         /* Avoid returning NULL at all costs, m_split won't like it. */
 1784         if (m == NULL)
 1785                 return (m0);
 1786         else
 1787                 return (m);
 1788 }

Cache object: 74ebed3a4fa6d0fc3aa22fe8fa9c387e


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.