uma_core.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*-
    2  * Copyright (c) 2002, 2003, 2004, 2005 Jeffrey Roberson <jeff@FreeBSD.org>
    3  * Copyright (c) 2004, 2005 Bosko Milekic <bmilekic@FreeBSD.org>
    4  * Copyright (c) 2004-2006 Robert N. M. Watson
    5  * All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice unmodified, this list of conditions, and the following
   12  *    disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  *
   17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
   18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
   20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
   21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
   22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
   23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
   24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
   26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   27  */
   28 
   29 /*
   30  * uma_core.c  Implementation of the Universal Memory allocator
   31  *
   32  * This allocator is intended to replace the multitude of similar object caches
   33  * in the standard FreeBSD kernel.  The intent is to be flexible as well as
   34  * effecient.  A primary design goal is to return unused memory to the rest of
   35  * the system.  This will make the system as a whole more flexible due to the
   36  * ability to move memory to subsystems which most need it instead of leaving
   37  * pools of reserved memory unused.
   38  *
   39  * The basic ideas stem from similar slab/zone based allocators whose algorithms
   40  * are well known.
   41  *
   42  */
   43 
   44 /*
   45  * TODO:
   46  *      - Improve memory usage for large allocations
   47  *      - Investigate cache size adjustments
   48  */
   49 
   50 #include <sys/cdefs.h>
   51 __FBSDID("$FreeBSD: releng/7.4/sys/vm/uma_core.c 214888 2010-11-06 15:21:46Z lstewart $");
   52 
   53 /* I should really use ktr.. */
   54 /*
   55 #define UMA_DEBUG 1
   56 #define UMA_DEBUG_ALLOC 1
   57 #define UMA_DEBUG_ALLOC_1 1
   58 */
   59 
   60 #include "opt_ddb.h"
   61 #include "opt_param.h"
   62 
   63 #include <sys/param.h>
   64 #include <sys/systm.h>
   65 #include <sys/kernel.h>
   66 #include <sys/types.h>
   67 #include <sys/queue.h>
   68 #include <sys/malloc.h>
   69 #include <sys/ktr.h>
   70 #include <sys/lock.h>
   71 #include <sys/sysctl.h>
   72 #include <sys/mutex.h>
   73 #include <sys/proc.h>
   74 #include <sys/sbuf.h>
   75 #include <sys/smp.h>
   76 #include <sys/vmmeter.h>
   77 
   78 #include <vm/vm.h>
   79 #include <vm/vm_object.h>
   80 #include <vm/vm_page.h>
   81 #include <vm/vm_param.h>
   82 #include <vm/vm_map.h>
   83 #include <vm/vm_kern.h>
   84 #include <vm/vm_extern.h>
   85 #include <vm/uma.h>
   86 #include <vm/uma_int.h>
   87 #include <vm/uma_dbg.h>
   88 
   89 #include <machine/vmparam.h>
   90 
   91 #include <ddb/ddb.h>
   92 
   93 /*
   94  * This is the zone and keg from which all zones are spawned.  The idea is that
   95  * even the zone & keg heads are allocated from the allocator, so we use the
   96  * bss section to bootstrap us.
   97  */
   98 static struct uma_keg masterkeg;
   99 static struct uma_zone masterzone_k;
  100 static struct uma_zone masterzone_z;
  101 static uma_zone_t kegs = &masterzone_k;
  102 static uma_zone_t zones = &masterzone_z;
  103 
  104 /* This is the zone from which all of uma_slab_t's are allocated. */
  105 static uma_zone_t slabzone;
  106 static uma_zone_t slabrefzone;  /* With refcounters (for UMA_ZONE_REFCNT) */
  107 
  108 /*
  109  * The initial hash tables come out of this zone so they can be allocated
  110  * prior to malloc coming up.
  111  */
  112 static uma_zone_t hashzone;
  113 
  114 /* The boot-time adjusted value for cache line alignment. */
  115 static int uma_align_cache = 16 - 1;
  116 
  117 static MALLOC_DEFINE(M_UMAHASH, "UMAHash", "UMA Hash Buckets");
  118 
  119 /*
  120  * Are we allowed to allocate buckets?
  121  */
  122 static int bucketdisable = 1;
  123 
  124 /* Linked list of all kegs in the system */
  125 static LIST_HEAD(,uma_keg) uma_kegs = LIST_HEAD_INITIALIZER(&uma_kegs);
  126 
  127 /* This mutex protects the keg list */
  128 static struct mtx uma_mtx;
  129 
  130 /* Linked list of boot time pages */
  131 static LIST_HEAD(,uma_slab) uma_boot_pages =
  132     LIST_HEAD_INITIALIZER(&uma_boot_pages);
  133 
  134 /* This mutex protects the boot time pages list */
  135 static struct mtx uma_boot_pages_mtx;
  136 
  137 /* Is the VM done starting up? */
  138 static int booted = 0;
  139 
  140 /* Maximum number of allowed items-per-slab if the slab header is OFFPAGE */
  141 static u_int uma_max_ipers;
  142 static u_int uma_max_ipers_ref;
  143 
  144 /*
  145  * This is the handle used to schedule events that need to happen
  146  * outside of the allocation fast path.
  147  */
  148 static struct callout uma_callout;
  149 #define UMA_TIMEOUT     20              /* Seconds for callout interval. */
  150 
  151 /*
  152  * This structure is passed as the zone ctor arg so that I don't have to create
  153  * a special allocation function just for zones.
  154  */
  155 struct uma_zctor_args {
  156         char *name;
  157         size_t size;
  158         uma_ctor ctor;
  159         uma_dtor dtor;
  160         uma_init uminit;
  161         uma_fini fini;
  162         uma_keg_t keg;
  163         int align;
  164         u_int32_t flags;
  165 };
  166 
  167 struct uma_kctor_args {
  168         uma_zone_t zone;
  169         size_t size;
  170         uma_init uminit;
  171         uma_fini fini;
  172         int align;
  173         u_int32_t flags;
  174 };
  175 
  176 struct uma_bucket_zone {
  177         uma_zone_t      ubz_zone;
  178         char            *ubz_name;
  179         int             ubz_entries;
  180 };
  181 
  182 #define BUCKET_MAX      128
  183 
  184 struct uma_bucket_zone bucket_zones[] = {
  185         { NULL, "16 Bucket", 16 },
  186         { NULL, "32 Bucket", 32 },
  187         { NULL, "64 Bucket", 64 },
  188         { NULL, "128 Bucket", 128 },
  189         { NULL, NULL, 0}
  190 };
  191 
  192 #define BUCKET_SHIFT    4
  193 #define BUCKET_ZONES    ((BUCKET_MAX >> BUCKET_SHIFT) + 1)
  194 
  195 /*
  196  * bucket_size[] maps requested bucket sizes to zones that allocate a bucket
  197  * of approximately the right size.
  198  */
  199 static uint8_t bucket_size[BUCKET_ZONES];
  200 
  201 /*
  202  * Flags and enumerations to be passed to internal functions.
  203  */
  204 enum zfreeskip { SKIP_NONE, SKIP_DTOR, SKIP_FINI };
  205 
  206 #define ZFREE_STATFAIL  0x00000001      /* Update zone failure statistic. */
  207 #define ZFREE_STATFREE  0x00000002      /* Update zone free statistic. */
  208 
  209 /* Prototypes.. */
  210 
  211 static void *obj_alloc(uma_zone_t, int, u_int8_t *, int);
  212 static void *page_alloc(uma_zone_t, int, u_int8_t *, int);
  213 static void *startup_alloc(uma_zone_t, int, u_int8_t *, int);
  214 static void page_free(void *, int, u_int8_t);
  215 static uma_slab_t slab_zalloc(uma_zone_t, int);
  216 static void cache_drain(uma_zone_t);
  217 static void bucket_drain(uma_zone_t, uma_bucket_t);
  218 static void bucket_cache_drain(uma_zone_t zone);
  219 static int keg_ctor(void *, int, void *, int);
  220 static void keg_dtor(void *, int, void *);
  221 static int zone_ctor(void *, int, void *, int);
  222 static void zone_dtor(void *, int, void *);
  223 static int zero_init(void *, int, int);
  224 static void zone_small_init(uma_zone_t zone);
  225 static void zone_large_init(uma_zone_t zone);
  226 static void zone_foreach(void (*zfunc)(uma_zone_t));
  227 static void zone_timeout(uma_zone_t zone);
  228 static int hash_alloc(struct uma_hash *);
  229 static int hash_expand(struct uma_hash *, struct uma_hash *);
  230 static void hash_free(struct uma_hash *hash);
  231 static void uma_timeout(void *);
  232 static void uma_startup3(void);
  233 static void *uma_zalloc_internal(uma_zone_t, void *, int);
  234 static void uma_zfree_internal(uma_zone_t, void *, void *, enum zfreeskip,
  235     int);
  236 static void bucket_enable(void);
  237 static void bucket_init(void);
  238 static uma_bucket_t bucket_alloc(int, int);
  239 static void bucket_free(uma_bucket_t);
  240 static void bucket_zone_drain(void);
  241 static int uma_zalloc_bucket(uma_zone_t zone, int flags);
  242 static uma_slab_t uma_zone_slab(uma_zone_t zone, int flags);
  243 static void *uma_slab_alloc(uma_zone_t zone, uma_slab_t slab);
  244 static uma_zone_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit,
  245     uma_fini fini, int align, u_int32_t flags);
  246 
  247 void uma_print_zone(uma_zone_t);
  248 void uma_print_stats(void);
  249 static int sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS);
  250 static int sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS);
  251 
  252 SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
  253 
  254 SYSCTL_PROC(_vm, OID_AUTO, zone_count, CTLFLAG_RD|CTLTYPE_INT,
  255     0, 0, sysctl_vm_zone_count, "I", "Number of UMA zones");
  256 
  257 SYSCTL_PROC(_vm, OID_AUTO, zone_stats, CTLFLAG_RD|CTLTYPE_STRUCT,
  258     0, 0, sysctl_vm_zone_stats, "s,struct uma_type_header", "Zone Stats");
  259 
  260 /*
  261  * This routine checks to see whether or not it's safe to enable buckets.
  262  */
  263 
  264 static void
  265 bucket_enable(void)
  266 {
  267         if (cnt.v_free_count < cnt.v_free_min)
  268                 bucketdisable = 1;
  269         else
  270                 bucketdisable = 0;
  271 }
  272 
  273 /*
  274  * Initialize bucket_zones, the array of zones of buckets of various sizes.
  275  *
  276  * For each zone, calculate the memory required for each bucket, consisting
  277  * of the header and an array of pointers.  Initialize bucket_size[] to point
  278  * the range of appropriate bucket sizes at the zone.
  279  */
  280 static void
  281 bucket_init(void)
  282 {
  283         struct uma_bucket_zone *ubz;
  284         int i;
  285         int j;
  286 
  287         for (i = 0, j = 0; bucket_zones[j].ubz_entries != 0; j++) {
  288                 int size;
  289 
  290                 ubz = &bucket_zones[j];
  291                 size = roundup(sizeof(struct uma_bucket), sizeof(void *));
  292                 size += sizeof(void *) * ubz->ubz_entries;
  293                 ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size,
  294                     NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
  295                 for (; i <= ubz->ubz_entries; i += (1 << BUCKET_SHIFT))
  296                         bucket_size[i >> BUCKET_SHIFT] = j;
  297         }
  298 }
  299 
  300 /*
  301  * Given a desired number of entries for a bucket, return the zone from which
  302  * to allocate the bucket.
  303  */
  304 static struct uma_bucket_zone *
  305 bucket_zone_lookup(int entries)
  306 {
  307         int idx;
  308 
  309         idx = howmany(entries, 1 << BUCKET_SHIFT);
  310         return (&bucket_zones[bucket_size[idx]]);
  311 }
  312 
  313 static uma_bucket_t
  314 bucket_alloc(int entries, int bflags)
  315 {
  316         struct uma_bucket_zone *ubz;
  317         uma_bucket_t bucket;
  318 
  319         /*
  320          * This is to stop us from allocating per cpu buckets while we're
  321          * running out of vm.boot_pages.  Otherwise, we would exhaust the
  322          * boot pages.  This also prevents us from allocating buckets in
  323          * low memory situations.
  324          */
  325         if (bucketdisable)
  326                 return (NULL);
  327 
  328         ubz = bucket_zone_lookup(entries);
  329         bucket = uma_zalloc_internal(ubz->ubz_zone, NULL, bflags);
  330         if (bucket) {
  331 #ifdef INVARIANTS
  332                 bzero(bucket->ub_bucket, sizeof(void *) * ubz->ubz_entries);
  333 #endif
  334                 bucket->ub_cnt = 0;
  335                 bucket->ub_entries = ubz->ubz_entries;
  336         }
  337 
  338         return (bucket);
  339 }
  340 
  341 static void
  342 bucket_free(uma_bucket_t bucket)
  343 {
  344         struct uma_bucket_zone *ubz;
  345 
  346         ubz = bucket_zone_lookup(bucket->ub_entries);
  347         uma_zfree_internal(ubz->ubz_zone, bucket, NULL, SKIP_NONE,
  348             ZFREE_STATFREE);
  349 }
  350 
  351 static void
  352 bucket_zone_drain(void)
  353 {
  354         struct uma_bucket_zone *ubz;
  355 
  356         for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
  357                 zone_drain(ubz->ubz_zone);
  358 }
  359 
  360 
  361 /*
  362  * Routine called by timeout which is used to fire off some time interval
  363  * based calculations.  (stats, hash size, etc.)
  364  *
  365  * Arguments:
  366  *      arg   Unused
  367  *
  368  * Returns:
  369  *      Nothing
  370  */
  371 static void
  372 uma_timeout(void *unused)
  373 {
  374         bucket_enable();
  375         zone_foreach(zone_timeout);
  376 
  377         /* Reschedule this event */
  378         callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
  379 }
  380 
  381 /*
  382  * Routine to perform timeout driven calculations.  This expands the
  383  * hashes and does per cpu statistics aggregation.
  384  *
  385  *  Arguments:
  386  *      zone  The zone to operate on
  387  *
  388  *  Returns:
  389  *      Nothing
  390  */
  391 static void
  392 zone_timeout(uma_zone_t zone)
  393 {
  394         uma_keg_t keg;
  395         u_int64_t alloc;
  396 
  397         keg = zone->uz_keg;
  398         alloc = 0;
  399 
  400         /*
  401          * Expand the zone hash table.
  402          *
  403          * This is done if the number of slabs is larger than the hash size.
  404          * What I'm trying to do here is completely reduce collisions.  This
  405          * may be a little aggressive.  Should I allow for two collisions max?
  406          */
  407         ZONE_LOCK(zone);
  408         if (keg->uk_flags & UMA_ZONE_HASH &&
  409             keg->uk_pages / keg->uk_ppera >= keg->uk_hash.uh_hashsize) {
  410                 struct uma_hash newhash;
  411                 struct uma_hash oldhash;
  412                 int ret;
  413 
  414                 /*
  415                  * This is so involved because allocating and freeing
  416                  * while the zone lock is held will lead to deadlock.
  417                  * I have to do everything in stages and check for
  418                  * races.
  419                  */
  420                 newhash = keg->uk_hash;
  421                 ZONE_UNLOCK(zone);
  422                 ret = hash_alloc(&newhash);
  423                 ZONE_LOCK(zone);
  424                 if (ret) {
  425                         if (hash_expand(&keg->uk_hash, &newhash)) {
  426                                 oldhash = keg->uk_hash;
  427                                 keg->uk_hash = newhash;
  428                         } else
  429                                 oldhash = newhash;
  430 
  431                         ZONE_UNLOCK(zone);
  432                         hash_free(&oldhash);
  433                         ZONE_LOCK(zone);
  434                 }
  435         }
  436         ZONE_UNLOCK(zone);
  437 }
  438 
  439 /*
  440  * Allocate and zero fill the next sized hash table from the appropriate
  441  * backing store.
  442  *
  443  * Arguments:
  444  *      hash  A new hash structure with the old hash size in uh_hashsize
  445  *
  446  * Returns:
  447  *      1 on sucess and 0 on failure.
  448  */
  449 static int
  450 hash_alloc(struct uma_hash *hash)
  451 {
  452         int oldsize;
  453         int alloc;
  454 
  455         oldsize = hash->uh_hashsize;
  456 
  457         /* We're just going to go to a power of two greater */
  458         if (oldsize)  {
  459                 hash->uh_hashsize = oldsize * 2;
  460                 alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize;
  461                 hash->uh_slab_hash = (struct slabhead *)malloc(alloc,
  462                     M_UMAHASH, M_NOWAIT);
  463         } else {
  464                 alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT;
  465                 hash->uh_slab_hash = uma_zalloc_internal(hashzone, NULL,
  466                     M_WAITOK);
  467                 hash->uh_hashsize = UMA_HASH_SIZE_INIT;
  468         }
  469         if (hash->uh_slab_hash) {
  470                 bzero(hash->uh_slab_hash, alloc);
  471                 hash->uh_hashmask = hash->uh_hashsize - 1;
  472                 return (1);
  473         }
  474 
  475         return (0);
  476 }
  477 
  478 /*
  479  * Expands the hash table for HASH zones.  This is done from zone_timeout
  480  * to reduce collisions.  This must not be done in the regular allocation
  481  * path, otherwise, we can recurse on the vm while allocating pages.
  482  *
  483  * Arguments:
  484  *      oldhash  The hash you want to expand
  485  *      newhash  The hash structure for the new table
  486  *
  487  * Returns:
  488  *      Nothing
  489  *
  490  * Discussion:
  491  */
  492 static int
  493 hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash)
  494 {
  495         uma_slab_t slab;
  496         int hval;
  497         int i;
  498 
  499         if (!newhash->uh_slab_hash)
  500                 return (0);
  501 
  502         if (oldhash->uh_hashsize >= newhash->uh_hashsize)
  503                 return (0);
  504 
  505         /*
  506          * I need to investigate hash algorithms for resizing without a
  507          * full rehash.
  508          */
  509 
  510         for (i = 0; i < oldhash->uh_hashsize; i++)
  511                 while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) {
  512                         slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]);
  513                         SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink);
  514                         hval = UMA_HASH(newhash, slab->us_data);
  515                         SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval],
  516                             slab, us_hlink);
  517                 }
  518 
  519         return (1);
  520 }
  521 
  522 /*
  523  * Free the hash bucket to the appropriate backing store.
  524  *
  525  * Arguments:
  526  *      slab_hash  The hash bucket we're freeing
  527  *      hashsize   The number of entries in that hash bucket
  528  *
  529  * Returns:
  530  *      Nothing
  531  */
  532 static void
  533 hash_free(struct uma_hash *hash)
  534 {
  535         if (hash->uh_slab_hash == NULL)
  536                 return;
  537         if (hash->uh_hashsize == UMA_HASH_SIZE_INIT)
  538                 uma_zfree_internal(hashzone,
  539                     hash->uh_slab_hash, NULL, SKIP_NONE, ZFREE_STATFREE);
  540         else
  541                 free(hash->uh_slab_hash, M_UMAHASH);
  542 }
  543 
  544 /*
  545  * Frees all outstanding items in a bucket
  546  *
  547  * Arguments:
  548  *      zone   The zone to free to, must be unlocked.
  549  *      bucket The free/alloc bucket with items, cpu queue must be locked.
  550  *
  551  * Returns:
  552  *      Nothing
  553  */
  554 
  555 static void
  556 bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
  557 {
  558         uma_slab_t slab;
  559         int mzone;
  560         void *item;
  561 
  562         if (bucket == NULL)
  563                 return;
  564 
  565         slab = NULL;
  566         mzone = 0;
  567 
  568         /* We have to lookup the slab again for malloc.. */
  569         if (zone->uz_keg->uk_flags & UMA_ZONE_MALLOC)
  570                 mzone = 1;
  571 
  572         while (bucket->ub_cnt > 0)  {
  573                 bucket->ub_cnt--;
  574                 item = bucket->ub_bucket[bucket->ub_cnt];
  575 #ifdef INVARIANTS
  576                 bucket->ub_bucket[bucket->ub_cnt] = NULL;
  577                 KASSERT(item != NULL,
  578                     ("bucket_drain: botched ptr, item is NULL"));
  579 #endif
  580                 /*
  581                  * This is extremely inefficient.  The slab pointer was passed
  582                  * to uma_zfree_arg, but we lost it because the buckets don't
  583                  * hold them.  This will go away when free() gets a size passed
  584                  * to it.
  585                  */
  586                 if (mzone)
  587                         slab = vtoslab((vm_offset_t)item & (~UMA_SLAB_MASK));
  588                 uma_zfree_internal(zone, item, slab, SKIP_DTOR, 0);
  589         }
  590 }
  591 
  592 /*
  593  * Drains the per cpu caches for a zone.
  594  *
  595  * NOTE: This may only be called while the zone is being turn down, and not
  596  * during normal operation.  This is necessary in order that we do not have
  597  * to migrate CPUs to drain the per-CPU caches.
  598  *
  599  * Arguments:
  600  *      zone     The zone to drain, must be unlocked.
  601  *
  602  * Returns:
  603  *      Nothing
  604  */
  605 static void
  606 cache_drain(uma_zone_t zone)
  607 {
  608         uma_cache_t cache;
  609         int cpu;
  610 
  611         /*
  612          * XXX: It is safe to not lock the per-CPU caches, because we're
  613          * tearing down the zone anyway.  I.e., there will be no further use
  614          * of the caches at this point.
  615          *
  616          * XXX: It would good to be able to assert that the zone is being
  617          * torn down to prevent improper use of cache_drain().
  618          *
  619          * XXX: We lock the zone before passing into bucket_cache_drain() as
  620          * it is used elsewhere.  Should the tear-down path be made special
  621          * there in some form?
  622          */
  623         for (cpu = 0; cpu <= mp_maxid; cpu++) {
  624                 if (CPU_ABSENT(cpu))
  625                         continue;
  626                 cache = &zone->uz_cpu[cpu];
  627                 bucket_drain(zone, cache->uc_allocbucket);
  628                 bucket_drain(zone, cache->uc_freebucket);
  629                 if (cache->uc_allocbucket != NULL)
  630                         bucket_free(cache->uc_allocbucket);
  631                 if (cache->uc_freebucket != NULL)
  632                         bucket_free(cache->uc_freebucket);
  633                 cache->uc_allocbucket = cache->uc_freebucket = NULL;
  634         }
  635         ZONE_LOCK(zone);
  636         bucket_cache_drain(zone);
  637         ZONE_UNLOCK(zone);
  638 }
  639 
  640 /*
  641  * Drain the cached buckets from a zone.  Expects a locked zone on entry.
  642  */
  643 static void
  644 bucket_cache_drain(uma_zone_t zone)
  645 {
  646         uma_bucket_t bucket;
  647 
  648         /*
  649          * Drain the bucket queues and free the buckets, we just keep two per
  650          * cpu (alloc/free).
  651          */
  652         while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
  653                 LIST_REMOVE(bucket, ub_link);
  654                 ZONE_UNLOCK(zone);
  655                 bucket_drain(zone, bucket);
  656                 bucket_free(bucket);
  657                 ZONE_LOCK(zone);
  658         }
  659 
  660         /* Now we do the free queue.. */
  661         while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
  662                 LIST_REMOVE(bucket, ub_link);
  663                 bucket_free(bucket);
  664         }
  665 }
  666 
  667 /*
  668  * Frees pages from a zone back to the system.  This is done on demand from
  669  * the pageout daemon.
  670  *
  671  * Arguments:
  672  *      zone  The zone to free pages from
  673  *       all  Should we drain all items?
  674  *
  675  * Returns:
  676  *      Nothing.
  677  */
  678 void
  679 zone_drain(uma_zone_t zone)
  680 {
  681         struct slabhead freeslabs = { 0 };
  682         uma_keg_t keg;
  683         uma_slab_t slab;
  684         uma_slab_t n;
  685         u_int8_t flags;
  686         u_int8_t *mem;
  687         int i;
  688 
  689         keg = zone->uz_keg;
  690 
  691         /*
  692          * We don't want to take pages from statically allocated zones at this
  693          * time
  694          */
  695         if (keg->uk_flags & UMA_ZONE_NOFREE || keg->uk_freef == NULL)
  696                 return;
  697 
  698         ZONE_LOCK(zone);
  699 
  700 #ifdef UMA_DEBUG
  701         printf("%s free items: %u\n", zone->uz_name, keg->uk_free);
  702 #endif
  703         bucket_cache_drain(zone);
  704         if (keg->uk_free == 0)
  705                 goto finished;
  706 
  707         slab = LIST_FIRST(&keg->uk_free_slab);
  708         while (slab) {
  709                 n = LIST_NEXT(slab, us_link);
  710 
  711                 /* We have no where to free these to */
  712                 if (slab->us_flags & UMA_SLAB_BOOT) {
  713                         slab = n;
  714                         continue;
  715                 }
  716 
  717                 LIST_REMOVE(slab, us_link);
  718                 keg->uk_pages -= keg->uk_ppera;
  719                 keg->uk_free -= keg->uk_ipers;
  720 
  721                 if (keg->uk_flags & UMA_ZONE_HASH)
  722                         UMA_HASH_REMOVE(&keg->uk_hash, slab, slab->us_data);
  723 
  724                 SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink);
  725 
  726                 slab = n;
  727         }
  728 finished:
  729         ZONE_UNLOCK(zone);
  730 
  731         while ((slab = SLIST_FIRST(&freeslabs)) != NULL) {
  732                 SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink);
  733                 if (keg->uk_fini)
  734                         for (i = 0; i < keg->uk_ipers; i++)
  735                                 keg->uk_fini(
  736                                     slab->us_data + (keg->uk_rsize * i),
  737                                     keg->uk_size);
  738                 flags = slab->us_flags;
  739                 mem = slab->us_data;
  740 
  741                 if ((keg->uk_flags & UMA_ZONE_MALLOC) ||
  742                     (keg->uk_flags & UMA_ZONE_REFCNT)) {
  743                         vm_object_t obj;
  744 
  745                         if (flags & UMA_SLAB_KMEM)
  746                                 obj = kmem_object;
  747                         else if (flags & UMA_SLAB_KERNEL)
  748                                 obj = kernel_object;
  749                         else
  750                                 obj = NULL;
  751                         for (i = 0; i < keg->uk_ppera; i++)
  752                                 vsetobj((vm_offset_t)mem + (i * PAGE_SIZE),
  753                                     obj);
  754                 }
  755                 if (keg->uk_flags & UMA_ZONE_OFFPAGE)
  756                         uma_zfree_internal(keg->uk_slabzone, slab, NULL,
  757                             SKIP_NONE, ZFREE_STATFREE);
  758 #ifdef UMA_DEBUG
  759                 printf("%s: Returning %d bytes.\n",
  760                     zone->uz_name, UMA_SLAB_SIZE * keg->uk_ppera);
  761 #endif
  762                 keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera, flags);
  763         }
  764 }
  765 
  766 /*
  767  * Allocate a new slab for a zone.  This does not insert the slab onto a list.
  768  *
  769  * Arguments:
  770  *      zone  The zone to allocate slabs for
  771  *      wait  Shall we wait?
  772  *
  773  * Returns:
  774  *      The slab that was allocated or NULL if there is no memory and the
  775  *      caller specified M_NOWAIT.
  776  */
  777 static uma_slab_t
  778 slab_zalloc(uma_zone_t zone, int wait)
  779 {
  780         uma_slabrefcnt_t slabref;
  781         uma_slab_t slab;
  782         uma_keg_t keg;
  783         u_int8_t *mem;
  784         u_int8_t flags;
  785         int i;
  786 
  787         slab = NULL;
  788         keg = zone->uz_keg;
  789 
  790 #ifdef UMA_DEBUG
  791         printf("slab_zalloc:  Allocating a new slab for %s\n", zone->uz_name);
  792 #endif
  793         ZONE_UNLOCK(zone);
  794 
  795         if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
  796                 slab = uma_zalloc_internal(keg->uk_slabzone, NULL, wait);
  797                 if (slab == NULL) {
  798                         ZONE_LOCK(zone);
  799                         return NULL;
  800                 }
  801         }
  802 
  803         /*
  804          * This reproduces the old vm_zone behavior of zero filling pages the
  805          * first time they are added to a zone.
  806          *
  807          * Malloced items are zeroed in uma_zalloc.
  808          */
  809 
  810         if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
  811                 wait |= M_ZERO;
  812         else
  813                 wait &= ~M_ZERO;
  814 
  815         mem = keg->uk_allocf(zone, keg->uk_ppera * UMA_SLAB_SIZE,
  816             &flags, wait);
  817         if (mem == NULL) {
  818                 if (keg->uk_flags & UMA_ZONE_OFFPAGE)
  819                         uma_zfree_internal(keg->uk_slabzone, slab, NULL,
  820                             SKIP_NONE, ZFREE_STATFREE);
  821                 ZONE_LOCK(zone);
  822                 return (NULL);
  823         }
  824 
  825         /* Point the slab into the allocated memory */
  826         if (!(keg->uk_flags & UMA_ZONE_OFFPAGE))
  827                 slab = (uma_slab_t )(mem + keg->uk_pgoff);
  828 
  829         if ((keg->uk_flags & UMA_ZONE_MALLOC) ||
  830             (keg->uk_flags & UMA_ZONE_REFCNT))
  831                 for (i = 0; i < keg->uk_ppera; i++)
  832                         vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab);
  833 
  834         slab->us_keg = keg;
  835         slab->us_data = mem;
  836         slab->us_freecount = keg->uk_ipers;
  837         slab->us_firstfree = 0;
  838         slab->us_flags = flags;
  839 
  840         if (keg->uk_flags & UMA_ZONE_REFCNT) {
  841                 slabref = (uma_slabrefcnt_t)slab;
  842                 for (i = 0; i < keg->uk_ipers; i++) {
  843                         slabref->us_freelist[i].us_refcnt = 0;
  844                         slabref->us_freelist[i].us_item = i+1;
  845                 }
  846         } else {
  847                 for (i = 0; i < keg->uk_ipers; i++)
  848                         slab->us_freelist[i].us_item = i+1;
  849         }
  850 
  851         if (keg->uk_init != NULL) {
  852                 for (i = 0; i < keg->uk_ipers; i++)
  853                         if (keg->uk_init(slab->us_data + (keg->uk_rsize * i),
  854                             keg->uk_size, wait) != 0)
  855                                 break;
  856                 if (i != keg->uk_ipers) {
  857                         if (keg->uk_fini != NULL) {
  858                                 for (i--; i > -1; i--)
  859                                         keg->uk_fini(slab->us_data +
  860                                             (keg->uk_rsize * i),
  861                                             keg->uk_size);
  862                         }
  863                         if ((keg->uk_flags & UMA_ZONE_MALLOC) ||
  864                             (keg->uk_flags & UMA_ZONE_REFCNT)) {
  865                                 vm_object_t obj;
  866 
  867                                 if (flags & UMA_SLAB_KMEM)
  868                                         obj = kmem_object;
  869                                 else if (flags & UMA_SLAB_KERNEL)
  870                                         obj = kernel_object;
  871                                 else
  872                                         obj = NULL;
  873                                 for (i = 0; i < keg->uk_ppera; i++)
  874                                         vsetobj((vm_offset_t)mem +
  875                                             (i * PAGE_SIZE), obj);
  876                         }
  877                         if (keg->uk_flags & UMA_ZONE_OFFPAGE)
  878                                 uma_zfree_internal(keg->uk_slabzone, slab,
  879                                     NULL, SKIP_NONE, ZFREE_STATFREE);
  880                         keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera,
  881                             flags);
  882                         ZONE_LOCK(zone);
  883                         return (NULL);
  884                 }
  885         }
  886         ZONE_LOCK(zone);
  887 
  888         if (keg->uk_flags & UMA_ZONE_HASH)
  889                 UMA_HASH_INSERT(&keg->uk_hash, slab, mem);
  890 
  891         keg->uk_pages += keg->uk_ppera;
  892         keg->uk_free += keg->uk_ipers;
  893 
  894         return (slab);
  895 }
  896 
  897 /*
  898  * This function is intended to be used early on in place of page_alloc() so
  899  * that we may use the boot time page cache to satisfy allocations before
  900  * the VM is ready.
  901  */
  902 static void *
  903 startup_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
  904 {
  905         uma_keg_t keg;
  906         uma_slab_t tmps;
  907 
  908         keg = zone->uz_keg;
  909 
  910         /*
  911          * Check our small startup cache to see if it has pages remaining.
  912          */
  913         mtx_lock(&uma_boot_pages_mtx);
  914         if ((tmps = LIST_FIRST(&uma_boot_pages)) != NULL) {
  915                 LIST_REMOVE(tmps, us_link);
  916                 mtx_unlock(&uma_boot_pages_mtx);
  917                 *pflag = tmps->us_flags;
  918                 return (tmps->us_data);
  919         }
  920         mtx_unlock(&uma_boot_pages_mtx);
  921         if (booted == 0)
  922                 panic("UMA: Increase vm.boot_pages");
  923         /*
  924          * Now that we've booted reset these users to their real allocator.
  925          */
  926 #ifdef UMA_MD_SMALL_ALLOC
  927         keg->uk_allocf = uma_small_alloc;
  928 #else
  929         keg->uk_allocf = page_alloc;
  930 #endif
  931         return keg->uk_allocf(zone, bytes, pflag, wait);
  932 }
  933 
  934 /*
  935  * Allocates a number of pages from the system
  936  *
  937  * Arguments:
  938  *      zone  Unused
  939  *      bytes  The number of bytes requested
  940  *      wait  Shall we wait?
  941  *
  942  * Returns:
  943  *      A pointer to the alloced memory or possibly
  944  *      NULL if M_NOWAIT is set.
  945  */
  946 static void *
  947 page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
  948 {
  949         void *p;        /* Returned page */
  950 
  951         *pflag = UMA_SLAB_KMEM;
  952         p = (void *) kmem_malloc(kmem_map, bytes, wait);
  953 
  954         return (p);
  955 }
  956 
  957 /*
  958  * Allocates a number of pages from within an object
  959  *
  960  * Arguments:
  961  *      zone   Unused
  962  *      bytes  The number of bytes requested
  963  *      wait   Shall we wait?
  964  *
  965  * Returns:
  966  *      A pointer to the alloced memory or possibly
  967  *      NULL if M_NOWAIT is set.
  968  */
  969 static void *
  970 obj_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
  971 {
  972         vm_object_t object;
  973         vm_offset_t retkva, zkva;
  974         vm_page_t p;
  975         int pages, startpages;
  976 
  977         object = zone->uz_keg->uk_obj;
  978         retkva = 0;
  979 
  980         /*
  981          * This looks a little weird since we're getting one page at a time.
  982          */
  983         VM_OBJECT_LOCK(object);
  984         p = TAILQ_LAST(&object->memq, pglist);
  985         pages = p != NULL ? p->pindex + 1 : 0;
  986         startpages = pages;
  987         zkva = zone->uz_keg->uk_kva + pages * PAGE_SIZE;
  988         for (; bytes > 0; bytes -= PAGE_SIZE) {
  989                 p = vm_page_alloc(object, pages,
  990                     VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED);
  991                 if (p == NULL) {
  992                         if (pages != startpages)
  993                                 pmap_qremove(retkva, pages - startpages);
  994                         while (pages != startpages) {
  995                                 pages--;
  996                                 p = TAILQ_LAST(&object->memq, pglist);
  997                                 vm_page_lock_queues();
  998                                 vm_page_unwire(p, 0);
  999                                 vm_page_free(p);
 1000                                 vm_page_unlock_queues();
 1001                         }
 1002                         retkva = 0;
 1003                         goto done;
 1004                 }
 1005                 pmap_qenter(zkva, &p, 1);
 1006                 if (retkva == 0)
 1007                         retkva = zkva;
 1008                 zkva += PAGE_SIZE;
 1009                 pages += 1;
 1010         }
 1011 done:
 1012         VM_OBJECT_UNLOCK(object);
 1013         *flags = UMA_SLAB_PRIV;
 1014 
 1015         return ((void *)retkva);
 1016 }
 1017 
 1018 /*
 1019  * Frees a number of pages to the system
 1020  *
 1021  * Arguments:
 1022  *      mem   A pointer to the memory to be freed
 1023  *      size  The size of the memory being freed
 1024  *      flags The original p->us_flags field
 1025  *
 1026  * Returns:
 1027  *      Nothing
 1028  */
 1029 static void
 1030 page_free(void *mem, int size, u_int8_t flags)
 1031 {
 1032         vm_map_t map;
 1033 
 1034         if (flags & UMA_SLAB_KMEM)
 1035                 map = kmem_map;
 1036         else if (flags & UMA_SLAB_KERNEL)
 1037                 map = kernel_map;
 1038         else
 1039                 panic("UMA: page_free used with invalid flags %d", flags);
 1040 
 1041         kmem_free(map, (vm_offset_t)mem, size);
 1042 }
 1043 
 1044 /*
 1045  * Zero fill initializer
 1046  *
 1047  * Arguments/Returns follow uma_init specifications
 1048  */
 1049 static int
 1050 zero_init(void *mem, int size, int flags)
 1051 {
 1052         bzero(mem, size);
 1053         return (0);
 1054 }
 1055 
 1056 /*
 1057  * Finish creating a small uma zone.  This calculates ipers, and the zone size.
 1058  *
 1059  * Arguments
 1060  *      zone  The zone we should initialize
 1061  *
 1062  * Returns
 1063  *      Nothing
 1064  */
 1065 static void
 1066 zone_small_init(uma_zone_t zone)
 1067 {
 1068         uma_keg_t keg;
 1069         u_int rsize;
 1070         u_int memused;
 1071         u_int wastedspace;
 1072         u_int shsize;
 1073 
 1074         keg = zone->uz_keg;
 1075         KASSERT(keg != NULL, ("Keg is null in zone_small_init"));
 1076         rsize = keg->uk_size;
 1077 
 1078         if (rsize < UMA_SMALLEST_UNIT)
 1079                 rsize = UMA_SMALLEST_UNIT;
 1080         if (rsize & keg->uk_align)
 1081                 rsize = (rsize & ~keg->uk_align) + (keg->uk_align + 1);
 1082 
 1083         keg->uk_rsize = rsize;
 1084         keg->uk_ppera = 1;
 1085 
 1086         if (keg->uk_flags & UMA_ZONE_REFCNT) {
 1087                 rsize += UMA_FRITMREF_SZ;       /* linkage & refcnt */
 1088                 shsize = sizeof(struct uma_slab_refcnt);
 1089         } else {
 1090                 rsize += UMA_FRITM_SZ;  /* Account for linkage */
 1091                 shsize = sizeof(struct uma_slab);
 1092         }
 1093 
 1094         keg->uk_ipers = (UMA_SLAB_SIZE - shsize) / rsize;
 1095         KASSERT(keg->uk_ipers != 0, ("zone_small_init: ipers is 0"));
 1096         memused = keg->uk_ipers * rsize + shsize;
 1097         wastedspace = UMA_SLAB_SIZE - memused;
 1098 
 1099         /*
 1100          * We can't do OFFPAGE if we're internal or if we've been
 1101          * asked to not go to the VM for buckets.  If we do this we
 1102          * may end up going to the VM (kmem_map) for slabs which we
 1103          * do not want to do if we're UMA_ZFLAG_CACHEONLY as a
 1104          * result of UMA_ZONE_VM, which clearly forbids it.
 1105          */
 1106         if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) ||
 1107             (keg->uk_flags & UMA_ZFLAG_CACHEONLY))
 1108                 return;
 1109 
 1110         if ((wastedspace >= UMA_MAX_WASTE) &&
 1111             (keg->uk_ipers < (UMA_SLAB_SIZE / keg->uk_rsize))) {
 1112                 keg->uk_ipers = UMA_SLAB_SIZE / keg->uk_rsize;
 1113                 KASSERT(keg->uk_ipers <= 255,
 1114                     ("zone_small_init: keg->uk_ipers too high!"));
 1115 #ifdef UMA_DEBUG
 1116                 printf("UMA decided we need offpage slab headers for "
 1117                     "zone: %s, calculated wastedspace = %d, "
 1118                     "maximum wasted space allowed = %d, "
 1119                     "calculated ipers = %d, "
 1120                     "new wasted space = %d\n", zone->uz_name, wastedspace,
 1121                     UMA_MAX_WASTE, keg->uk_ipers,
 1122                     UMA_SLAB_SIZE - keg->uk_ipers * keg->uk_rsize);
 1123 #endif
 1124                 keg->uk_flags |= UMA_ZONE_OFFPAGE;
 1125                 if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
 1126                         keg->uk_flags |= UMA_ZONE_HASH;
 1127         }
 1128 }
 1129 
 1130 /*
 1131  * Finish creating a large (> UMA_SLAB_SIZE) uma zone.  Just give in and do
 1132  * OFFPAGE for now.  When I can allow for more dynamic slab sizes this will be
 1133  * more complicated.
 1134  *
 1135  * Arguments
 1136  *      zone  The zone we should initialize
 1137  *
 1138  * Returns
 1139  *      Nothing
 1140  */
 1141 static void
 1142 zone_large_init(uma_zone_t zone)
 1143 {
 1144         uma_keg_t keg;
 1145         int pages;
 1146 
 1147         keg = zone->uz_keg;
 1148 
 1149         KASSERT(keg != NULL, ("Keg is null in zone_large_init"));
 1150         KASSERT((keg->uk_flags & UMA_ZFLAG_CACHEONLY) == 0,
 1151             ("zone_large_init: Cannot large-init a UMA_ZFLAG_CACHEONLY zone"));
 1152 
 1153         pages = keg->uk_size / UMA_SLAB_SIZE;
 1154 
 1155         /* Account for remainder */
 1156         if ((pages * UMA_SLAB_SIZE) < keg->uk_size)
 1157                 pages++;
 1158 
 1159         keg->uk_ppera = pages;
 1160         keg->uk_ipers = 1;
 1161 
 1162         keg->uk_flags |= UMA_ZONE_OFFPAGE;
 1163         if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
 1164                 keg->uk_flags |= UMA_ZONE_HASH;
 1165 
 1166         keg->uk_rsize = keg->uk_size;
 1167 }
 1168 
 1169 /*
 1170  * Keg header ctor.  This initializes all fields, locks, etc.  And inserts
 1171  * the keg onto the global keg list.
 1172  *
 1173  * Arguments/Returns follow uma_ctor specifications
 1174  *      udata  Actually uma_kctor_args
 1175  */
 1176 static int
 1177 keg_ctor(void *mem, int size, void *udata, int flags)
 1178 {
 1179         struct uma_kctor_args *arg = udata;
 1180         uma_keg_t keg = mem;
 1181         uma_zone_t zone;
 1182 
 1183         bzero(keg, size);
 1184         keg->uk_size = arg->size;
 1185         keg->uk_init = arg->uminit;
 1186         keg->uk_fini = arg->fini;
 1187         keg->uk_align = arg->align;
 1188         keg->uk_free = 0;
 1189         keg->uk_pages = 0;
 1190         keg->uk_flags = arg->flags;
 1191         keg->uk_allocf = page_alloc;
 1192         keg->uk_freef = page_free;
 1193         keg->uk_recurse = 0;
 1194         keg->uk_slabzone = NULL;
 1195 
 1196         /*
 1197          * The master zone is passed to us at keg-creation time.
 1198          */
 1199         zone = arg->zone;
 1200         zone->uz_keg = keg;
 1201 
 1202         if (arg->flags & UMA_ZONE_VM)
 1203                 keg->uk_flags |= UMA_ZFLAG_CACHEONLY;
 1204 
 1205         if (arg->flags & UMA_ZONE_ZINIT)
 1206                 keg->uk_init = zero_init;
 1207 
 1208         /*
 1209          * The +UMA_FRITM_SZ added to uk_size is to account for the
 1210          * linkage that is added to the size in zone_small_init().  If
 1211          * we don't account for this here then we may end up in
 1212          * zone_small_init() with a calculated 'ipers' of 0.
 1213          */
 1214         if (keg->uk_flags & UMA_ZONE_REFCNT) {
 1215                 if ((keg->uk_size+UMA_FRITMREF_SZ) >
 1216                     (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt)))
 1217                         zone_large_init(zone);
 1218                 else
 1219                         zone_small_init(zone);
 1220         } else {
 1221                 if ((keg->uk_size+UMA_FRITM_SZ) >
 1222                     (UMA_SLAB_SIZE - sizeof(struct uma_slab)))
 1223                         zone_large_init(zone);
 1224                 else
 1225                         zone_small_init(zone);
 1226         }
 1227 
 1228         if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
 1229                 if (keg->uk_flags & UMA_ZONE_REFCNT)
 1230                         keg->uk_slabzone = slabrefzone;
 1231                 else
 1232                         keg->uk_slabzone = slabzone;
 1233         }
 1234 
 1235         /*
 1236          * If we haven't booted yet we need allocations to go through the
 1237          * startup cache until the vm is ready.
 1238          */
 1239         if (keg->uk_ppera == 1) {
 1240 #ifdef UMA_MD_SMALL_ALLOC
 1241                 keg->uk_allocf = uma_small_alloc;
 1242                 keg->uk_freef = uma_small_free;
 1243 #endif
 1244                 if (booted == 0)
 1245                         keg->uk_allocf = startup_alloc;
 1246         }
 1247 
 1248         /*
 1249          * Initialize keg's lock (shared among zones) through
 1250          * Master zone
 1251          */
 1252         zone->uz_lock = &keg->uk_lock;
 1253         if (arg->flags & UMA_ZONE_MTXCLASS)
 1254                 ZONE_LOCK_INIT(zone, 1);
 1255         else
 1256                 ZONE_LOCK_INIT(zone, 0);
 1257 
 1258         /*
 1259          * If we're putting the slab header in the actual page we need to
 1260          * figure out where in each page it goes.  This calculates a right
 1261          * justified offset into the memory on an ALIGN_PTR boundary.
 1262          */
 1263         if (!(keg->uk_flags & UMA_ZONE_OFFPAGE)) {
 1264                 u_int totsize;
 1265 
 1266                 /* Size of the slab struct and free list */
 1267                 if (keg->uk_flags & UMA_ZONE_REFCNT)
 1268                         totsize = sizeof(struct uma_slab_refcnt) +
 1269                             keg->uk_ipers * UMA_FRITMREF_SZ;
 1270                 else
 1271                         totsize = sizeof(struct uma_slab) +
 1272                             keg->uk_ipers * UMA_FRITM_SZ;
 1273 
 1274                 if (totsize & UMA_ALIGN_PTR)
 1275                         totsize = (totsize & ~UMA_ALIGN_PTR) +
 1276                             (UMA_ALIGN_PTR + 1);
 1277                 keg->uk_pgoff = UMA_SLAB_SIZE - totsize;
 1278 
 1279                 if (keg->uk_flags & UMA_ZONE_REFCNT)
 1280                         totsize = keg->uk_pgoff + sizeof(struct uma_slab_refcnt)
 1281                             + keg->uk_ipers * UMA_FRITMREF_SZ;
 1282                 else
 1283                         totsize = keg->uk_pgoff + sizeof(struct uma_slab)
 1284                             + keg->uk_ipers * UMA_FRITM_SZ;
 1285 
 1286                 /*
 1287                  * The only way the following is possible is if with our
 1288                  * UMA_ALIGN_PTR adjustments we are now bigger than
 1289                  * UMA_SLAB_SIZE.  I haven't checked whether this is
 1290                  * mathematically possible for all cases, so we make
 1291                  * sure here anyway.
 1292                  */
 1293                 if (totsize > UMA_SLAB_SIZE) {
 1294                         printf("zone %s ipers %d rsize %d size %d\n",
 1295                             zone->uz_name, keg->uk_ipers, keg->uk_rsize,
 1296                             keg->uk_size);
 1297                         panic("UMA slab won't fit.");
 1298                 }
 1299         }
 1300 
 1301         if (keg->uk_flags & UMA_ZONE_HASH)
 1302                 hash_alloc(&keg->uk_hash);
 1303 
 1304 #ifdef UMA_DEBUG
 1305         printf("%s(%p) size = %d ipers = %d ppera = %d pgoff = %d\n",
 1306             zone->uz_name, zone,
 1307             keg->uk_size, keg->uk_ipers,
 1308             keg->uk_ppera, keg->uk_pgoff);
 1309 #endif
 1310 
 1311         LIST_INSERT_HEAD(&keg->uk_zones, zone, uz_link);
 1312 
 1313         mtx_lock(&uma_mtx);
 1314         LIST_INSERT_HEAD(&uma_kegs, keg, uk_link);
 1315         mtx_unlock(&uma_mtx);
 1316         return (0);
 1317 }
 1318 
 1319 /*
 1320  * Zone header ctor.  This initializes all fields, locks, etc.
 1321  *
 1322  * Arguments/Returns follow uma_ctor specifications
 1323  *      udata  Actually uma_zctor_args
 1324  */
 1325 
 1326 static int
 1327 zone_ctor(void *mem, int size, void *udata, int flags)
 1328 {
 1329         struct uma_zctor_args *arg = udata;
 1330         uma_zone_t zone = mem;
 1331         uma_zone_t z;
 1332         uma_keg_t keg;
 1333 
 1334         bzero(zone, size);
 1335         zone->uz_name = arg->name;
 1336         zone->uz_ctor = arg->ctor;
 1337         zone->uz_dtor = arg->dtor;
 1338         zone->uz_init = NULL;
 1339         zone->uz_fini = NULL;
 1340         zone->uz_allocs = 0;
 1341         zone->uz_frees = 0;
 1342         zone->uz_fails = 0;
 1343         zone->uz_fills = zone->uz_count = 0;
 1344 
 1345         if (arg->flags & UMA_ZONE_SECONDARY) {
 1346                 KASSERT(arg->keg != NULL, ("Secondary zone on zero'd keg"));
 1347                 keg = arg->keg;
 1348                 zone->uz_keg = keg;
 1349                 zone->uz_init = arg->uminit;
 1350                 zone->uz_fini = arg->fini;
 1351                 zone->uz_lock = &keg->uk_lock;
 1352                 mtx_lock(&uma_mtx);
 1353                 ZONE_LOCK(zone);
 1354                 keg->uk_flags |= UMA_ZONE_SECONDARY;
 1355                 LIST_FOREACH(z, &keg->uk_zones, uz_link) {
 1356                         if (LIST_NEXT(z, uz_link) == NULL) {
 1357                                 LIST_INSERT_AFTER(z, zone, uz_link);
 1358                                 break;
 1359                         }
 1360                 }
 1361                 ZONE_UNLOCK(zone);
 1362                 mtx_unlock(&uma_mtx);
 1363         } else if (arg->keg == NULL) {
 1364                 if (uma_kcreate(zone, arg->size, arg->uminit, arg->fini,
 1365                     arg->align, arg->flags) == NULL)
 1366                         return (ENOMEM);
 1367         } else {
 1368                 struct uma_kctor_args karg;
 1369                 int error;
 1370 
 1371                 /* We should only be here from uma_startup() */
 1372                 karg.size = arg->size;
 1373                 karg.uminit = arg->uminit;
 1374                 karg.fini = arg->fini;
 1375                 karg.align = arg->align;
 1376                 karg.flags = arg->flags;
 1377                 karg.zone = zone;
 1378                 error = keg_ctor(arg->keg, sizeof(struct uma_keg), &karg,
 1379                     flags);
 1380                 if (error)
 1381                         return (error);
 1382         }
 1383         keg = zone->uz_keg;
 1384         zone->uz_lock = &keg->uk_lock;
 1385 
 1386         /*
 1387          * Some internal zones don't have room allocated for the per cpu
 1388          * caches.  If we're internal, bail out here.
 1389          */
 1390         if (keg->uk_flags & UMA_ZFLAG_INTERNAL) {
 1391                 KASSERT((keg->uk_flags & UMA_ZONE_SECONDARY) == 0,
 1392                     ("Secondary zone requested UMA_ZFLAG_INTERNAL"));
 1393                 return (0);
 1394         }
 1395 
 1396         if (keg->uk_flags & UMA_ZONE_MAXBUCKET)
 1397                 zone->uz_count = BUCKET_MAX;
 1398         else if (keg->uk_ipers <= BUCKET_MAX)
 1399                 zone->uz_count = keg->uk_ipers;
 1400         else
 1401                 zone->uz_count = BUCKET_MAX;
 1402         return (0);
 1403 }
 1404 
 1405 /*
 1406  * Keg header dtor.  This frees all data, destroys locks, frees the hash
 1407  * table and removes the keg from the global list.
 1408  *
 1409  * Arguments/Returns follow uma_dtor specifications
 1410  *      udata  unused
 1411  */
 1412 static void
 1413 keg_dtor(void *arg, int size, void *udata)
 1414 {
 1415         uma_keg_t keg;
 1416 
 1417         keg = (uma_keg_t)arg;
 1418         mtx_lock(&keg->uk_lock);
 1419         if (keg->uk_free != 0) {
 1420                 printf("Freed UMA keg was not empty (%d items). "
 1421                     " Lost %d pages of memory.\n",
 1422                     keg->uk_free, keg->uk_pages);
 1423         }
 1424         mtx_unlock(&keg->uk_lock);
 1425 
 1426         if (keg->uk_flags & UMA_ZONE_HASH)
 1427                 hash_free(&keg->uk_hash);
 1428 
 1429         mtx_destroy(&keg->uk_lock);
 1430 }
 1431 
 1432 /*
 1433  * Zone header dtor.
 1434  *
 1435  * Arguments/Returns follow uma_dtor specifications
 1436  *      udata  unused
 1437  */
 1438 static void
 1439 zone_dtor(void *arg, int size, void *udata)
 1440 {
 1441         uma_zone_t zone;
 1442         uma_keg_t keg;
 1443 
 1444         zone = (uma_zone_t)arg;
 1445         keg = zone->uz_keg;
 1446 
 1447         if (!(keg->uk_flags & UMA_ZFLAG_INTERNAL))
 1448                 cache_drain(zone);
 1449 
 1450         mtx_lock(&uma_mtx);
 1451         zone_drain(zone);
 1452         if (keg->uk_flags & UMA_ZONE_SECONDARY) {
 1453                 LIST_REMOVE(zone, uz_link);
 1454                 /*
 1455                  * XXX there are some races here where
 1456                  * the zone can be drained but zone lock
 1457                  * released and then refilled before we
 1458                  * remove it... we dont care for now
 1459                  */
 1460                 ZONE_LOCK(zone);
 1461                 if (LIST_EMPTY(&keg->uk_zones))
 1462                         keg->uk_flags &= ~UMA_ZONE_SECONDARY;
 1463                 ZONE_UNLOCK(zone);
 1464                 mtx_unlock(&uma_mtx);
 1465         } else {
 1466                 LIST_REMOVE(keg, uk_link);
 1467                 LIST_REMOVE(zone, uz_link);
 1468                 mtx_unlock(&uma_mtx);
 1469                 uma_zfree_internal(kegs, keg, NULL, SKIP_NONE,
 1470                     ZFREE_STATFREE);
 1471         }
 1472         zone->uz_keg = NULL;
 1473 }
 1474 
 1475 /*
 1476  * Traverses every zone in the system and calls a callback
 1477  *
 1478  * Arguments:
 1479  *      zfunc  A pointer to a function which accepts a zone
 1480  *              as an argument.
 1481  *
 1482  * Returns:
 1483  *      Nothing
 1484  */
 1485 static void
 1486 zone_foreach(void (*zfunc)(uma_zone_t))
 1487 {
 1488         uma_keg_t keg;
 1489         uma_zone_t zone;
 1490 
 1491         mtx_lock(&uma_mtx);
 1492         LIST_FOREACH(keg, &uma_kegs, uk_link) {
 1493                 LIST_FOREACH(zone, &keg->uk_zones, uz_link)
 1494                         zfunc(zone);
 1495         }
 1496         mtx_unlock(&uma_mtx);
 1497 }
 1498 
 1499 /* Public functions */
 1500 /* See uma.h */
 1501 void
 1502 uma_startup(void *bootmem, int boot_pages)
 1503 {
 1504         struct uma_zctor_args args;
 1505         uma_slab_t slab;
 1506         u_int slabsize;
 1507         u_int objsize, totsize, wsize;
 1508         int i;
 1509 
 1510 #ifdef UMA_DEBUG
 1511         printf("Creating uma keg headers zone and keg.\n");
 1512 #endif
 1513         mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF);
 1514 
 1515         /*
 1516          * Figure out the maximum number of items-per-slab we'll have if
 1517          * we're using the OFFPAGE slab header to track free items, given
 1518          * all possible object sizes and the maximum desired wastage
 1519          * (UMA_MAX_WASTE).
 1520          *
 1521          * We iterate until we find an object size for
 1522          * which the calculated wastage in zone_small_init() will be
 1523          * enough to warrant OFFPAGE.  Since wastedspace versus objsize
 1524          * is an overall increasing see-saw function, we find the smallest
 1525          * objsize such that the wastage is always acceptable for objects
 1526          * with that objsize or smaller.  Since a smaller objsize always
 1527          * generates a larger possible uma_max_ipers, we use this computed
 1528          * objsize to calculate the largest ipers possible.  Since the
 1529          * ipers calculated for OFFPAGE slab headers is always larger than
 1530          * the ipers initially calculated in zone_small_init(), we use
 1531          * the former's equation (UMA_SLAB_SIZE / keg->uk_rsize) to
 1532          * obtain the maximum ipers possible for offpage slab headers.
 1533          *
 1534          * It should be noted that ipers versus objsize is an inversly
 1535          * proportional function which drops off rather quickly so as
 1536          * long as our UMA_MAX_WASTE is such that the objsize we calculate
 1537          * falls into the portion of the inverse relation AFTER the steep
 1538          * falloff, then uma_max_ipers shouldn't be too high (~10 on i386).
 1539          *
 1540          * Note that we have 8-bits (1 byte) to use as a freelist index
 1541          * inside the actual slab header itself and this is enough to
 1542          * accomodate us.  In the worst case, a UMA_SMALLEST_UNIT sized
 1543          * object with offpage slab header would have ipers =
 1544          * UMA_SLAB_SIZE / UMA_SMALLEST_UNIT (currently = 256), which is
 1545          * 1 greater than what our byte-integer freelist index can
 1546          * accomodate, but we know that this situation never occurs as
 1547          * for UMA_SMALLEST_UNIT-sized objects, we will never calculate
 1548          * that we need to go to offpage slab headers.  Or, if we do,
 1549          * then we trap that condition below and panic in the INVARIANTS case.
 1550          */
 1551         wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab) - UMA_MAX_WASTE;
 1552         totsize = wsize;
 1553         objsize = UMA_SMALLEST_UNIT;
 1554         while (totsize >= wsize) {
 1555                 totsize = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) /
 1556                     (objsize + UMA_FRITM_SZ);
 1557                 totsize *= (UMA_FRITM_SZ + objsize);
 1558                 objsize++;
 1559         }
 1560         if (objsize > UMA_SMALLEST_UNIT)
 1561                 objsize--;
 1562         uma_max_ipers = UMA_SLAB_SIZE / objsize;
 1563 
 1564         wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt) - UMA_MAX_WASTE;
 1565         totsize = wsize;
 1566         objsize = UMA_SMALLEST_UNIT;
 1567         while (totsize >= wsize) {
 1568                 totsize = (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt)) /
 1569                     (objsize + UMA_FRITMREF_SZ);
 1570                 totsize *= (UMA_FRITMREF_SZ + objsize);
 1571                 objsize++;
 1572         }
 1573         if (objsize > UMA_SMALLEST_UNIT)
 1574                 objsize--;
 1575         uma_max_ipers_ref = UMA_SLAB_SIZE / objsize;
 1576 
 1577         KASSERT((uma_max_ipers_ref <= 255) && (uma_max_ipers <= 255),
 1578             ("uma_startup: calculated uma_max_ipers values too large!"));
 1579 
 1580 #ifdef UMA_DEBUG
 1581         printf("Calculated uma_max_ipers (for OFFPAGE) is %d\n", uma_max_ipers);
 1582         printf("Calculated uma_max_ipers_slab (for OFFPAGE) is %d\n",
 1583             uma_max_ipers_ref);
 1584 #endif
 1585 
 1586         /* "manually" create the initial zone */
 1587         args.name = "UMA Kegs";
 1588         args.size = sizeof(struct uma_keg);
 1589         args.ctor = keg_ctor;
 1590         args.dtor = keg_dtor;
 1591         args.uminit = zero_init;
 1592         args.fini = NULL;
 1593         args.keg = &masterkeg;
 1594         args.align = 32 - 1;
 1595         args.flags = UMA_ZFLAG_INTERNAL;
 1596         /* The initial zone has no Per cpu queues so it's smaller */
 1597         zone_ctor(kegs, sizeof(struct uma_zone), &args, M_WAITOK);
 1598 
 1599 #ifdef UMA_DEBUG
 1600         printf("Filling boot free list.\n");
 1601 #endif
 1602         for (i = 0; i < boot_pages; i++) {
 1603                 slab = (uma_slab_t)((u_int8_t *)bootmem + (i * UMA_SLAB_SIZE));
 1604                 slab->us_data = (u_int8_t *)slab;
 1605                 slab->us_flags = UMA_SLAB_BOOT;
 1606                 LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link);
 1607         }
 1608         mtx_init(&uma_boot_pages_mtx, "UMA boot pages", NULL, MTX_DEF);
 1609 
 1610 #ifdef UMA_DEBUG
 1611         printf("Creating uma zone headers zone and keg.\n");
 1612 #endif
 1613         args.name = "UMA Zones";
 1614         args.size = sizeof(struct uma_zone) +
 1615             (sizeof(struct uma_cache) * (mp_maxid + 1));
 1616         args.ctor = zone_ctor;
 1617         args.dtor = zone_dtor;
 1618         args.uminit = zero_init;
 1619         args.fini = NULL;
 1620         args.keg = NULL;
 1621         args.align = 32 - 1;
 1622         args.flags = UMA_ZFLAG_INTERNAL;
 1623         /* The initial zone has no Per cpu queues so it's smaller */
 1624         zone_ctor(zones, sizeof(struct uma_zone), &args, M_WAITOK);
 1625 
 1626 #ifdef UMA_DEBUG
 1627         printf("Initializing pcpu cache locks.\n");
 1628 #endif
 1629 #ifdef UMA_DEBUG
 1630         printf("Creating slab and hash zones.\n");
 1631 #endif
 1632 
 1633         /*
 1634          * This is the max number of free list items we'll have with
 1635          * offpage slabs.
 1636          */
 1637         slabsize = uma_max_ipers * UMA_FRITM_SZ;
 1638         slabsize += sizeof(struct uma_slab);
 1639 
 1640         /* Now make a zone for slab headers */
 1641         slabzone = uma_zcreate("UMA Slabs",
 1642                                 slabsize,
 1643                                 NULL, NULL, NULL, NULL,
 1644                                 UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
 1645 
 1646         /*
 1647          * We also create a zone for the bigger slabs with reference
 1648          * counts in them, to accomodate UMA_ZONE_REFCNT zones.
 1649          */
 1650         slabsize = uma_max_ipers_ref * UMA_FRITMREF_SZ;
 1651         slabsize += sizeof(struct uma_slab_refcnt);
 1652         slabrefzone = uma_zcreate("UMA RCntSlabs",
 1653                                   slabsize,
 1654                                   NULL, NULL, NULL, NULL,
 1655                                   UMA_ALIGN_PTR,
 1656                                   UMA_ZFLAG_INTERNAL);
 1657 
 1658         hashzone = uma_zcreate("UMA Hash",
 1659             sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT,
 1660             NULL, NULL, NULL, NULL,
 1661             UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
 1662 
 1663         bucket_init();
 1664 
 1665 #ifdef UMA_MD_SMALL_ALLOC
 1666         booted = 1;
 1667 #endif
 1668 
 1669 #ifdef UMA_DEBUG
 1670         printf("UMA startup complete.\n");
 1671 #endif
 1672 }
 1673 
 1674 /* see uma.h */
 1675 void
 1676 uma_startup2(void)
 1677 {
 1678         booted = 1;
 1679         bucket_enable();
 1680 #ifdef UMA_DEBUG
 1681         printf("UMA startup2 complete.\n");
 1682 #endif
 1683 }
 1684 
 1685 /*
 1686  * Initialize our callout handle
 1687  *
 1688  */
 1689 
 1690 static void
 1691 uma_startup3(void)
 1692 {
 1693 #ifdef UMA_DEBUG
 1694         printf("Starting callout.\n");
 1695 #endif
 1696         callout_init(&uma_callout, CALLOUT_MPSAFE);
 1697         callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
 1698 #ifdef UMA_DEBUG
 1699         printf("UMA startup3 complete.\n");
 1700 #endif
 1701 }
 1702 
 1703 static uma_zone_t
 1704 uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini,
 1705                 int align, u_int32_t flags)
 1706 {
 1707         struct uma_kctor_args args;
 1708 
 1709         args.size = size;
 1710         args.uminit = uminit;
 1711         args.fini = fini;
 1712         args.align = (align == UMA_ALIGN_CACHE) ? uma_align_cache : align;
 1713         args.flags = flags;
 1714         args.zone = zone;
 1715         return (uma_zalloc_internal(kegs, &args, M_WAITOK));
 1716 }
 1717 
 1718 /* See uma.h */
 1719 void
 1720 uma_set_align(int align)
 1721 {
 1722 
 1723         if (align != UMA_ALIGN_CACHE)
 1724                 uma_align_cache = align;
 1725 }
 1726 
 1727 /* See uma.h */
 1728 uma_zone_t
 1729 uma_zcreate(char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
 1730                 uma_init uminit, uma_fini fini, int align, u_int32_t flags)
 1731 
 1732 {
 1733         struct uma_zctor_args args;
 1734 
 1735         /* This stuff is essential for the zone ctor */
 1736         args.name = name;
 1737         args.size = size;
 1738         args.ctor = ctor;
 1739         args.dtor = dtor;
 1740         args.uminit = uminit;
 1741         args.fini = fini;
 1742         args.align = align;
 1743         args.flags = flags;
 1744         args.keg = NULL;
 1745 
 1746         return (uma_zalloc_internal(zones, &args, M_WAITOK));
 1747 }
 1748 
 1749 /* See uma.h */
 1750 uma_zone_t
 1751 uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor,
 1752                     uma_init zinit, uma_fini zfini, uma_zone_t master)
 1753 {
 1754         struct uma_zctor_args args;
 1755 
 1756         args.name = name;
 1757         args.size = master->uz_keg->uk_size;
 1758         args.ctor = ctor;
 1759         args.dtor = dtor;
 1760         args.uminit = zinit;
 1761         args.fini = zfini;
 1762         args.align = master->uz_keg->uk_align;
 1763         args.flags = master->uz_keg->uk_flags | UMA_ZONE_SECONDARY;
 1764         args.keg = master->uz_keg;
 1765 
 1766         return (uma_zalloc_internal(zones, &args, M_WAITOK));
 1767 }
 1768 
 1769 /* See uma.h */
 1770 void
 1771 uma_zdestroy(uma_zone_t zone)
 1772 {
 1773 
 1774         uma_zfree_internal(zones, zone, NULL, SKIP_NONE, ZFREE_STATFREE);
 1775 }
 1776 
 1777 /* See uma.h */
 1778 void *
 1779 uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
 1780 {
 1781         void *item;
 1782         uma_cache_t cache;
 1783         uma_bucket_t bucket;
 1784         int cpu;
 1785 
 1786         /* This is the fast path allocation */
 1787 #ifdef UMA_DEBUG_ALLOC_1
 1788         printf("Allocating one item from %s(%p)\n", zone->uz_name, zone);
 1789 #endif
 1790         CTR3(KTR_UMA, "uma_zalloc_arg thread %x zone %s flags %d", curthread,
 1791             zone->uz_name, flags);
 1792 
 1793         if (flags & M_WAITOK) {
 1794                 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
 1795                     "uma_zalloc_arg: zone \"%s\"", zone->uz_name);
 1796         }
 1797 
 1798         /*
 1799          * If possible, allocate from the per-CPU cache.  There are two
 1800          * requirements for safe access to the per-CPU cache: (1) the thread
 1801          * accessing the cache must not be preempted or yield during access,
 1802          * and (2) the thread must not migrate CPUs without switching which
 1803          * cache it accesses.  We rely on a critical section to prevent
 1804          * preemption and migration.  We release the critical section in
 1805          * order to acquire the zone mutex if we are unable to allocate from
 1806          * the current cache; when we re-acquire the critical section, we
 1807          * must detect and handle migration if it has occurred.
 1808          */
 1809 zalloc_restart:
 1810         critical_enter();
 1811         cpu = curcpu;
 1812         cache = &zone->uz_cpu[cpu];
 1813 
 1814 zalloc_start:
 1815         bucket = cache->uc_allocbucket;
 1816 
 1817         if (bucket) {
 1818                 if (bucket->ub_cnt > 0) {
 1819                         bucket->ub_cnt--;
 1820                         item = bucket->ub_bucket[bucket->ub_cnt];
 1821 #ifdef INVARIANTS
 1822                         bucket->ub_bucket[bucket->ub_cnt] = NULL;
 1823 #endif
 1824                         KASSERT(item != NULL,
 1825                             ("uma_zalloc: Bucket pointer mangled."));
 1826                         cache->uc_allocs++;
 1827                         critical_exit();
 1828 #ifdef INVARIANTS
 1829                         ZONE_LOCK(zone);
 1830                         uma_dbg_alloc(zone, NULL, item);
 1831                         ZONE_UNLOCK(zone);
 1832 #endif
 1833                         if (zone->uz_ctor != NULL) {
 1834                                 if (zone->uz_ctor(item, zone->uz_keg->uk_size,
 1835                                     udata, flags) != 0) {
 1836                                         uma_zfree_internal(zone, item, udata,
 1837                                             SKIP_DTOR, ZFREE_STATFAIL |
 1838                                             ZFREE_STATFREE);
 1839                                         return (NULL);
 1840                                 }
 1841                         }
 1842                         if (flags & M_ZERO)
 1843                                 bzero(item, zone->uz_keg->uk_size);
 1844                         return (item);
 1845                 } else if (cache->uc_freebucket) {
 1846                         /*
 1847                          * We have run out of items in our allocbucket.
 1848                          * See if we can switch with our free bucket.
 1849                          */
 1850                         if (cache->uc_freebucket->ub_cnt > 0) {
 1851 #ifdef UMA_DEBUG_ALLOC
 1852                                 printf("uma_zalloc: Swapping empty with"
 1853                                     " alloc.\n");
 1854 #endif
 1855                                 bucket = cache->uc_freebucket;
 1856                                 cache->uc_freebucket = cache->uc_allocbucket;
 1857                                 cache->uc_allocbucket = bucket;
 1858 
 1859                                 goto zalloc_start;
 1860                         }
 1861                 }
 1862         }
 1863         /*
 1864          * Attempt to retrieve the item from the per-CPU cache has failed, so
 1865          * we must go back to the zone.  This requires the zone lock, so we
 1866          * must drop the critical section, then re-acquire it when we go back
 1867          * to the cache.  Since the critical section is released, we may be
 1868          * preempted or migrate.  As such, make sure not to maintain any
 1869          * thread-local state specific to the cache from prior to releasing
 1870          * the critical section.
 1871          */
 1872         critical_exit();
 1873         ZONE_LOCK(zone);
 1874         critical_enter();
 1875         cpu = curcpu;
 1876         cache = &zone->uz_cpu[cpu];
 1877         bucket = cache->uc_allocbucket;
 1878         if (bucket != NULL) {
 1879                 if (bucket->ub_cnt > 0) {
 1880                         ZONE_UNLOCK(zone);
 1881                         goto zalloc_start;
 1882                 }
 1883                 bucket = cache->uc_freebucket;
 1884                 if (bucket != NULL && bucket->ub_cnt > 0) {
 1885                         ZONE_UNLOCK(zone);
 1886                         goto zalloc_start;
 1887                 }
 1888         }
 1889 
 1890         /* Since we have locked the zone we may as well send back our stats */
 1891         zone->uz_allocs += cache->uc_allocs;
 1892         cache->uc_allocs = 0;
 1893         zone->uz_frees += cache->uc_frees;
 1894         cache->uc_frees = 0;
 1895 
 1896         /* Our old one is now a free bucket */
 1897         if (cache->uc_allocbucket) {
 1898                 KASSERT(cache->uc_allocbucket->ub_cnt == 0,
 1899                     ("uma_zalloc_arg: Freeing a non free bucket."));
 1900                 LIST_INSERT_HEAD(&zone->uz_free_bucket,
 1901                     cache->uc_allocbucket, ub_link);
 1902                 cache->uc_allocbucket = NULL;
 1903         }
 1904 
 1905         /* Check the free list for a new alloc bucket */
 1906         if ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
 1907                 KASSERT(bucket->ub_cnt != 0,
 1908                     ("uma_zalloc_arg: Returning an empty bucket."));
 1909 
 1910                 LIST_REMOVE(bucket, ub_link);
 1911                 cache->uc_allocbucket = bucket;
 1912                 ZONE_UNLOCK(zone);
 1913                 goto zalloc_start;
 1914         }
 1915         /* We are no longer associated with this CPU. */
 1916         critical_exit();
 1917 
 1918         /* Bump up our uz_count so we get here less */
 1919         if (zone->uz_count < BUCKET_MAX)
 1920                 zone->uz_count++;
 1921 
 1922         /*
 1923          * Now lets just fill a bucket and put it on the free list.  If that
 1924          * works we'll restart the allocation from the begining.
 1925          */
 1926         if (uma_zalloc_bucket(zone, flags)) {
 1927                 ZONE_UNLOCK(zone);
 1928                 goto zalloc_restart;
 1929         }
 1930         ZONE_UNLOCK(zone);
 1931         /*
 1932          * We may not be able to get a bucket so return an actual item.
 1933          */
 1934 #ifdef UMA_DEBUG
 1935         printf("uma_zalloc_arg: Bucketzone returned NULL\n");
 1936 #endif
 1937 
 1938         return (uma_zalloc_internal(zone, udata, flags));
 1939 }
 1940 
 1941 static uma_slab_t
 1942 uma_zone_slab(uma_zone_t zone, int flags)
 1943 {
 1944         uma_slab_t slab;
 1945         uma_keg_t keg;
 1946 
 1947         keg = zone->uz_keg;
 1948 
 1949         /*
 1950          * This is to prevent us from recursively trying to allocate
 1951          * buckets.  The problem is that if an allocation forces us to
 1952          * grab a new bucket we will call page_alloc, which will go off
 1953          * and cause the vm to allocate vm_map_entries.  If we need new
 1954          * buckets there too we will recurse in kmem_alloc and bad
 1955          * things happen.  So instead we return a NULL bucket, and make
 1956          * the code that allocates buckets smart enough to deal with it
 1957          *
 1958          * XXX: While we want this protection for the bucket zones so that
 1959          * recursion from the VM is handled (and the calling code that
 1960          * allocates buckets knows how to deal with it), we do not want
 1961          * to prevent allocation from the slab header zones (slabzone
 1962          * and slabrefzone) if uk_recurse is not zero for them.  The
 1963          * reason is that it could lead to NULL being returned for
 1964          * slab header allocations even in the M_WAITOK case, and the
 1965          * caller can't handle that. 
 1966          */
 1967         if (keg->uk_flags & UMA_ZFLAG_INTERNAL && keg->uk_recurse != 0)
 1968                 if (zone != slabzone && zone != slabrefzone && zone != zones)
 1969                         return (NULL);
 1970 
 1971         slab = NULL;
 1972 
 1973         for (;;) {
 1974                 /*
 1975                  * Find a slab with some space.  Prefer slabs that are partially
 1976                  * used over those that are totally full.  This helps to reduce
 1977                  * fragmentation.
 1978                  */
 1979                 if (keg->uk_free != 0) {
 1980                         if (!LIST_EMPTY(&keg->uk_part_slab)) {
 1981                                 slab = LIST_FIRST(&keg->uk_part_slab);
 1982                         } else {
 1983                                 slab = LIST_FIRST(&keg->uk_free_slab);
 1984                                 LIST_REMOVE(slab, us_link);
 1985                                 LIST_INSERT_HEAD(&keg->uk_part_slab, slab,
 1986                                     us_link);
 1987                         }
 1988                         return (slab);
 1989                 }
 1990 
 1991                 /*
 1992                  * M_NOVM means don't ask at all!
 1993                  */
 1994                 if (flags & M_NOVM)
 1995                         break;
 1996 
 1997                 if (keg->uk_maxpages &&
 1998                     keg->uk_pages >= keg->uk_maxpages) {
 1999                         keg->uk_flags |= UMA_ZFLAG_FULL;
 2000 
 2001                         if (flags & M_NOWAIT)
 2002                                 break;
 2003                         else
 2004                                 msleep(keg, &keg->uk_lock, PVM,
 2005                                     "zonelimit", 0);
 2006                         continue;
 2007                 }
 2008                 keg->uk_recurse++;
 2009                 slab = slab_zalloc(zone, flags);
 2010                 keg->uk_recurse--;
 2011 
 2012                 /*
 2013                  * If we got a slab here it's safe to mark it partially used
 2014                  * and return.  We assume that the caller is going to remove
 2015                  * at least one item.
 2016                  */
 2017                 if (slab) {
 2018                         LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
 2019                         return (slab);
 2020                 }
 2021                 /*
 2022                  * We might not have been able to get a slab but another cpu
 2023                  * could have while we were unlocked.  Check again before we
 2024                  * fail.
 2025                  */
 2026                 if (flags & M_NOWAIT)
 2027                         flags |= M_NOVM;
 2028         }
 2029         return (slab);
 2030 }
 2031 
 2032 static void *
 2033 uma_slab_alloc(uma_zone_t zone, uma_slab_t slab)
 2034 {
 2035         uma_keg_t keg;
 2036         uma_slabrefcnt_t slabref;
 2037         void *item;
 2038         u_int8_t freei;
 2039 
 2040         keg = zone->uz_keg;
 2041 
 2042         freei = slab->us_firstfree;
 2043         if (keg->uk_flags & UMA_ZONE_REFCNT) {
 2044                 slabref = (uma_slabrefcnt_t)slab;
 2045                 slab->us_firstfree = slabref->us_freelist[freei].us_item;
 2046         } else {
 2047                 slab->us_firstfree = slab->us_freelist[freei].us_item;
 2048         }
 2049         item = slab->us_data + (keg->uk_rsize * freei);
 2050 
 2051         slab->us_freecount--;
 2052         keg->uk_free--;
 2053 #ifdef INVARIANTS
 2054         uma_dbg_alloc(zone, slab, item);
 2055 #endif
 2056         /* Move this slab to the full list */
 2057         if (slab->us_freecount == 0) {
 2058                 LIST_REMOVE(slab, us_link);
 2059                 LIST_INSERT_HEAD(&keg->uk_full_slab, slab, us_link);
 2060         }
 2061 
 2062         return (item);
 2063 }
 2064 
 2065 static int
 2066 uma_zalloc_bucket(uma_zone_t zone, int flags)
 2067 {
 2068         uma_bucket_t bucket;
 2069         uma_slab_t slab;
 2070         int16_t saved;
 2071         int max, origflags = flags;
 2072 
 2073         /*
 2074          * Try this zone's free list first so we don't allocate extra buckets.
 2075          */
 2076         if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
 2077                 KASSERT(bucket->ub_cnt == 0,
 2078                     ("uma_zalloc_bucket: Bucket on free list is not empty."));
 2079                 LIST_REMOVE(bucket, ub_link);
 2080         } else {
 2081                 int bflags;
 2082 
 2083                 bflags = (flags & ~M_ZERO);
 2084                 if (zone->uz_keg->uk_flags & UMA_ZFLAG_CACHEONLY)
 2085                         bflags |= M_NOVM;
 2086 
 2087                 ZONE_UNLOCK(zone);
 2088                 bucket = bucket_alloc(zone->uz_count, bflags);
 2089                 ZONE_LOCK(zone);
 2090         }
 2091 
 2092         if (bucket == NULL)
 2093                 return (0);
 2094 
 2095 #ifdef SMP
 2096         /*
 2097          * This code is here to limit the number of simultaneous bucket fills
 2098          * for any given zone to the number of per cpu caches in this zone. This
 2099          * is done so that we don't allocate more memory than we really need.
 2100          */
 2101         if (zone->uz_fills >= mp_ncpus)
 2102                 goto done;
 2103 
 2104 #endif
 2105         zone->uz_fills++;
 2106 
 2107         max = MIN(bucket->ub_entries, zone->uz_count);
 2108         /* Try to keep the buckets totally full */
 2109         saved = bucket->ub_cnt;
 2110         while (bucket->ub_cnt < max &&
 2111             (slab = uma_zone_slab(zone, flags)) != NULL) {
 2112                 while (slab->us_freecount && bucket->ub_cnt < max) {
 2113                         bucket->ub_bucket[bucket->ub_cnt++] =
 2114                             uma_slab_alloc(zone, slab);
 2115                 }
 2116 
 2117                 /* Don't block on the next fill */
 2118                 flags |= M_NOWAIT;
 2119         }
 2120 
 2121         /*
 2122          * We unlock here because we need to call the zone's init.
 2123          * It should be safe to unlock because the slab dealt with
 2124          * above is already on the appropriate list within the keg
 2125          * and the bucket we filled is not yet on any list, so we
 2126          * own it.
 2127          */
 2128         if (zone->uz_init != NULL) {
 2129                 int i;
 2130 
 2131                 ZONE_UNLOCK(zone);
 2132                 for (i = saved; i < bucket->ub_cnt; i++)
 2133                         if (zone->uz_init(bucket->ub_bucket[i],
 2134                             zone->uz_keg->uk_size, origflags) != 0)
 2135                                 break;
 2136                 /*
 2137                  * If we couldn't initialize the whole bucket, put the
 2138                  * rest back onto the freelist.
 2139                  */
 2140                 if (i != bucket->ub_cnt) {
 2141                         int j;
 2142 
 2143                         for (j = i; j < bucket->ub_cnt; j++) {
 2144                                 uma_zfree_internal(zone, bucket->ub_bucket[j],
 2145                                     NULL, SKIP_FINI, 0);
 2146 #ifdef INVARIANTS
 2147                                 bucket->ub_bucket[j] = NULL;
 2148 #endif
 2149                         }
 2150                         bucket->ub_cnt = i;
 2151                 }
 2152                 ZONE_LOCK(zone);
 2153         }
 2154 
 2155         zone->uz_fills--;
 2156         if (bucket->ub_cnt != 0) {
 2157                 LIST_INSERT_HEAD(&zone->uz_full_bucket,
 2158                     bucket, ub_link);
 2159                 return (1);
 2160         }
 2161 #ifdef SMP
 2162 done:
 2163 #endif
 2164         bucket_free(bucket);
 2165 
 2166         return (0);
 2167 }
 2168 /*
 2169  * Allocates an item for an internal zone
 2170  *
 2171  * Arguments
 2172  *      zone   The zone to alloc for.
 2173  *      udata  The data to be passed to the constructor.
 2174  *      flags  M_WAITOK, M_NOWAIT, M_ZERO.
 2175  *
 2176  * Returns
 2177  *      NULL if there is no memory and M_NOWAIT is set
 2178  *      An item if successful
 2179  */
 2180 
 2181 static void *
 2182 uma_zalloc_internal(uma_zone_t zone, void *udata, int flags)
 2183 {
 2184         uma_keg_t keg;
 2185         uma_slab_t slab;
 2186         void *item;
 2187 
 2188         item = NULL;
 2189         keg = zone->uz_keg;
 2190 
 2191 #ifdef UMA_DEBUG_ALLOC
 2192         printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone);
 2193 #endif
 2194         ZONE_LOCK(zone);
 2195 
 2196         slab = uma_zone_slab(zone, flags);
 2197         if (slab == NULL) {
 2198                 zone->uz_fails++;
 2199                 ZONE_UNLOCK(zone);
 2200                 return (NULL);
 2201         }
 2202 
 2203         item = uma_slab_alloc(zone, slab);
 2204 
 2205         zone->uz_allocs++;
 2206 
 2207         ZONE_UNLOCK(zone);
 2208 
 2209         /*
 2210          * We have to call both the zone's init (not the keg's init)
 2211          * and the zone's ctor.  This is because the item is going from
 2212          * a keg slab directly to the user, and the user is expecting it
 2213          * to be both zone-init'd as well as zone-ctor'd.
 2214          */
 2215         if (zone->uz_init != NULL) {
 2216                 if (zone->uz_init(item, keg->uk_size, flags) != 0) {
 2217                         uma_zfree_internal(zone, item, udata, SKIP_FINI,
 2218                             ZFREE_STATFAIL | ZFREE_STATFREE);
 2219                         return (NULL);
 2220                 }
 2221         }
 2222         if (zone->uz_ctor != NULL) {
 2223                 if (zone->uz_ctor(item, keg->uk_size, udata, flags) != 0) {
 2224                         uma_zfree_internal(zone, item, udata, SKIP_DTOR,
 2225                             ZFREE_STATFAIL | ZFREE_STATFREE);
 2226                         return (NULL);
 2227                 }
 2228         }
 2229         if (flags & M_ZERO)
 2230                 bzero(item, keg->uk_size);
 2231 
 2232         return (item);
 2233 }
 2234 
 2235 /* See uma.h */
 2236 void
 2237 uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
 2238 {
 2239         uma_keg_t keg;
 2240         uma_cache_t cache;
 2241         uma_bucket_t bucket;
 2242         int bflags;
 2243         int cpu;
 2244 
 2245         keg = zone->uz_keg;
 2246 
 2247 #ifdef UMA_DEBUG_ALLOC_1
 2248         printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone);
 2249 #endif
 2250         CTR2(KTR_UMA, "uma_zfree_arg thread %x zone %s", curthread,
 2251             zone->uz_name);
 2252 
 2253         /* uma_zfree(..., NULL) does nothing, to match free(9). */
 2254         if (item == NULL)
 2255                 return;
 2256 
 2257         if (zone->uz_dtor)
 2258                 zone->uz_dtor(item, keg->uk_size, udata);
 2259 #ifdef INVARIANTS
 2260         ZONE_LOCK(zone);
 2261         if (keg->uk_flags & UMA_ZONE_MALLOC)
 2262                 uma_dbg_free(zone, udata, item);
 2263         else
 2264                 uma_dbg_free(zone, NULL, item);
 2265         ZONE_UNLOCK(zone);
 2266 #endif
 2267         /*
 2268          * The race here is acceptable.  If we miss it we'll just have to wait
 2269          * a little longer for the limits to be reset.
 2270          */
 2271         if (keg->uk_flags & UMA_ZFLAG_FULL)
 2272                 goto zfree_internal;
 2273 
 2274         /*
 2275          * If possible, free to the per-CPU cache.  There are two
 2276          * requirements for safe access to the per-CPU cache: (1) the thread
 2277          * accessing the cache must not be preempted or yield during access,
 2278          * and (2) the thread must not migrate CPUs without switching which
 2279          * cache it accesses.  We rely on a critical section to prevent
 2280          * preemption and migration.  We release the critical section in
 2281          * order to acquire the zone mutex if we are unable to free to the
 2282          * current cache; when we re-acquire the critical section, we must
 2283          * detect and handle migration if it has occurred.
 2284          */
 2285 zfree_restart:
 2286         critical_enter();
 2287         cpu = curcpu;
 2288         cache = &zone->uz_cpu[cpu];
 2289 
 2290 zfree_start:
 2291         bucket = cache->uc_freebucket;
 2292 
 2293         if (bucket) {
 2294                 /*
 2295                  * Do we have room in our bucket? It is OK for this uz count
 2296                  * check to be slightly out of sync.
 2297                  */
 2298 
 2299                 if (bucket->ub_cnt < bucket->ub_entries) {
 2300                         KASSERT(bucket->ub_bucket[bucket->ub_cnt] == NULL,
 2301                             ("uma_zfree: Freeing to non free bucket index."));
 2302                         bucket->ub_bucket[bucket->ub_cnt] = item;
 2303                         bucket->ub_cnt++;
 2304                         cache->uc_frees++;
 2305                         critical_exit();
 2306                         return;
 2307                 } else if (cache->uc_allocbucket) {
 2308 #ifdef UMA_DEBUG_ALLOC
 2309                         printf("uma_zfree: Swapping buckets.\n");
 2310 #endif
 2311                         /*
 2312                          * We have run out of space in our freebucket.
 2313                          * See if we can switch with our alloc bucket.
 2314                          */
 2315                         if (cache->uc_allocbucket->ub_cnt <
 2316                             cache->uc_freebucket->ub_cnt) {
 2317                                 bucket = cache->uc_freebucket;
 2318                                 cache->uc_freebucket = cache->uc_allocbucket;
 2319                                 cache->uc_allocbucket = bucket;
 2320                                 goto zfree_start;
 2321                         }
 2322                 }
 2323         }
 2324         /*
 2325          * We can get here for two reasons:
 2326          *
 2327          * 1) The buckets are NULL
 2328          * 2) The alloc and free buckets are both somewhat full.
 2329          *
 2330          * We must go back the zone, which requires acquiring the zone lock,
 2331          * which in turn means we must release and re-acquire the critical
 2332          * section.  Since the critical section is released, we may be
 2333          * preempted or migrate.  As such, make sure not to maintain any
 2334          * thread-local state specific to the cache from prior to releasing
 2335          * the critical section.
 2336          */
 2337         critical_exit();
 2338         ZONE_LOCK(zone);
 2339         critical_enter();
 2340         cpu = curcpu;
 2341         cache = &zone->uz_cpu[cpu];
 2342         if (cache->uc_freebucket != NULL) {
 2343                 if (cache->uc_freebucket->ub_cnt <
 2344                     cache->uc_freebucket->ub_entries) {
 2345                         ZONE_UNLOCK(zone);
 2346                         goto zfree_start;
 2347                 }
 2348                 if (cache->uc_allocbucket != NULL &&
 2349                     (cache->uc_allocbucket->ub_cnt <
 2350                     cache->uc_freebucket->ub_cnt)) {
 2351                         ZONE_UNLOCK(zone);
 2352                         goto zfree_start;
 2353                 }
 2354         }
 2355 
 2356         /* Since we have locked the zone we may as well send back our stats */
 2357         zone->uz_allocs += cache->uc_allocs;
 2358         cache->uc_allocs = 0;
 2359         zone->uz_frees += cache->uc_frees;
 2360         cache->uc_frees = 0;
 2361 
 2362         bucket = cache->uc_freebucket;
 2363         cache->uc_freebucket = NULL;
 2364 
 2365         /* Can we throw this on the zone full list? */
 2366         if (bucket != NULL) {
 2367 #ifdef UMA_DEBUG_ALLOC
 2368                 printf("uma_zfree: Putting old bucket on the free list.\n");
 2369 #endif
 2370                 /* ub_cnt is pointing to the last free item */
 2371                 KASSERT(bucket->ub_cnt != 0,
 2372                     ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n"));
 2373                 LIST_INSERT_HEAD(&zone->uz_full_bucket,
 2374                     bucket, ub_link);
 2375         }
 2376         if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
 2377                 LIST_REMOVE(bucket, ub_link);
 2378                 ZONE_UNLOCK(zone);
 2379                 cache->uc_freebucket = bucket;
 2380                 goto zfree_start;
 2381         }
 2382         /* We are no longer associated with this CPU. */
 2383         critical_exit();
 2384 
 2385         /* And the zone.. */
 2386         ZONE_UNLOCK(zone);
 2387 
 2388 #ifdef UMA_DEBUG_ALLOC
 2389         printf("uma_zfree: Allocating new free bucket.\n");
 2390 #endif
 2391         bflags = M_NOWAIT;
 2392 
 2393         if (keg->uk_flags & UMA_ZFLAG_CACHEONLY)
 2394                 bflags |= M_NOVM;
 2395         bucket = bucket_alloc(zone->uz_count, bflags);
 2396         if (bucket) {
 2397                 ZONE_LOCK(zone);
 2398                 LIST_INSERT_HEAD(&zone->uz_free_bucket,
 2399                     bucket, ub_link);
 2400                 ZONE_UNLOCK(zone);
 2401                 goto zfree_restart;
 2402         }
 2403 
 2404         /*
 2405          * If nothing else caught this, we'll just do an internal free.
 2406          */
 2407 zfree_internal:
 2408         uma_zfree_internal(zone, item, udata, SKIP_DTOR, ZFREE_STATFREE);
 2409 
 2410         return;
 2411 }
 2412 
 2413 /*
 2414  * Frees an item to an INTERNAL zone or allocates a free bucket
 2415  *
 2416  * Arguments:
 2417  *      zone   The zone to free to
 2418  *      item   The item we're freeing
 2419  *      udata  User supplied data for the dtor
 2420  *      skip   Skip dtors and finis
 2421  */
 2422 static void
 2423 uma_zfree_internal(uma_zone_t zone, void *item, void *udata,
 2424     enum zfreeskip skip, int flags)
 2425 {
 2426         uma_slab_t slab;
 2427         uma_slabrefcnt_t slabref;
 2428         uma_keg_t keg;
 2429         u_int8_t *mem;
 2430         u_int8_t freei;
 2431 
 2432         keg = zone->uz_keg;
 2433 
 2434         if (skip < SKIP_DTOR && zone->uz_dtor)
 2435                 zone->uz_dtor(item, keg->uk_size, udata);
 2436         if (skip < SKIP_FINI && zone->uz_fini)
 2437                 zone->uz_fini(item, keg->uk_size);
 2438 
 2439         ZONE_LOCK(zone);
 2440 
 2441         if (flags & ZFREE_STATFAIL)
 2442                 zone->uz_fails++;
 2443         if (flags & ZFREE_STATFREE)
 2444                 zone->uz_frees++;
 2445 
 2446         if (!(keg->uk_flags & UMA_ZONE_MALLOC)) {
 2447                 mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK));
 2448                 if (keg->uk_flags & UMA_ZONE_HASH)
 2449                         slab = hash_sfind(&keg->uk_hash, mem);
 2450                 else {
 2451                         mem += keg->uk_pgoff;
 2452                         slab = (uma_slab_t)mem;
 2453                 }
 2454         } else {
 2455                 slab = (uma_slab_t)udata;
 2456         }
 2457 
 2458         /* Do we need to remove from any lists? */
 2459         if (slab->us_freecount+1 == keg->uk_ipers) {
 2460                 LIST_REMOVE(slab, us_link);
 2461                 LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
 2462         } else if (slab->us_freecount == 0) {
 2463                 LIST_REMOVE(slab, us_link);
 2464                 LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
 2465         }
 2466 
 2467         /* Slab management stuff */
 2468         freei = ((unsigned long)item - (unsigned long)slab->us_data)
 2469                 / keg->uk_rsize;
 2470 
 2471 #ifdef INVARIANTS
 2472         if (!skip)
 2473                 uma_dbg_free(zone, slab, item);
 2474 #endif
 2475 
 2476         if (keg->uk_flags & UMA_ZONE_REFCNT) {
 2477                 slabref = (uma_slabrefcnt_t)slab;
 2478                 slabref->us_freelist[freei].us_item = slab->us_firstfree;
 2479         } else {
 2480                 slab->us_freelist[freei].us_item = slab->us_firstfree;
 2481         }
 2482         slab->us_firstfree = freei;
 2483         slab->us_freecount++;
 2484 
 2485         /* Zone statistics */
 2486         keg->uk_free++;
 2487 
 2488         if (keg->uk_flags & UMA_ZFLAG_FULL) {
 2489                 if (keg->uk_pages < keg->uk_maxpages)
 2490                         keg->uk_flags &= ~UMA_ZFLAG_FULL;
 2491 
 2492                 /* 
 2493                  * We can handle one more allocation. Since we're clearing ZFLAG_FULL,
 2494                  * wake up all procs blocked on pages. This should be uncommon, so 
 2495                  * keeping this simple for now (rather than adding count of blocked 
 2496                  * threads etc).
 2497                  */
 2498                 wakeup(keg);
 2499         }
 2500 
 2501         ZONE_UNLOCK(zone);
 2502 }
 2503 
 2504 /* See uma.h */
 2505 void
 2506 uma_zone_set_max(uma_zone_t zone, int nitems)
 2507 {
 2508         uma_keg_t keg;
 2509 
 2510         keg = zone->uz_keg;
 2511         ZONE_LOCK(zone);
 2512         if (keg->uk_ppera > 1)
 2513                 keg->uk_maxpages = nitems * keg->uk_ppera;
 2514         else
 2515                 keg->uk_maxpages = nitems / keg->uk_ipers;
 2516 
 2517         if (keg->uk_maxpages * keg->uk_ipers < nitems)
 2518                 keg->uk_maxpages++;
 2519 
 2520         ZONE_UNLOCK(zone);
 2521 }
 2522 
 2523 /* See uma.h */
 2524 int
 2525 uma_zone_get_max(uma_zone_t zone)
 2526 {
 2527         int nitems;
 2528         uma_keg_t keg;
 2529 
 2530         ZONE_LOCK(zone);
 2531         keg = zone->uz_keg;
 2532         nitems = keg->uk_maxpages * keg->uk_ipers;
 2533         ZONE_UNLOCK(zone);
 2534 
 2535         return (nitems);
 2536 }
 2537 
 2538 /* See uma.h */
 2539 int
 2540 uma_zone_get_cur(uma_zone_t zone)
 2541 {
 2542         int64_t nitems;
 2543         u_int i;
 2544 
 2545         ZONE_LOCK(zone);
 2546         nitems = zone->uz_allocs - zone->uz_frees;
 2547         CPU_FOREACH(i) {
 2548                 /*
 2549                  * See the comment in sysctl_vm_zone_stats() regarding the
 2550                  * safety of accessing the per-cpu caches. With the zone lock
 2551                  * held, it is safe, but can potentially result in stale data.
 2552                  */
 2553                 nitems += zone->uz_cpu[i].uc_allocs -
 2554                     zone->uz_cpu[i].uc_frees;
 2555         }
 2556         ZONE_UNLOCK(zone);
 2557 
 2558         return (nitems < 0 ? 0 : nitems);
 2559 }
 2560 
 2561 /* See uma.h */
 2562 void
 2563 uma_zone_set_init(uma_zone_t zone, uma_init uminit)
 2564 {
 2565         ZONE_LOCK(zone);
 2566         KASSERT(zone->uz_keg->uk_pages == 0,
 2567             ("uma_zone_set_init on non-empty keg"));
 2568         zone->uz_keg->uk_init = uminit;
 2569         ZONE_UNLOCK(zone);
 2570 }
 2571 
 2572 /* See uma.h */
 2573 void
 2574 uma_zone_set_fini(uma_zone_t zone, uma_fini fini)
 2575 {
 2576         ZONE_LOCK(zone);
 2577         KASSERT(zone->uz_keg->uk_pages == 0,
 2578             ("uma_zone_set_fini on non-empty keg"));
 2579         zone->uz_keg->uk_fini = fini;
 2580         ZONE_UNLOCK(zone);
 2581 }
 2582 
 2583 /* See uma.h */
 2584 void
 2585 uma_zone_set_zinit(uma_zone_t zone, uma_init zinit)
 2586 {
 2587         ZONE_LOCK(zone);
 2588         KASSERT(zone->uz_keg->uk_pages == 0,
 2589             ("uma_zone_set_zinit on non-empty keg"));
 2590         zone->uz_init = zinit;
 2591         ZONE_UNLOCK(zone);
 2592 }
 2593 
 2594 /* See uma.h */
 2595 void
 2596 uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini)
 2597 {
 2598         ZONE_LOCK(zone);
 2599         KASSERT(zone->uz_keg->uk_pages == 0,
 2600             ("uma_zone_set_zfini on non-empty keg"));
 2601         zone->uz_fini = zfini;
 2602         ZONE_UNLOCK(zone);
 2603 }
 2604 
 2605 /* See uma.h */
 2606 /* XXX uk_freef is not actually used with the zone locked */
 2607 void
 2608 uma_zone_set_freef(uma_zone_t zone, uma_free freef)
 2609 {
 2610         ZONE_LOCK(zone);
 2611         zone->uz_keg->uk_freef = freef;
 2612         ZONE_UNLOCK(zone);
 2613 }
 2614 
 2615 /* See uma.h */
 2616 /* XXX uk_allocf is not actually used with the zone locked */
 2617 void
 2618 uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf)
 2619 {
 2620         ZONE_LOCK(zone);
 2621         zone->uz_keg->uk_flags |= UMA_ZFLAG_PRIVALLOC;
 2622         zone->uz_keg->uk_allocf = allocf;
 2623         ZONE_UNLOCK(zone);
 2624 }
 2625 
 2626 /* See uma.h */
 2627 int
 2628 uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int count)
 2629 {
 2630         uma_keg_t keg;
 2631         vm_offset_t kva;
 2632         int pages;
 2633 
 2634         keg = zone->uz_keg;
 2635         pages = count / keg->uk_ipers;
 2636 
 2637         if (pages * keg->uk_ipers < count)
 2638                 pages++;
 2639 
 2640         kva = kmem_alloc_nofault(kernel_map, pages * UMA_SLAB_SIZE);
 2641 
 2642         if (kva == 0)
 2643                 return (0);
 2644         if (obj == NULL) {
 2645                 obj = vm_object_allocate(OBJT_DEFAULT,
 2646                     pages);
 2647         } else {
 2648                 VM_OBJECT_LOCK_INIT(obj, "uma object");
 2649                 _vm_object_allocate(OBJT_DEFAULT,
 2650                     pages, obj);
 2651         }
 2652         ZONE_LOCK(zone);
 2653         keg->uk_kva = kva;
 2654         keg->uk_obj = obj;
 2655         keg->uk_maxpages = pages;
 2656         keg->uk_allocf = obj_alloc;
 2657         keg->uk_flags |= UMA_ZONE_NOFREE | UMA_ZFLAG_PRIVALLOC;
 2658         ZONE_UNLOCK(zone);
 2659         return (1);
 2660 }
 2661 
 2662 /* See uma.h */
 2663 void
 2664 uma_prealloc(uma_zone_t zone, int items)
 2665 {
 2666         int slabs;
 2667         uma_slab_t slab;
 2668         uma_keg_t keg;
 2669 
 2670         keg = zone->uz_keg;
 2671         ZONE_LOCK(zone);
 2672         slabs = items / keg->uk_ipers;
 2673         if (slabs * keg->uk_ipers < items)
 2674                 slabs++;
 2675         while (slabs > 0) {
 2676                 slab = slab_zalloc(zone, M_WAITOK);
 2677                 LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
 2678                 slabs--;
 2679         }
 2680         ZONE_UNLOCK(zone);
 2681 }
 2682 
 2683 /* See uma.h */
 2684 u_int32_t *
 2685 uma_find_refcnt(uma_zone_t zone, void *item)
 2686 {
 2687         uma_slabrefcnt_t slabref;
 2688         uma_keg_t keg;
 2689         u_int32_t *refcnt;
 2690         int idx;
 2691 
 2692         keg = zone->uz_keg;
 2693         slabref = (uma_slabrefcnt_t)vtoslab((vm_offset_t)item &
 2694             (~UMA_SLAB_MASK));
 2695         KASSERT(slabref != NULL && slabref->us_keg->uk_flags & UMA_ZONE_REFCNT,
 2696             ("uma_find_refcnt(): zone possibly not UMA_ZONE_REFCNT"));
 2697         idx = ((unsigned long)item - (unsigned long)slabref->us_data)
 2698             / keg->uk_rsize;
 2699         refcnt = &slabref->us_freelist[idx].us_refcnt;
 2700         return refcnt;
 2701 }
 2702 
 2703 /* See uma.h */
 2704 void
 2705 uma_reclaim(void)
 2706 {
 2707 #ifdef UMA_DEBUG
 2708         printf("UMA: vm asked us to release pages!\n");
 2709 #endif
 2710         bucket_enable();
 2711         zone_foreach(zone_drain);
 2712         /*
 2713          * Some slabs may have been freed but this zone will be visited early
 2714          * we visit again so that we can free pages that are empty once other
 2715          * zones are drained.  We have to do the same for buckets.
 2716          */
 2717         zone_drain(slabzone);
 2718         zone_drain(slabrefzone);
 2719         bucket_zone_drain();
 2720 }
 2721 
 2722 /* See uma.h */
 2723 int
 2724 uma_zone_exhausted(uma_zone_t zone)
 2725 {
 2726         int full;
 2727 
 2728         ZONE_LOCK(zone);
 2729         full = (zone->uz_keg->uk_flags & UMA_ZFLAG_FULL);
 2730         ZONE_UNLOCK(zone);
 2731         return (full);  
 2732 }
 2733 
 2734 int
 2735 uma_zone_exhausted_nolock(uma_zone_t zone)
 2736 {
 2737         return (zone->uz_keg->uk_flags & UMA_ZFLAG_FULL);
 2738 }
 2739 
 2740 void *
 2741 uma_large_malloc(int size, int wait)
 2742 {
 2743         void *mem;
 2744         uma_slab_t slab;
 2745         u_int8_t flags;
 2746 
 2747         slab = uma_zalloc_internal(slabzone, NULL, wait);
 2748         if (slab == NULL)
 2749                 return (NULL);
 2750         mem = page_alloc(NULL, size, &flags, wait);
 2751         if (mem) {
 2752                 vsetslab((vm_offset_t)mem, slab);
 2753                 slab->us_data = mem;
 2754                 slab->us_flags = flags | UMA_SLAB_MALLOC;
 2755                 slab->us_size = size;
 2756         } else {
 2757                 uma_zfree_internal(slabzone, slab, NULL, SKIP_NONE,
 2758                     ZFREE_STATFAIL | ZFREE_STATFREE);
 2759         }
 2760 
 2761         return (mem);
 2762 }
 2763 
 2764 void
 2765 uma_large_free(uma_slab_t slab)
 2766 {
 2767         vsetobj((vm_offset_t)slab->us_data, kmem_object);
 2768         page_free(slab->us_data, slab->us_size, slab->us_flags);
 2769         uma_zfree_internal(slabzone, slab, NULL, SKIP_NONE, ZFREE_STATFREE);
 2770 }
 2771 
 2772 void
 2773 uma_print_stats(void)
 2774 {
 2775         zone_foreach(uma_print_zone);
 2776 }
 2777 
 2778 static void
 2779 slab_print(uma_slab_t slab)
 2780 {
 2781         printf("slab: keg %p, data %p, freecount %d, firstfree %d\n",
 2782                 slab->us_keg, slab->us_data, slab->us_freecount,
 2783                 slab->us_firstfree);
 2784 }
 2785 
 2786 static void
 2787 cache_print(uma_cache_t cache)
 2788 {
 2789         printf("alloc: %p(%d), free: %p(%d)\n",
 2790                 cache->uc_allocbucket,
 2791                 cache->uc_allocbucket?cache->uc_allocbucket->ub_cnt:0,
 2792                 cache->uc_freebucket,
 2793                 cache->uc_freebucket?cache->uc_freebucket->ub_cnt:0);
 2794 }
 2795 
 2796 void
 2797 uma_print_zone(uma_zone_t zone)
 2798 {
 2799         uma_cache_t cache;
 2800         uma_keg_t keg;
 2801         uma_slab_t slab;
 2802         int i;
 2803 
 2804         keg = zone->uz_keg;
 2805         printf("%s(%p) size %d(%d) flags %d ipers %d ppera %d out %d free %d\n",
 2806             zone->uz_name, zone, keg->uk_size, keg->uk_rsize, keg->uk_flags,
 2807             keg->uk_ipers, keg->uk_ppera,
 2808             (keg->uk_ipers * keg->uk_pages) - keg->uk_free, keg->uk_free);
 2809         printf("Part slabs:\n");
 2810         LIST_FOREACH(slab, &keg->uk_part_slab, us_link)
 2811                 slab_print(slab);
 2812         printf("Free slabs:\n");
 2813         LIST_FOREACH(slab, &keg->uk_free_slab, us_link)
 2814                 slab_print(slab);
 2815         printf("Full slabs:\n");
 2816         LIST_FOREACH(slab, &keg->uk_full_slab, us_link)
 2817                 slab_print(slab);
 2818         for (i = 0; i <= mp_maxid; i++) {
 2819                 if (CPU_ABSENT(i))
 2820                         continue;
 2821                 cache = &zone->uz_cpu[i];
 2822                 printf("CPU %d Cache:\n", i);
 2823                 cache_print(cache);
 2824         }
 2825 }
 2826 
 2827 #ifdef DDB
 2828 /*
 2829  * Generate statistics across both the zone and its per-cpu cache's.  Return
 2830  * desired statistics if the pointer is non-NULL for that statistic.
 2831  *
 2832  * Note: does not update the zone statistics, as it can't safely clear the
 2833  * per-CPU cache statistic.
 2834  *
 2835  * XXXRW: Following the uc_allocbucket and uc_freebucket pointers here isn't
 2836  * safe from off-CPU; we should modify the caches to track this information
 2837  * directly so that we don't have to.
 2838  */
 2839 static void
 2840 uma_zone_sumstat(uma_zone_t z, int *cachefreep, u_int64_t *allocsp,
 2841     u_int64_t *freesp)
 2842 {
 2843         uma_cache_t cache;
 2844         u_int64_t allocs, frees;
 2845         int cachefree, cpu;
 2846 
 2847         allocs = frees = 0;
 2848         cachefree = 0;
 2849         for (cpu = 0; cpu <= mp_maxid; cpu++) {
 2850                 if (CPU_ABSENT(cpu))
 2851                         continue;
 2852                 cache = &z->uz_cpu[cpu];
 2853                 if (cache->uc_allocbucket != NULL)
 2854                         cachefree += cache->uc_allocbucket->ub_cnt;
 2855                 if (cache->uc_freebucket != NULL)
 2856                         cachefree += cache->uc_freebucket->ub_cnt;
 2857                 allocs += cache->uc_allocs;
 2858                 frees += cache->uc_frees;
 2859         }
 2860         allocs += z->uz_allocs;
 2861         frees += z->uz_frees;
 2862         if (cachefreep != NULL)
 2863                 *cachefreep = cachefree;
 2864         if (allocsp != NULL)
 2865                 *allocsp = allocs;
 2866         if (freesp != NULL)
 2867                 *freesp = frees;
 2868 }
 2869 #endif /* DDB */
 2870 
 2871 static int
 2872 sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS)
 2873 {
 2874         uma_keg_t kz;
 2875         uma_zone_t z;
 2876         int count;
 2877 
 2878         count = 0;
 2879         mtx_lock(&uma_mtx);
 2880         LIST_FOREACH(kz, &uma_kegs, uk_link) {
 2881                 LIST_FOREACH(z, &kz->uk_zones, uz_link)
 2882                         count++;
 2883         }
 2884         mtx_unlock(&uma_mtx);
 2885         return (sysctl_handle_int(oidp, &count, 0, req));
 2886 }
 2887 
 2888 static int
 2889 sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS)
 2890 {
 2891         struct uma_stream_header ush;
 2892         struct uma_type_header uth;
 2893         struct uma_percpu_stat ups;
 2894         uma_bucket_t bucket;
 2895         struct sbuf sbuf;
 2896         uma_cache_t cache;
 2897         uma_keg_t kz;
 2898         uma_zone_t z;
 2899         char *buffer;
 2900         int buflen, count, error, i;
 2901 
 2902         mtx_lock(&uma_mtx);
 2903 restart:
 2904         mtx_assert(&uma_mtx, MA_OWNED);
 2905         count = 0;
 2906         LIST_FOREACH(kz, &uma_kegs, uk_link) {
 2907                 LIST_FOREACH(z, &kz->uk_zones, uz_link)
 2908                         count++;
 2909         }
 2910         mtx_unlock(&uma_mtx);
 2911 
 2912         buflen = sizeof(ush) + count * (sizeof(uth) + sizeof(ups) *
 2913             (mp_maxid + 1)) + 1;
 2914         buffer = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO);
 2915 
 2916         mtx_lock(&uma_mtx);
 2917         i = 0;
 2918         LIST_FOREACH(kz, &uma_kegs, uk_link) {
 2919                 LIST_FOREACH(z, &kz->uk_zones, uz_link)
 2920                         i++;
 2921         }
 2922         if (i > count) {
 2923                 free(buffer, M_TEMP);
 2924                 goto restart;
 2925         }
 2926         count =  i;
 2927 
 2928         sbuf_new(&sbuf, buffer, buflen, SBUF_FIXEDLEN);
 2929 
 2930         /*
 2931          * Insert stream header.
 2932          */
 2933         bzero(&ush, sizeof(ush));
 2934         ush.ush_version = UMA_STREAM_VERSION;
 2935         ush.ush_maxcpus = (mp_maxid + 1);
 2936         ush.ush_count = count;
 2937         if (sbuf_bcat(&sbuf, &ush, sizeof(ush)) < 0) {
 2938                 mtx_unlock(&uma_mtx);
 2939                 error = ENOMEM;
 2940                 goto out;
 2941         }
 2942 
 2943         LIST_FOREACH(kz, &uma_kegs, uk_link) {
 2944                 LIST_FOREACH(z, &kz->uk_zones, uz_link) {
 2945                         bzero(&uth, sizeof(uth));
 2946                         ZONE_LOCK(z);
 2947                         strlcpy(uth.uth_name, z->uz_name, UTH_MAX_NAME);
 2948                         uth.uth_align = kz->uk_align;
 2949                         uth.uth_pages = kz->uk_pages;
 2950                         uth.uth_keg_free = kz->uk_free;
 2951                         uth.uth_size = kz->uk_size;
 2952                         uth.uth_rsize = kz->uk_rsize;
 2953                         uth.uth_maxpages = kz->uk_maxpages;
 2954                         if (kz->uk_ppera > 1)
 2955                                 uth.uth_limit = kz->uk_maxpages /
 2956                                     kz->uk_ppera;
 2957                         else
 2958                                 uth.uth_limit = kz->uk_maxpages *
 2959                                     kz->uk_ipers;
 2960 
 2961                         /*
 2962                          * A zone is secondary is it is not the first entry
 2963                          * on the keg's zone list.
 2964                          */
 2965                         if ((kz->uk_flags & UMA_ZONE_SECONDARY) &&
 2966                             (LIST_FIRST(&kz->uk_zones) != z))
 2967                                 uth.uth_zone_flags = UTH_ZONE_SECONDARY;
 2968 
 2969                         LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link)
 2970                                 uth.uth_zone_free += bucket->ub_cnt;
 2971                         uth.uth_allocs = z->uz_allocs;
 2972                         uth.uth_frees = z->uz_frees;
 2973                         uth.uth_fails = z->uz_fails;
 2974                         if (sbuf_bcat(&sbuf, &uth, sizeof(uth)) < 0) {
 2975                                 ZONE_UNLOCK(z);
 2976                                 mtx_unlock(&uma_mtx);
 2977                                 error = ENOMEM;
 2978                                 goto out;
 2979                         }
 2980                         /*
 2981                          * While it is not normally safe to access the cache
 2982                          * bucket pointers while not on the CPU that owns the
 2983                          * cache, we only allow the pointers to be exchanged
 2984                          * without the zone lock held, not invalidated, so
 2985                          * accept the possible race associated with bucket
 2986                          * exchange during monitoring.
 2987                          */
 2988                         for (i = 0; i < (mp_maxid + 1); i++) {
 2989                                 bzero(&ups, sizeof(ups));
 2990                                 if (kz->uk_flags & UMA_ZFLAG_INTERNAL)
 2991                                         goto skip;
 2992                                 if (CPU_ABSENT(i))
 2993                                         goto skip;
 2994                                 cache = &z->uz_cpu[i];
 2995                                 if (cache->uc_allocbucket != NULL)
 2996                                         ups.ups_cache_free +=
 2997                                             cache->uc_allocbucket->ub_cnt;
 2998                                 if (cache->uc_freebucket != NULL)
 2999                                         ups.ups_cache_free +=
 3000                                             cache->uc_freebucket->ub_cnt;
 3001                                 ups.ups_allocs = cache->uc_allocs;
 3002                                 ups.ups_frees = cache->uc_frees;
 3003 skip:
 3004                                 if (sbuf_bcat(&sbuf, &ups, sizeof(ups)) < 0) {
 3005                                         ZONE_UNLOCK(z);
 3006                                         mtx_unlock(&uma_mtx);
 3007                                         error = ENOMEM;
 3008                                         goto out;
 3009                                 }
 3010                         }
 3011                         ZONE_UNLOCK(z);
 3012                 }
 3013         }
 3014         mtx_unlock(&uma_mtx);
 3015         sbuf_finish(&sbuf);
 3016         error = SYSCTL_OUT(req, sbuf_data(&sbuf), sbuf_len(&sbuf));
 3017 out:
 3018         free(buffer, M_TEMP);
 3019         return (error);
 3020 }
 3021 
 3022 #ifdef DDB
 3023 DB_SHOW_COMMAND(uma, db_show_uma)
 3024 {
 3025         u_int64_t allocs, frees;
 3026         uma_bucket_t bucket;
 3027         uma_keg_t kz;
 3028         uma_zone_t z;
 3029         int cachefree;
 3030 
 3031         db_printf("%18s %8s %8s %8s %12s\n", "Zone", "Size", "Used", "Free",
 3032             "Requests");
 3033         LIST_FOREACH(kz, &uma_kegs, uk_link) {
 3034                 LIST_FOREACH(z, &kz->uk_zones, uz_link) {
 3035                         if (kz->uk_flags & UMA_ZFLAG_INTERNAL) {
 3036                                 allocs = z->uz_allocs;
 3037                                 frees = z->uz_frees;
 3038                                 cachefree = 0;
 3039                         } else
 3040                                 uma_zone_sumstat(z, &cachefree, &allocs,
 3041                                     &frees);
 3042                         if (!((kz->uk_flags & UMA_ZONE_SECONDARY) &&
 3043                             (LIST_FIRST(&kz->uk_zones) != z)))
 3044                                 cachefree += kz->uk_free;
 3045                         LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link)
 3046                                 cachefree += bucket->ub_cnt;
 3047                         db_printf("%18s %8ju %8jd %8d %12ju\n", z->uz_name,
 3048                             (uintmax_t)kz->uk_size,
 3049                             (intmax_t)(allocs - frees), cachefree,
 3050                             (uintmax_t)allocs);
 3051                 }
 3052         }
 3053 }
 3054 #endif
Cache object: 007952d6a9d0119952b19b2d3a38384c
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/vm/uma_core.c

FreeBSD/Linux Kernel Cross Reference
sys/vm/uma_core.c