The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/vm/uma_core.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2002-2005, 2009, 2013 Jeffrey Roberson <jeff@FreeBSD.org>
    3  * Copyright (c) 2004, 2005 Bosko Milekic <bmilekic@FreeBSD.org>
    4  * Copyright (c) 2004-2006 Robert N. M. Watson
    5  * All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice unmodified, this list of conditions, and the following
   12  *    disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  *
   17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
   18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
   20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
   21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
   22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
   23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
   24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
   26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   27  */
   28 
   29 /*
   30  * uma_core.c  Implementation of the Universal Memory allocator
   31  *
   32  * This allocator is intended to replace the multitude of similar object caches
   33  * in the standard FreeBSD kernel.  The intent is to be flexible as well as
   34  * effecient.  A primary design goal is to return unused memory to the rest of
   35  * the system.  This will make the system as a whole more flexible due to the
   36  * ability to move memory to subsystems which most need it instead of leaving
   37  * pools of reserved memory unused.
   38  *
   39  * The basic ideas stem from similar slab/zone based allocators whose algorithms
   40  * are well known.
   41  *
   42  */
   43 
   44 /*
   45  * TODO:
   46  *      - Improve memory usage for large allocations
   47  *      - Investigate cache size adjustments
   48  */
   49 
   50 #include <sys/cdefs.h>
   51 __FBSDID("$FreeBSD: releng/10.0/sys/vm/uma_core.c 258911 2013-12-04 07:46:53Z rodrigc $");
   52 
   53 /* I should really use ktr.. */
   54 /*
   55 #define UMA_DEBUG 1
   56 #define UMA_DEBUG_ALLOC 1
   57 #define UMA_DEBUG_ALLOC_1 1
   58 */
   59 
   60 #include "opt_ddb.h"
   61 #include "opt_param.h"
   62 #include "opt_vm.h"
   63 
   64 #include <sys/param.h>
   65 #include <sys/systm.h>
   66 #include <sys/bitset.h>
   67 #include <sys/kernel.h>
   68 #include <sys/types.h>
   69 #include <sys/queue.h>
   70 #include <sys/malloc.h>
   71 #include <sys/ktr.h>
   72 #include <sys/lock.h>
   73 #include <sys/sysctl.h>
   74 #include <sys/mutex.h>
   75 #include <sys/proc.h>
   76 #include <sys/rwlock.h>
   77 #include <sys/sbuf.h>
   78 #include <sys/smp.h>
   79 #include <sys/vmmeter.h>
   80 
   81 #include <vm/vm.h>
   82 #include <vm/vm_object.h>
   83 #include <vm/vm_page.h>
   84 #include <vm/vm_pageout.h>
   85 #include <vm/vm_param.h>
   86 #include <vm/vm_map.h>
   87 #include <vm/vm_kern.h>
   88 #include <vm/vm_extern.h>
   89 #include <vm/uma.h>
   90 #include <vm/uma_int.h>
   91 #include <vm/uma_dbg.h>
   92 
   93 #include <ddb/ddb.h>
   94 
   95 #ifdef DEBUG_MEMGUARD
   96 #include <vm/memguard.h>
   97 #endif
   98 
   99 /*
  100  * This is the zone and keg from which all zones are spawned.  The idea is that
  101  * even the zone & keg heads are allocated from the allocator, so we use the
  102  * bss section to bootstrap us.
  103  */
  104 static struct uma_keg masterkeg;
  105 static struct uma_zone masterzone_k;
  106 static struct uma_zone masterzone_z;
  107 static uma_zone_t kegs = &masterzone_k;
  108 static uma_zone_t zones = &masterzone_z;
  109 
  110 /* This is the zone from which all of uma_slab_t's are allocated. */
  111 static uma_zone_t slabzone;
  112 static uma_zone_t slabrefzone;  /* With refcounters (for UMA_ZONE_REFCNT) */
  113 
  114 /*
  115  * The initial hash tables come out of this zone so they can be allocated
  116  * prior to malloc coming up.
  117  */
  118 static uma_zone_t hashzone;
  119 
  120 /* The boot-time adjusted value for cache line alignment. */
  121 int uma_align_cache = 64 - 1;
  122 
  123 static MALLOC_DEFINE(M_UMAHASH, "UMAHash", "UMA Hash Buckets");
  124 
  125 /*
  126  * Are we allowed to allocate buckets?
  127  */
  128 static int bucketdisable = 1;
  129 
  130 /* Linked list of all kegs in the system */
  131 static LIST_HEAD(,uma_keg) uma_kegs = LIST_HEAD_INITIALIZER(uma_kegs);
  132 
  133 /* This mutex protects the keg list */
  134 static struct mtx_padalign uma_mtx;
  135 
  136 /* Linked list of boot time pages */
  137 static LIST_HEAD(,uma_slab) uma_boot_pages =
  138     LIST_HEAD_INITIALIZER(uma_boot_pages);
  139 
  140 /* This mutex protects the boot time pages list */
  141 static struct mtx_padalign uma_boot_pages_mtx;
  142 
  143 /* Is the VM done starting up? */
  144 static int booted = 0;
  145 #define UMA_STARTUP     1
  146 #define UMA_STARTUP2    2
  147 
  148 /* Maximum number of allowed items-per-slab if the slab header is OFFPAGE */
  149 static const u_int uma_max_ipers = SLAB_SETSIZE;
  150 
  151 /*
  152  * Only mbuf clusters use ref zones.  Just provide enough references
  153  * to support the one user.  New code should not use the ref facility.
  154  */
  155 static const u_int uma_max_ipers_ref = PAGE_SIZE / MCLBYTES;
  156 
  157 /*
  158  * This is the handle used to schedule events that need to happen
  159  * outside of the allocation fast path.
  160  */
  161 static struct callout uma_callout;
  162 #define UMA_TIMEOUT     20              /* Seconds for callout interval. */
  163 
  164 /*
  165  * This structure is passed as the zone ctor arg so that I don't have to create
  166  * a special allocation function just for zones.
  167  */
  168 struct uma_zctor_args {
  169         const char *name;
  170         size_t size;
  171         uma_ctor ctor;
  172         uma_dtor dtor;
  173         uma_init uminit;
  174         uma_fini fini;
  175         uma_import import;
  176         uma_release release;
  177         void *arg;
  178         uma_keg_t keg;
  179         int align;
  180         uint32_t flags;
  181 };
  182 
  183 struct uma_kctor_args {
  184         uma_zone_t zone;
  185         size_t size;
  186         uma_init uminit;
  187         uma_fini fini;
  188         int align;
  189         uint32_t flags;
  190 };
  191 
  192 struct uma_bucket_zone {
  193         uma_zone_t      ubz_zone;
  194         char            *ubz_name;
  195         int             ubz_entries;    /* Number of items it can hold. */
  196         int             ubz_maxsize;    /* Maximum allocation size per-item. */
  197 };
  198 
  199 /*
  200  * Compute the actual number of bucket entries to pack them in power
  201  * of two sizes for more efficient space utilization.
  202  */
  203 #define BUCKET_SIZE(n)                                          \
  204     (((sizeof(void *) * (n)) - sizeof(struct uma_bucket)) / sizeof(void *))
  205 
  206 #define BUCKET_MAX      BUCKET_SIZE(128)
  207 
  208 struct uma_bucket_zone bucket_zones[] = {
  209         { NULL, "4 Bucket", BUCKET_SIZE(4), 4096 },
  210         { NULL, "8 Bucket", BUCKET_SIZE(8), 2048 },
  211         { NULL, "16 Bucket", BUCKET_SIZE(16), 1024 },
  212         { NULL, "32 Bucket", BUCKET_SIZE(32), 512 },
  213         { NULL, "64 Bucket", BUCKET_SIZE(64), 256 },
  214         { NULL, "128 Bucket", BUCKET_SIZE(128), 128 },
  215         { NULL, NULL, 0}
  216 };
  217 
  218 /*
  219  * Flags and enumerations to be passed to internal functions.
  220  */
  221 enum zfreeskip { SKIP_NONE = 0, SKIP_DTOR, SKIP_FINI };
  222 
  223 /* Prototypes.. */
  224 
  225 static void *noobj_alloc(uma_zone_t, int, uint8_t *, int);
  226 static void *page_alloc(uma_zone_t, int, uint8_t *, int);
  227 static void *startup_alloc(uma_zone_t, int, uint8_t *, int);
  228 static void page_free(void *, int, uint8_t);
  229 static uma_slab_t keg_alloc_slab(uma_keg_t, uma_zone_t, int);
  230 static void cache_drain(uma_zone_t);
  231 static void bucket_drain(uma_zone_t, uma_bucket_t);
  232 static void bucket_cache_drain(uma_zone_t zone);
  233 static int keg_ctor(void *, int, void *, int);
  234 static void keg_dtor(void *, int, void *);
  235 static int zone_ctor(void *, int, void *, int);
  236 static void zone_dtor(void *, int, void *);
  237 static int zero_init(void *, int, int);
  238 static void keg_small_init(uma_keg_t keg);
  239 static void keg_large_init(uma_keg_t keg);
  240 static void zone_foreach(void (*zfunc)(uma_zone_t));
  241 static void zone_timeout(uma_zone_t zone);
  242 static int hash_alloc(struct uma_hash *);
  243 static int hash_expand(struct uma_hash *, struct uma_hash *);
  244 static void hash_free(struct uma_hash *hash);
  245 static void uma_timeout(void *);
  246 static void uma_startup3(void);
  247 static void *zone_alloc_item(uma_zone_t, void *, int);
  248 static void zone_free_item(uma_zone_t, void *, void *, enum zfreeskip);
  249 static void bucket_enable(void);
  250 static void bucket_init(void);
  251 static uma_bucket_t bucket_alloc(uma_zone_t zone, void *, int);
  252 static void bucket_free(uma_zone_t zone, uma_bucket_t, void *);
  253 static void bucket_zone_drain(void);
  254 static uma_bucket_t zone_alloc_bucket(uma_zone_t zone, void *, int flags);
  255 static uma_slab_t zone_fetch_slab(uma_zone_t zone, uma_keg_t last, int flags);
  256 static uma_slab_t zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int flags);
  257 static void *slab_alloc_item(uma_keg_t keg, uma_slab_t slab);
  258 static void slab_free_item(uma_keg_t keg, uma_slab_t slab, void *item);
  259 static uma_keg_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit,
  260     uma_fini fini, int align, uint32_t flags);
  261 static int zone_import(uma_zone_t zone, void **bucket, int max, int flags);
  262 static void zone_release(uma_zone_t zone, void **bucket, int cnt);
  263 
  264 void uma_print_zone(uma_zone_t);
  265 void uma_print_stats(void);
  266 static int sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS);
  267 static int sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS);
  268 
  269 SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
  270 
  271 SYSCTL_PROC(_vm, OID_AUTO, zone_count, CTLFLAG_RD|CTLTYPE_INT,
  272     0, 0, sysctl_vm_zone_count, "I", "Number of UMA zones");
  273 
  274 SYSCTL_PROC(_vm, OID_AUTO, zone_stats, CTLFLAG_RD|CTLTYPE_STRUCT,
  275     0, 0, sysctl_vm_zone_stats, "s,struct uma_type_header", "Zone Stats");
  276 
  277 static int zone_warnings = 1;
  278 TUNABLE_INT("vm.zone_warnings", &zone_warnings);
  279 SYSCTL_INT(_vm, OID_AUTO, zone_warnings, CTLFLAG_RW, &zone_warnings, 0,
  280     "Warn when UMA zones becomes full");
  281 
  282 /*
  283  * This routine checks to see whether or not it's safe to enable buckets.
  284  */
  285 static void
  286 bucket_enable(void)
  287 {
  288         bucketdisable = vm_page_count_min();
  289 }
  290 
  291 /*
  292  * Initialize bucket_zones, the array of zones of buckets of various sizes.
  293  *
  294  * For each zone, calculate the memory required for each bucket, consisting
  295  * of the header and an array of pointers.
  296  */
  297 static void
  298 bucket_init(void)
  299 {
  300         struct uma_bucket_zone *ubz;
  301         int size;
  302         int i;
  303 
  304         for (i = 0, ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++) {
  305                 size = roundup(sizeof(struct uma_bucket), sizeof(void *));
  306                 size += sizeof(void *) * ubz->ubz_entries;
  307                 ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size,
  308                     NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
  309                     UMA_ZONE_MTXCLASS | UMA_ZFLAG_BUCKET);
  310         }
  311 }
  312 
  313 /*
  314  * Given a desired number of entries for a bucket, return the zone from which
  315  * to allocate the bucket.
  316  */
  317 static struct uma_bucket_zone *
  318 bucket_zone_lookup(int entries)
  319 {
  320         struct uma_bucket_zone *ubz;
  321 
  322         for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
  323                 if (ubz->ubz_entries >= entries)
  324                         return (ubz);
  325         ubz--;
  326         return (ubz);
  327 }
  328 
  329 static int
  330 bucket_select(int size)
  331 {
  332         struct uma_bucket_zone *ubz;
  333 
  334         ubz = &bucket_zones[0];
  335         if (size > ubz->ubz_maxsize)
  336                 return MAX((ubz->ubz_maxsize * ubz->ubz_entries) / size, 1);
  337 
  338         for (; ubz->ubz_entries != 0; ubz++)
  339                 if (ubz->ubz_maxsize < size)
  340                         break;
  341         ubz--;
  342         return (ubz->ubz_entries);
  343 }
  344 
  345 static uma_bucket_t
  346 bucket_alloc(uma_zone_t zone, void *udata, int flags)
  347 {
  348         struct uma_bucket_zone *ubz;
  349         uma_bucket_t bucket;
  350 
  351         /*
  352          * This is to stop us from allocating per cpu buckets while we're
  353          * running out of vm.boot_pages.  Otherwise, we would exhaust the
  354          * boot pages.  This also prevents us from allocating buckets in
  355          * low memory situations.
  356          */
  357         if (bucketdisable)
  358                 return (NULL);
  359         /*
  360          * To limit bucket recursion we store the original zone flags
  361          * in a cookie passed via zalloc_arg/zfree_arg.  This allows the
  362          * NOVM flag to persist even through deep recursions.  We also
  363          * store ZFLAG_BUCKET once we have recursed attempting to allocate
  364          * a bucket for a bucket zone so we do not allow infinite bucket
  365          * recursion.  This cookie will even persist to frees of unused
  366          * buckets via the allocation path or bucket allocations in the
  367          * free path.
  368          */
  369         if ((uintptr_t)udata & UMA_ZFLAG_BUCKET)
  370                 return (NULL);
  371         if ((zone->uz_flags & UMA_ZFLAG_BUCKET) == 0)
  372                 udata = (void *)(uintptr_t)zone->uz_flags;
  373         else
  374                 udata = (void *)((uintptr_t)udata | UMA_ZFLAG_BUCKET);
  375         if ((uintptr_t)udata & UMA_ZFLAG_CACHEONLY)
  376                 flags |= M_NOVM;
  377         ubz = bucket_zone_lookup(zone->uz_count);
  378         bucket = uma_zalloc_arg(ubz->ubz_zone, udata, flags);
  379         if (bucket) {
  380 #ifdef INVARIANTS
  381                 bzero(bucket->ub_bucket, sizeof(void *) * ubz->ubz_entries);
  382 #endif
  383                 bucket->ub_cnt = 0;
  384                 bucket->ub_entries = ubz->ubz_entries;
  385         }
  386 
  387         return (bucket);
  388 }
  389 
  390 static void
  391 bucket_free(uma_zone_t zone, uma_bucket_t bucket, void *udata)
  392 {
  393         struct uma_bucket_zone *ubz;
  394 
  395         KASSERT(bucket->ub_cnt == 0,
  396             ("bucket_free: Freeing a non free bucket."));
  397         if ((zone->uz_flags & UMA_ZFLAG_BUCKET) == 0)
  398                 udata = (void *)(uintptr_t)zone->uz_flags;
  399         ubz = bucket_zone_lookup(bucket->ub_entries);
  400         uma_zfree_arg(ubz->ubz_zone, bucket, udata);
  401 }
  402 
  403 static void
  404 bucket_zone_drain(void)
  405 {
  406         struct uma_bucket_zone *ubz;
  407 
  408         for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
  409                 zone_drain(ubz->ubz_zone);
  410 }
  411 
  412 static void
  413 zone_log_warning(uma_zone_t zone)
  414 {
  415         static const struct timeval warninterval = { 300, 0 };
  416 
  417         if (!zone_warnings || zone->uz_warning == NULL)
  418                 return;
  419 
  420         if (ratecheck(&zone->uz_ratecheck, &warninterval))
  421                 printf("[zone: %s] %s\n", zone->uz_name, zone->uz_warning);
  422 }
  423 
  424 static void
  425 zone_foreach_keg(uma_zone_t zone, void (*kegfn)(uma_keg_t))
  426 {
  427         uma_klink_t klink;
  428 
  429         LIST_FOREACH(klink, &zone->uz_kegs, kl_link)
  430                 kegfn(klink->kl_keg);
  431 }
  432 
  433 /*
  434  * Routine called by timeout which is used to fire off some time interval
  435  * based calculations.  (stats, hash size, etc.)
  436  *
  437  * Arguments:
  438  *      arg   Unused
  439  *
  440  * Returns:
  441  *      Nothing
  442  */
  443 static void
  444 uma_timeout(void *unused)
  445 {
  446         bucket_enable();
  447         zone_foreach(zone_timeout);
  448 
  449         /* Reschedule this event */
  450         callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
  451 }
  452 
  453 /*
  454  * Routine to perform timeout driven calculations.  This expands the
  455  * hashes and does per cpu statistics aggregation.
  456  *
  457  *  Returns nothing.
  458  */
  459 static void
  460 keg_timeout(uma_keg_t keg)
  461 {
  462 
  463         KEG_LOCK(keg);
  464         /*
  465          * Expand the keg hash table.
  466          *
  467          * This is done if the number of slabs is larger than the hash size.
  468          * What I'm trying to do here is completely reduce collisions.  This
  469          * may be a little aggressive.  Should I allow for two collisions max?
  470          */
  471         if (keg->uk_flags & UMA_ZONE_HASH &&
  472             keg->uk_pages / keg->uk_ppera >= keg->uk_hash.uh_hashsize) {
  473                 struct uma_hash newhash;
  474                 struct uma_hash oldhash;
  475                 int ret;
  476 
  477                 /*
  478                  * This is so involved because allocating and freeing
  479                  * while the keg lock is held will lead to deadlock.
  480                  * I have to do everything in stages and check for
  481                  * races.
  482                  */
  483                 newhash = keg->uk_hash;
  484                 KEG_UNLOCK(keg);
  485                 ret = hash_alloc(&newhash);
  486                 KEG_LOCK(keg);
  487                 if (ret) {
  488                         if (hash_expand(&keg->uk_hash, &newhash)) {
  489                                 oldhash = keg->uk_hash;
  490                                 keg->uk_hash = newhash;
  491                         } else
  492                                 oldhash = newhash;
  493 
  494                         KEG_UNLOCK(keg);
  495                         hash_free(&oldhash);
  496                         return;
  497                 }
  498         }
  499         KEG_UNLOCK(keg);
  500 }
  501 
  502 static void
  503 zone_timeout(uma_zone_t zone)
  504 {
  505 
  506         zone_foreach_keg(zone, &keg_timeout);
  507 }
  508 
  509 /*
  510  * Allocate and zero fill the next sized hash table from the appropriate
  511  * backing store.
  512  *
  513  * Arguments:
  514  *      hash  A new hash structure with the old hash size in uh_hashsize
  515  *
  516  * Returns:
  517  *      1 on sucess and 0 on failure.
  518  */
  519 static int
  520 hash_alloc(struct uma_hash *hash)
  521 {
  522         int oldsize;
  523         int alloc;
  524 
  525         oldsize = hash->uh_hashsize;
  526 
  527         /* We're just going to go to a power of two greater */
  528         if (oldsize)  {
  529                 hash->uh_hashsize = oldsize * 2;
  530                 alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize;
  531                 hash->uh_slab_hash = (struct slabhead *)malloc(alloc,
  532                     M_UMAHASH, M_NOWAIT);
  533         } else {
  534                 alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT;
  535                 hash->uh_slab_hash = zone_alloc_item(hashzone, NULL,
  536                     M_WAITOK);
  537                 hash->uh_hashsize = UMA_HASH_SIZE_INIT;
  538         }
  539         if (hash->uh_slab_hash) {
  540                 bzero(hash->uh_slab_hash, alloc);
  541                 hash->uh_hashmask = hash->uh_hashsize - 1;
  542                 return (1);
  543         }
  544 
  545         return (0);
  546 }
  547 
  548 /*
  549  * Expands the hash table for HASH zones.  This is done from zone_timeout
  550  * to reduce collisions.  This must not be done in the regular allocation
  551  * path, otherwise, we can recurse on the vm while allocating pages.
  552  *
  553  * Arguments:
  554  *      oldhash  The hash you want to expand
  555  *      newhash  The hash structure for the new table
  556  *
  557  * Returns:
  558  *      Nothing
  559  *
  560  * Discussion:
  561  */
  562 static int
  563 hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash)
  564 {
  565         uma_slab_t slab;
  566         int hval;
  567         int i;
  568 
  569         if (!newhash->uh_slab_hash)
  570                 return (0);
  571 
  572         if (oldhash->uh_hashsize >= newhash->uh_hashsize)
  573                 return (0);
  574 
  575         /*
  576          * I need to investigate hash algorithms for resizing without a
  577          * full rehash.
  578          */
  579 
  580         for (i = 0; i < oldhash->uh_hashsize; i++)
  581                 while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) {
  582                         slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]);
  583                         SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink);
  584                         hval = UMA_HASH(newhash, slab->us_data);
  585                         SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval],
  586                             slab, us_hlink);
  587                 }
  588 
  589         return (1);
  590 }
  591 
  592 /*
  593  * Free the hash bucket to the appropriate backing store.
  594  *
  595  * Arguments:
  596  *      slab_hash  The hash bucket we're freeing
  597  *      hashsize   The number of entries in that hash bucket
  598  *
  599  * Returns:
  600  *      Nothing
  601  */
  602 static void
  603 hash_free(struct uma_hash *hash)
  604 {
  605         if (hash->uh_slab_hash == NULL)
  606                 return;
  607         if (hash->uh_hashsize == UMA_HASH_SIZE_INIT)
  608                 zone_free_item(hashzone, hash->uh_slab_hash, NULL, SKIP_NONE);
  609         else
  610                 free(hash->uh_slab_hash, M_UMAHASH);
  611 }
  612 
  613 /*
  614  * Frees all outstanding items in a bucket
  615  *
  616  * Arguments:
  617  *      zone   The zone to free to, must be unlocked.
  618  *      bucket The free/alloc bucket with items, cpu queue must be locked.
  619  *
  620  * Returns:
  621  *      Nothing
  622  */
  623 
  624 static void
  625 bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
  626 {
  627         int i;
  628 
  629         if (bucket == NULL)
  630                 return;
  631 
  632         if (zone->uz_fini)
  633                 for (i = 0; i < bucket->ub_cnt; i++) 
  634                         zone->uz_fini(bucket->ub_bucket[i], zone->uz_size);
  635         zone->uz_release(zone->uz_arg, bucket->ub_bucket, bucket->ub_cnt);
  636         bucket->ub_cnt = 0;
  637 }
  638 
  639 /*
  640  * Drains the per cpu caches for a zone.
  641  *
  642  * NOTE: This may only be called while the zone is being turn down, and not
  643  * during normal operation.  This is necessary in order that we do not have
  644  * to migrate CPUs to drain the per-CPU caches.
  645  *
  646  * Arguments:
  647  *      zone     The zone to drain, must be unlocked.
  648  *
  649  * Returns:
  650  *      Nothing
  651  */
  652 static void
  653 cache_drain(uma_zone_t zone)
  654 {
  655         uma_cache_t cache;
  656         int cpu;
  657 
  658         /*
  659          * XXX: It is safe to not lock the per-CPU caches, because we're
  660          * tearing down the zone anyway.  I.e., there will be no further use
  661          * of the caches at this point.
  662          *
  663          * XXX: It would good to be able to assert that the zone is being
  664          * torn down to prevent improper use of cache_drain().
  665          *
  666          * XXX: We lock the zone before passing into bucket_cache_drain() as
  667          * it is used elsewhere.  Should the tear-down path be made special
  668          * there in some form?
  669          */
  670         CPU_FOREACH(cpu) {
  671                 cache = &zone->uz_cpu[cpu];
  672                 bucket_drain(zone, cache->uc_allocbucket);
  673                 bucket_drain(zone, cache->uc_freebucket);
  674                 if (cache->uc_allocbucket != NULL)
  675                         bucket_free(zone, cache->uc_allocbucket, NULL);
  676                 if (cache->uc_freebucket != NULL)
  677                         bucket_free(zone, cache->uc_freebucket, NULL);
  678                 cache->uc_allocbucket = cache->uc_freebucket = NULL;
  679         }
  680         ZONE_LOCK(zone);
  681         bucket_cache_drain(zone);
  682         ZONE_UNLOCK(zone);
  683 }
  684 
  685 /*
  686  * Drain the cached buckets from a zone.  Expects a locked zone on entry.
  687  */
  688 static void
  689 bucket_cache_drain(uma_zone_t zone)
  690 {
  691         uma_bucket_t bucket;
  692 
  693         /*
  694          * Drain the bucket queues and free the buckets, we just keep two per
  695          * cpu (alloc/free).
  696          */
  697         while ((bucket = LIST_FIRST(&zone->uz_buckets)) != NULL) {
  698                 LIST_REMOVE(bucket, ub_link);
  699                 ZONE_UNLOCK(zone);
  700                 bucket_drain(zone, bucket);
  701                 bucket_free(zone, bucket, NULL);
  702                 ZONE_LOCK(zone);
  703         }
  704 }
  705 
  706 static void
  707 keg_free_slab(uma_keg_t keg, uma_slab_t slab, int start)
  708 {
  709         uint8_t *mem;
  710         int i;
  711         uint8_t flags;
  712 
  713         mem = slab->us_data;
  714         flags = slab->us_flags;
  715         i = start;
  716         if (keg->uk_fini != NULL) {
  717                 for (i--; i > -1; i--)
  718                         keg->uk_fini(slab->us_data + (keg->uk_rsize * i),
  719                             keg->uk_size);
  720         }
  721         if (keg->uk_flags & UMA_ZONE_OFFPAGE)
  722                 zone_free_item(keg->uk_slabzone, slab, NULL, SKIP_NONE);
  723 #ifdef UMA_DEBUG
  724         printf("%s: Returning %d bytes.\n", keg->uk_name,
  725             PAGE_SIZE * keg->uk_ppera);
  726 #endif
  727         keg->uk_freef(mem, PAGE_SIZE * keg->uk_ppera, flags);
  728 }
  729 
  730 /*
  731  * Frees pages from a keg back to the system.  This is done on demand from
  732  * the pageout daemon.
  733  *
  734  * Returns nothing.
  735  */
  736 static void
  737 keg_drain(uma_keg_t keg)
  738 {
  739         struct slabhead freeslabs = { 0 };
  740         uma_slab_t slab;
  741         uma_slab_t n;
  742 
  743         /*
  744          * We don't want to take pages from statically allocated kegs at this
  745          * time
  746          */
  747         if (keg->uk_flags & UMA_ZONE_NOFREE || keg->uk_freef == NULL)
  748                 return;
  749 
  750 #ifdef UMA_DEBUG
  751         printf("%s free items: %u\n", keg->uk_name, keg->uk_free);
  752 #endif
  753         KEG_LOCK(keg);
  754         if (keg->uk_free == 0)
  755                 goto finished;
  756 
  757         slab = LIST_FIRST(&keg->uk_free_slab);
  758         while (slab) {
  759                 n = LIST_NEXT(slab, us_link);
  760 
  761                 /* We have no where to free these to */
  762                 if (slab->us_flags & UMA_SLAB_BOOT) {
  763                         slab = n;
  764                         continue;
  765                 }
  766 
  767                 LIST_REMOVE(slab, us_link);
  768                 keg->uk_pages -= keg->uk_ppera;
  769                 keg->uk_free -= keg->uk_ipers;
  770 
  771                 if (keg->uk_flags & UMA_ZONE_HASH)
  772                         UMA_HASH_REMOVE(&keg->uk_hash, slab, slab->us_data);
  773 
  774                 SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink);
  775 
  776                 slab = n;
  777         }
  778 finished:
  779         KEG_UNLOCK(keg);
  780 
  781         while ((slab = SLIST_FIRST(&freeslabs)) != NULL) {
  782                 SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink);
  783                 keg_free_slab(keg, slab, keg->uk_ipers);
  784         }
  785 }
  786 
  787 static void
  788 zone_drain_wait(uma_zone_t zone, int waitok)
  789 {
  790 
  791         /*
  792          * Set draining to interlock with zone_dtor() so we can release our
  793          * locks as we go.  Only dtor() should do a WAITOK call since it
  794          * is the only call that knows the structure will still be available
  795          * when it wakes up.
  796          */
  797         ZONE_LOCK(zone);
  798         while (zone->uz_flags & UMA_ZFLAG_DRAINING) {
  799                 if (waitok == M_NOWAIT)
  800                         goto out;
  801                 mtx_unlock(&uma_mtx);
  802                 msleep(zone, zone->uz_lockptr, PVM, "zonedrain", 1);
  803                 mtx_lock(&uma_mtx);
  804         }
  805         zone->uz_flags |= UMA_ZFLAG_DRAINING;
  806         bucket_cache_drain(zone);
  807         ZONE_UNLOCK(zone);
  808         /*
  809          * The DRAINING flag protects us from being freed while
  810          * we're running.  Normally the uma_mtx would protect us but we
  811          * must be able to release and acquire the right lock for each keg.
  812          */
  813         zone_foreach_keg(zone, &keg_drain);
  814         ZONE_LOCK(zone);
  815         zone->uz_flags &= ~UMA_ZFLAG_DRAINING;
  816         wakeup(zone);
  817 out:
  818         ZONE_UNLOCK(zone);
  819 }
  820 
  821 void
  822 zone_drain(uma_zone_t zone)
  823 {
  824 
  825         zone_drain_wait(zone, M_NOWAIT);
  826 }
  827 
  828 /*
  829  * Allocate a new slab for a keg.  This does not insert the slab onto a list.
  830  *
  831  * Arguments:
  832  *      wait  Shall we wait?
  833  *
  834  * Returns:
  835  *      The slab that was allocated or NULL if there is no memory and the
  836  *      caller specified M_NOWAIT.
  837  */
  838 static uma_slab_t
  839 keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int wait)
  840 {
  841         uma_slabrefcnt_t slabref;
  842         uma_alloc allocf;
  843         uma_slab_t slab;
  844         uint8_t *mem;
  845         uint8_t flags;
  846         int i;
  847 
  848         mtx_assert(&keg->uk_lock, MA_OWNED);
  849         slab = NULL;
  850         mem = NULL;
  851 
  852 #ifdef UMA_DEBUG
  853         printf("alloc_slab:  Allocating a new slab for %s\n", keg->uk_name);
  854 #endif
  855         allocf = keg->uk_allocf;
  856         KEG_UNLOCK(keg);
  857 
  858         if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
  859                 slab = zone_alloc_item(keg->uk_slabzone, NULL, wait);
  860                 if (slab == NULL)
  861                         goto out;
  862         }
  863 
  864         /*
  865          * This reproduces the old vm_zone behavior of zero filling pages the
  866          * first time they are added to a zone.
  867          *
  868          * Malloced items are zeroed in uma_zalloc.
  869          */
  870 
  871         if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
  872                 wait |= M_ZERO;
  873         else
  874                 wait &= ~M_ZERO;
  875 
  876         if (keg->uk_flags & UMA_ZONE_NODUMP)
  877                 wait |= M_NODUMP;
  878 
  879         /* zone is passed for legacy reasons. */
  880         mem = allocf(zone, keg->uk_ppera * PAGE_SIZE, &flags, wait);
  881         if (mem == NULL) {
  882                 if (keg->uk_flags & UMA_ZONE_OFFPAGE)
  883                         zone_free_item(keg->uk_slabzone, slab, NULL, SKIP_NONE);
  884                 slab = NULL;
  885                 goto out;
  886         }
  887 
  888         /* Point the slab into the allocated memory */
  889         if (!(keg->uk_flags & UMA_ZONE_OFFPAGE))
  890                 slab = (uma_slab_t )(mem + keg->uk_pgoff);
  891 
  892         if (keg->uk_flags & UMA_ZONE_VTOSLAB)
  893                 for (i = 0; i < keg->uk_ppera; i++)
  894                         vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab);
  895 
  896         slab->us_keg = keg;
  897         slab->us_data = mem;
  898         slab->us_freecount = keg->uk_ipers;
  899         slab->us_flags = flags;
  900         BIT_FILL(SLAB_SETSIZE, &slab->us_free);
  901 #ifdef INVARIANTS
  902         BIT_ZERO(SLAB_SETSIZE, &slab->us_debugfree);
  903 #endif
  904         if (keg->uk_flags & UMA_ZONE_REFCNT) {
  905                 slabref = (uma_slabrefcnt_t)slab;
  906                 for (i = 0; i < keg->uk_ipers; i++)
  907                         slabref->us_refcnt[i] = 0;
  908         }
  909 
  910         if (keg->uk_init != NULL) {
  911                 for (i = 0; i < keg->uk_ipers; i++)
  912                         if (keg->uk_init(slab->us_data + (keg->uk_rsize * i),
  913                             keg->uk_size, wait) != 0)
  914                                 break;
  915                 if (i != keg->uk_ipers) {
  916                         keg_free_slab(keg, slab, i);
  917                         slab = NULL;
  918                         goto out;
  919                 }
  920         }
  921 out:
  922         KEG_LOCK(keg);
  923 
  924         if (slab != NULL) {
  925                 if (keg->uk_flags & UMA_ZONE_HASH)
  926                         UMA_HASH_INSERT(&keg->uk_hash, slab, mem);
  927 
  928                 keg->uk_pages += keg->uk_ppera;
  929                 keg->uk_free += keg->uk_ipers;
  930         }
  931 
  932         return (slab);
  933 }
  934 
  935 /*
  936  * This function is intended to be used early on in place of page_alloc() so
  937  * that we may use the boot time page cache to satisfy allocations before
  938  * the VM is ready.
  939  */
  940 static void *
  941 startup_alloc(uma_zone_t zone, int bytes, uint8_t *pflag, int wait)
  942 {
  943         uma_keg_t keg;
  944         uma_slab_t tmps;
  945         int pages, check_pages;
  946 
  947         keg = zone_first_keg(zone);
  948         pages = howmany(bytes, PAGE_SIZE);
  949         check_pages = pages - 1;
  950         KASSERT(pages > 0, ("startup_alloc can't reserve 0 pages\n"));
  951 
  952         /*
  953          * Check our small startup cache to see if it has pages remaining.
  954          */
  955         mtx_lock(&uma_boot_pages_mtx);
  956 
  957         /* First check if we have enough room. */
  958         tmps = LIST_FIRST(&uma_boot_pages);
  959         while (tmps != NULL && check_pages-- > 0)
  960                 tmps = LIST_NEXT(tmps, us_link);
  961         if (tmps != NULL) {
  962                 /*
  963                  * It's ok to lose tmps references.  The last one will
  964                  * have tmps->us_data pointing to the start address of
  965                  * "pages" contiguous pages of memory.
  966                  */
  967                 while (pages-- > 0) {
  968                         tmps = LIST_FIRST(&uma_boot_pages);
  969                         LIST_REMOVE(tmps, us_link);
  970                 }
  971                 mtx_unlock(&uma_boot_pages_mtx);
  972                 *pflag = tmps->us_flags;
  973                 return (tmps->us_data);
  974         }
  975         mtx_unlock(&uma_boot_pages_mtx);
  976         if (booted < UMA_STARTUP2)
  977                 panic("UMA: Increase vm.boot_pages");
  978         /*
  979          * Now that we've booted reset these users to their real allocator.
  980          */
  981 #ifdef UMA_MD_SMALL_ALLOC
  982         keg->uk_allocf = (keg->uk_ppera > 1) ? page_alloc : uma_small_alloc;
  983 #else
  984         keg->uk_allocf = page_alloc;
  985 #endif
  986         return keg->uk_allocf(zone, bytes, pflag, wait);
  987 }
  988 
  989 /*
  990  * Allocates a number of pages from the system
  991  *
  992  * Arguments:
  993  *      bytes  The number of bytes requested
  994  *      wait  Shall we wait?
  995  *
  996  * Returns:
  997  *      A pointer to the alloced memory or possibly
  998  *      NULL if M_NOWAIT is set.
  999  */
 1000 static void *
 1001 page_alloc(uma_zone_t zone, int bytes, uint8_t *pflag, int wait)
 1002 {
 1003         void *p;        /* Returned page */
 1004 
 1005         *pflag = UMA_SLAB_KMEM;
 1006         p = (void *) kmem_malloc(kmem_arena, bytes, wait);
 1007 
 1008         return (p);
 1009 }
 1010 
 1011 /*
 1012  * Allocates a number of pages from within an object
 1013  *
 1014  * Arguments:
 1015  *      bytes  The number of bytes requested
 1016  *      wait   Shall we wait?
 1017  *
 1018  * Returns:
 1019  *      A pointer to the alloced memory or possibly
 1020  *      NULL if M_NOWAIT is set.
 1021  */
 1022 static void *
 1023 noobj_alloc(uma_zone_t zone, int bytes, uint8_t *flags, int wait)
 1024 {
 1025         TAILQ_HEAD(, vm_page) alloctail;
 1026         u_long npages;
 1027         vm_offset_t retkva, zkva;
 1028         vm_page_t p, p_next;
 1029         uma_keg_t keg;
 1030 
 1031         TAILQ_INIT(&alloctail);
 1032         keg = zone_first_keg(zone);
 1033 
 1034         npages = howmany(bytes, PAGE_SIZE);
 1035         while (npages > 0) {
 1036                 p = vm_page_alloc(NULL, 0, VM_ALLOC_INTERRUPT |
 1037                     VM_ALLOC_WIRED | VM_ALLOC_NOOBJ);
 1038                 if (p != NULL) {
 1039                         /*
 1040                          * Since the page does not belong to an object, its
 1041                          * listq is unused.
 1042                          */
 1043                         TAILQ_INSERT_TAIL(&alloctail, p, listq);
 1044                         npages--;
 1045                         continue;
 1046                 }
 1047                 if (wait & M_WAITOK) {
 1048                         VM_WAIT;
 1049                         continue;
 1050                 }
 1051 
 1052                 /*
 1053                  * Page allocation failed, free intermediate pages and
 1054                  * exit.
 1055                  */
 1056                 TAILQ_FOREACH_SAFE(p, &alloctail, listq, p_next) {
 1057                         vm_page_unwire(p, 0);
 1058                         vm_page_free(p); 
 1059                 }
 1060                 return (NULL);
 1061         }
 1062         *flags = UMA_SLAB_PRIV;
 1063         zkva = keg->uk_kva +
 1064             atomic_fetchadd_long(&keg->uk_offset, round_page(bytes));
 1065         retkva = zkva;
 1066         TAILQ_FOREACH(p, &alloctail, listq) {
 1067                 pmap_qenter(zkva, &p, 1);
 1068                 zkva += PAGE_SIZE;
 1069         }
 1070 
 1071         return ((void *)retkva);
 1072 }
 1073 
 1074 /*
 1075  * Frees a number of pages to the system
 1076  *
 1077  * Arguments:
 1078  *      mem   A pointer to the memory to be freed
 1079  *      size  The size of the memory being freed
 1080  *      flags The original p->us_flags field
 1081  *
 1082  * Returns:
 1083  *      Nothing
 1084  */
 1085 static void
 1086 page_free(void *mem, int size, uint8_t flags)
 1087 {
 1088         struct vmem *vmem;
 1089 
 1090         if (flags & UMA_SLAB_KMEM)
 1091                 vmem = kmem_arena;
 1092         else if (flags & UMA_SLAB_KERNEL)
 1093                 vmem = kernel_arena;
 1094         else
 1095                 panic("UMA: page_free used with invalid flags %d", flags);
 1096 
 1097         kmem_free(vmem, (vm_offset_t)mem, size);
 1098 }
 1099 
 1100 /*
 1101  * Zero fill initializer
 1102  *
 1103  * Arguments/Returns follow uma_init specifications
 1104  */
 1105 static int
 1106 zero_init(void *mem, int size, int flags)
 1107 {
 1108         bzero(mem, size);
 1109         return (0);
 1110 }
 1111 
 1112 /*
 1113  * Finish creating a small uma keg.  This calculates ipers, and the keg size.
 1114  *
 1115  * Arguments
 1116  *      keg  The zone we should initialize
 1117  *
 1118  * Returns
 1119  *      Nothing
 1120  */
 1121 static void
 1122 keg_small_init(uma_keg_t keg)
 1123 {
 1124         u_int rsize;
 1125         u_int memused;
 1126         u_int wastedspace;
 1127         u_int shsize;
 1128 
 1129         if (keg->uk_flags & UMA_ZONE_PCPU) {
 1130                 u_int ncpus = mp_ncpus ? mp_ncpus : MAXCPU;
 1131 
 1132                 keg->uk_slabsize = sizeof(struct pcpu);
 1133                 keg->uk_ppera = howmany(ncpus * sizeof(struct pcpu),
 1134                     PAGE_SIZE);
 1135         } else {
 1136                 keg->uk_slabsize = UMA_SLAB_SIZE;
 1137                 keg->uk_ppera = 1;
 1138         }
 1139 
 1140         /*
 1141          * Calculate the size of each allocation (rsize) according to
 1142          * alignment.  If the requested size is smaller than we have
 1143          * allocation bits for we round it up.
 1144          */
 1145         rsize = keg->uk_size;
 1146         if (rsize < keg->uk_slabsize / SLAB_SETSIZE)
 1147                 rsize = keg->uk_slabsize / SLAB_SETSIZE;
 1148         if (rsize & keg->uk_align)
 1149                 rsize = (rsize & ~keg->uk_align) + (keg->uk_align + 1);
 1150         keg->uk_rsize = rsize;
 1151 
 1152         KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0 ||
 1153             keg->uk_rsize < sizeof(struct pcpu),
 1154             ("%s: size %u too large", __func__, keg->uk_rsize));
 1155 
 1156         if (keg->uk_flags & UMA_ZONE_REFCNT)
 1157                 rsize += sizeof(uint32_t);
 1158 
 1159         if (keg->uk_flags & UMA_ZONE_OFFPAGE)
 1160                 shsize = 0;
 1161         else 
 1162                 shsize = sizeof(struct uma_slab);
 1163 
 1164         keg->uk_ipers = (keg->uk_slabsize - shsize) / rsize;
 1165         KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= SLAB_SETSIZE,
 1166             ("%s: keg->uk_ipers %u", __func__, keg->uk_ipers));
 1167 
 1168         memused = keg->uk_ipers * rsize + shsize;
 1169         wastedspace = keg->uk_slabsize - memused;
 1170 
 1171         /*
 1172          * We can't do OFFPAGE if we're internal or if we've been
 1173          * asked to not go to the VM for buckets.  If we do this we
 1174          * may end up going to the VM  for slabs which we do not
 1175          * want to do if we're UMA_ZFLAG_CACHEONLY as a result
 1176          * of UMA_ZONE_VM, which clearly forbids it.
 1177          */
 1178         if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) ||
 1179             (keg->uk_flags & UMA_ZFLAG_CACHEONLY))
 1180                 return;
 1181 
 1182         /*
 1183          * See if using an OFFPAGE slab will limit our waste.  Only do
 1184          * this if it permits more items per-slab.
 1185          *
 1186          * XXX We could try growing slabsize to limit max waste as well.
 1187          * Historically this was not done because the VM could not
 1188          * efficiently handle contiguous allocations.
 1189          */
 1190         if ((wastedspace >= keg->uk_slabsize / UMA_MAX_WASTE) &&
 1191             (keg->uk_ipers < (keg->uk_slabsize / keg->uk_rsize))) {
 1192                 keg->uk_ipers = keg->uk_slabsize / keg->uk_rsize;
 1193                 KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= SLAB_SETSIZE,
 1194                     ("%s: keg->uk_ipers %u", __func__, keg->uk_ipers));
 1195 #ifdef UMA_DEBUG
 1196                 printf("UMA decided we need offpage slab headers for "
 1197                     "keg: %s, calculated wastedspace = %d, "
 1198                     "maximum wasted space allowed = %d, "
 1199                     "calculated ipers = %d, "
 1200                     "new wasted space = %d\n", keg->uk_name, wastedspace,
 1201                     keg->uk_slabsize / UMA_MAX_WASTE, keg->uk_ipers,
 1202                     keg->uk_slabsize - keg->uk_ipers * keg->uk_rsize);
 1203 #endif
 1204                 keg->uk_flags |= UMA_ZONE_OFFPAGE;
 1205         }
 1206 
 1207         if ((keg->uk_flags & UMA_ZONE_OFFPAGE) &&
 1208             (keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
 1209                 keg->uk_flags |= UMA_ZONE_HASH;
 1210 }
 1211 
 1212 /*
 1213  * Finish creating a large (> UMA_SLAB_SIZE) uma kegs.  Just give in and do
 1214  * OFFPAGE for now.  When I can allow for more dynamic slab sizes this will be
 1215  * more complicated.
 1216  *
 1217  * Arguments
 1218  *      keg  The keg we should initialize
 1219  *
 1220  * Returns
 1221  *      Nothing
 1222  */
 1223 static void
 1224 keg_large_init(uma_keg_t keg)
 1225 {
 1226 
 1227         KASSERT(keg != NULL, ("Keg is null in keg_large_init"));
 1228         KASSERT((keg->uk_flags & UMA_ZFLAG_CACHEONLY) == 0,
 1229             ("keg_large_init: Cannot large-init a UMA_ZFLAG_CACHEONLY keg"));
 1230         KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0,
 1231             ("%s: Cannot large-init a UMA_ZONE_PCPU keg", __func__));
 1232 
 1233         keg->uk_ppera = howmany(keg->uk_size, PAGE_SIZE);
 1234         keg->uk_slabsize = keg->uk_ppera * PAGE_SIZE;
 1235         keg->uk_ipers = 1;
 1236         keg->uk_rsize = keg->uk_size;
 1237 
 1238         /* We can't do OFFPAGE if we're internal, bail out here. */
 1239         if (keg->uk_flags & UMA_ZFLAG_INTERNAL)
 1240                 return;
 1241 
 1242         keg->uk_flags |= UMA_ZONE_OFFPAGE;
 1243         if ((keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
 1244                 keg->uk_flags |= UMA_ZONE_HASH;
 1245 }
 1246 
 1247 static void
 1248 keg_cachespread_init(uma_keg_t keg)
 1249 {
 1250         int alignsize;
 1251         int trailer;
 1252         int pages;
 1253         int rsize;
 1254 
 1255         KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0,
 1256             ("%s: Cannot cachespread-init a UMA_ZONE_PCPU keg", __func__));
 1257 
 1258         alignsize = keg->uk_align + 1;
 1259         rsize = keg->uk_size;
 1260         /*
 1261          * We want one item to start on every align boundary in a page.  To
 1262          * do this we will span pages.  We will also extend the item by the
 1263          * size of align if it is an even multiple of align.  Otherwise, it
 1264          * would fall on the same boundary every time.
 1265          */
 1266         if (rsize & keg->uk_align)
 1267                 rsize = (rsize & ~keg->uk_align) + alignsize;
 1268         if ((rsize & alignsize) == 0)
 1269                 rsize += alignsize;
 1270         trailer = rsize - keg->uk_size;
 1271         pages = (rsize * (PAGE_SIZE / alignsize)) / PAGE_SIZE;
 1272         pages = MIN(pages, (128 * 1024) / PAGE_SIZE);
 1273         keg->uk_rsize = rsize;
 1274         keg->uk_ppera = pages;
 1275         keg->uk_slabsize = UMA_SLAB_SIZE;
 1276         keg->uk_ipers = ((pages * PAGE_SIZE) + trailer) / rsize;
 1277         keg->uk_flags |= UMA_ZONE_OFFPAGE | UMA_ZONE_VTOSLAB;
 1278         KASSERT(keg->uk_ipers <= uma_max_ipers,
 1279             ("%s: keg->uk_ipers too high(%d) increase max_ipers", __func__,
 1280             keg->uk_ipers));
 1281 }
 1282 
 1283 /*
 1284  * Keg header ctor.  This initializes all fields, locks, etc.  And inserts
 1285  * the keg onto the global keg list.
 1286  *
 1287  * Arguments/Returns follow uma_ctor specifications
 1288  *      udata  Actually uma_kctor_args
 1289  */
 1290 static int
 1291 keg_ctor(void *mem, int size, void *udata, int flags)
 1292 {
 1293         struct uma_kctor_args *arg = udata;
 1294         uma_keg_t keg = mem;
 1295         uma_zone_t zone;
 1296 
 1297         bzero(keg, size);
 1298         keg->uk_size = arg->size;
 1299         keg->uk_init = arg->uminit;
 1300         keg->uk_fini = arg->fini;
 1301         keg->uk_align = arg->align;
 1302         keg->uk_free = 0;
 1303         keg->uk_reserve = 0;
 1304         keg->uk_pages = 0;
 1305         keg->uk_flags = arg->flags;
 1306         keg->uk_allocf = page_alloc;
 1307         keg->uk_freef = page_free;
 1308         keg->uk_slabzone = NULL;
 1309 
 1310         /*
 1311          * The master zone is passed to us at keg-creation time.
 1312          */
 1313         zone = arg->zone;
 1314         keg->uk_name = zone->uz_name;
 1315 
 1316         if (arg->flags & UMA_ZONE_VM)
 1317                 keg->uk_flags |= UMA_ZFLAG_CACHEONLY;
 1318 
 1319         if (arg->flags & UMA_ZONE_ZINIT)
 1320                 keg->uk_init = zero_init;
 1321 
 1322         if (arg->flags & UMA_ZONE_REFCNT || arg->flags & UMA_ZONE_MALLOC)
 1323                 keg->uk_flags |= UMA_ZONE_VTOSLAB;
 1324 
 1325         if (arg->flags & UMA_ZONE_PCPU)
 1326 #ifdef SMP
 1327                 keg->uk_flags |= UMA_ZONE_OFFPAGE;
 1328 #else
 1329                 keg->uk_flags &= ~UMA_ZONE_PCPU;
 1330 #endif
 1331 
 1332         if (keg->uk_flags & UMA_ZONE_CACHESPREAD) {
 1333                 keg_cachespread_init(keg);
 1334         } else if (keg->uk_flags & UMA_ZONE_REFCNT) {
 1335                 if (keg->uk_size >
 1336                     (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt) -
 1337                     sizeof(uint32_t)))
 1338                         keg_large_init(keg);
 1339                 else
 1340                         keg_small_init(keg);
 1341         } else {
 1342                 if (keg->uk_size > (UMA_SLAB_SIZE - sizeof(struct uma_slab)))
 1343                         keg_large_init(keg);
 1344                 else
 1345                         keg_small_init(keg);
 1346         }
 1347 
 1348         if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
 1349                 if (keg->uk_flags & UMA_ZONE_REFCNT) {
 1350                         if (keg->uk_ipers > uma_max_ipers_ref)
 1351                                 panic("Too many ref items per zone: %d > %d\n",
 1352                                     keg->uk_ipers, uma_max_ipers_ref);
 1353                         keg->uk_slabzone = slabrefzone;
 1354                 } else
 1355                         keg->uk_slabzone = slabzone;
 1356         }
 1357 
 1358         /*
 1359          * If we haven't booted yet we need allocations to go through the
 1360          * startup cache until the vm is ready.
 1361          */
 1362         if (keg->uk_ppera == 1) {
 1363 #ifdef UMA_MD_SMALL_ALLOC
 1364                 keg->uk_allocf = uma_small_alloc;
 1365                 keg->uk_freef = uma_small_free;
 1366 
 1367                 if (booted < UMA_STARTUP)
 1368                         keg->uk_allocf = startup_alloc;
 1369 #else
 1370                 if (booted < UMA_STARTUP2)
 1371                         keg->uk_allocf = startup_alloc;
 1372 #endif
 1373         } else if (booted < UMA_STARTUP2 &&
 1374             (keg->uk_flags & UMA_ZFLAG_INTERNAL))
 1375                 keg->uk_allocf = startup_alloc;
 1376 
 1377         /*
 1378          * Initialize keg's lock
 1379          */
 1380         KEG_LOCK_INIT(keg, (arg->flags & UMA_ZONE_MTXCLASS));
 1381 
 1382         /*
 1383          * If we're putting the slab header in the actual page we need to
 1384          * figure out where in each page it goes.  This calculates a right
 1385          * justified offset into the memory on an ALIGN_PTR boundary.
 1386          */
 1387         if (!(keg->uk_flags & UMA_ZONE_OFFPAGE)) {
 1388                 u_int totsize;
 1389 
 1390                 /* Size of the slab struct and free list */
 1391                 totsize = sizeof(struct uma_slab);
 1392 
 1393                 /* Size of the reference counts. */
 1394                 if (keg->uk_flags & UMA_ZONE_REFCNT)
 1395                         totsize += keg->uk_ipers * sizeof(uint32_t);
 1396 
 1397                 if (totsize & UMA_ALIGN_PTR)
 1398                         totsize = (totsize & ~UMA_ALIGN_PTR) +
 1399                             (UMA_ALIGN_PTR + 1);
 1400                 keg->uk_pgoff = (PAGE_SIZE * keg->uk_ppera) - totsize;
 1401 
 1402                 /*
 1403                  * The only way the following is possible is if with our
 1404                  * UMA_ALIGN_PTR adjustments we are now bigger than
 1405                  * UMA_SLAB_SIZE.  I haven't checked whether this is
 1406                  * mathematically possible for all cases, so we make
 1407                  * sure here anyway.
 1408                  */
 1409                 totsize = keg->uk_pgoff + sizeof(struct uma_slab);
 1410                 if (keg->uk_flags & UMA_ZONE_REFCNT)
 1411                         totsize += keg->uk_ipers * sizeof(uint32_t);
 1412                 if (totsize > PAGE_SIZE * keg->uk_ppera) {
 1413                         printf("zone %s ipers %d rsize %d size %d\n",
 1414                             zone->uz_name, keg->uk_ipers, keg->uk_rsize,
 1415                             keg->uk_size);
 1416                         panic("UMA slab won't fit.");
 1417                 }
 1418         }
 1419 
 1420         if (keg->uk_flags & UMA_ZONE_HASH)
 1421                 hash_alloc(&keg->uk_hash);
 1422 
 1423 #ifdef UMA_DEBUG
 1424         printf("UMA: %s(%p) size %d(%d) flags %#x ipers %d ppera %d out %d free %d\n",
 1425             zone->uz_name, zone, keg->uk_size, keg->uk_rsize, keg->uk_flags,
 1426             keg->uk_ipers, keg->uk_ppera,
 1427             (keg->uk_ipers * keg->uk_pages) - keg->uk_free, keg->uk_free);
 1428 #endif
 1429 
 1430         LIST_INSERT_HEAD(&keg->uk_zones, zone, uz_link);
 1431 
 1432         mtx_lock(&uma_mtx);
 1433         LIST_INSERT_HEAD(&uma_kegs, keg, uk_link);
 1434         mtx_unlock(&uma_mtx);
 1435         return (0);
 1436 }
 1437 
 1438 /*
 1439  * Zone header ctor.  This initializes all fields, locks, etc.
 1440  *
 1441  * Arguments/Returns follow uma_ctor specifications
 1442  *      udata  Actually uma_zctor_args
 1443  */
 1444 static int
 1445 zone_ctor(void *mem, int size, void *udata, int flags)
 1446 {
 1447         struct uma_zctor_args *arg = udata;
 1448         uma_zone_t zone = mem;
 1449         uma_zone_t z;
 1450         uma_keg_t keg;
 1451 
 1452         bzero(zone, size);
 1453         zone->uz_name = arg->name;
 1454         zone->uz_ctor = arg->ctor;
 1455         zone->uz_dtor = arg->dtor;
 1456         zone->uz_slab = zone_fetch_slab;
 1457         zone->uz_init = NULL;
 1458         zone->uz_fini = NULL;
 1459         zone->uz_allocs = 0;
 1460         zone->uz_frees = 0;
 1461         zone->uz_fails = 0;
 1462         zone->uz_sleeps = 0;
 1463         zone->uz_count = 0;
 1464         zone->uz_flags = 0;
 1465         zone->uz_warning = NULL;
 1466         timevalclear(&zone->uz_ratecheck);
 1467         keg = arg->keg;
 1468 
 1469         ZONE_LOCK_INIT(zone, (arg->flags & UMA_ZONE_MTXCLASS));
 1470 
 1471         /*
 1472          * This is a pure cache zone, no kegs.
 1473          */
 1474         if (arg->import) {
 1475                 if (arg->flags & UMA_ZONE_VM)
 1476                         arg->flags |= UMA_ZFLAG_CACHEONLY;
 1477                 zone->uz_flags = arg->flags;
 1478                 zone->uz_size = arg->size;
 1479                 zone->uz_import = arg->import;
 1480                 zone->uz_release = arg->release;
 1481                 zone->uz_arg = arg->arg;
 1482                 zone->uz_lockptr = &zone->uz_lock;
 1483                 goto out;
 1484         }
 1485 
 1486         /*
 1487          * Use the regular zone/keg/slab allocator.
 1488          */
 1489         zone->uz_import = (uma_import)zone_import;
 1490         zone->uz_release = (uma_release)zone_release;
 1491         zone->uz_arg = zone; 
 1492 
 1493         if (arg->flags & UMA_ZONE_SECONDARY) {
 1494                 KASSERT(arg->keg != NULL, ("Secondary zone on zero'd keg"));
 1495                 zone->uz_init = arg->uminit;
 1496                 zone->uz_fini = arg->fini;
 1497                 zone->uz_lockptr = &keg->uk_lock;
 1498                 zone->uz_flags |= UMA_ZONE_SECONDARY;
 1499                 mtx_lock(&uma_mtx);
 1500                 ZONE_LOCK(zone);
 1501                 LIST_FOREACH(z, &keg->uk_zones, uz_link) {
 1502                         if (LIST_NEXT(z, uz_link) == NULL) {
 1503                                 LIST_INSERT_AFTER(z, zone, uz_link);
 1504                                 break;
 1505                         }
 1506                 }
 1507                 ZONE_UNLOCK(zone);
 1508                 mtx_unlock(&uma_mtx);
 1509         } else if (keg == NULL) {
 1510                 if ((keg = uma_kcreate(zone, arg->size, arg->uminit, arg->fini,
 1511                     arg->align, arg->flags)) == NULL)
 1512                         return (ENOMEM);
 1513         } else {
 1514                 struct uma_kctor_args karg;
 1515                 int error;
 1516 
 1517                 /* We should only be here from uma_startup() */
 1518                 karg.size = arg->size;
 1519                 karg.uminit = arg->uminit;
 1520                 karg.fini = arg->fini;
 1521                 karg.align = arg->align;
 1522                 karg.flags = arg->flags;
 1523                 karg.zone = zone;
 1524                 error = keg_ctor(arg->keg, sizeof(struct uma_keg), &karg,
 1525                     flags);
 1526                 if (error)
 1527                         return (error);
 1528         }
 1529 
 1530         /*
 1531          * Link in the first keg.
 1532          */
 1533         zone->uz_klink.kl_keg = keg;
 1534         LIST_INSERT_HEAD(&zone->uz_kegs, &zone->uz_klink, kl_link);
 1535         zone->uz_lockptr = &keg->uk_lock;
 1536         zone->uz_size = keg->uk_size;
 1537         zone->uz_flags |= (keg->uk_flags &
 1538             (UMA_ZONE_INHERIT | UMA_ZFLAG_INHERIT));
 1539 
 1540         /*
 1541          * Some internal zones don't have room allocated for the per cpu
 1542          * caches.  If we're internal, bail out here.
 1543          */
 1544         if (keg->uk_flags & UMA_ZFLAG_INTERNAL) {
 1545                 KASSERT((zone->uz_flags & UMA_ZONE_SECONDARY) == 0,
 1546                     ("Secondary zone requested UMA_ZFLAG_INTERNAL"));
 1547                 return (0);
 1548         }
 1549 
 1550 out:
 1551         if ((arg->flags & UMA_ZONE_MAXBUCKET) == 0)
 1552                 zone->uz_count = bucket_select(zone->uz_size);
 1553         else
 1554                 zone->uz_count = BUCKET_MAX;
 1555 
 1556         return (0);
 1557 }
 1558 
 1559 /*
 1560  * Keg header dtor.  This frees all data, destroys locks, frees the hash
 1561  * table and removes the keg from the global list.
 1562  *
 1563  * Arguments/Returns follow uma_dtor specifications
 1564  *      udata  unused
 1565  */
 1566 static void
 1567 keg_dtor(void *arg, int size, void *udata)
 1568 {
 1569         uma_keg_t keg;
 1570 
 1571         keg = (uma_keg_t)arg;
 1572         KEG_LOCK(keg);
 1573         if (keg->uk_free != 0) {
 1574                 printf("Freed UMA keg (%s) was not empty (%d items). "
 1575                     " Lost %d pages of memory.\n",
 1576                     keg->uk_name ? keg->uk_name : "",
 1577                     keg->uk_free, keg->uk_pages);
 1578         }
 1579         KEG_UNLOCK(keg);
 1580 
 1581         hash_free(&keg->uk_hash);
 1582 
 1583         KEG_LOCK_FINI(keg);
 1584 }
 1585 
 1586 /*
 1587  * Zone header dtor.
 1588  *
 1589  * Arguments/Returns follow uma_dtor specifications
 1590  *      udata  unused
 1591  */
 1592 static void
 1593 zone_dtor(void *arg, int size, void *udata)
 1594 {
 1595         uma_klink_t klink;
 1596         uma_zone_t zone;
 1597         uma_keg_t keg;
 1598 
 1599         zone = (uma_zone_t)arg;
 1600         keg = zone_first_keg(zone);
 1601 
 1602         if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL))
 1603                 cache_drain(zone);
 1604 
 1605         mtx_lock(&uma_mtx);
 1606         LIST_REMOVE(zone, uz_link);
 1607         mtx_unlock(&uma_mtx);
 1608         /*
 1609          * XXX there are some races here where
 1610          * the zone can be drained but zone lock
 1611          * released and then refilled before we
 1612          * remove it... we dont care for now
 1613          */
 1614         zone_drain_wait(zone, M_WAITOK);
 1615         /*
 1616          * Unlink all of our kegs.
 1617          */
 1618         while ((klink = LIST_FIRST(&zone->uz_kegs)) != NULL) {
 1619                 klink->kl_keg = NULL;
 1620                 LIST_REMOVE(klink, kl_link);
 1621                 if (klink == &zone->uz_klink)
 1622                         continue;
 1623                 free(klink, M_TEMP);
 1624         }
 1625         /*
 1626          * We only destroy kegs from non secondary zones.
 1627          */
 1628         if (keg != NULL && (zone->uz_flags & UMA_ZONE_SECONDARY) == 0)  {
 1629                 mtx_lock(&uma_mtx);
 1630                 LIST_REMOVE(keg, uk_link);
 1631                 mtx_unlock(&uma_mtx);
 1632                 zone_free_item(kegs, keg, NULL, SKIP_NONE);
 1633         }
 1634         ZONE_LOCK_FINI(zone);
 1635 }
 1636 
 1637 /*
 1638  * Traverses every zone in the system and calls a callback
 1639  *
 1640  * Arguments:
 1641  *      zfunc  A pointer to a function which accepts a zone
 1642  *              as an argument.
 1643  *
 1644  * Returns:
 1645  *      Nothing
 1646  */
 1647 static void
 1648 zone_foreach(void (*zfunc)(uma_zone_t))
 1649 {
 1650         uma_keg_t keg;
 1651         uma_zone_t zone;
 1652 
 1653         mtx_lock(&uma_mtx);
 1654         LIST_FOREACH(keg, &uma_kegs, uk_link) {
 1655                 LIST_FOREACH(zone, &keg->uk_zones, uz_link)
 1656                         zfunc(zone);
 1657         }
 1658         mtx_unlock(&uma_mtx);
 1659 }
 1660 
 1661 /* Public functions */
 1662 /* See uma.h */
 1663 void
 1664 uma_startup(void *bootmem, int boot_pages)
 1665 {
 1666         struct uma_zctor_args args;
 1667         uma_slab_t slab;
 1668         u_int slabsize;
 1669         int i;
 1670 
 1671 #ifdef UMA_DEBUG
 1672         printf("Creating uma keg headers zone and keg.\n");
 1673 #endif
 1674         mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF);
 1675 
 1676         /* "manually" create the initial zone */
 1677         memset(&args, 0, sizeof(args));
 1678         args.name = "UMA Kegs";
 1679         args.size = sizeof(struct uma_keg);
 1680         args.ctor = keg_ctor;
 1681         args.dtor = keg_dtor;
 1682         args.uminit = zero_init;
 1683         args.fini = NULL;
 1684         args.keg = &masterkeg;
 1685         args.align = 32 - 1;
 1686         args.flags = UMA_ZFLAG_INTERNAL;
 1687         /* The initial zone has no Per cpu queues so it's smaller */
 1688         zone_ctor(kegs, sizeof(struct uma_zone), &args, M_WAITOK);
 1689 
 1690 #ifdef UMA_DEBUG
 1691         printf("Filling boot free list.\n");
 1692 #endif
 1693         for (i = 0; i < boot_pages; i++) {
 1694                 slab = (uma_slab_t)((uint8_t *)bootmem + (i * UMA_SLAB_SIZE));
 1695                 slab->us_data = (uint8_t *)slab;
 1696                 slab->us_flags = UMA_SLAB_BOOT;
 1697                 LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link);
 1698         }
 1699         mtx_init(&uma_boot_pages_mtx, "UMA boot pages", NULL, MTX_DEF);
 1700 
 1701 #ifdef UMA_DEBUG
 1702         printf("Creating uma zone headers zone and keg.\n");
 1703 #endif
 1704         args.name = "UMA Zones";
 1705         args.size = sizeof(struct uma_zone) +
 1706             (sizeof(struct uma_cache) * (mp_maxid + 1));
 1707         args.ctor = zone_ctor;
 1708         args.dtor = zone_dtor;
 1709         args.uminit = zero_init;
 1710         args.fini = NULL;
 1711         args.keg = NULL;
 1712         args.align = 32 - 1;
 1713         args.flags = UMA_ZFLAG_INTERNAL;
 1714         /* The initial zone has no Per cpu queues so it's smaller */
 1715         zone_ctor(zones, sizeof(struct uma_zone), &args, M_WAITOK);
 1716 
 1717 #ifdef UMA_DEBUG
 1718         printf("Initializing pcpu cache locks.\n");
 1719 #endif
 1720 #ifdef UMA_DEBUG
 1721         printf("Creating slab and hash zones.\n");
 1722 #endif
 1723 
 1724         /* Now make a zone for slab headers */
 1725         slabzone = uma_zcreate("UMA Slabs",
 1726                                 sizeof(struct uma_slab),
 1727                                 NULL, NULL, NULL, NULL,
 1728                                 UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
 1729 
 1730         /*
 1731          * We also create a zone for the bigger slabs with reference
 1732          * counts in them, to accomodate UMA_ZONE_REFCNT zones.
 1733          */
 1734         slabsize = sizeof(struct uma_slab_refcnt);
 1735         slabsize += uma_max_ipers_ref * sizeof(uint32_t);
 1736         slabrefzone = uma_zcreate("UMA RCntSlabs",
 1737                                   slabsize,
 1738                                   NULL, NULL, NULL, NULL,
 1739                                   UMA_ALIGN_PTR,
 1740                                   UMA_ZFLAG_INTERNAL);
 1741 
 1742         hashzone = uma_zcreate("UMA Hash",
 1743             sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT,
 1744             NULL, NULL, NULL, NULL,
 1745             UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
 1746 
 1747         bucket_init();
 1748 
 1749         booted = UMA_STARTUP;
 1750 
 1751 #ifdef UMA_DEBUG
 1752         printf("UMA startup complete.\n");
 1753 #endif
 1754 }
 1755 
 1756 /* see uma.h */
 1757 void
 1758 uma_startup2(void)
 1759 {
 1760         booted = UMA_STARTUP2;
 1761         bucket_enable();
 1762 #ifdef UMA_DEBUG
 1763         printf("UMA startup2 complete.\n");
 1764 #endif
 1765 }
 1766 
 1767 /*
 1768  * Initialize our callout handle
 1769  *
 1770  */
 1771 
 1772 static void
 1773 uma_startup3(void)
 1774 {
 1775 #ifdef UMA_DEBUG
 1776         printf("Starting callout.\n");
 1777 #endif
 1778         callout_init(&uma_callout, CALLOUT_MPSAFE);
 1779         callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
 1780 #ifdef UMA_DEBUG
 1781         printf("UMA startup3 complete.\n");
 1782 #endif
 1783 }
 1784 
 1785 static uma_keg_t
 1786 uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini,
 1787                 int align, uint32_t flags)
 1788 {
 1789         struct uma_kctor_args args;
 1790 
 1791         args.size = size;
 1792         args.uminit = uminit;
 1793         args.fini = fini;
 1794         args.align = (align == UMA_ALIGN_CACHE) ? uma_align_cache : align;
 1795         args.flags = flags;
 1796         args.zone = zone;
 1797         return (zone_alloc_item(kegs, &args, M_WAITOK));
 1798 }
 1799 
 1800 /* See uma.h */
 1801 void
 1802 uma_set_align(int align)
 1803 {
 1804 
 1805         if (align != UMA_ALIGN_CACHE)
 1806                 uma_align_cache = align;
 1807 }
 1808 
 1809 /* See uma.h */
 1810 uma_zone_t
 1811 uma_zcreate(const char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
 1812                 uma_init uminit, uma_fini fini, int align, uint32_t flags)
 1813 
 1814 {
 1815         struct uma_zctor_args args;
 1816 
 1817         /* This stuff is essential for the zone ctor */
 1818         memset(&args, 0, sizeof(args));
 1819         args.name = name;
 1820         args.size = size;
 1821         args.ctor = ctor;
 1822         args.dtor = dtor;
 1823         args.uminit = uminit;
 1824         args.fini = fini;
 1825         args.align = align;
 1826         args.flags = flags;
 1827         args.keg = NULL;
 1828 
 1829         return (zone_alloc_item(zones, &args, M_WAITOK));
 1830 }
 1831 
 1832 /* See uma.h */
 1833 uma_zone_t
 1834 uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor,
 1835                     uma_init zinit, uma_fini zfini, uma_zone_t master)
 1836 {
 1837         struct uma_zctor_args args;
 1838         uma_keg_t keg;
 1839 
 1840         keg = zone_first_keg(master);
 1841         memset(&args, 0, sizeof(args));
 1842         args.name = name;
 1843         args.size = keg->uk_size;
 1844         args.ctor = ctor;
 1845         args.dtor = dtor;
 1846         args.uminit = zinit;
 1847         args.fini = zfini;
 1848         args.align = keg->uk_align;
 1849         args.flags = keg->uk_flags | UMA_ZONE_SECONDARY;
 1850         args.keg = keg;
 1851 
 1852         /* XXX Attaches only one keg of potentially many. */
 1853         return (zone_alloc_item(zones, &args, M_WAITOK));
 1854 }
 1855 
 1856 /* See uma.h */
 1857 uma_zone_t
 1858 uma_zcache_create(char *name, int size, uma_ctor ctor, uma_dtor dtor,
 1859                     uma_init zinit, uma_fini zfini, uma_import zimport,
 1860                     uma_release zrelease, void *arg, int flags)
 1861 {
 1862         struct uma_zctor_args args;
 1863 
 1864         memset(&args, 0, sizeof(args));
 1865         args.name = name;
 1866         args.size = size;
 1867         args.ctor = ctor;
 1868         args.dtor = dtor;
 1869         args.uminit = zinit;
 1870         args.fini = zfini;
 1871         args.import = zimport;
 1872         args.release = zrelease;
 1873         args.arg = arg;
 1874         args.align = 0;
 1875         args.flags = flags;
 1876 
 1877         return (zone_alloc_item(zones, &args, M_WAITOK));
 1878 }
 1879 
 1880 static void
 1881 zone_lock_pair(uma_zone_t a, uma_zone_t b)
 1882 {
 1883         if (a < b) {
 1884                 ZONE_LOCK(a);
 1885                 mtx_lock_flags(b->uz_lockptr, MTX_DUPOK);
 1886         } else {
 1887                 ZONE_LOCK(b);
 1888                 mtx_lock_flags(a->uz_lockptr, MTX_DUPOK);
 1889         }
 1890 }
 1891 
 1892 static void
 1893 zone_unlock_pair(uma_zone_t a, uma_zone_t b)
 1894 {
 1895 
 1896         ZONE_UNLOCK(a);
 1897         ZONE_UNLOCK(b);
 1898 }
 1899 
 1900 int
 1901 uma_zsecond_add(uma_zone_t zone, uma_zone_t master)
 1902 {
 1903         uma_klink_t klink;
 1904         uma_klink_t kl;
 1905         int error;
 1906 
 1907         error = 0;
 1908         klink = malloc(sizeof(*klink), M_TEMP, M_WAITOK | M_ZERO);
 1909 
 1910         zone_lock_pair(zone, master);
 1911         /*
 1912          * zone must use vtoslab() to resolve objects and must already be
 1913          * a secondary.
 1914          */
 1915         if ((zone->uz_flags & (UMA_ZONE_VTOSLAB | UMA_ZONE_SECONDARY))
 1916             != (UMA_ZONE_VTOSLAB | UMA_ZONE_SECONDARY)) {
 1917                 error = EINVAL;
 1918                 goto out;
 1919         }
 1920         /*
 1921          * The new master must also use vtoslab().
 1922          */
 1923         if ((zone->uz_flags & UMA_ZONE_VTOSLAB) != UMA_ZONE_VTOSLAB) {
 1924                 error = EINVAL;
 1925                 goto out;
 1926         }
 1927         /*
 1928          * Both must either be refcnt, or not be refcnt.
 1929          */
 1930         if ((zone->uz_flags & UMA_ZONE_REFCNT) !=
 1931             (master->uz_flags & UMA_ZONE_REFCNT)) {
 1932                 error = EINVAL;
 1933                 goto out;
 1934         }
 1935         /*
 1936          * The underlying object must be the same size.  rsize
 1937          * may be different.
 1938          */
 1939         if (master->uz_size != zone->uz_size) {
 1940                 error = E2BIG;
 1941                 goto out;
 1942         }
 1943         /*
 1944          * Put it at the end of the list.
 1945          */
 1946         klink->kl_keg = zone_first_keg(master);
 1947         LIST_FOREACH(kl, &zone->uz_kegs, kl_link) {
 1948                 if (LIST_NEXT(kl, kl_link) == NULL) {
 1949                         LIST_INSERT_AFTER(kl, klink, kl_link);
 1950                         break;
 1951                 }
 1952         }
 1953         klink = NULL;
 1954         zone->uz_flags |= UMA_ZFLAG_MULTI;
 1955         zone->uz_slab = zone_fetch_slab_multi;
 1956 
 1957 out:
 1958         zone_unlock_pair(zone, master);
 1959         if (klink != NULL)
 1960                 free(klink, M_TEMP);
 1961 
 1962         return (error);
 1963 }
 1964 
 1965 
 1966 /* See uma.h */
 1967 void
 1968 uma_zdestroy(uma_zone_t zone)
 1969 {
 1970 
 1971         zone_free_item(zones, zone, NULL, SKIP_NONE);
 1972 }
 1973 
 1974 /* See uma.h */
 1975 void *
 1976 uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
 1977 {
 1978         void *item;
 1979         uma_cache_t cache;
 1980         uma_bucket_t bucket;
 1981         int lockfail;
 1982         int cpu;
 1983 
 1984         /* This is the fast path allocation */
 1985 #ifdef UMA_DEBUG_ALLOC_1
 1986         printf("Allocating one item from %s(%p)\n", zone->uz_name, zone);
 1987 #endif
 1988         CTR3(KTR_UMA, "uma_zalloc_arg thread %x zone %s flags %d", curthread,
 1989             zone->uz_name, flags);
 1990 
 1991         if (flags & M_WAITOK) {
 1992                 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
 1993                     "uma_zalloc_arg: zone \"%s\"", zone->uz_name);
 1994         }
 1995 #ifdef DEBUG_MEMGUARD
 1996         if (memguard_cmp_zone(zone)) {
 1997                 item = memguard_alloc(zone->uz_size, flags);
 1998                 if (item != NULL) {
 1999                         /*
 2000                          * Avoid conflict with the use-after-free
 2001                          * protecting infrastructure from INVARIANTS.
 2002                          */
 2003                         if (zone->uz_init != NULL &&
 2004                             zone->uz_init != mtrash_init &&
 2005                             zone->uz_init(item, zone->uz_size, flags) != 0)
 2006                                 return (NULL);
 2007                         if (zone->uz_ctor != NULL &&
 2008                             zone->uz_ctor != mtrash_ctor &&
 2009                             zone->uz_ctor(item, zone->uz_size, udata,
 2010                             flags) != 0) {
 2011                                 zone->uz_fini(item, zone->uz_size);
 2012                                 return (NULL);
 2013                         }
 2014                         return (item);
 2015                 }
 2016                 /* This is unfortunate but should not be fatal. */
 2017         }
 2018 #endif
 2019         /*
 2020          * If possible, allocate from the per-CPU cache.  There are two
 2021          * requirements for safe access to the per-CPU cache: (1) the thread
 2022          * accessing the cache must not be preempted or yield during access,
 2023          * and (2) the thread must not migrate CPUs without switching which
 2024          * cache it accesses.  We rely on a critical section to prevent
 2025          * preemption and migration.  We release the critical section in
 2026          * order to acquire the zone mutex if we are unable to allocate from
 2027          * the current cache; when we re-acquire the critical section, we
 2028          * must detect and handle migration if it has occurred.
 2029          */
 2030         critical_enter();
 2031         cpu = curcpu;
 2032         cache = &zone->uz_cpu[cpu];
 2033 
 2034 zalloc_start:
 2035         bucket = cache->uc_allocbucket;
 2036         if (bucket != NULL && bucket->ub_cnt > 0) {
 2037                 bucket->ub_cnt--;
 2038                 item = bucket->ub_bucket[bucket->ub_cnt];
 2039 #ifdef INVARIANTS
 2040                 bucket->ub_bucket[bucket->ub_cnt] = NULL;
 2041 #endif
 2042                 KASSERT(item != NULL, ("uma_zalloc: Bucket pointer mangled."));
 2043                 cache->uc_allocs++;
 2044                 critical_exit();
 2045                 if (zone->uz_ctor != NULL &&
 2046                     zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) {
 2047                         atomic_add_long(&zone->uz_fails, 1);
 2048                         zone_free_item(zone, item, udata, SKIP_DTOR);
 2049                         return (NULL);
 2050                 }
 2051 #ifdef INVARIANTS
 2052                 uma_dbg_alloc(zone, NULL, item);
 2053 #endif
 2054                 if (flags & M_ZERO)
 2055                         bzero(item, zone->uz_size);
 2056                 return (item);
 2057         }
 2058 
 2059         /*
 2060          * We have run out of items in our alloc bucket.
 2061          * See if we can switch with our free bucket.
 2062          */
 2063         bucket = cache->uc_freebucket;
 2064         if (bucket != NULL && bucket->ub_cnt > 0) {
 2065 #ifdef UMA_DEBUG_ALLOC
 2066                 printf("uma_zalloc: Swapping empty with alloc.\n");
 2067 #endif
 2068                 cache->uc_freebucket = cache->uc_allocbucket;
 2069                 cache->uc_allocbucket = bucket;
 2070                 goto zalloc_start;
 2071         }
 2072 
 2073         /*
 2074          * Discard any empty allocation bucket while we hold no locks.
 2075          */
 2076         bucket = cache->uc_allocbucket;
 2077         cache->uc_allocbucket = NULL;
 2078         critical_exit();
 2079         if (bucket != NULL)
 2080                 bucket_free(zone, bucket, udata);
 2081 
 2082         /* Short-circuit for zones without buckets and low memory. */
 2083         if (zone->uz_count == 0 || bucketdisable)
 2084                 goto zalloc_item;
 2085 
 2086         /*
 2087          * Attempt to retrieve the item from the per-CPU cache has failed, so
 2088          * we must go back to the zone.  This requires the zone lock, so we
 2089          * must drop the critical section, then re-acquire it when we go back
 2090          * to the cache.  Since the critical section is released, we may be
 2091          * preempted or migrate.  As such, make sure not to maintain any
 2092          * thread-local state specific to the cache from prior to releasing
 2093          * the critical section.
 2094          */
 2095         lockfail = 0;
 2096         if (ZONE_TRYLOCK(zone) == 0) {
 2097                 /* Record contention to size the buckets. */
 2098                 ZONE_LOCK(zone);
 2099                 lockfail = 1;
 2100         }
 2101         critical_enter();
 2102         cpu = curcpu;
 2103         cache = &zone->uz_cpu[cpu];
 2104 
 2105         /*
 2106          * Since we have locked the zone we may as well send back our stats.
 2107          */
 2108         atomic_add_long(&zone->uz_allocs, cache->uc_allocs);
 2109         atomic_add_long(&zone->uz_frees, cache->uc_frees);
 2110         cache->uc_allocs = 0;
 2111         cache->uc_frees = 0;
 2112 
 2113         /* See if we lost the race to fill the cache. */
 2114         if (cache->uc_allocbucket != NULL) {
 2115                 ZONE_UNLOCK(zone);
 2116                 goto zalloc_start;
 2117         }
 2118 
 2119         /*
 2120          * Check the zone's cache of buckets.
 2121          */
 2122         if ((bucket = LIST_FIRST(&zone->uz_buckets)) != NULL) {
 2123                 KASSERT(bucket->ub_cnt != 0,
 2124                     ("uma_zalloc_arg: Returning an empty bucket."));
 2125 
 2126                 LIST_REMOVE(bucket, ub_link);
 2127                 cache->uc_allocbucket = bucket;
 2128                 ZONE_UNLOCK(zone);
 2129                 goto zalloc_start;
 2130         }
 2131         /* We are no longer associated with this CPU. */
 2132         critical_exit();
 2133 
 2134         /*
 2135          * We bump the uz count when the cache size is insufficient to
 2136          * handle the working set.
 2137          */
 2138         if (lockfail && zone->uz_count < BUCKET_MAX)
 2139                 zone->uz_count++;
 2140         ZONE_UNLOCK(zone);
 2141 
 2142         /*
 2143          * Now lets just fill a bucket and put it on the free list.  If that
 2144          * works we'll restart the allocation from the begining and it
 2145          * will use the just filled bucket.
 2146          */
 2147         bucket = zone_alloc_bucket(zone, udata, flags);
 2148         if (bucket != NULL) {
 2149                 ZONE_LOCK(zone);
 2150                 critical_enter();
 2151                 cpu = curcpu;
 2152                 cache = &zone->uz_cpu[cpu];
 2153                 /*
 2154                  * See if we lost the race or were migrated.  Cache the
 2155                  * initialized bucket to make this less likely or claim
 2156                  * the memory directly.
 2157                  */
 2158                 if (cache->uc_allocbucket == NULL)
 2159                         cache->uc_allocbucket = bucket;
 2160                 else
 2161                         LIST_INSERT_HEAD(&zone->uz_buckets, bucket, ub_link);
 2162                 ZONE_UNLOCK(zone);
 2163                 goto zalloc_start;
 2164         }
 2165 
 2166         /*
 2167          * We may not be able to get a bucket so return an actual item.
 2168          */
 2169 #ifdef UMA_DEBUG
 2170         printf("uma_zalloc_arg: Bucketzone returned NULL\n");
 2171 #endif
 2172 
 2173 zalloc_item:
 2174         item = zone_alloc_item(zone, udata, flags);
 2175 
 2176         return (item);
 2177 }
 2178 
 2179 static uma_slab_t
 2180 keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int flags)
 2181 {
 2182         uma_slab_t slab;
 2183         int reserve;
 2184 
 2185         mtx_assert(&keg->uk_lock, MA_OWNED);
 2186         slab = NULL;
 2187         reserve = 0;
 2188         if ((flags & M_USE_RESERVE) == 0)
 2189                 reserve = keg->uk_reserve;
 2190 
 2191         for (;;) {
 2192                 /*
 2193                  * Find a slab with some space.  Prefer slabs that are partially
 2194                  * used over those that are totally full.  This helps to reduce
 2195                  * fragmentation.
 2196                  */
 2197                 if (keg->uk_free > reserve) {
 2198                         if (!LIST_EMPTY(&keg->uk_part_slab)) {
 2199                                 slab = LIST_FIRST(&keg->uk_part_slab);
 2200                         } else {
 2201                                 slab = LIST_FIRST(&keg->uk_free_slab);
 2202                                 LIST_REMOVE(slab, us_link);
 2203                                 LIST_INSERT_HEAD(&keg->uk_part_slab, slab,
 2204                                     us_link);
 2205                         }
 2206                         MPASS(slab->us_keg == keg);
 2207                         return (slab);
 2208                 }
 2209 
 2210                 /*
 2211                  * M_NOVM means don't ask at all!
 2212                  */
 2213                 if (flags & M_NOVM)
 2214                         break;
 2215 
 2216                 if (keg->uk_maxpages && keg->uk_pages >= keg->uk_maxpages) {
 2217                         keg->uk_flags |= UMA_ZFLAG_FULL;
 2218                         /*
 2219                          * If this is not a multi-zone, set the FULL bit.
 2220                          * Otherwise slab_multi() takes care of it.
 2221                          */
 2222                         if ((zone->uz_flags & UMA_ZFLAG_MULTI) == 0) {
 2223                                 zone->uz_flags |= UMA_ZFLAG_FULL;
 2224                                 zone_log_warning(zone);
 2225                         }
 2226                         if (flags & M_NOWAIT)
 2227                                 break;
 2228                         zone->uz_sleeps++;
 2229                         msleep(keg, &keg->uk_lock, PVM, "keglimit", 0);
 2230                         continue;
 2231                 }
 2232                 slab = keg_alloc_slab(keg, zone, flags);
 2233                 /*
 2234                  * If we got a slab here it's safe to mark it partially used
 2235                  * and return.  We assume that the caller is going to remove
 2236                  * at least one item.
 2237                  */
 2238                 if (slab) {
 2239                         MPASS(slab->us_keg == keg);
 2240                         LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
 2241                         return (slab);
 2242                 }
 2243                 /*
 2244                  * We might not have been able to get a slab but another cpu
 2245                  * could have while we were unlocked.  Check again before we
 2246                  * fail.
 2247                  */
 2248                 flags |= M_NOVM;
 2249         }
 2250         return (slab);
 2251 }
 2252 
 2253 static uma_slab_t
 2254 zone_fetch_slab(uma_zone_t zone, uma_keg_t keg, int flags)
 2255 {
 2256         uma_slab_t slab;
 2257 
 2258         if (keg == NULL) {
 2259                 keg = zone_first_keg(zone);
 2260                 KEG_LOCK(keg);
 2261         }
 2262 
 2263         for (;;) {
 2264                 slab = keg_fetch_slab(keg, zone, flags);
 2265                 if (slab)
 2266                         return (slab);
 2267                 if (flags & (M_NOWAIT | M_NOVM))
 2268                         break;
 2269         }
 2270         KEG_UNLOCK(keg);
 2271         return (NULL);
 2272 }
 2273 
 2274 /*
 2275  * uma_zone_fetch_slab_multi:  Fetches a slab from one available keg.  Returns
 2276  * with the keg locked.  On NULL no lock is held.
 2277  *
 2278  * The last pointer is used to seed the search.  It is not required.
 2279  */
 2280 static uma_slab_t
 2281 zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int rflags)
 2282 {
 2283         uma_klink_t klink;
 2284         uma_slab_t slab;
 2285         uma_keg_t keg;
 2286         int flags;
 2287         int empty;
 2288         int full;
 2289 
 2290         /*
 2291          * Don't wait on the first pass.  This will skip limit tests
 2292          * as well.  We don't want to block if we can find a provider
 2293          * without blocking.
 2294          */
 2295         flags = (rflags & ~M_WAITOK) | M_NOWAIT;
 2296         /*
 2297          * Use the last slab allocated as a hint for where to start
 2298          * the search.
 2299          */
 2300         if (last != NULL) {
 2301                 slab = keg_fetch_slab(last, zone, flags);
 2302                 if (slab)
 2303                         return (slab);
 2304                 KEG_UNLOCK(last);
 2305         }
 2306         /*
 2307          * Loop until we have a slab incase of transient failures
 2308          * while M_WAITOK is specified.  I'm not sure this is 100%
 2309          * required but we've done it for so long now.
 2310          */
 2311         for (;;) {
 2312                 empty = 0;
 2313                 full = 0;
 2314                 /*
 2315                  * Search the available kegs for slabs.  Be careful to hold the
 2316                  * correct lock while calling into the keg layer.
 2317                  */
 2318                 LIST_FOREACH(klink, &zone->uz_kegs, kl_link) {
 2319                         keg = klink->kl_keg;
 2320                         KEG_LOCK(keg);
 2321                         if ((keg->uk_flags & UMA_ZFLAG_FULL) == 0) {
 2322                                 slab = keg_fetch_slab(keg, zone, flags);
 2323                                 if (slab)
 2324                                         return (slab);
 2325                         }
 2326                         if (keg->uk_flags & UMA_ZFLAG_FULL)
 2327                                 full++;
 2328                         else
 2329                                 empty++;
 2330                         KEG_UNLOCK(keg);
 2331                 }
 2332                 if (rflags & (M_NOWAIT | M_NOVM))
 2333                         break;
 2334                 flags = rflags;
 2335                 /*
 2336                  * All kegs are full.  XXX We can't atomically check all kegs
 2337                  * and sleep so just sleep for a short period and retry.
 2338                  */
 2339                 if (full && !empty) {
 2340                         ZONE_LOCK(zone);
 2341                         zone->uz_flags |= UMA_ZFLAG_FULL;
 2342                         zone->uz_sleeps++;
 2343                         zone_log_warning(zone);
 2344                         msleep(zone, zone->uz_lockptr, PVM,
 2345                             "zonelimit", hz/100);
 2346                         zone->uz_flags &= ~UMA_ZFLAG_FULL;
 2347                         ZONE_UNLOCK(zone);
 2348                         continue;
 2349                 }
 2350         }
 2351         return (NULL);
 2352 }
 2353 
 2354 static void *
 2355 slab_alloc_item(uma_keg_t keg, uma_slab_t slab)
 2356 {
 2357         void *item;
 2358         uint8_t freei;
 2359 
 2360         MPASS(keg == slab->us_keg);
 2361         mtx_assert(&keg->uk_lock, MA_OWNED);
 2362 
 2363         freei = BIT_FFS(SLAB_SETSIZE, &slab->us_free) - 1;
 2364         BIT_CLR(SLAB_SETSIZE, freei, &slab->us_free);
 2365         item = slab->us_data + (keg->uk_rsize * freei);
 2366         slab->us_freecount--;
 2367         keg->uk_free--;
 2368 
 2369         /* Move this slab to the full list */
 2370         if (slab->us_freecount == 0) {
 2371                 LIST_REMOVE(slab, us_link);
 2372                 LIST_INSERT_HEAD(&keg->uk_full_slab, slab, us_link);
 2373         }
 2374 
 2375         return (item);
 2376 }
 2377 
 2378 static int
 2379 zone_import(uma_zone_t zone, void **bucket, int max, int flags)
 2380 {
 2381         uma_slab_t slab;
 2382         uma_keg_t keg;
 2383         int i;
 2384 
 2385         slab = NULL;
 2386         keg = NULL;
 2387         /* Try to keep the buckets totally full */
 2388         for (i = 0; i < max; ) {
 2389                 if ((slab = zone->uz_slab(zone, keg, flags)) == NULL)
 2390                         break;
 2391                 keg = slab->us_keg;
 2392                 while (slab->us_freecount && i < max) { 
 2393                         bucket[i++] = slab_alloc_item(keg, slab);
 2394                         if (keg->uk_free <= keg->uk_reserve)
 2395                                 break;
 2396                 }
 2397                 /* Don't grab more than one slab at a time. */
 2398                 flags &= ~M_WAITOK;
 2399                 flags |= M_NOWAIT;
 2400         }
 2401         if (slab != NULL)
 2402                 KEG_UNLOCK(keg);
 2403 
 2404         return i;
 2405 }
 2406 
 2407 static uma_bucket_t
 2408 zone_alloc_bucket(uma_zone_t zone, void *udata, int flags)
 2409 {
 2410         uma_bucket_t bucket;
 2411         int max;
 2412 
 2413         /* Don't wait for buckets, preserve caller's NOVM setting. */
 2414         bucket = bucket_alloc(zone, udata, M_NOWAIT | (flags & M_NOVM));
 2415         if (bucket == NULL)
 2416                 goto out;
 2417 
 2418         max = MIN(bucket->ub_entries, zone->uz_count);
 2419         bucket->ub_cnt = zone->uz_import(zone->uz_arg, bucket->ub_bucket,
 2420             max, flags);
 2421 
 2422         /*
 2423          * Initialize the memory if necessary.
 2424          */
 2425         if (bucket->ub_cnt != 0 && zone->uz_init != NULL) {
 2426                 int i;
 2427 
 2428                 for (i = 0; i < bucket->ub_cnt; i++)
 2429                         if (zone->uz_init(bucket->ub_bucket[i], zone->uz_size,
 2430                             flags) != 0)
 2431                                 break;
 2432                 /*
 2433                  * If we couldn't initialize the whole bucket, put the
 2434                  * rest back onto the freelist.
 2435                  */
 2436                 if (i != bucket->ub_cnt) {
 2437                         zone->uz_release(zone->uz_arg, &bucket->ub_bucket[i],
 2438                             bucket->ub_cnt - i);
 2439 #ifdef INVARIANTS
 2440                         bzero(&bucket->ub_bucket[i],
 2441                             sizeof(void *) * (bucket->ub_cnt - i));
 2442 #endif
 2443                         bucket->ub_cnt = i;
 2444                 }
 2445         }
 2446 
 2447 out:
 2448         if (bucket == NULL || bucket->ub_cnt == 0) {
 2449                 if (bucket != NULL)
 2450                         bucket_free(zone, bucket, udata);
 2451                 atomic_add_long(&zone->uz_fails, 1);
 2452                 return (NULL);
 2453         }
 2454 
 2455         return (bucket);
 2456 }
 2457 
 2458 /*
 2459  * Allocates a single item from a zone.
 2460  *
 2461  * Arguments
 2462  *      zone   The zone to alloc for.
 2463  *      udata  The data to be passed to the constructor.
 2464  *      flags  M_WAITOK, M_NOWAIT, M_ZERO.
 2465  *
 2466  * Returns
 2467  *      NULL if there is no memory and M_NOWAIT is set
 2468  *      An item if successful
 2469  */
 2470 
 2471 static void *
 2472 zone_alloc_item(uma_zone_t zone, void *udata, int flags)
 2473 {
 2474         void *item;
 2475 
 2476         item = NULL;
 2477 
 2478 #ifdef UMA_DEBUG_ALLOC
 2479         printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone);
 2480 #endif
 2481         if (zone->uz_import(zone->uz_arg, &item, 1, flags) != 1)
 2482                 goto fail;
 2483         atomic_add_long(&zone->uz_allocs, 1);
 2484 
 2485         /*
 2486          * We have to call both the zone's init (not the keg's init)
 2487          * and the zone's ctor.  This is because the item is going from
 2488          * a keg slab directly to the user, and the user is expecting it
 2489          * to be both zone-init'd as well as zone-ctor'd.
 2490          */
 2491         if (zone->uz_init != NULL) {
 2492                 if (zone->uz_init(item, zone->uz_size, flags) != 0) {
 2493                         zone_free_item(zone, item, udata, SKIP_FINI);
 2494                         goto fail;
 2495                 }
 2496         }
 2497         if (zone->uz_ctor != NULL) {
 2498                 if (zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) {
 2499                         zone_free_item(zone, item, udata, SKIP_DTOR);
 2500                         goto fail;
 2501                 }
 2502         }
 2503 #ifdef INVARIANTS
 2504         uma_dbg_alloc(zone, NULL, item);
 2505 #endif
 2506         if (flags & M_ZERO)
 2507                 bzero(item, zone->uz_size);
 2508 
 2509         return (item);
 2510 
 2511 fail:
 2512         atomic_add_long(&zone->uz_fails, 1);
 2513         return (NULL);
 2514 }
 2515 
 2516 /* See uma.h */
 2517 void
 2518 uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
 2519 {
 2520         uma_cache_t cache;
 2521         uma_bucket_t bucket;
 2522         int cpu;
 2523 
 2524 #ifdef UMA_DEBUG_ALLOC_1
 2525         printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone);
 2526 #endif
 2527         CTR2(KTR_UMA, "uma_zfree_arg thread %x zone %s", curthread,
 2528             zone->uz_name);
 2529 
 2530         /* uma_zfree(..., NULL) does nothing, to match free(9). */
 2531         if (item == NULL)
 2532                 return;
 2533 #ifdef DEBUG_MEMGUARD
 2534         if (is_memguard_addr(item)) {
 2535                 if (zone->uz_dtor != NULL && zone->uz_dtor != mtrash_dtor)
 2536                         zone->uz_dtor(item, zone->uz_size, udata);
 2537                 if (zone->uz_fini != NULL && zone->uz_fini != mtrash_fini)
 2538                         zone->uz_fini(item, zone->uz_size);
 2539                 memguard_free(item);
 2540                 return;
 2541         }
 2542 #endif
 2543 #ifdef INVARIANTS
 2544         if (zone->uz_flags & UMA_ZONE_MALLOC)
 2545                 uma_dbg_free(zone, udata, item);
 2546         else
 2547                 uma_dbg_free(zone, NULL, item);
 2548 #endif
 2549         if (zone->uz_dtor != NULL)
 2550                 zone->uz_dtor(item, zone->uz_size, udata);
 2551 
 2552         /*
 2553          * The race here is acceptable.  If we miss it we'll just have to wait
 2554          * a little longer for the limits to be reset.
 2555          */
 2556         if (zone->uz_flags & UMA_ZFLAG_FULL)
 2557                 goto zfree_item;
 2558 
 2559         /*
 2560          * If possible, free to the per-CPU cache.  There are two
 2561          * requirements for safe access to the per-CPU cache: (1) the thread
 2562          * accessing the cache must not be preempted or yield during access,
 2563          * and (2) the thread must not migrate CPUs without switching which
 2564          * cache it accesses.  We rely on a critical section to prevent
 2565          * preemption and migration.  We release the critical section in
 2566          * order to acquire the zone mutex if we are unable to free to the
 2567          * current cache; when we re-acquire the critical section, we must
 2568          * detect and handle migration if it has occurred.
 2569          */
 2570 zfree_restart:
 2571         critical_enter();
 2572         cpu = curcpu;
 2573         cache = &zone->uz_cpu[cpu];
 2574 
 2575 zfree_start:
 2576         /*
 2577          * Try to free into the allocbucket first to give LIFO ordering
 2578          * for cache-hot datastructures.  Spill over into the freebucket
 2579          * if necessary.  Alloc will swap them if one runs dry.
 2580          */
 2581         bucket = cache->uc_allocbucket;
 2582         if (bucket == NULL || bucket->ub_cnt >= bucket->ub_entries)
 2583                 bucket = cache->uc_freebucket;
 2584         if (bucket != NULL && bucket->ub_cnt < bucket->ub_entries) {
 2585                 KASSERT(bucket->ub_bucket[bucket->ub_cnt] == NULL,
 2586                     ("uma_zfree: Freeing to non free bucket index."));
 2587                 bucket->ub_bucket[bucket->ub_cnt] = item;
 2588                 bucket->ub_cnt++;
 2589                 cache->uc_frees++;
 2590                 critical_exit();
 2591                 return;
 2592         }
 2593 
 2594         /*
 2595          * We must go back the zone, which requires acquiring the zone lock,
 2596          * which in turn means we must release and re-acquire the critical
 2597          * section.  Since the critical section is released, we may be
 2598          * preempted or migrate.  As such, make sure not to maintain any
 2599          * thread-local state specific to the cache from prior to releasing
 2600          * the critical section.
 2601          */
 2602         critical_exit();
 2603         if (zone->uz_count == 0 || bucketdisable)
 2604                 goto zfree_item;
 2605 
 2606         ZONE_LOCK(zone);
 2607         critical_enter();
 2608         cpu = curcpu;
 2609         cache = &zone->uz_cpu[cpu];
 2610 
 2611         /*
 2612          * Since we have locked the zone we may as well send back our stats.
 2613          */
 2614         atomic_add_long(&zone->uz_allocs, cache->uc_allocs);
 2615         atomic_add_long(&zone->uz_frees, cache->uc_frees);
 2616         cache->uc_allocs = 0;
 2617         cache->uc_frees = 0;
 2618 
 2619         bucket = cache->uc_freebucket;
 2620         if (bucket != NULL && bucket->ub_cnt < bucket->ub_entries) {
 2621                 ZONE_UNLOCK(zone);
 2622                 goto zfree_start;
 2623         }
 2624         cache->uc_freebucket = NULL;
 2625 
 2626         /* Can we throw this on the zone full list? */
 2627         if (bucket != NULL) {
 2628 #ifdef UMA_DEBUG_ALLOC
 2629                 printf("uma_zfree: Putting old bucket on the free list.\n");
 2630 #endif
 2631                 /* ub_cnt is pointing to the last free item */
 2632                 KASSERT(bucket->ub_cnt != 0,
 2633                     ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n"));
 2634                 LIST_INSERT_HEAD(&zone->uz_buckets, bucket, ub_link);
 2635         }
 2636 
 2637         /* We are no longer associated with this CPU. */
 2638         critical_exit();
 2639 
 2640         /* And the zone.. */
 2641         ZONE_UNLOCK(zone);
 2642 
 2643 #ifdef UMA_DEBUG_ALLOC
 2644         printf("uma_zfree: Allocating new free bucket.\n");
 2645 #endif
 2646         bucket = bucket_alloc(zone, udata, M_NOWAIT);
 2647         if (bucket) {
 2648                 critical_enter();
 2649                 cpu = curcpu;
 2650                 cache = &zone->uz_cpu[cpu];
 2651                 if (cache->uc_freebucket == NULL) {
 2652                         cache->uc_freebucket = bucket;
 2653                         goto zfree_start;
 2654                 }
 2655                 /*
 2656                  * We lost the race, start over.  We have to drop our
 2657                  * critical section to free the bucket.
 2658                  */
 2659                 critical_exit();
 2660                 bucket_free(zone, bucket, udata);
 2661                 goto zfree_restart;
 2662         }
 2663 
 2664         /*
 2665          * If nothing else caught this, we'll just do an internal free.
 2666          */
 2667 zfree_item:
 2668         zone_free_item(zone, item, udata, SKIP_DTOR);
 2669 
 2670         return;
 2671 }
 2672 
 2673 static void
 2674 slab_free_item(uma_keg_t keg, uma_slab_t slab, void *item)
 2675 {
 2676         uint8_t freei;
 2677 
 2678         mtx_assert(&keg->uk_lock, MA_OWNED);
 2679         MPASS(keg == slab->us_keg);
 2680 
 2681         /* Do we need to remove from any lists? */
 2682         if (slab->us_freecount+1 == keg->uk_ipers) {
 2683                 LIST_REMOVE(slab, us_link);
 2684                 LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
 2685         } else if (slab->us_freecount == 0) {
 2686                 LIST_REMOVE(slab, us_link);
 2687                 LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
 2688         }
 2689 
 2690         /* Slab management. */
 2691         freei = ((uintptr_t)item - (uintptr_t)slab->us_data) / keg->uk_rsize;
 2692         BIT_SET(SLAB_SETSIZE, freei, &slab->us_free);
 2693         slab->us_freecount++;
 2694 
 2695         /* Keg statistics. */
 2696         keg->uk_free++;
 2697 }
 2698 
 2699 static void
 2700 zone_release(uma_zone_t zone, void **bucket, int cnt)
 2701 {
 2702         void *item;
 2703         uma_slab_t slab;
 2704         uma_keg_t keg;
 2705         uint8_t *mem;
 2706         int clearfull;
 2707         int i;
 2708 
 2709         clearfull = 0;
 2710         keg = zone_first_keg(zone);
 2711         KEG_LOCK(keg);
 2712         for (i = 0; i < cnt; i++) {
 2713                 item = bucket[i];
 2714                 if (!(zone->uz_flags & UMA_ZONE_VTOSLAB)) {
 2715                         mem = (uint8_t *)((uintptr_t)item & (~UMA_SLAB_MASK));
 2716                         if (zone->uz_flags & UMA_ZONE_HASH) {
 2717                                 slab = hash_sfind(&keg->uk_hash, mem);
 2718                         } else {
 2719                                 mem += keg->uk_pgoff;
 2720                                 slab = (uma_slab_t)mem;
 2721                         }
 2722                 } else {
 2723                         slab = vtoslab((vm_offset_t)item);
 2724                         if (slab->us_keg != keg) {
 2725                                 KEG_UNLOCK(keg);
 2726                                 keg = slab->us_keg;
 2727                                 KEG_LOCK(keg);
 2728                         }
 2729                 }
 2730                 slab_free_item(keg, slab, item);
 2731                 if (keg->uk_flags & UMA_ZFLAG_FULL) {
 2732                         if (keg->uk_pages < keg->uk_maxpages) {
 2733                                 keg->uk_flags &= ~UMA_ZFLAG_FULL;
 2734                                 clearfull = 1;
 2735                         }
 2736 
 2737                         /* 
 2738                          * We can handle one more allocation. Since we're
 2739                          * clearing ZFLAG_FULL, wake up all procs blocked
 2740                          * on pages. This should be uncommon, so keeping this
 2741                          * simple for now (rather than adding count of blocked 
 2742                          * threads etc).
 2743                          */
 2744                         wakeup(keg);
 2745                 }
 2746         }
 2747         KEG_UNLOCK(keg);
 2748         if (clearfull) {
 2749                 ZONE_LOCK(zone);
 2750                 zone->uz_flags &= ~UMA_ZFLAG_FULL;
 2751                 wakeup(zone);
 2752                 ZONE_UNLOCK(zone);
 2753         }
 2754 
 2755 }
 2756 
 2757 /*
 2758  * Frees a single item to any zone.
 2759  *
 2760  * Arguments:
 2761  *      zone   The zone to free to
 2762  *      item   The item we're freeing
 2763  *      udata  User supplied data for the dtor
 2764  *      skip   Skip dtors and finis
 2765  */
 2766 static void
 2767 zone_free_item(uma_zone_t zone, void *item, void *udata, enum zfreeskip skip)
 2768 {
 2769 
 2770 #ifdef INVARIANTS
 2771         if (skip == SKIP_NONE) {
 2772                 if (zone->uz_flags & UMA_ZONE_MALLOC)
 2773                         uma_dbg_free(zone, udata, item);
 2774                 else
 2775                         uma_dbg_free(zone, NULL, item);
 2776         }
 2777 #endif
 2778         if (skip < SKIP_DTOR && zone->uz_dtor)
 2779                 zone->uz_dtor(item, zone->uz_size, udata);
 2780 
 2781         if (skip < SKIP_FINI && zone->uz_fini)
 2782                 zone->uz_fini(item, zone->uz_size);
 2783 
 2784         atomic_add_long(&zone->uz_frees, 1);
 2785         zone->uz_release(zone->uz_arg, &item, 1);
 2786 }
 2787 
 2788 /* See uma.h */
 2789 int
 2790 uma_zone_set_max(uma_zone_t zone, int nitems)
 2791 {
 2792         uma_keg_t keg;
 2793 
 2794         keg = zone_first_keg(zone);
 2795         if (keg == NULL)
 2796                 return (0);
 2797         KEG_LOCK(keg);
 2798         keg->uk_maxpages = (nitems / keg->uk_ipers) * keg->uk_ppera;
 2799         if (keg->uk_maxpages * keg->uk_ipers < nitems)
 2800                 keg->uk_maxpages += keg->uk_ppera;
 2801         nitems = keg->uk_maxpages * keg->uk_ipers;
 2802         KEG_UNLOCK(keg);
 2803 
 2804         return (nitems);
 2805 }
 2806 
 2807 /* See uma.h */
 2808 int
 2809 uma_zone_get_max(uma_zone_t zone)
 2810 {
 2811         int nitems;
 2812         uma_keg_t keg;
 2813 
 2814         keg = zone_first_keg(zone);
 2815         if (keg == NULL)
 2816                 return (0);
 2817         KEG_LOCK(keg);
 2818         nitems = keg->uk_maxpages * keg->uk_ipers;
 2819         KEG_UNLOCK(keg);
 2820 
 2821         return (nitems);
 2822 }
 2823 
 2824 /* See uma.h */
 2825 void
 2826 uma_zone_set_warning(uma_zone_t zone, const char *warning)
 2827 {
 2828 
 2829         ZONE_LOCK(zone);
 2830         zone->uz_warning = warning;
 2831         ZONE_UNLOCK(zone);
 2832 }
 2833 
 2834 /* See uma.h */
 2835 int
 2836 uma_zone_get_cur(uma_zone_t zone)
 2837 {
 2838         int64_t nitems;
 2839         u_int i;
 2840 
 2841         ZONE_LOCK(zone);
 2842         nitems = zone->uz_allocs - zone->uz_frees;
 2843         CPU_FOREACH(i) {
 2844                 /*
 2845                  * See the comment in sysctl_vm_zone_stats() regarding the
 2846                  * safety of accessing the per-cpu caches. With the zone lock
 2847                  * held, it is safe, but can potentially result in stale data.
 2848                  */
 2849                 nitems += zone->uz_cpu[i].uc_allocs -
 2850                     zone->uz_cpu[i].uc_frees;
 2851         }
 2852         ZONE_UNLOCK(zone);
 2853 
 2854         return (nitems < 0 ? 0 : nitems);
 2855 }
 2856 
 2857 /* See uma.h */
 2858 void
 2859 uma_zone_set_init(uma_zone_t zone, uma_init uminit)
 2860 {
 2861         uma_keg_t keg;
 2862 
 2863         keg = zone_first_keg(zone);
 2864         KASSERT(keg != NULL, ("uma_zone_set_init: Invalid zone type"));
 2865         KEG_LOCK(keg);
 2866         KASSERT(keg->uk_pages == 0,
 2867             ("uma_zone_set_init on non-empty keg"));
 2868         keg->uk_init = uminit;
 2869         KEG_UNLOCK(keg);
 2870 }
 2871 
 2872 /* See uma.h */
 2873 void
 2874 uma_zone_set_fini(uma_zone_t zone, uma_fini fini)
 2875 {
 2876         uma_keg_t keg;
 2877 
 2878         keg = zone_first_keg(zone);
 2879         KASSERT(keg != NULL, ("uma_zone_set_init: Invalid zone type"));
 2880         KEG_LOCK(keg);
 2881         KASSERT(keg->uk_pages == 0,
 2882             ("uma_zone_set_fini on non-empty keg"));
 2883         keg->uk_fini = fini;
 2884         KEG_UNLOCK(keg);
 2885 }
 2886 
 2887 /* See uma.h */
 2888 void
 2889 uma_zone_set_zinit(uma_zone_t zone, uma_init zinit)
 2890 {
 2891 
 2892         ZONE_LOCK(zone);
 2893         KASSERT(zone_first_keg(zone)->uk_pages == 0,
 2894             ("uma_zone_set_zinit on non-empty keg"));
 2895         zone->uz_init = zinit;
 2896         ZONE_UNLOCK(zone);
 2897 }
 2898 
 2899 /* See uma.h */
 2900 void
 2901 uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini)
 2902 {
 2903 
 2904         ZONE_LOCK(zone);
 2905         KASSERT(zone_first_keg(zone)->uk_pages == 0,
 2906             ("uma_zone_set_zfini on non-empty keg"));
 2907         zone->uz_fini = zfini;
 2908         ZONE_UNLOCK(zone);
 2909 }
 2910 
 2911 /* See uma.h */
 2912 /* XXX uk_freef is not actually used with the zone locked */
 2913 void
 2914 uma_zone_set_freef(uma_zone_t zone, uma_free freef)
 2915 {
 2916         uma_keg_t keg;
 2917 
 2918         keg = zone_first_keg(zone);
 2919         KASSERT(keg != NULL, ("uma_zone_set_init: Invalid zone type"));
 2920         KEG_LOCK(keg);
 2921         keg->uk_freef = freef;
 2922         KEG_UNLOCK(keg);
 2923 }
 2924 
 2925 /* See uma.h */
 2926 /* XXX uk_allocf is not actually used with the zone locked */
 2927 void
 2928 uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf)
 2929 {
 2930         uma_keg_t keg;
 2931 
 2932         keg = zone_first_keg(zone);
 2933         KEG_LOCK(keg);
 2934         keg->uk_allocf = allocf;
 2935         KEG_UNLOCK(keg);
 2936 }
 2937 
 2938 /* See uma.h */
 2939 void
 2940 uma_zone_reserve(uma_zone_t zone, int items)
 2941 {
 2942         uma_keg_t keg;
 2943 
 2944         keg = zone_first_keg(zone);
 2945         if (keg == NULL)
 2946                 return;
 2947         KEG_LOCK(keg);
 2948         keg->uk_reserve = items;
 2949         KEG_UNLOCK(keg);
 2950 
 2951         return;
 2952 }
 2953 
 2954 /* See uma.h */
 2955 int
 2956 uma_zone_reserve_kva(uma_zone_t zone, int count)
 2957 {
 2958         uma_keg_t keg;
 2959         vm_offset_t kva;
 2960         int pages;
 2961 
 2962         keg = zone_first_keg(zone);
 2963         if (keg == NULL)
 2964                 return (0);
 2965         pages = count / keg->uk_ipers;
 2966 
 2967         if (pages * keg->uk_ipers < count)
 2968                 pages++;
 2969 
 2970 #ifdef UMA_MD_SMALL_ALLOC
 2971         if (keg->uk_ppera > 1) {
 2972 #else
 2973         if (1) {
 2974 #endif
 2975                 kva = kva_alloc(pages * UMA_SLAB_SIZE);
 2976                 if (kva == 0)
 2977                         return (0);
 2978         } else
 2979                 kva = 0;
 2980         KEG_LOCK(keg);
 2981         keg->uk_kva = kva;
 2982         keg->uk_offset = 0;
 2983         keg->uk_maxpages = pages;
 2984 #ifdef UMA_MD_SMALL_ALLOC
 2985         keg->uk_allocf = (keg->uk_ppera > 1) ? noobj_alloc : uma_small_alloc;
 2986 #else
 2987         keg->uk_allocf = noobj_alloc;
 2988 #endif
 2989         keg->uk_flags |= UMA_ZONE_NOFREE;
 2990         KEG_UNLOCK(keg);
 2991 
 2992         return (1);
 2993 }
 2994 
 2995 /* See uma.h */
 2996 void
 2997 uma_prealloc(uma_zone_t zone, int items)
 2998 {
 2999         int slabs;
 3000         uma_slab_t slab;
 3001         uma_keg_t keg;
 3002 
 3003         keg = zone_first_keg(zone);
 3004         if (keg == NULL)
 3005                 return;
 3006         KEG_LOCK(keg);
 3007         slabs = items / keg->uk_ipers;
 3008         if (slabs * keg->uk_ipers < items)
 3009                 slabs++;
 3010         while (slabs > 0) {
 3011                 slab = keg_alloc_slab(keg, zone, M_WAITOK);
 3012                 if (slab == NULL)
 3013                         break;
 3014                 MPASS(slab->us_keg == keg);
 3015                 LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
 3016                 slabs--;
 3017         }
 3018         KEG_UNLOCK(keg);
 3019 }
 3020 
 3021 /* See uma.h */
 3022 uint32_t *
 3023 uma_find_refcnt(uma_zone_t zone, void *item)
 3024 {
 3025         uma_slabrefcnt_t slabref;
 3026         uma_slab_t slab;
 3027         uma_keg_t keg;
 3028         uint32_t *refcnt;
 3029         int idx;
 3030 
 3031         slab = vtoslab((vm_offset_t)item & (~UMA_SLAB_MASK));
 3032         slabref = (uma_slabrefcnt_t)slab;
 3033         keg = slab->us_keg;
 3034         KASSERT(keg->uk_flags & UMA_ZONE_REFCNT,
 3035             ("uma_find_refcnt(): zone possibly not UMA_ZONE_REFCNT"));
 3036         idx = ((uintptr_t)item - (uintptr_t)slab->us_data) / keg->uk_rsize;
 3037         refcnt = &slabref->us_refcnt[idx];
 3038         return refcnt;
 3039 }
 3040 
 3041 /* See uma.h */
 3042 void
 3043 uma_reclaim(void)
 3044 {
 3045 #ifdef UMA_DEBUG
 3046         printf("UMA: vm asked us to release pages!\n");
 3047 #endif
 3048         bucket_enable();
 3049         zone_foreach(zone_drain);
 3050         /*
 3051          * Some slabs may have been freed but this zone will be visited early
 3052          * we visit again so that we can free pages that are empty once other
 3053          * zones are drained.  We have to do the same for buckets.
 3054          */
 3055         zone_drain(slabzone);
 3056         zone_drain(slabrefzone);
 3057         bucket_zone_drain();
 3058 }
 3059 
 3060 /* See uma.h */
 3061 int
 3062 uma_zone_exhausted(uma_zone_t zone)
 3063 {
 3064         int full;
 3065 
 3066         ZONE_LOCK(zone);
 3067         full = (zone->uz_flags & UMA_ZFLAG_FULL);
 3068         ZONE_UNLOCK(zone);
 3069         return (full);  
 3070 }
 3071 
 3072 int
 3073 uma_zone_exhausted_nolock(uma_zone_t zone)
 3074 {
 3075         return (zone->uz_flags & UMA_ZFLAG_FULL);
 3076 }
 3077 
 3078 void *
 3079 uma_large_malloc(int size, int wait)
 3080 {
 3081         void *mem;
 3082         uma_slab_t slab;
 3083         uint8_t flags;
 3084 
 3085         slab = zone_alloc_item(slabzone, NULL, wait);
 3086         if (slab == NULL)
 3087                 return (NULL);
 3088         mem = page_alloc(NULL, size, &flags, wait);
 3089         if (mem) {
 3090                 vsetslab((vm_offset_t)mem, slab);
 3091                 slab->us_data = mem;
 3092                 slab->us_flags = flags | UMA_SLAB_MALLOC;
 3093                 slab->us_size = size;
 3094         } else {
 3095                 zone_free_item(slabzone, slab, NULL, SKIP_NONE);
 3096         }
 3097 
 3098         return (mem);
 3099 }
 3100 
 3101 void
 3102 uma_large_free(uma_slab_t slab)
 3103 {
 3104 
 3105         page_free(slab->us_data, slab->us_size, slab->us_flags);
 3106         zone_free_item(slabzone, slab, NULL, SKIP_NONE);
 3107 }
 3108 
 3109 void
 3110 uma_print_stats(void)
 3111 {
 3112         zone_foreach(uma_print_zone);
 3113 }
 3114 
 3115 static void
 3116 slab_print(uma_slab_t slab)
 3117 {
 3118         printf("slab: keg %p, data %p, freecount %d\n",
 3119                 slab->us_keg, slab->us_data, slab->us_freecount);
 3120 }
 3121 
 3122 static void
 3123 cache_print(uma_cache_t cache)
 3124 {
 3125         printf("alloc: %p(%d), free: %p(%d)\n",
 3126                 cache->uc_allocbucket,
 3127                 cache->uc_allocbucket?cache->uc_allocbucket->ub_cnt:0,
 3128                 cache->uc_freebucket,
 3129                 cache->uc_freebucket?cache->uc_freebucket->ub_cnt:0);
 3130 }
 3131 
 3132 static void
 3133 uma_print_keg(uma_keg_t keg)
 3134 {
 3135         uma_slab_t slab;
 3136 
 3137         printf("keg: %s(%p) size %d(%d) flags %#x ipers %d ppera %d "
 3138             "out %d free %d limit %d\n",
 3139             keg->uk_name, keg, keg->uk_size, keg->uk_rsize, keg->uk_flags,
 3140             keg->uk_ipers, keg->uk_ppera,
 3141             (keg->uk_ipers * keg->uk_pages) - keg->uk_free, keg->uk_free,
 3142             (keg->uk_maxpages / keg->uk_ppera) * keg->uk_ipers);
 3143         printf("Part slabs:\n");
 3144         LIST_FOREACH(slab, &keg->uk_part_slab, us_link)
 3145                 slab_print(slab);
 3146         printf("Free slabs:\n");
 3147         LIST_FOREACH(slab, &keg->uk_free_slab, us_link)
 3148                 slab_print(slab);
 3149         printf("Full slabs:\n");
 3150         LIST_FOREACH(slab, &keg->uk_full_slab, us_link)
 3151                 slab_print(slab);
 3152 }
 3153 
 3154 void
 3155 uma_print_zone(uma_zone_t zone)
 3156 {
 3157         uma_cache_t cache;
 3158         uma_klink_t kl;
 3159         int i;
 3160 
 3161         printf("zone: %s(%p) size %d flags %#x\n",
 3162             zone->uz_name, zone, zone->uz_size, zone->uz_flags);
 3163         LIST_FOREACH(kl, &zone->uz_kegs, kl_link)
 3164                 uma_print_keg(kl->kl_keg);
 3165         CPU_FOREACH(i) {
 3166                 cache = &zone->uz_cpu[i];
 3167                 printf("CPU %d Cache:\n", i);
 3168                 cache_print(cache);
 3169         }
 3170 }
 3171 
 3172 #ifdef DDB
 3173 /*
 3174  * Generate statistics across both the zone and its per-cpu cache's.  Return
 3175  * desired statistics if the pointer is non-NULL for that statistic.
 3176  *
 3177  * Note: does not update the zone statistics, as it can't safely clear the
 3178  * per-CPU cache statistic.
 3179  *
 3180  * XXXRW: Following the uc_allocbucket and uc_freebucket pointers here isn't
 3181  * safe from off-CPU; we should modify the caches to track this information
 3182  * directly so that we don't have to.
 3183  */
 3184 static void
 3185 uma_zone_sumstat(uma_zone_t z, int *cachefreep, uint64_t *allocsp,
 3186     uint64_t *freesp, uint64_t *sleepsp)
 3187 {
 3188         uma_cache_t cache;
 3189         uint64_t allocs, frees, sleeps;
 3190         int cachefree, cpu;
 3191 
 3192         allocs = frees = sleeps = 0;
 3193         cachefree = 0;
 3194         CPU_FOREACH(cpu) {
 3195                 cache = &z->uz_cpu[cpu];
 3196                 if (cache->uc_allocbucket != NULL)
 3197                         cachefree += cache->uc_allocbucket->ub_cnt;
 3198                 if (cache->uc_freebucket != NULL)
 3199                         cachefree += cache->uc_freebucket->ub_cnt;
 3200                 allocs += cache->uc_allocs;
 3201                 frees += cache->uc_frees;
 3202         }
 3203         allocs += z->uz_allocs;
 3204         frees += z->uz_frees;
 3205         sleeps += z->uz_sleeps;
 3206         if (cachefreep != NULL)
 3207                 *cachefreep = cachefree;
 3208         if (allocsp != NULL)
 3209                 *allocsp = allocs;
 3210         if (freesp != NULL)
 3211                 *freesp = frees;
 3212         if (sleepsp != NULL)
 3213                 *sleepsp = sleeps;
 3214 }
 3215 #endif /* DDB */
 3216 
 3217 static int
 3218 sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS)
 3219 {
 3220         uma_keg_t kz;
 3221         uma_zone_t z;
 3222         int count;
 3223 
 3224         count = 0;
 3225         mtx_lock(&uma_mtx);
 3226         LIST_FOREACH(kz, &uma_kegs, uk_link) {
 3227                 LIST_FOREACH(z, &kz->uk_zones, uz_link)
 3228                         count++;
 3229         }
 3230         mtx_unlock(&uma_mtx);
 3231         return (sysctl_handle_int(oidp, &count, 0, req));
 3232 }
 3233 
 3234 static int
 3235 sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS)
 3236 {
 3237         struct uma_stream_header ush;
 3238         struct uma_type_header uth;
 3239         struct uma_percpu_stat ups;
 3240         uma_bucket_t bucket;
 3241         struct sbuf sbuf;
 3242         uma_cache_t cache;
 3243         uma_klink_t kl;
 3244         uma_keg_t kz;
 3245         uma_zone_t z;
 3246         uma_keg_t k;
 3247         int count, error, i;
 3248 
 3249         error = sysctl_wire_old_buffer(req, 0);
 3250         if (error != 0)
 3251                 return (error);
 3252         sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
 3253 
 3254         count = 0;
 3255         mtx_lock(&uma_mtx);
 3256         LIST_FOREACH(kz, &uma_kegs, uk_link) {
 3257                 LIST_FOREACH(z, &kz->uk_zones, uz_link)
 3258                         count++;
 3259         }
 3260 
 3261         /*
 3262          * Insert stream header.
 3263          */
 3264         bzero(&ush, sizeof(ush));
 3265         ush.ush_version = UMA_STREAM_VERSION;
 3266         ush.ush_maxcpus = (mp_maxid + 1);
 3267         ush.ush_count = count;
 3268         (void)sbuf_bcat(&sbuf, &ush, sizeof(ush));
 3269 
 3270         LIST_FOREACH(kz, &uma_kegs, uk_link) {
 3271                 LIST_FOREACH(z, &kz->uk_zones, uz_link) {
 3272                         bzero(&uth, sizeof(uth));
 3273                         ZONE_LOCK(z);
 3274                         strlcpy(uth.uth_name, z->uz_name, UTH_MAX_NAME);
 3275                         uth.uth_align = kz->uk_align;
 3276                         uth.uth_size = kz->uk_size;
 3277                         uth.uth_rsize = kz->uk_rsize;
 3278                         LIST_FOREACH(kl, &z->uz_kegs, kl_link) {
 3279                                 k = kl->kl_keg;
 3280                                 uth.uth_maxpages += k->uk_maxpages;
 3281                                 uth.uth_pages += k->uk_pages;
 3282                                 uth.uth_keg_free += k->uk_free;
 3283                                 uth.uth_limit = (k->uk_maxpages / k->uk_ppera)
 3284                                     * k->uk_ipers;
 3285                         }
 3286 
 3287                         /*
 3288                          * A zone is secondary is it is not the first entry
 3289                          * on the keg's zone list.
 3290                          */
 3291                         if ((z->uz_flags & UMA_ZONE_SECONDARY) &&
 3292                             (LIST_FIRST(&kz->uk_zones) != z))
 3293                                 uth.uth_zone_flags = UTH_ZONE_SECONDARY;
 3294 
 3295                         LIST_FOREACH(bucket, &z->uz_buckets, ub_link)
 3296                                 uth.uth_zone_free += bucket->ub_cnt;
 3297                         uth.uth_allocs = z->uz_allocs;
 3298                         uth.uth_frees = z->uz_frees;
 3299                         uth.uth_fails = z->uz_fails;
 3300                         uth.uth_sleeps = z->uz_sleeps;
 3301                         (void)sbuf_bcat(&sbuf, &uth, sizeof(uth));
 3302                         /*
 3303                          * While it is not normally safe to access the cache
 3304                          * bucket pointers while not on the CPU that owns the
 3305                          * cache, we only allow the pointers to be exchanged
 3306                          * without the zone lock held, not invalidated, so
 3307                          * accept the possible race associated with bucket
 3308                          * exchange during monitoring.
 3309                          */
 3310                         for (i = 0; i < (mp_maxid + 1); i++) {
 3311                                 bzero(&ups, sizeof(ups));
 3312                                 if (kz->uk_flags & UMA_ZFLAG_INTERNAL)
 3313                                         goto skip;
 3314                                 if (CPU_ABSENT(i))
 3315                                         goto skip;
 3316                                 cache = &z->uz_cpu[i];
 3317                                 if (cache->uc_allocbucket != NULL)
 3318                                         ups.ups_cache_free +=
 3319                                             cache->uc_allocbucket->ub_cnt;
 3320                                 if (cache->uc_freebucket != NULL)
 3321                                         ups.ups_cache_free +=
 3322                                             cache->uc_freebucket->ub_cnt;
 3323                                 ups.ups_allocs = cache->uc_allocs;
 3324                                 ups.ups_frees = cache->uc_frees;
 3325 skip:
 3326                                 (void)sbuf_bcat(&sbuf, &ups, sizeof(ups));
 3327                         }
 3328                         ZONE_UNLOCK(z);
 3329                 }
 3330         }
 3331         mtx_unlock(&uma_mtx);
 3332         error = sbuf_finish(&sbuf);
 3333         sbuf_delete(&sbuf);
 3334         return (error);
 3335 }
 3336 
 3337 #ifdef DDB
 3338 DB_SHOW_COMMAND(uma, db_show_uma)
 3339 {
 3340         uint64_t allocs, frees, sleeps;
 3341         uma_bucket_t bucket;
 3342         uma_keg_t kz;
 3343         uma_zone_t z;
 3344         int cachefree;
 3345 
 3346         db_printf("%18s %8s %8s %8s %12s %8s\n", "Zone", "Size", "Used", "Free",
 3347             "Requests", "Sleeps");
 3348         LIST_FOREACH(kz, &uma_kegs, uk_link) {
 3349                 LIST_FOREACH(z, &kz->uk_zones, uz_link) {
 3350                         if (kz->uk_flags & UMA_ZFLAG_INTERNAL) {
 3351                                 allocs = z->uz_allocs;
 3352                                 frees = z->uz_frees;
 3353                                 sleeps = z->uz_sleeps;
 3354                                 cachefree = 0;
 3355                         } else
 3356                                 uma_zone_sumstat(z, &cachefree, &allocs,
 3357                                     &frees, &sleeps);
 3358                         if (!((z->uz_flags & UMA_ZONE_SECONDARY) &&
 3359                             (LIST_FIRST(&kz->uk_zones) != z)))
 3360                                 cachefree += kz->uk_free;
 3361                         LIST_FOREACH(bucket, &z->uz_buckets, ub_link)
 3362                                 cachefree += bucket->ub_cnt;
 3363                         db_printf("%18s %8ju %8jd %8d %12ju %8ju\n", z->uz_name,
 3364                             (uintmax_t)kz->uk_size,
 3365                             (intmax_t)(allocs - frees), cachefree,
 3366                             (uintmax_t)allocs, sleeps);
 3367                         if (db_pager_quit)
 3368                                 return;
 3369                 }
 3370         }
 3371 }
 3372 #endif

Cache object: 3c6f4f48f54bdc9f88c279bf16ca106d


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.