The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/vm/uma_core.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
    3  * All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice unmodified, this list of conditions, and the following
   10  *    disclaimer.
   11  * 2. Redistributions in binary form must reproduce the above copyright
   12  *    notice, this list of conditions and the following disclaimer in the
   13  *    documentation and/or other materials provided with the distribution.
   14  *
   15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
   16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
   18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
   19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
   20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
   21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
   22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
   24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   25  *
   26  * $FreeBSD: releng/5.0/sys/vm/uma_core.c 107048 2002-11-18 08:27:14Z jeff $
   27  *
   28  */
   29 
   30 /*
   31  * uma_core.c  Implementation of the Universal Memory allocator
   32  *
   33  * This allocator is intended to replace the multitude of similar object caches
   34  * in the standard FreeBSD kernel.  The intent is to be flexible as well as
   35  * effecient.  A primary design goal is to return unused memory to the rest of
   36  * the system.  This will make the system as a whole more flexible due to the 
   37  * ability to move memory to subsystems which most need it instead of leaving
   38  * pools of reserved memory unused.
   39  *
   40  * The basic ideas stem from similar slab/zone based allocators whose algorithms
   41  * are well known.
   42  *
   43  */
   44 
   45 /*
   46  * TODO:
   47  *      - Improve memory usage for large allocations
   48  *      - Investigate cache size adjustments
   49  */
   50 
   51 /* I should really use ktr.. */
   52 /*
   53 #define UMA_DEBUG 1
   54 #define UMA_DEBUG_ALLOC 1
   55 #define UMA_DEBUG_ALLOC_1 1
   56 */
   57 
   58 
   59 #include "opt_param.h"
   60 #include <sys/param.h>
   61 #include <sys/systm.h>
   62 #include <sys/kernel.h>
   63 #include <sys/types.h>
   64 #include <sys/queue.h>
   65 #include <sys/malloc.h>
   66 #include <sys/lock.h>
   67 #include <sys/sysctl.h>
   68 #include <sys/mutex.h>
   69 #include <sys/proc.h>
   70 #include <sys/smp.h>
   71 #include <sys/vmmeter.h>
   72 
   73 #include <vm/vm.h>
   74 #include <vm/vm_object.h>
   75 #include <vm/vm_page.h>
   76 #include <vm/vm_param.h>
   77 #include <vm/vm_map.h>
   78 #include <vm/vm_kern.h>
   79 #include <vm/vm_extern.h>
   80 #include <vm/uma.h>
   81 #include <vm/uma_int.h>
   82 #include <vm/uma_dbg.h>
   83 
   84 #include <machine/vmparam.h>
   85 
   86 /*
   87  * This is the zone from which all zones are spawned.  The idea is that even 
   88  * the zone heads are allocated from the allocator, so we use the bss section
   89  * to bootstrap us.
   90  */
   91 static struct uma_zone masterzone;
   92 static uma_zone_t zones = &masterzone;
   93 
   94 /* This is the zone from which all of uma_slab_t's are allocated. */
   95 static uma_zone_t slabzone;
   96 
   97 /*
   98  * The initial hash tables come out of this zone so they can be allocated
   99  * prior to malloc coming up.
  100  */
  101 static uma_zone_t hashzone;
  102 
  103 /*
  104  * Zone that buckets come from.
  105  */
  106 static uma_zone_t bucketzone;
  107 
  108 /*
  109  * Are we allowed to allocate buckets?
  110  */
  111 static int bucketdisable = 1;
  112 
  113 /* Linked list of all zones in the system */
  114 static LIST_HEAD(,uma_zone) uma_zones = LIST_HEAD_INITIALIZER(&uma_zones); 
  115 
  116 /* This mutex protects the zone list */
  117 static struct mtx uma_mtx;
  118 
  119 /* Linked list of boot time pages */
  120 static LIST_HEAD(,uma_slab) uma_boot_pages =
  121     LIST_HEAD_INITIALIZER(&uma_boot_pages);
  122 
  123 /* Count of free boottime pages */
  124 static int uma_boot_free = 0;
  125 
  126 /* Is the VM done starting up? */
  127 static int booted = 0;
  128 
  129 /* This is the handle used to schedule our working set calculator */
  130 static struct callout uma_callout;
  131 
  132 /* This is mp_maxid + 1, for use while looping over each cpu */
  133 static int maxcpu;
  134 
  135 /*
  136  * This structure is passed as the zone ctor arg so that I don't have to create
  137  * a special allocation function just for zones.
  138  */
  139 struct uma_zctor_args {
  140         char *name;
  141         size_t size;
  142         uma_ctor ctor;
  143         uma_dtor dtor;
  144         uma_init uminit;
  145         uma_fini fini;
  146         int align;
  147         u_int16_t flags;
  148 };
  149 
  150 /* Prototypes.. */
  151 
  152 static void *obj_alloc(uma_zone_t, int, u_int8_t *, int);
  153 static void *page_alloc(uma_zone_t, int, u_int8_t *, int);
  154 static void page_free(void *, int, u_int8_t);
  155 static uma_slab_t slab_zalloc(uma_zone_t, int);
  156 static void cache_drain(uma_zone_t);
  157 static void bucket_drain(uma_zone_t, uma_bucket_t);
  158 static void zone_drain(uma_zone_t);
  159 static void zone_ctor(void *, int, void *);
  160 static void zone_dtor(void *, int, void *);
  161 static void zero_init(void *, int);
  162 static void zone_small_init(uma_zone_t zone);
  163 static void zone_large_init(uma_zone_t zone);
  164 static void zone_foreach(void (*zfunc)(uma_zone_t));
  165 static void zone_timeout(uma_zone_t zone);
  166 static int hash_alloc(struct uma_hash *);
  167 static int hash_expand(struct uma_hash *, struct uma_hash *);
  168 static void hash_free(struct uma_hash *hash);
  169 static void uma_timeout(void *);
  170 static void uma_startup3(void);
  171 static void *uma_zalloc_internal(uma_zone_t, void *, int);
  172 static void uma_zfree_internal(uma_zone_t, void *, void *, int);
  173 static void bucket_enable(void);
  174 static int uma_zalloc_bucket(uma_zone_t zone, int flags);
  175 static uma_slab_t uma_zone_slab(uma_zone_t zone, int flags);
  176 static void *uma_slab_alloc(uma_zone_t zone, uma_slab_t slab);
  177 
  178 void uma_print_zone(uma_zone_t);
  179 void uma_print_stats(void);
  180 static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS);
  181 
  182 SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING|CTLFLAG_RD,
  183     NULL, 0, sysctl_vm_zone, "A", "Zone Info");
  184 SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
  185 
  186 /*
  187  * This routine checks to see whether or not it's safe to enable buckets.
  188  */
  189 
  190 static void
  191 bucket_enable(void)
  192 {
  193         if (cnt.v_free_count < cnt.v_free_min)
  194                 bucketdisable = 1;
  195         else
  196                 bucketdisable = 0;
  197 }
  198 
  199 
  200 /*
  201  * Routine called by timeout which is used to fire off some time interval
  202  * based calculations.  (working set, stats, etc.)
  203  *
  204  * Arguments:
  205  *      arg   Unused
  206  * 
  207  * Returns:
  208  *      Nothing
  209  */
  210 static void
  211 uma_timeout(void *unused)
  212 {
  213         bucket_enable();
  214         zone_foreach(zone_timeout);
  215 
  216         /* Reschedule this event */
  217         callout_reset(&uma_callout, UMA_WORKING_TIME * hz, uma_timeout, NULL);
  218 }
  219 
  220 /*
  221  * Routine to perform timeout driven calculations.  This does the working set
  222  * as well as hash expanding, and per cpu statistics aggregation.
  223  *
  224  *  Arguments:
  225  *      zone  The zone to operate on
  226  *
  227  *  Returns:
  228  *      Nothing
  229  */
  230 static void
  231 zone_timeout(uma_zone_t zone)
  232 {
  233         uma_cache_t cache;
  234         u_int64_t alloc;
  235         int free;
  236         int cpu;
  237 
  238         alloc = 0;
  239         free = 0;
  240 
  241         /*
  242          * Aggregate per cpu cache statistics back to the zone.
  243          *
  244          * I may rewrite this to set a flag in the per cpu cache instead of
  245          * locking.  If the flag is not cleared on the next round I will have
  246          * to lock and do it here instead so that the statistics don't get too
  247          * far out of sync.
  248          */
  249         if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL)) {
  250                 for (cpu = 0; cpu < maxcpu; cpu++) {
  251                         if (CPU_ABSENT(cpu))
  252                                 continue;
  253                         CPU_LOCK(zone, cpu); 
  254                         cache = &zone->uz_cpu[cpu];
  255                         /* Add them up, and reset */
  256                         alloc += cache->uc_allocs;
  257                         cache->uc_allocs = 0;
  258                         if (cache->uc_allocbucket)
  259                                 free += cache->uc_allocbucket->ub_ptr + 1;
  260                         if (cache->uc_freebucket)
  261                                 free += cache->uc_freebucket->ub_ptr + 1;
  262                         CPU_UNLOCK(zone, cpu);
  263                 }
  264         }
  265 
  266         /* Now push these stats back into the zone.. */
  267         ZONE_LOCK(zone);
  268         zone->uz_allocs += alloc;
  269 
  270         /*
  271          * cachefree is an instantanious snapshot of what is in the per cpu
  272          * caches, not an accurate counter
  273          */
  274         zone->uz_cachefree = free;
  275 
  276         /*
  277          * Expand the zone hash table.
  278          * 
  279          * This is done if the number of slabs is larger than the hash size.
  280          * What I'm trying to do here is completely reduce collisions.  This
  281          * may be a little aggressive.  Should I allow for two collisions max?
  282          */
  283 
  284         if (zone->uz_flags & UMA_ZFLAG_HASH &&
  285             zone->uz_pages / zone->uz_ppera >= zone->uz_hash.uh_hashsize) {
  286                 struct uma_hash newhash;
  287                 struct uma_hash oldhash;
  288                 int ret;
  289 
  290                 /*
  291                  * This is so involved because allocating and freeing 
  292                  * while the zone lock is held will lead to deadlock.
  293                  * I have to do everything in stages and check for
  294                  * races.
  295                  */
  296                 newhash = zone->uz_hash;
  297                 ZONE_UNLOCK(zone);
  298                 ret = hash_alloc(&newhash);
  299                 ZONE_LOCK(zone);
  300                 if (ret) {
  301                         if (hash_expand(&zone->uz_hash, &newhash)) {
  302                                 oldhash = zone->uz_hash;
  303                                 zone->uz_hash = newhash;
  304                         } else
  305                                 oldhash = newhash;
  306 
  307                         ZONE_UNLOCK(zone);
  308                         hash_free(&oldhash);
  309                         ZONE_LOCK(zone);
  310                 }
  311         }
  312 
  313         /*
  314          * Here we compute the working set size as the total number of items 
  315          * left outstanding since the last time interval.  This is slightly
  316          * suboptimal. What we really want is the highest number of outstanding
  317          * items during the last time quantum.  This should be close enough.
  318          *
  319          * The working set size is used to throttle the zone_drain function.
  320          * We don't want to return memory that we may need again immediately.
  321          */
  322         alloc = zone->uz_allocs - zone->uz_oallocs;
  323         zone->uz_oallocs = zone->uz_allocs;
  324         zone->uz_wssize = alloc;
  325 
  326         ZONE_UNLOCK(zone);
  327 }
  328 
  329 /*
  330  * Allocate and zero fill the next sized hash table from the appropriate
  331  * backing store.
  332  *
  333  * Arguments:
  334  *      hash  A new hash structure with the old hash size in uh_hashsize
  335  *
  336  * Returns:
  337  *      1 on sucess and 0 on failure.
  338  */
  339 static int
  340 hash_alloc(struct uma_hash *hash)
  341 {
  342         int oldsize;
  343         int alloc;
  344 
  345         oldsize = hash->uh_hashsize;
  346 
  347         /* We're just going to go to a power of two greater */
  348         if (oldsize)  {
  349                 hash->uh_hashsize = oldsize * 2;
  350                 alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize;
  351                 /* XXX Shouldn't be abusing DEVBUF here */
  352                 hash->uh_slab_hash = (struct slabhead *)malloc(alloc,
  353                     M_DEVBUF, M_NOWAIT);
  354         } else {
  355                 alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT;
  356                 hash->uh_slab_hash = uma_zalloc_internal(hashzone, NULL,
  357                     M_WAITOK);
  358                 hash->uh_hashsize = UMA_HASH_SIZE_INIT;
  359         }
  360         if (hash->uh_slab_hash) {
  361                 bzero(hash->uh_slab_hash, alloc);
  362                 hash->uh_hashmask = hash->uh_hashsize - 1;
  363                 return (1);
  364         }
  365 
  366         return (0);
  367 }
  368 
  369 /*
  370  * Expands the hash table for OFFPAGE zones.  This is done from zone_timeout
  371  * to reduce collisions.  This must not be done in the regular allocation path,
  372  * otherwise, we can recurse on the vm while allocating pages.
  373  *
  374  * Arguments:
  375  *      oldhash  The hash you want to expand 
  376  *      newhash  The hash structure for the new table
  377  *
  378  * Returns:
  379  *      Nothing
  380  *
  381  * Discussion:
  382  */
  383 static int
  384 hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash)
  385 {
  386         uma_slab_t slab;
  387         int hval;
  388         int i;
  389 
  390         if (!newhash->uh_slab_hash)
  391                 return (0);
  392 
  393         if (oldhash->uh_hashsize >= newhash->uh_hashsize)
  394                 return (0);
  395 
  396         /*
  397          * I need to investigate hash algorithms for resizing without a
  398          * full rehash.
  399          */
  400 
  401         for (i = 0; i < oldhash->uh_hashsize; i++)
  402                 while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) {
  403                         slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]);
  404                         SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink);
  405                         hval = UMA_HASH(newhash, slab->us_data);
  406                         SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval],
  407                             slab, us_hlink);
  408                 }
  409 
  410         return (1);
  411 }
  412 
  413 /*
  414  * Free the hash bucket to the appropriate backing store.
  415  *
  416  * Arguments:
  417  *      slab_hash  The hash bucket we're freeing
  418  *      hashsize   The number of entries in that hash bucket
  419  *
  420  * Returns:
  421  *      Nothing
  422  */
  423 static void
  424 hash_free(struct uma_hash *hash)
  425 {
  426         if (hash->uh_slab_hash == NULL)
  427                 return;
  428         if (hash->uh_hashsize == UMA_HASH_SIZE_INIT)
  429                 uma_zfree_internal(hashzone,
  430                     hash->uh_slab_hash, NULL, 0);
  431         else
  432                 free(hash->uh_slab_hash, M_DEVBUF);
  433 }
  434 
  435 /*
  436  * Frees all outstanding items in a bucket
  437  *
  438  * Arguments:
  439  *      zone   The zone to free to, must be unlocked.
  440  *      bucket The free/alloc bucket with items, cpu queue must be locked.
  441  *
  442  * Returns:
  443  *      Nothing
  444  */
  445 
  446 static void
  447 bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
  448 {
  449         uma_slab_t slab;
  450         int mzone;
  451         void *item;
  452 
  453         if (bucket == NULL)
  454                 return;
  455 
  456         slab = NULL;
  457         mzone = 0;
  458 
  459         /* We have to lookup the slab again for malloc.. */
  460         if (zone->uz_flags & UMA_ZFLAG_MALLOC)
  461                 mzone = 1;
  462 
  463         while (bucket->ub_ptr > -1)  {
  464                 item = bucket->ub_bucket[bucket->ub_ptr];
  465 #ifdef INVARIANTS
  466                 bucket->ub_bucket[bucket->ub_ptr] = NULL;
  467                 KASSERT(item != NULL,
  468                     ("bucket_drain: botched ptr, item is NULL"));
  469 #endif
  470                 bucket->ub_ptr--;
  471                 /* 
  472                  * This is extremely inefficient.  The slab pointer was passed
  473                  * to uma_zfree_arg, but we lost it because the buckets don't
  474                  * hold them.  This will go away when free() gets a size passed
  475                  * to it.
  476                  */
  477                 if (mzone)
  478                         slab = vtoslab((vm_offset_t)item & (~UMA_SLAB_MASK));
  479                 uma_zfree_internal(zone, item, slab, 1);
  480         }
  481 }
  482 
  483 /*
  484  * Drains the per cpu caches for a zone.
  485  *
  486  * Arguments:
  487  *      zone  The zone to drain, must be unlocked.
  488  *
  489  * Returns:
  490  *      Nothing
  491  *
  492  * This function returns with the zone locked so that the per cpu queues can
  493  * not be filled until zone_drain is finished.
  494  *
  495  */
  496 static void
  497 cache_drain(uma_zone_t zone)
  498 {
  499         uma_bucket_t bucket;
  500         uma_cache_t cache;
  501         int cpu;
  502 
  503         /*
  504          * Flush out the per cpu queues.
  505          *
  506          * XXX This causes unnecessary thrashing due to immediately having
  507          * empty per cpu queues.  I need to improve this.
  508          */
  509 
  510         /*
  511          * We have to lock each cpu cache before locking the zone
  512          */
  513         ZONE_UNLOCK(zone);
  514 
  515         for (cpu = 0; cpu < maxcpu; cpu++) {
  516                 if (CPU_ABSENT(cpu))
  517                         continue;
  518                 CPU_LOCK(zone, cpu);
  519                 cache = &zone->uz_cpu[cpu];
  520                 bucket_drain(zone, cache->uc_allocbucket);
  521                 bucket_drain(zone, cache->uc_freebucket);
  522         }
  523 
  524         /*
  525          * Drain the bucket queues and free the buckets, we just keep two per
  526          * cpu (alloc/free).
  527          */
  528         ZONE_LOCK(zone);
  529         while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
  530                 LIST_REMOVE(bucket, ub_link);
  531                 ZONE_UNLOCK(zone);
  532                 bucket_drain(zone, bucket);
  533                 uma_zfree_internal(bucketzone, bucket, NULL, 0);
  534                 ZONE_LOCK(zone);
  535         }
  536 
  537         /* Now we do the free queue.. */
  538         while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
  539                 LIST_REMOVE(bucket, ub_link);
  540                 uma_zfree_internal(bucketzone, bucket, NULL, 0);
  541         }
  542 
  543         /* We unlock here, but they will all block until the zone is unlocked */
  544         for (cpu = 0; cpu < maxcpu; cpu++) {
  545                 if (CPU_ABSENT(cpu))
  546                         continue;
  547                 CPU_UNLOCK(zone, cpu);
  548         }
  549 
  550         zone->uz_cachefree = 0;
  551 }
  552 
  553 /*
  554  * Frees pages from a zone back to the system.  This is done on demand from
  555  * the pageout daemon.
  556  *
  557  * Arguments:
  558  *      zone  The zone to free pages from
  559  *      all   Should we drain all items?
  560  *
  561  * Returns:
  562  *      Nothing.
  563  */
  564 static void
  565 zone_drain(uma_zone_t zone)
  566 {
  567         struct slabhead freeslabs = {};
  568         uma_slab_t slab;
  569         uma_slab_t n;
  570         u_int64_t extra;
  571         u_int8_t flags;
  572         u_int8_t *mem;
  573         int i;
  574 
  575         /*
  576          * We don't want to take pages from staticly allocated zones at this
  577          * time
  578          */
  579         if (zone->uz_flags & UMA_ZFLAG_NOFREE || zone->uz_freef == NULL)
  580                 return;
  581 
  582         ZONE_LOCK(zone);
  583 
  584         if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL))
  585                 cache_drain(zone);
  586 
  587         if (zone->uz_free < zone->uz_wssize)
  588                 goto finished;
  589 #ifdef UMA_DEBUG
  590         printf("%s working set size: %llu free items: %u\n",
  591             zone->uz_name, (unsigned long long)zone->uz_wssize, zone->uz_free);
  592 #endif
  593         extra = zone->uz_free - zone->uz_wssize;
  594         extra /= zone->uz_ipers;
  595 
  596         /* extra is now the number of extra slabs that we can free */
  597 
  598         if (extra == 0)
  599                 goto finished;
  600 
  601         slab = LIST_FIRST(&zone->uz_free_slab);
  602         while (slab && extra) {
  603                 n = LIST_NEXT(slab, us_link);
  604 
  605                 /* We have no where to free these to */
  606                 if (slab->us_flags & UMA_SLAB_BOOT) {
  607                         slab = n;
  608                         continue;
  609                 }
  610 
  611                 LIST_REMOVE(slab, us_link);
  612                 zone->uz_pages -= zone->uz_ppera;
  613                 zone->uz_free -= zone->uz_ipers;
  614 
  615                 if (zone->uz_flags & UMA_ZFLAG_HASH)
  616                         UMA_HASH_REMOVE(&zone->uz_hash, slab, slab->us_data);
  617 
  618                 SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink);
  619 
  620                 slab = n;
  621                 extra--;
  622         }
  623 finished:
  624         ZONE_UNLOCK(zone);
  625 
  626         while ((slab = SLIST_FIRST(&freeslabs)) != NULL) {
  627                 SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink);
  628                 if (zone->uz_fini)
  629                         for (i = 0; i < zone->uz_ipers; i++)
  630                                 zone->uz_fini(
  631                                     slab->us_data + (zone->uz_rsize * i),
  632                                     zone->uz_size);
  633                 flags = slab->us_flags;
  634                 mem = slab->us_data;
  635 
  636                 if (zone->uz_flags & UMA_ZFLAG_OFFPAGE)
  637                         uma_zfree_internal(slabzone, slab, NULL, 0);
  638                 if (zone->uz_flags & UMA_ZFLAG_MALLOC) {
  639                         vm_object_t obj;
  640 
  641                         if (flags & UMA_SLAB_KMEM)
  642                                 obj = kmem_object;
  643                         else
  644                                 obj = NULL;
  645                         for (i = 0; i < zone->uz_ppera; i++)
  646                                 vsetobj((vm_offset_t)mem + (i * PAGE_SIZE),
  647                                     obj);
  648                 }
  649 #ifdef UMA_DEBUG
  650                 printf("%s: Returning %d bytes.\n",
  651                     zone->uz_name, UMA_SLAB_SIZE * zone->uz_ppera);
  652 #endif
  653                 zone->uz_freef(mem, UMA_SLAB_SIZE * zone->uz_ppera, flags);
  654         }
  655 
  656 }
  657 
  658 /*
  659  * Allocate a new slab for a zone.  This does not insert the slab onto a list.
  660  *
  661  * Arguments:
  662  *      zone  The zone to allocate slabs for
  663  *      wait  Shall we wait?
  664  *
  665  * Returns:
  666  *      The slab that was allocated or NULL if there is no memory and the
  667  *      caller specified M_NOWAIT.
  668  *      
  669  */
  670 static uma_slab_t 
  671 slab_zalloc(uma_zone_t zone, int wait)
  672 {
  673         uma_slab_t slab;        /* Starting slab */
  674         u_int8_t *mem;
  675         u_int8_t flags;
  676         int i;
  677 
  678         slab = NULL;
  679 
  680 #ifdef UMA_DEBUG
  681         printf("slab_zalloc:  Allocating a new slab for %s\n", zone->uz_name);
  682 #endif
  683         ZONE_UNLOCK(zone);
  684 
  685         if (zone->uz_flags & UMA_ZFLAG_OFFPAGE) {
  686                 slab = uma_zalloc_internal(slabzone, NULL, wait);
  687                 if (slab == NULL) {
  688                         ZONE_LOCK(zone);
  689                         return NULL;
  690                 }
  691         }
  692 
  693         /*
  694          * This reproduces the old vm_zone behavior of zero filling pages the
  695          * first time they are added to a zone.
  696          *
  697          * Malloced items are zeroed in uma_zalloc.
  698          */
  699 
  700         if ((zone->uz_flags & UMA_ZFLAG_MALLOC) == 0)
  701                 wait |= M_ZERO;
  702         else
  703                 wait &= ~M_ZERO;
  704 
  705         if (booted || (zone->uz_flags & UMA_ZFLAG_PRIVALLOC)) {
  706                 mtx_lock(&Giant);
  707                 mem = zone->uz_allocf(zone, 
  708                     zone->uz_ppera * UMA_SLAB_SIZE, &flags, wait);
  709                 mtx_unlock(&Giant);
  710                 if (mem == NULL) {
  711                         ZONE_LOCK(zone);
  712                         return (NULL);
  713                 }
  714         } else {
  715                 uma_slab_t tmps;
  716 
  717                 if (zone->uz_ppera > 1)
  718                         panic("UMA: Attemping to allocate multiple pages before vm has started.\n");
  719                 if (zone->uz_flags & UMA_ZFLAG_MALLOC)
  720                         panic("Mallocing before uma_startup2 has been called.\n");
  721                 if (uma_boot_free == 0)
  722                         panic("UMA: Ran out of pre init pages, increase UMA_BOOT_PAGES\n");
  723                 tmps = LIST_FIRST(&uma_boot_pages);
  724                 LIST_REMOVE(tmps, us_link);
  725                 uma_boot_free--;
  726                 mem = tmps->us_data;
  727                 flags = tmps->us_flags;
  728         }
  729 
  730         /* Point the slab into the allocated memory */
  731         if (!(zone->uz_flags & UMA_ZFLAG_OFFPAGE))
  732                 slab = (uma_slab_t )(mem + zone->uz_pgoff);
  733 
  734         if (zone->uz_flags & UMA_ZFLAG_MALLOC)
  735                 for (i = 0; i < zone->uz_ppera; i++)
  736                         vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab);
  737 
  738         slab->us_zone = zone;
  739         slab->us_data = mem;
  740 
  741         /*
  742          * This is intended to spread data out across cache lines.
  743          *
  744          * This code doesn't seem to work properly on x86, and on alpha
  745          * it makes absolutely no performance difference. I'm sure it could
  746          * use some tuning, but sun makes outrageous claims about it's
  747          * performance.
  748          */
  749 #if 0
  750         if (zone->uz_cachemax) {
  751                 slab->us_data += zone->uz_cacheoff;
  752                 zone->uz_cacheoff += UMA_CACHE_INC;
  753                 if (zone->uz_cacheoff > zone->uz_cachemax)
  754                         zone->uz_cacheoff = 0;
  755         }
  756 #endif
  757         
  758         slab->us_freecount = zone->uz_ipers;
  759         slab->us_firstfree = 0;
  760         slab->us_flags = flags;
  761         for (i = 0; i < zone->uz_ipers; i++)
  762                 slab->us_freelist[i] = i+1;
  763 
  764         if (zone->uz_init)
  765                 for (i = 0; i < zone->uz_ipers; i++)
  766                         zone->uz_init(slab->us_data + (zone->uz_rsize * i),
  767                             zone->uz_size);
  768         ZONE_LOCK(zone);
  769 
  770         if (zone->uz_flags & UMA_ZFLAG_HASH)
  771                 UMA_HASH_INSERT(&zone->uz_hash, slab, mem);
  772 
  773         zone->uz_pages += zone->uz_ppera;
  774         zone->uz_free += zone->uz_ipers;
  775 
  776 
  777         return (slab);
  778 }
  779 
  780 /*
  781  * Allocates a number of pages from the system
  782  *
  783  * Arguments:
  784  *      zone  Unused
  785  *      bytes  The number of bytes requested
  786  *      wait  Shall we wait?
  787  *
  788  * Returns:
  789  *      A pointer to the alloced memory or possibly 
  790  *      NULL if M_NOWAIT is set.
  791  */
  792 static void *
  793 page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
  794 {
  795         void *p;        /* Returned page */
  796 
  797         *pflag = UMA_SLAB_KMEM;
  798         p = (void *) kmem_malloc(kmem_map, bytes, wait);
  799   
  800         return (p);
  801 }
  802 
  803 /*
  804  * Allocates a number of pages from within an object
  805  *
  806  * Arguments:
  807  *      zone   Unused
  808  *      bytes  The number of bytes requested
  809  *      wait   Shall we wait?
  810  *
  811  * Returns:
  812  *      A pointer to the alloced memory or possibly 
  813  *      NULL if M_NOWAIT is set.
  814  *
  815  * TODO: If we fail during a multi-page allocation release the pages that have
  816  *       already been allocated.
  817  */
  818 static void *
  819 obj_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
  820 {
  821         vm_offset_t zkva;
  822         vm_offset_t retkva;
  823         vm_page_t p;
  824         int pages;
  825 
  826         retkva = 0;
  827         pages = zone->uz_pages;
  828 
  829         /* 
  830          * This looks a little weird since we're getting one page at a time
  831          */
  832         while (bytes > 0) {
  833                 p = vm_page_alloc(zone->uz_obj, pages,
  834                     VM_ALLOC_INTERRUPT);
  835                 if (p == NULL)
  836                         return (NULL);
  837 
  838                 zkva = zone->uz_kva + pages * PAGE_SIZE;
  839                 if (retkva == 0)
  840                         retkva = zkva;
  841                 pmap_qenter(zkva, &p, 1);
  842                 bytes -= PAGE_SIZE;
  843                 pages += 1;
  844         }
  845 
  846         *flags = UMA_SLAB_PRIV;
  847 
  848         return ((void *)retkva);
  849 }
  850 
  851 /*
  852  * Frees a number of pages to the system
  853  * 
  854  * Arguments:
  855  *      mem   A pointer to the memory to be freed
  856  *      size  The size of the memory being freed
  857  *      flags The original p->us_flags field
  858  *
  859  * Returns:
  860  *      Nothing
  861  *
  862  */
  863 static void
  864 page_free(void *mem, int size, u_int8_t flags)
  865 {
  866         vm_map_t map;
  867 
  868         if (flags & UMA_SLAB_KMEM)
  869                 map = kmem_map;
  870         else
  871                 panic("UMA: page_free used with invalid flags %d\n", flags);
  872 
  873         kmem_free(map, (vm_offset_t)mem, size);
  874 }
  875 
  876 /*
  877  * Zero fill initializer
  878  *
  879  * Arguments/Returns follow uma_init specifications
  880  *
  881  */
  882 static void
  883 zero_init(void *mem, int size)
  884 {
  885         bzero(mem, size);
  886 }
  887 
  888 /*
  889  * Finish creating a small uma zone.  This calculates ipers, and the zone size.
  890  *
  891  * Arguments
  892  *      zone  The zone we should initialize
  893  *
  894  * Returns
  895  *      Nothing
  896  */
  897 static void
  898 zone_small_init(uma_zone_t zone)
  899 {
  900         int rsize;
  901         int memused;
  902         int ipers;
  903 
  904         rsize = zone->uz_size;
  905 
  906         if (rsize < UMA_SMALLEST_UNIT)
  907                 rsize = UMA_SMALLEST_UNIT;
  908 
  909         if (rsize & zone->uz_align)
  910                 rsize = (rsize & ~zone->uz_align) + (zone->uz_align + 1);
  911 
  912         zone->uz_rsize = rsize;
  913 
  914         rsize += 1;     /* Account for the byte of linkage */
  915         zone->uz_ipers = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) / rsize;
  916         zone->uz_ppera = 1;
  917 
  918         memused = zone->uz_ipers * zone->uz_rsize;
  919 
  920         /* Can we do any better? */
  921         if ((UMA_SLAB_SIZE - memused) >= UMA_MAX_WASTE) {
  922                 if (zone->uz_flags & UMA_ZFLAG_INTERNAL) 
  923                         return;
  924                 ipers = UMA_SLAB_SIZE / zone->uz_rsize;
  925                 if (ipers > zone->uz_ipers) {
  926                         zone->uz_flags |= UMA_ZFLAG_OFFPAGE;
  927                         if ((zone->uz_flags & UMA_ZFLAG_MALLOC) == 0)
  928                                 zone->uz_flags |= UMA_ZFLAG_HASH;
  929                         zone->uz_ipers = ipers;
  930                 }
  931         }
  932 
  933 }
  934 
  935 /*
  936  * Finish creating a large (> UMA_SLAB_SIZE) uma zone.  Just give in and do 
  937  * OFFPAGE for now.  When I can allow for more dynamic slab sizes this will be
  938  * more complicated.
  939  *
  940  * Arguments
  941  *      zone  The zone we should initialize
  942  *
  943  * Returns
  944  *      Nothing
  945  */
  946 static void
  947 zone_large_init(uma_zone_t zone)
  948 {       
  949         int pages;
  950 
  951         pages = zone->uz_size / UMA_SLAB_SIZE;
  952 
  953         /* Account for remainder */
  954         if ((pages * UMA_SLAB_SIZE) < zone->uz_size)
  955                 pages++;
  956 
  957         zone->uz_ppera = pages;
  958         zone->uz_ipers = 1;
  959 
  960         zone->uz_flags |= UMA_ZFLAG_OFFPAGE;
  961         if ((zone->uz_flags & UMA_ZFLAG_MALLOC) == 0)
  962                 zone->uz_flags |= UMA_ZFLAG_HASH;
  963 
  964         zone->uz_rsize = zone->uz_size;
  965 }
  966 
  967 /* 
  968  * Zone header ctor.  This initializes all fields, locks, etc.  And inserts
  969  * the zone onto the global zone list.
  970  *
  971  * Arguments/Returns follow uma_ctor specifications
  972  *      udata  Actually uma_zcreat_args
  973  *
  974  */
  975 
  976 static void
  977 zone_ctor(void *mem, int size, void *udata)
  978 {
  979         struct uma_zctor_args *arg = udata;
  980         uma_zone_t zone = mem;
  981         int privlc;
  982         int cplen;
  983         int cpu;
  984 
  985         bzero(zone, size);
  986         zone->uz_name = arg->name;
  987         zone->uz_size = arg->size;
  988         zone->uz_ctor = arg->ctor;
  989         zone->uz_dtor = arg->dtor;
  990         zone->uz_init = arg->uminit;
  991         zone->uz_fini = arg->fini;
  992         zone->uz_align = arg->align;
  993         zone->uz_free = 0;
  994         zone->uz_pages = 0;
  995         zone->uz_flags = 0;
  996         zone->uz_allocf = page_alloc;
  997         zone->uz_freef = page_free;
  998 
  999         if (arg->flags & UMA_ZONE_ZINIT)
 1000                 zone->uz_init = zero_init;
 1001 
 1002         if (arg->flags & UMA_ZONE_INTERNAL)
 1003                 zone->uz_flags |= UMA_ZFLAG_INTERNAL;
 1004 
 1005         if (arg->flags & UMA_ZONE_MALLOC)
 1006                 zone->uz_flags |= UMA_ZFLAG_MALLOC;
 1007 
 1008         if (arg->flags & UMA_ZONE_NOFREE)
 1009                 zone->uz_flags |= UMA_ZFLAG_NOFREE;
 1010 
 1011         if (arg->flags & UMA_ZONE_VM)
 1012                 zone->uz_flags |= UMA_ZFLAG_BUCKETCACHE;
 1013 
 1014         if (zone->uz_size > UMA_SLAB_SIZE)
 1015                 zone_large_init(zone);
 1016         else
 1017                 zone_small_init(zone);
 1018 #ifdef UMA_MD_SMALL_ALLOC
 1019         if (zone->uz_ppera == 1) {
 1020                 zone->uz_allocf = uma_small_alloc;
 1021                 zone->uz_freef = uma_small_free;
 1022         }
 1023 #endif  /* UMA_MD_SMALL_ALLOC */
 1024 
 1025         if (arg->flags & UMA_ZONE_MTXCLASS)
 1026                 privlc = 1;
 1027         else
 1028                 privlc = 0;
 1029 
 1030         /* We do this so that the per cpu lock name is unique for each zone */
 1031         memcpy(zone->uz_lname, "PCPU ", 5);
 1032         cplen = min(strlen(zone->uz_name) + 1, LOCKNAME_LEN - 6);
 1033         memcpy(zone->uz_lname+5, zone->uz_name, cplen);
 1034         zone->uz_lname[LOCKNAME_LEN - 1] = '\0';
 1035 
 1036         /*
 1037          * If we're putting the slab header in the actual page we need to
 1038          * figure out where in each page it goes.  This calculates a right 
 1039          * justified offset into the memory on a ALIGN_PTR boundary.
 1040          */
 1041         if (!(zone->uz_flags & UMA_ZFLAG_OFFPAGE)) {
 1042                 int totsize;
 1043                 int waste;
 1044 
 1045                 /* Size of the slab struct and free list */
 1046                 totsize = sizeof(struct uma_slab) + zone->uz_ipers;
 1047                 if (totsize & UMA_ALIGN_PTR)
 1048                         totsize = (totsize & ~UMA_ALIGN_PTR) +
 1049                             (UMA_ALIGN_PTR + 1);
 1050                 zone->uz_pgoff = UMA_SLAB_SIZE - totsize;
 1051 
 1052                 waste = zone->uz_pgoff;
 1053                 waste -= (zone->uz_ipers * zone->uz_rsize);
 1054 
 1055                 /*
 1056                  * This calculates how much space we have for cache line size
 1057                  * optimizations.  It works by offseting each slab slightly.
 1058                  * Currently it breaks on x86, and so it is disabled.
 1059                  */
 1060 
 1061                 if (zone->uz_align < UMA_CACHE_INC && waste > UMA_CACHE_INC) {
 1062                         zone->uz_cachemax = waste - UMA_CACHE_INC;
 1063                         zone->uz_cacheoff = 0;
 1064                 } 
 1065 
 1066                 totsize = zone->uz_pgoff + sizeof(struct uma_slab)
 1067                     + zone->uz_ipers;
 1068                 /* I don't think it's possible, but I'll make sure anyway */
 1069                 if (totsize > UMA_SLAB_SIZE) {
 1070                         printf("zone %s ipers %d rsize %d size %d\n",
 1071                             zone->uz_name, zone->uz_ipers, zone->uz_rsize,
 1072                             zone->uz_size);
 1073                         panic("UMA slab won't fit.\n");
 1074                 }
 1075         }
 1076 
 1077         if (zone->uz_flags & UMA_ZFLAG_HASH)
 1078                 hash_alloc(&zone->uz_hash);
 1079 
 1080 #ifdef UMA_DEBUG
 1081         printf("%s(%p) size = %d ipers = %d ppera = %d pgoff = %d\n",
 1082             zone->uz_name, zone,
 1083             zone->uz_size, zone->uz_ipers,
 1084             zone->uz_ppera, zone->uz_pgoff);
 1085 #endif
 1086         ZONE_LOCK_INIT(zone, privlc);
 1087 
 1088         mtx_lock(&uma_mtx);
 1089         LIST_INSERT_HEAD(&uma_zones, zone, uz_link);
 1090         mtx_unlock(&uma_mtx);
 1091 
 1092         /*
 1093          * Some internal zones don't have room allocated for the per cpu
 1094          * caches.  If we're internal, bail out here.
 1095          */
 1096 
 1097         if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
 1098                 return;
 1099 
 1100         if (zone->uz_ipers < UMA_BUCKET_SIZE)
 1101                 zone->uz_count = zone->uz_ipers - 1;
 1102         else
 1103                 zone->uz_count = UMA_BUCKET_SIZE - 1;
 1104 
 1105         for (cpu = 0; cpu < maxcpu; cpu++)
 1106                 CPU_LOCK_INIT(zone, cpu, privlc);
 1107 }
 1108 
 1109 /* 
 1110  * Zone header dtor.  This frees all data, destroys locks, frees the hash table
 1111  * and removes the zone from the global list.
 1112  *
 1113  * Arguments/Returns follow uma_dtor specifications
 1114  *      udata  unused
 1115  */
 1116 
 1117 static void
 1118 zone_dtor(void *arg, int size, void *udata)
 1119 {
 1120         uma_zone_t zone;
 1121         int cpu;
 1122 
 1123         zone = (uma_zone_t)arg;
 1124 
 1125         ZONE_LOCK(zone);
 1126         zone->uz_wssize = 0;
 1127         ZONE_UNLOCK(zone);
 1128 
 1129         mtx_lock(&uma_mtx);
 1130         LIST_REMOVE(zone, uz_link);
 1131         zone_drain(zone);
 1132         mtx_unlock(&uma_mtx);
 1133 
 1134         ZONE_LOCK(zone);
 1135         if (zone->uz_free != 0)
 1136                 printf("Zone %s was not empty.  Lost %d pages of memory.\n",
 1137                     zone->uz_name, zone->uz_pages);
 1138 
 1139         if ((zone->uz_flags & UMA_ZFLAG_INTERNAL) == 0)
 1140                 for (cpu = 0; cpu < maxcpu; cpu++)
 1141                         CPU_LOCK_FINI(zone, cpu);
 1142 
 1143         ZONE_UNLOCK(zone);
 1144         if ((zone->uz_flags & UMA_ZFLAG_OFFPAGE) != 0)
 1145                 hash_free(&zone->uz_hash);
 1146 
 1147         ZONE_LOCK_FINI(zone);
 1148 }
 1149 /*
 1150  * Traverses every zone in the system and calls a callback
 1151  *
 1152  * Arguments:
 1153  *      zfunc  A pointer to a function which accepts a zone
 1154  *              as an argument.
 1155  * 
 1156  * Returns:
 1157  *      Nothing
 1158  */
 1159 static void 
 1160 zone_foreach(void (*zfunc)(uma_zone_t))
 1161 {
 1162         uma_zone_t zone;
 1163 
 1164         mtx_lock(&uma_mtx);
 1165         LIST_FOREACH(zone, &uma_zones, uz_link) {
 1166                 zfunc(zone);
 1167         }
 1168         mtx_unlock(&uma_mtx);
 1169 }
 1170 
 1171 /* Public functions */
 1172 /* See uma.h */
 1173 void
 1174 uma_startup(void *bootmem)
 1175 {
 1176         struct uma_zctor_args args;
 1177         uma_slab_t slab;
 1178         int slabsize;
 1179         int i;
 1180 
 1181 #ifdef UMA_DEBUG
 1182         printf("Creating uma zone headers zone.\n");
 1183 #endif
 1184 #ifdef SMP
 1185         maxcpu = mp_maxid + 1;
 1186 #else
 1187         maxcpu = 1;
 1188 #endif
 1189 #ifdef UMA_DEBUG 
 1190         printf("Max cpu = %d, mp_maxid = %d\n", maxcpu, mp_maxid);
 1191         Debugger("stop");
 1192 #endif
 1193         mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF);
 1194         /* "manually" Create the initial zone */
 1195         args.name = "UMA Zones";
 1196         args.size = sizeof(struct uma_zone) +
 1197             (sizeof(struct uma_cache) * (maxcpu - 1));
 1198         args.ctor = zone_ctor;
 1199         args.dtor = zone_dtor;
 1200         args.uminit = zero_init;
 1201         args.fini = NULL;
 1202         args.align = 32 - 1;
 1203         args.flags = UMA_ZONE_INTERNAL;
 1204         /* The initial zone has no Per cpu queues so it's smaller */
 1205         zone_ctor(zones, sizeof(struct uma_zone), &args);
 1206 
 1207 #ifdef UMA_DEBUG
 1208         printf("Filling boot free list.\n");
 1209 #endif
 1210         for (i = 0; i < UMA_BOOT_PAGES; i++) {
 1211                 slab = (uma_slab_t)((u_int8_t *)bootmem + (i * UMA_SLAB_SIZE));
 1212                 slab->us_data = (u_int8_t *)slab;
 1213                 slab->us_flags = UMA_SLAB_BOOT;
 1214                 LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link);
 1215                 uma_boot_free++;
 1216         }
 1217 
 1218 #ifdef UMA_DEBUG
 1219         printf("Creating slab zone.\n");
 1220 #endif
 1221 
 1222         /*
 1223          * This is the max number of free list items we'll have with
 1224          * offpage slabs.
 1225          */
 1226 
 1227         slabsize = UMA_SLAB_SIZE - sizeof(struct uma_slab);
 1228         slabsize /= UMA_MAX_WASTE;
 1229         slabsize++;                     /* In case there it's rounded */
 1230         slabsize += sizeof(struct uma_slab);
 1231 
 1232         /* Now make a zone for slab headers */
 1233         slabzone = uma_zcreate("UMA Slabs",
 1234                                 slabsize,
 1235                                 NULL, NULL, NULL, NULL,
 1236                                 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL);
 1237 
 1238         hashzone = uma_zcreate("UMA Hash",
 1239             sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT,
 1240             NULL, NULL, NULL, NULL,
 1241             UMA_ALIGN_PTR, UMA_ZONE_INTERNAL);
 1242 
 1243         bucketzone = uma_zcreate("UMA Buckets", sizeof(struct uma_bucket),
 1244             NULL, NULL, NULL, NULL,
 1245             UMA_ALIGN_PTR, UMA_ZONE_INTERNAL);
 1246 
 1247 #ifdef UMA_MD_SMALL_ALLOC
 1248         booted = 1;
 1249 #endif
 1250 
 1251 #ifdef UMA_DEBUG
 1252         printf("UMA startup complete.\n");
 1253 #endif
 1254 }
 1255 
 1256 /* see uma.h */
 1257 void
 1258 uma_startup2(void)
 1259 {
 1260         booted = 1;
 1261         bucket_enable();
 1262 #ifdef UMA_DEBUG
 1263         printf("UMA startup2 complete.\n");
 1264 #endif
 1265 }
 1266 
 1267 /*
 1268  * Initialize our callout handle
 1269  *
 1270  */
 1271 
 1272 static void
 1273 uma_startup3(void)
 1274 {
 1275 #ifdef UMA_DEBUG
 1276         printf("Starting callout.\n");
 1277 #endif
 1278         callout_init(&uma_callout, 0);
 1279         callout_reset(&uma_callout, UMA_WORKING_TIME * hz, uma_timeout, NULL);
 1280 #ifdef UMA_DEBUG
 1281         printf("UMA startup3 complete.\n");
 1282 #endif
 1283 }
 1284 
 1285 /* See uma.h */
 1286 uma_zone_t  
 1287 uma_zcreate(char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
 1288                 uma_init uminit, uma_fini fini, int align, u_int16_t flags)
 1289                      
 1290 {
 1291         struct uma_zctor_args args;
 1292 
 1293         /* This stuff is essential for the zone ctor */
 1294         args.name = name;
 1295         args.size = size;
 1296         args.ctor = ctor;
 1297         args.dtor = dtor;
 1298         args.uminit = uminit;
 1299         args.fini = fini;
 1300         args.align = align;
 1301         args.flags = flags;
 1302 
 1303         return (uma_zalloc_internal(zones, &args, M_WAITOK));
 1304 }
 1305 
 1306 /* See uma.h */
 1307 void
 1308 uma_zdestroy(uma_zone_t zone)
 1309 {
 1310         uma_zfree_internal(zones, zone, NULL, 0);
 1311 }
 1312 
 1313 /* See uma.h */
 1314 void *
 1315 uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
 1316 {
 1317         void *item;
 1318         uma_cache_t cache;
 1319         uma_bucket_t bucket;
 1320         int cpu;
 1321 
 1322         /* This is the fast path allocation */
 1323 #ifdef UMA_DEBUG_ALLOC_1
 1324         printf("Allocating one item from %s(%p)\n", zone->uz_name, zone);
 1325 #endif
 1326 
 1327         if (!(flags & M_NOWAIT)) {
 1328                 KASSERT(curthread->td_intr_nesting_level == 0,
 1329                    ("malloc(M_WAITOK) in interrupt context"));
 1330                 WITNESS_SLEEP(1, NULL);
 1331         }
 1332 
 1333 zalloc_restart:
 1334         cpu = PCPU_GET(cpuid);
 1335         CPU_LOCK(zone, cpu);
 1336         cache = &zone->uz_cpu[cpu];
 1337 
 1338 zalloc_start:
 1339         bucket = cache->uc_allocbucket;
 1340 
 1341         if (bucket) {
 1342                 if (bucket->ub_ptr > -1) {
 1343                         item = bucket->ub_bucket[bucket->ub_ptr];
 1344 #ifdef INVARIANTS
 1345                         bucket->ub_bucket[bucket->ub_ptr] = NULL;
 1346 #endif
 1347                         bucket->ub_ptr--;
 1348                         KASSERT(item != NULL,
 1349                             ("uma_zalloc: Bucket pointer mangled."));
 1350                         cache->uc_allocs++;
 1351 #ifdef INVARIANTS
 1352                         ZONE_LOCK(zone);
 1353                         uma_dbg_alloc(zone, NULL, item);
 1354                         ZONE_UNLOCK(zone);
 1355 #endif
 1356                         CPU_UNLOCK(zone, cpu);
 1357                         if (zone->uz_ctor)
 1358                                 zone->uz_ctor(item, zone->uz_size, udata);
 1359                         if (flags & M_ZERO)
 1360                                 bzero(item, zone->uz_size);
 1361                         return (item);
 1362                 } else if (cache->uc_freebucket) {
 1363                         /*
 1364                          * We have run out of items in our allocbucket.
 1365                          * See if we can switch with our free bucket.
 1366                          */
 1367                         if (cache->uc_freebucket->ub_ptr > -1) {
 1368                                 uma_bucket_t swap;
 1369 
 1370 #ifdef UMA_DEBUG_ALLOC
 1371                                 printf("uma_zalloc: Swapping empty with alloc.\n");
 1372 #endif
 1373                                 swap = cache->uc_freebucket;
 1374                                 cache->uc_freebucket = cache->uc_allocbucket;
 1375                                 cache->uc_allocbucket = swap;
 1376 
 1377                                 goto zalloc_start;
 1378                         }
 1379                 }
 1380         }
 1381         ZONE_LOCK(zone);
 1382         /* Since we have locked the zone we may as well send back our stats */
 1383         zone->uz_allocs += cache->uc_allocs;
 1384         cache->uc_allocs = 0;
 1385 
 1386         /* Our old one is now a free bucket */
 1387         if (cache->uc_allocbucket) {
 1388                 KASSERT(cache->uc_allocbucket->ub_ptr == -1,
 1389                     ("uma_zalloc_arg: Freeing a non free bucket."));
 1390                 LIST_INSERT_HEAD(&zone->uz_free_bucket,
 1391                     cache->uc_allocbucket, ub_link);
 1392                 cache->uc_allocbucket = NULL;
 1393         }
 1394 
 1395         /* Check the free list for a new alloc bucket */
 1396         if ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
 1397                 KASSERT(bucket->ub_ptr != -1,
 1398                     ("uma_zalloc_arg: Returning an empty bucket."));
 1399 
 1400                 LIST_REMOVE(bucket, ub_link);
 1401                 cache->uc_allocbucket = bucket;
 1402                 ZONE_UNLOCK(zone);
 1403                 goto zalloc_start;
 1404         } 
 1405         /* We are no longer associated with this cpu!!! */
 1406         CPU_UNLOCK(zone, cpu);
 1407 
 1408         /* Bump up our uz_count so we get here less */
 1409         if (zone->uz_count < UMA_BUCKET_SIZE - 1)
 1410                 zone->uz_count++;
 1411 
 1412         /*
 1413          * Now lets just fill a bucket and put it on the free list.  If that
 1414          * works we'll restart the allocation from the begining.
 1415          */
 1416 
 1417         if (uma_zalloc_bucket(zone, flags)) {
 1418                 ZONE_UNLOCK(zone);
 1419                 goto zalloc_restart;
 1420         }
 1421         ZONE_UNLOCK(zone);
 1422         /*
 1423          * We may not be able to get a bucket so return an actual item.
 1424          */
 1425 #ifdef UMA_DEBUG
 1426         printf("uma_zalloc_arg: Bucketzone returned NULL\n");
 1427 #endif
 1428 
 1429         return (uma_zalloc_internal(zone, udata, flags));
 1430 }
 1431 
 1432 static uma_slab_t
 1433 uma_zone_slab(uma_zone_t zone, int flags)
 1434 {
 1435         uma_slab_t slab;
 1436 
 1437         /* 
 1438          * This is to prevent us from recursively trying to allocate
 1439          * buckets.  The problem is that if an allocation forces us to
 1440          * grab a new bucket we will call page_alloc, which will go off
 1441          * and cause the vm to allocate vm_map_entries.  If we need new
 1442          * buckets there too we will recurse in kmem_alloc and bad 
 1443          * things happen.  So instead we return a NULL bucket, and make
 1444          * the code that allocates buckets smart enough to deal with it
 1445          */ 
 1446         if (zone == bucketzone && zone->uz_recurse != 0)
 1447                 return (NULL);
 1448 
 1449         slab = NULL;
 1450 
 1451         for (;;) {
 1452                 /*
 1453                  * Find a slab with some space.  Prefer slabs that are partially
 1454                  * used over those that are totally full.  This helps to reduce
 1455                  * fragmentation.
 1456                  */
 1457                 if (zone->uz_free != 0) {
 1458                         if (!LIST_EMPTY(&zone->uz_part_slab)) {
 1459                                 slab = LIST_FIRST(&zone->uz_part_slab);
 1460                         } else {
 1461                                 slab = LIST_FIRST(&zone->uz_free_slab);
 1462                                 LIST_REMOVE(slab, us_link);
 1463                                 LIST_INSERT_HEAD(&zone->uz_part_slab, slab,
 1464                                 us_link);
 1465                         }
 1466                         return (slab);
 1467                 }
 1468 
 1469                 /*
 1470                  * M_NOVM means don't ask at all!
 1471                  */
 1472                 if (flags & M_NOVM)
 1473                         break;
 1474 
 1475                 if (zone->uz_maxpages &&
 1476                     zone->uz_pages >= zone->uz_maxpages) {
 1477                         zone->uz_flags |= UMA_ZFLAG_FULL;
 1478 
 1479                         if (flags & M_WAITOK)
 1480                                 msleep(zone, &zone->uz_lock, PVM, "zonelimit", 0);
 1481                         else 
 1482                                 break;
 1483                         continue;
 1484                 }
 1485                 zone->uz_recurse++;
 1486                 slab = slab_zalloc(zone, flags);
 1487                 zone->uz_recurse--;
 1488                 /* 
 1489                  * If we got a slab here it's safe to mark it partially used
 1490                  * and return.  We assume that the caller is going to remove
 1491                  * at least one item.
 1492                  */
 1493                 if (slab) {
 1494                         LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link);
 1495                         return (slab);
 1496                 }
 1497                 /* 
 1498                  * We might not have been able to get a slab but another cpu
 1499                  * could have while we were unlocked.  Check again before we
 1500                  * fail.
 1501                  */
 1502                 if ((flags & M_WAITOK) == 0)
 1503                         flags |= M_NOVM;
 1504         }
 1505         return (slab);
 1506 }
 1507 
 1508 static __inline void *
 1509 uma_slab_alloc(uma_zone_t zone, uma_slab_t slab)
 1510 {
 1511         void *item;
 1512         u_int8_t freei;
 1513         
 1514         freei = slab->us_firstfree;
 1515         slab->us_firstfree = slab->us_freelist[freei];
 1516         item = slab->us_data + (zone->uz_rsize * freei);
 1517 
 1518         slab->us_freecount--;
 1519         zone->uz_free--;
 1520 #ifdef INVARIANTS
 1521         uma_dbg_alloc(zone, slab, item);
 1522 #endif
 1523         /* Move this slab to the full list */
 1524         if (slab->us_freecount == 0) {
 1525                 LIST_REMOVE(slab, us_link);
 1526                 LIST_INSERT_HEAD(&zone->uz_full_slab, slab, us_link);
 1527         }
 1528 
 1529         return (item);
 1530 }
 1531 
 1532 static int
 1533 uma_zalloc_bucket(uma_zone_t zone, int flags)
 1534 {
 1535         uma_bucket_t bucket;
 1536         uma_slab_t slab;
 1537 
 1538         /*
 1539          * Try this zone's free list first so we don't allocate extra buckets.
 1540          */
 1541 
 1542         if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
 1543                 KASSERT(bucket->ub_ptr == -1,
 1544                     ("uma_zalloc_bucket: Bucket on free list is not empty."));
 1545                 LIST_REMOVE(bucket, ub_link);
 1546         } else {
 1547                 int bflags;
 1548 
 1549                 bflags = flags;
 1550                 if (zone->uz_flags & UMA_ZFLAG_BUCKETCACHE)
 1551                         bflags |= M_NOVM;
 1552 
 1553                 ZONE_UNLOCK(zone);
 1554                 bucket = uma_zalloc_internal(bucketzone,
 1555                     NULL, bflags);
 1556                 ZONE_LOCK(zone);
 1557                 if (bucket != NULL) {
 1558 #ifdef INVARIANTS
 1559                         bzero(bucket, bucketzone->uz_size);
 1560 #endif
 1561                         bucket->ub_ptr = -1;
 1562                 }
 1563         }
 1564 
 1565         if (bucket == NULL)
 1566                 return (0);
 1567 
 1568 #ifdef SMP
 1569         /*
 1570          * This code is here to limit the number of simultaneous bucket fills
 1571          * for any given zone to the number of per cpu caches in this zone. This
 1572          * is done so that we don't allocate more memory than we really need.
 1573          */
 1574         if (zone->uz_fills >= mp_ncpus)
 1575                 goto done;
 1576 
 1577 #endif
 1578         zone->uz_fills++;
 1579 
 1580         /* Try to keep the buckets totally full */
 1581         while ((slab = uma_zone_slab(zone, flags)) != NULL &&
 1582             bucket->ub_ptr < zone->uz_count) {
 1583                 while (slab->us_freecount &&
 1584                     bucket->ub_ptr < zone->uz_count) {
 1585                         bucket->ub_bucket[++bucket->ub_ptr] =
 1586                             uma_slab_alloc(zone, slab);
 1587                 }
 1588                 /* Don't block on the next fill */
 1589                 flags |= M_NOWAIT;
 1590                 flags &= ~M_WAITOK;
 1591         }
 1592 
 1593         zone->uz_fills--;
 1594 
 1595         if (bucket->ub_ptr != -1) {
 1596                 LIST_INSERT_HEAD(&zone->uz_full_bucket,
 1597                     bucket, ub_link);
 1598                 return (1);
 1599         }
 1600 #ifdef SMP
 1601 done:
 1602 #endif
 1603         uma_zfree_internal(bucketzone, bucket, NULL, 0);
 1604 
 1605         return (0);
 1606 }
 1607 /*
 1608  * Allocates an item for an internal zone
 1609  *
 1610  * Arguments
 1611  *      zone   The zone to alloc for.
 1612  *      udata  The data to be passed to the constructor.
 1613  *      flags  M_WAITOK, M_NOWAIT, M_ZERO.
 1614  *
 1615  * Returns
 1616  *      NULL if there is no memory and M_NOWAIT is set
 1617  *      An item if successful
 1618  */
 1619 
 1620 static void *
 1621 uma_zalloc_internal(uma_zone_t zone, void *udata, int flags)
 1622 {
 1623         uma_slab_t slab;
 1624         void *item;
 1625 
 1626         item = NULL;
 1627 
 1628         /*
 1629          * This is to stop us from allocating per cpu buckets while we're
 1630          * running out of UMA_BOOT_PAGES.  Otherwise, we would exhaust the
 1631          * boot pages.
 1632          */
 1633 
 1634         if (bucketdisable && zone == bucketzone)
 1635                 return (NULL);
 1636 
 1637 #ifdef UMA_DEBUG_ALLOC
 1638         printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone);
 1639 #endif
 1640         ZONE_LOCK(zone);
 1641 
 1642         slab = uma_zone_slab(zone, flags);
 1643         if (slab == NULL) {
 1644                 ZONE_UNLOCK(zone);
 1645                 return (NULL);
 1646         }
 1647 
 1648         item = uma_slab_alloc(zone, slab);
 1649 
 1650         ZONE_UNLOCK(zone);
 1651 
 1652         if (zone->uz_ctor != NULL) 
 1653                 zone->uz_ctor(item, zone->uz_size, udata);
 1654         if (flags & M_ZERO)
 1655                 bzero(item, zone->uz_size);
 1656 
 1657         return (item);
 1658 }
 1659 
 1660 /* See uma.h */
 1661 void
 1662 uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
 1663 {
 1664         uma_cache_t cache;
 1665         uma_bucket_t bucket;
 1666         int bflags;
 1667         int cpu;
 1668 
 1669         /* This is the fast path free */
 1670 #ifdef UMA_DEBUG_ALLOC_1
 1671         printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone);
 1672 #endif
 1673         /*
 1674          * The race here is acceptable.  If we miss it we'll just have to wait
 1675          * a little longer for the limits to be reset.
 1676          */
 1677 
 1678         if (zone->uz_flags & UMA_ZFLAG_FULL)
 1679                 goto zfree_internal;
 1680 
 1681         if (zone->uz_dtor)
 1682                 zone->uz_dtor(item, zone->uz_size, udata);
 1683 
 1684 zfree_restart:
 1685         cpu = PCPU_GET(cpuid);
 1686         CPU_LOCK(zone, cpu);
 1687         cache = &zone->uz_cpu[cpu];
 1688 
 1689 zfree_start:
 1690         bucket = cache->uc_freebucket;
 1691 
 1692         if (bucket) {
 1693                 /*
 1694                  * Do we have room in our bucket? It is OK for this uz count
 1695                  * check to be slightly out of sync.
 1696                  */
 1697 
 1698                 if (bucket->ub_ptr < zone->uz_count) {
 1699                         bucket->ub_ptr++;
 1700                         KASSERT(bucket->ub_bucket[bucket->ub_ptr] == NULL,
 1701                             ("uma_zfree: Freeing to non free bucket index."));
 1702                         bucket->ub_bucket[bucket->ub_ptr] = item;
 1703 #ifdef INVARIANTS
 1704                         ZONE_LOCK(zone);
 1705                         if (zone->uz_flags & UMA_ZFLAG_MALLOC)
 1706                                 uma_dbg_free(zone, udata, item);
 1707                         else
 1708                                 uma_dbg_free(zone, NULL, item);
 1709                         ZONE_UNLOCK(zone);
 1710 #endif
 1711                         CPU_UNLOCK(zone, cpu);
 1712                         return;
 1713                 } else if (cache->uc_allocbucket) {
 1714 #ifdef UMA_DEBUG_ALLOC
 1715                         printf("uma_zfree: Swapping buckets.\n");
 1716 #endif
 1717                         /*
 1718                          * We have run out of space in our freebucket.
 1719                          * See if we can switch with our alloc bucket.
 1720                          */
 1721                         if (cache->uc_allocbucket->ub_ptr < 
 1722                             cache->uc_freebucket->ub_ptr) {
 1723                                 uma_bucket_t swap;
 1724 
 1725                                 swap = cache->uc_freebucket;
 1726                                 cache->uc_freebucket = cache->uc_allocbucket;
 1727                                 cache->uc_allocbucket = swap;
 1728 
 1729                                 goto zfree_start;
 1730                         }
 1731                 }
 1732         } 
 1733 
 1734         /*
 1735          * We can get here for two reasons:
 1736          *
 1737          * 1) The buckets are NULL
 1738          * 2) The alloc and free buckets are both somewhat full.
 1739          *
 1740          */
 1741 
 1742         ZONE_LOCK(zone);
 1743 
 1744         bucket = cache->uc_freebucket;
 1745         cache->uc_freebucket = NULL;
 1746 
 1747         /* Can we throw this on the zone full list? */
 1748         if (bucket != NULL) {
 1749 #ifdef UMA_DEBUG_ALLOC
 1750                 printf("uma_zfree: Putting old bucket on the free list.\n");
 1751 #endif
 1752                 /* ub_ptr is pointing to the last free item */
 1753                 KASSERT(bucket->ub_ptr != -1,
 1754                     ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n"));
 1755                 LIST_INSERT_HEAD(&zone->uz_full_bucket,
 1756                     bucket, ub_link);
 1757         }
 1758         if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
 1759                 LIST_REMOVE(bucket, ub_link);
 1760                 ZONE_UNLOCK(zone);
 1761                 cache->uc_freebucket = bucket;
 1762                 goto zfree_start;
 1763         }
 1764         /* We're done with this CPU now */
 1765         CPU_UNLOCK(zone, cpu);
 1766 
 1767         /* And the zone.. */
 1768         ZONE_UNLOCK(zone);
 1769 
 1770 #ifdef UMA_DEBUG_ALLOC
 1771         printf("uma_zfree: Allocating new free bucket.\n");
 1772 #endif
 1773         bflags = M_NOWAIT;
 1774 
 1775         if (zone->uz_flags & UMA_ZFLAG_BUCKETCACHE)
 1776                 bflags |= M_NOVM;
 1777 #ifdef INVARIANTS
 1778         bflags |= M_ZERO;
 1779 #endif
 1780         bucket = uma_zalloc_internal(bucketzone,
 1781             NULL, bflags);
 1782         if (bucket) {
 1783                 bucket->ub_ptr = -1;
 1784                 ZONE_LOCK(zone);
 1785                 LIST_INSERT_HEAD(&zone->uz_free_bucket,
 1786                     bucket, ub_link);
 1787                 ZONE_UNLOCK(zone);
 1788                 goto zfree_restart;
 1789         }
 1790 
 1791         /*
 1792          * If nothing else caught this, we'll just do an internal free.
 1793          */
 1794 
 1795 zfree_internal:
 1796 
 1797         uma_zfree_internal(zone, item, udata, 0);
 1798 
 1799         return;
 1800 
 1801 }
 1802 
 1803 /*
 1804  * Frees an item to an INTERNAL zone or allocates a free bucket
 1805  *
 1806  * Arguments:
 1807  *      zone   The zone to free to
 1808  *      item   The item we're freeing
 1809  *      udata  User supplied data for the dtor
 1810  *      skip   Skip the dtor, it was done in uma_zfree_arg
 1811  */
 1812 
 1813 static void
 1814 uma_zfree_internal(uma_zone_t zone, void *item, void *udata, int skip)
 1815 {
 1816         uma_slab_t slab;
 1817         u_int8_t *mem;
 1818         u_int8_t freei;
 1819 
 1820         if (!skip && zone->uz_dtor)
 1821                 zone->uz_dtor(item, zone->uz_size, udata);
 1822 
 1823         ZONE_LOCK(zone);
 1824 
 1825         if (!(zone->uz_flags & UMA_ZFLAG_MALLOC)) {
 1826                 mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK));
 1827                 if (zone->uz_flags & UMA_ZFLAG_HASH)
 1828                         slab = hash_sfind(&zone->uz_hash, mem);
 1829                 else {
 1830                         mem += zone->uz_pgoff;
 1831                         slab = (uma_slab_t)mem;
 1832                 }
 1833         } else {
 1834                 slab = (uma_slab_t)udata;
 1835         }
 1836 
 1837         /* Do we need to remove from any lists? */
 1838         if (slab->us_freecount+1 == zone->uz_ipers) {
 1839                 LIST_REMOVE(slab, us_link);
 1840                 LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link);
 1841         } else if (slab->us_freecount == 0) {
 1842                 LIST_REMOVE(slab, us_link);
 1843                 LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link);
 1844         }
 1845 
 1846         /* Slab management stuff */     
 1847         freei = ((unsigned long)item - (unsigned long)slab->us_data)
 1848                 / zone->uz_rsize;
 1849 
 1850 #ifdef INVARIANTS
 1851         if (!skip)
 1852                 uma_dbg_free(zone, slab, item);
 1853 #endif
 1854 
 1855         slab->us_freelist[freei] = slab->us_firstfree;
 1856         slab->us_firstfree = freei;
 1857         slab->us_freecount++;
 1858 
 1859         /* Zone statistics */
 1860         zone->uz_free++;
 1861 
 1862         if (zone->uz_flags & UMA_ZFLAG_FULL) {
 1863                 if (zone->uz_pages < zone->uz_maxpages)
 1864                         zone->uz_flags &= ~UMA_ZFLAG_FULL;
 1865 
 1866                 /* We can handle one more allocation */
 1867                 wakeup_one(zone);
 1868         }
 1869 
 1870         ZONE_UNLOCK(zone);
 1871 }
 1872 
 1873 /* See uma.h */
 1874 void
 1875 uma_zone_set_max(uma_zone_t zone, int nitems)
 1876 {
 1877         ZONE_LOCK(zone);
 1878         if (zone->uz_ppera > 1)
 1879                 zone->uz_maxpages = nitems * zone->uz_ppera;
 1880         else 
 1881                 zone->uz_maxpages = nitems / zone->uz_ipers;
 1882 
 1883         if (zone->uz_maxpages * zone->uz_ipers < nitems)
 1884                 zone->uz_maxpages++;
 1885 
 1886         ZONE_UNLOCK(zone);
 1887 }
 1888 
 1889 /* See uma.h */
 1890 void
 1891 uma_zone_set_freef(uma_zone_t zone, uma_free freef)
 1892 {
 1893         ZONE_LOCK(zone);
 1894 
 1895         zone->uz_freef = freef;
 1896 
 1897         ZONE_UNLOCK(zone);
 1898 }
 1899 
 1900 /* See uma.h */
 1901 void
 1902 uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf)
 1903 {
 1904         ZONE_LOCK(zone);
 1905 
 1906         zone->uz_flags |= UMA_ZFLAG_PRIVALLOC;
 1907         zone->uz_allocf = allocf;
 1908 
 1909         ZONE_UNLOCK(zone);
 1910 }
 1911 
 1912 /* See uma.h */
 1913 int
 1914 uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int count)
 1915 {
 1916         int pages;
 1917         vm_offset_t kva;
 1918 
 1919         mtx_lock(&Giant);
 1920 
 1921         pages = count / zone->uz_ipers;
 1922 
 1923         if (pages * zone->uz_ipers < count)
 1924                 pages++;
 1925 
 1926         kva = kmem_alloc_pageable(kernel_map, pages * UMA_SLAB_SIZE);
 1927 
 1928         if (kva == 0) {
 1929                 mtx_unlock(&Giant);
 1930                 return (0);
 1931         }
 1932 
 1933 
 1934         if (obj == NULL)
 1935                 obj = vm_object_allocate(OBJT_DEFAULT,
 1936                     pages);
 1937         else 
 1938                 _vm_object_allocate(OBJT_DEFAULT,
 1939                     pages, obj);
 1940 
 1941         ZONE_LOCK(zone);
 1942         zone->uz_kva = kva;
 1943         zone->uz_obj = obj;
 1944         zone->uz_maxpages = pages;
 1945 
 1946         zone->uz_allocf = obj_alloc;
 1947         zone->uz_flags |= UMA_ZFLAG_NOFREE | UMA_ZFLAG_PRIVALLOC;
 1948 
 1949         ZONE_UNLOCK(zone);
 1950         mtx_unlock(&Giant);
 1951 
 1952         return (1);
 1953 }
 1954 
 1955 /* See uma.h */
 1956 void
 1957 uma_prealloc(uma_zone_t zone, int items)
 1958 {
 1959         int slabs;
 1960         uma_slab_t slab;
 1961 
 1962         ZONE_LOCK(zone);
 1963         slabs = items / zone->uz_ipers;
 1964         if (slabs * zone->uz_ipers < items)
 1965                 slabs++;
 1966 
 1967         while (slabs > 0) {
 1968                 slab = slab_zalloc(zone, M_WAITOK);
 1969                 LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link);
 1970                 slabs--;
 1971         }
 1972         ZONE_UNLOCK(zone);
 1973 }
 1974 
 1975 /* See uma.h */
 1976 void
 1977 uma_reclaim(void)
 1978 {
 1979         /*
 1980          * You might think that the delay below would improve performance since
 1981          * the allocator will give away memory that it may ask for immediately.
 1982          * Really, it makes things worse, since cpu cycles are so much cheaper
 1983          * than disk activity.
 1984          */
 1985 #if 0
 1986         static struct timeval tv = {0};
 1987         struct timeval now;
 1988         getmicrouptime(&now);
 1989         if (now.tv_sec > tv.tv_sec + 30)
 1990                 tv = now;
 1991         else
 1992                 return;
 1993 #endif
 1994 #ifdef UMA_DEBUG
 1995         printf("UMA: vm asked us to release pages!\n");
 1996 #endif
 1997         bucket_enable();
 1998         zone_foreach(zone_drain);
 1999 
 2000         /*
 2001          * Some slabs may have been freed but this zone will be visited early
 2002          * we visit again so that we can free pages that are empty once other
 2003          * zones are drained.  We have to do the same for buckets.
 2004          */
 2005         zone_drain(slabzone);
 2006         zone_drain(bucketzone);
 2007 }
 2008 
 2009 void *
 2010 uma_large_malloc(int size, int wait)
 2011 {
 2012         void *mem;
 2013         uma_slab_t slab;
 2014         u_int8_t flags;
 2015 
 2016         slab = uma_zalloc_internal(slabzone, NULL, wait);
 2017         if (slab == NULL)
 2018                 return (NULL);
 2019 
 2020         mem = page_alloc(NULL, size, &flags, wait);
 2021         if (mem) {
 2022                 vsetslab((vm_offset_t)mem, slab);
 2023                 slab->us_data = mem;
 2024                 slab->us_flags = flags | UMA_SLAB_MALLOC;
 2025                 slab->us_size = size;
 2026         } else {
 2027                 uma_zfree_internal(slabzone, slab, NULL, 0);
 2028         }
 2029 
 2030 
 2031         return (mem);
 2032 }
 2033 
 2034 void
 2035 uma_large_free(uma_slab_t slab)
 2036 {
 2037         vsetobj((vm_offset_t)slab->us_data, kmem_object);
 2038         page_free(slab->us_data, slab->us_size, slab->us_flags);
 2039         uma_zfree_internal(slabzone, slab, NULL, 0);
 2040 }
 2041 
 2042 void
 2043 uma_print_stats(void)
 2044 {
 2045         zone_foreach(uma_print_zone);
 2046 }
 2047 
 2048 void
 2049 uma_print_zone(uma_zone_t zone)
 2050 {
 2051         printf("%s(%p) size %d(%d) flags %d ipers %d ppera %d out %d free %d\n",
 2052             zone->uz_name, zone, zone->uz_size, zone->uz_rsize, zone->uz_flags,
 2053             zone->uz_ipers, zone->uz_ppera,
 2054             (zone->uz_ipers * zone->uz_pages) - zone->uz_free, zone->uz_free);
 2055 }
 2056 
 2057 /*
 2058  * Sysctl handler for vm.zone 
 2059  *
 2060  * stolen from vm_zone.c
 2061  */
 2062 static int
 2063 sysctl_vm_zone(SYSCTL_HANDLER_ARGS)
 2064 {
 2065         int error, len, cnt;
 2066         const int linesize = 128;       /* conservative */
 2067         int totalfree;
 2068         char *tmpbuf, *offset;
 2069         uma_zone_t z;
 2070         char *p;
 2071 
 2072         cnt = 0;
 2073         mtx_lock(&uma_mtx);
 2074         LIST_FOREACH(z, &uma_zones, uz_link)
 2075                 cnt++;
 2076         mtx_unlock(&uma_mtx);
 2077         MALLOC(tmpbuf, char *, (cnt == 0 ? 1 : cnt) * linesize,
 2078                         M_TEMP, M_WAITOK);
 2079         len = snprintf(tmpbuf, linesize,
 2080             "\nITEM            SIZE     LIMIT     USED    FREE  REQUESTS\n\n");
 2081         if (cnt == 0)
 2082                 tmpbuf[len - 1] = '\0';
 2083         error = SYSCTL_OUT(req, tmpbuf, cnt == 0 ? len-1 : len);
 2084         if (error || cnt == 0)
 2085                 goto out;
 2086         offset = tmpbuf;
 2087         mtx_lock(&uma_mtx);
 2088         LIST_FOREACH(z, &uma_zones, uz_link) {
 2089                 if (cnt == 0)   /* list may have changed size */
 2090                         break;
 2091                 ZONE_LOCK(z);
 2092                 totalfree = z->uz_free + z->uz_cachefree;
 2093                 len = snprintf(offset, linesize,
 2094                     "%-12.12s  %6.6u, %8.8u, %6.6u, %6.6u, %8.8llu\n",
 2095                     z->uz_name, z->uz_size,
 2096                     z->uz_maxpages * z->uz_ipers,
 2097                     (z->uz_ipers * (z->uz_pages / z->uz_ppera)) - totalfree,
 2098                     totalfree,
 2099                     (unsigned long long)z->uz_allocs);
 2100                 ZONE_UNLOCK(z);
 2101                 for (p = offset + 12; p > offset && *p == ' '; --p)
 2102                         /* nothing */ ;
 2103                 p[1] = ':';
 2104                 cnt--;
 2105                 offset += len;
 2106         }
 2107         mtx_unlock(&uma_mtx);
 2108         *offset++ = '\0';
 2109         error = SYSCTL_OUT(req, tmpbuf, offset - tmpbuf);
 2110 out:
 2111         FREE(tmpbuf, M_TEMP);
 2112         return (error);
 2113 }

Cache object: 2fa8f70a4e5fa4161e3cdbc2551aa870


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.