uma_core.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*
    2  * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
    3  * All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice unmodified, this list of conditions, and the following
   10  *    disclaimer.
   11  * 2. Redistributions in binary form must reproduce the above copyright
   12  *    notice, this list of conditions and the following disclaimer in the
   13  *    documentation and/or other materials provided with the distribution.
   14  *
   15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
   16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
   18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
   19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
   20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
   21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
   22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
   24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   25  *
   26  * $FreeBSD: releng/5.1/sys/vm/uma_core.c 114149 2003-04-28 06:11:32Z alc $
   27  *
   28  */
   29 
   30 /*
   31  * uma_core.c  Implementation of the Universal Memory allocator
   32  *
   33  * This allocator is intended to replace the multitude of similar object caches
   34  * in the standard FreeBSD kernel.  The intent is to be flexible as well as
   35  * effecient.  A primary design goal is to return unused memory to the rest of
   36  * the system.  This will make the system as a whole more flexible due to the 
   37  * ability to move memory to subsystems which most need it instead of leaving
   38  * pools of reserved memory unused.
   39  *
   40  * The basic ideas stem from similar slab/zone based allocators whose algorithms
   41  * are well known.
   42  *
   43  */
   44 
   45 /*
   46  * TODO:
   47  *      - Improve memory usage for large allocations
   48  *      - Investigate cache size adjustments
   49  */
   50 
   51 /* I should really use ktr.. */
   52 /*
   53 #define UMA_DEBUG 1
   54 #define UMA_DEBUG_ALLOC 1
   55 #define UMA_DEBUG_ALLOC_1 1
   56 */
   57 
   58 
   59 #include "opt_param.h"
   60 #include <sys/param.h>
   61 #include <sys/systm.h>
   62 #include <sys/kernel.h>
   63 #include <sys/types.h>
   64 #include <sys/queue.h>
   65 #include <sys/malloc.h>
   66 #include <sys/lock.h>
   67 #include <sys/sysctl.h>
   68 #include <sys/mutex.h>
   69 #include <sys/proc.h>
   70 #include <sys/smp.h>
   71 #include <sys/vmmeter.h>
   72 
   73 #include <vm/vm.h>
   74 #include <vm/vm_object.h>
   75 #include <vm/vm_page.h>
   76 #include <vm/vm_param.h>
   77 #include <vm/vm_map.h>
   78 #include <vm/vm_kern.h>
   79 #include <vm/vm_extern.h>
   80 #include <vm/uma.h>
   81 #include <vm/uma_int.h>
   82 #include <vm/uma_dbg.h>
   83 
   84 #include <machine/vmparam.h>
   85 
   86 /*
   87  * This is the zone from which all zones are spawned.  The idea is that even 
   88  * the zone heads are allocated from the allocator, so we use the bss section
   89  * to bootstrap us.
   90  */
   91 static struct uma_zone masterzone;
   92 static uma_zone_t zones = &masterzone;
   93 
   94 /* This is the zone from which all of uma_slab_t's are allocated. */
   95 static uma_zone_t slabzone;
   96 
   97 /*
   98  * The initial hash tables come out of this zone so they can be allocated
   99  * prior to malloc coming up.
  100  */
  101 static uma_zone_t hashzone;
  102 
  103 /*
  104  * Zone that buckets come from.
  105  */
  106 static uma_zone_t bucketzone;
  107 
  108 /*
  109  * Are we allowed to allocate buckets?
  110  */
  111 static int bucketdisable = 1;
  112 
  113 /* Linked list of all zones in the system */
  114 static LIST_HEAD(,uma_zone) uma_zones = LIST_HEAD_INITIALIZER(&uma_zones); 
  115 
  116 /* This mutex protects the zone list */
  117 static struct mtx uma_mtx;
  118 
  119 /* Linked list of boot time pages */
  120 static LIST_HEAD(,uma_slab) uma_boot_pages =
  121     LIST_HEAD_INITIALIZER(&uma_boot_pages);
  122 
  123 /* Count of free boottime pages */
  124 static int uma_boot_free = 0;
  125 
  126 /* Is the VM done starting up? */
  127 static int booted = 0;
  128 
  129 /* This is the handle used to schedule our working set calculator */
  130 static struct callout uma_callout;
  131 
  132 /* This is mp_maxid + 1, for use while looping over each cpu */
  133 static int maxcpu;
  134 
  135 /*
  136  * This structure is passed as the zone ctor arg so that I don't have to create
  137  * a special allocation function just for zones.
  138  */
  139 struct uma_zctor_args {
  140         char *name;
  141         size_t size;
  142         uma_ctor ctor;
  143         uma_dtor dtor;
  144         uma_init uminit;
  145         uma_fini fini;
  146         int align;
  147         u_int16_t flags;
  148 };
  149 
  150 /* Prototypes.. */
  151 
  152 static void *obj_alloc(uma_zone_t, int, u_int8_t *, int);
  153 static void *page_alloc(uma_zone_t, int, u_int8_t *, int);
  154 static void page_free(void *, int, u_int8_t);
  155 static uma_slab_t slab_zalloc(uma_zone_t, int);
  156 static void cache_drain(uma_zone_t);
  157 static void bucket_drain(uma_zone_t, uma_bucket_t);
  158 static void zone_drain(uma_zone_t);
  159 static void zone_ctor(void *, int, void *);
  160 static void zone_dtor(void *, int, void *);
  161 static void zero_init(void *, int);
  162 static void zone_small_init(uma_zone_t zone);
  163 static void zone_large_init(uma_zone_t zone);
  164 static void zone_foreach(void (*zfunc)(uma_zone_t));
  165 static void zone_timeout(uma_zone_t zone);
  166 static int hash_alloc(struct uma_hash *);
  167 static int hash_expand(struct uma_hash *, struct uma_hash *);
  168 static void hash_free(struct uma_hash *hash);
  169 static void uma_timeout(void *);
  170 static void uma_startup3(void);
  171 static void *uma_zalloc_internal(uma_zone_t, void *, int);
  172 static void uma_zfree_internal(uma_zone_t, void *, void *, int);
  173 static void bucket_enable(void);
  174 static int uma_zalloc_bucket(uma_zone_t zone, int flags);
  175 static uma_slab_t uma_zone_slab(uma_zone_t zone, int flags);
  176 static void *uma_slab_alloc(uma_zone_t zone, uma_slab_t slab);
  177 
  178 void uma_print_zone(uma_zone_t);
  179 void uma_print_stats(void);
  180 static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS);
  181 
  182 SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING|CTLFLAG_RD,
  183     NULL, 0, sysctl_vm_zone, "A", "Zone Info");
  184 SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
  185 
  186 /*
  187  * This routine checks to see whether or not it's safe to enable buckets.
  188  */
  189 
  190 static void
  191 bucket_enable(void)
  192 {
  193         if (cnt.v_free_count < cnt.v_free_min)
  194                 bucketdisable = 1;
  195         else
  196                 bucketdisable = 0;
  197 }
  198 
  199 
  200 /*
  201  * Routine called by timeout which is used to fire off some time interval
  202  * based calculations.  (working set, stats, etc.)
  203  *
  204  * Arguments:
  205  *      arg   Unused
  206  * 
  207  * Returns:
  208  *      Nothing
  209  */
  210 static void
  211 uma_timeout(void *unused)
  212 {
  213         bucket_enable();
  214         zone_foreach(zone_timeout);
  215 
  216         /* Reschedule this event */
  217         callout_reset(&uma_callout, UMA_WORKING_TIME * hz, uma_timeout, NULL);
  218 }
  219 
  220 /*
  221  * Routine to perform timeout driven calculations.  This does the working set
  222  * as well as hash expanding, and per cpu statistics aggregation.
  223  *
  224  *  Arguments:
  225  *      zone  The zone to operate on
  226  *
  227  *  Returns:
  228  *      Nothing
  229  */
  230 static void
  231 zone_timeout(uma_zone_t zone)
  232 {
  233         uma_cache_t cache;
  234         u_int64_t alloc;
  235         int free;
  236         int cpu;
  237 
  238         alloc = 0;
  239         free = 0;
  240 
  241         /*
  242          * Aggregate per cpu cache statistics back to the zone.
  243          *
  244          * I may rewrite this to set a flag in the per cpu cache instead of
  245          * locking.  If the flag is not cleared on the next round I will have
  246          * to lock and do it here instead so that the statistics don't get too
  247          * far out of sync.
  248          */
  249         if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL)) {
  250                 for (cpu = 0; cpu < maxcpu; cpu++) {
  251                         if (CPU_ABSENT(cpu))
  252                                 continue;
  253                         CPU_LOCK(zone, cpu); 
  254                         cache = &zone->uz_cpu[cpu];
  255                         /* Add them up, and reset */
  256                         alloc += cache->uc_allocs;
  257                         cache->uc_allocs = 0;
  258                         if (cache->uc_allocbucket)
  259                                 free += cache->uc_allocbucket->ub_ptr + 1;
  260                         if (cache->uc_freebucket)
  261                                 free += cache->uc_freebucket->ub_ptr + 1;
  262                         CPU_UNLOCK(zone, cpu);
  263                 }
  264         }
  265 
  266         /* Now push these stats back into the zone.. */
  267         ZONE_LOCK(zone);
  268         zone->uz_allocs += alloc;
  269 
  270         /*
  271          * cachefree is an instantanious snapshot of what is in the per cpu
  272          * caches, not an accurate counter
  273          */
  274         zone->uz_cachefree = free;
  275 
  276         /*
  277          * Expand the zone hash table.
  278          * 
  279          * This is done if the number of slabs is larger than the hash size.
  280          * What I'm trying to do here is completely reduce collisions.  This
  281          * may be a little aggressive.  Should I allow for two collisions max?
  282          */
  283 
  284         if (zone->uz_flags & UMA_ZFLAG_HASH &&
  285             zone->uz_pages / zone->uz_ppera >= zone->uz_hash.uh_hashsize) {
  286                 struct uma_hash newhash;
  287                 struct uma_hash oldhash;
  288                 int ret;
  289 
  290                 /*
  291                  * This is so involved because allocating and freeing 
  292                  * while the zone lock is held will lead to deadlock.
  293                  * I have to do everything in stages and check for
  294                  * races.
  295                  */
  296                 newhash = zone->uz_hash;
  297                 ZONE_UNLOCK(zone);
  298                 ret = hash_alloc(&newhash);
  299                 ZONE_LOCK(zone);
  300                 if (ret) {
  301                         if (hash_expand(&zone->uz_hash, &newhash)) {
  302                                 oldhash = zone->uz_hash;
  303                                 zone->uz_hash = newhash;
  304                         } else
  305                                 oldhash = newhash;
  306 
  307                         ZONE_UNLOCK(zone);
  308                         hash_free(&oldhash);
  309                         ZONE_LOCK(zone);
  310                 }
  311         }
  312 
  313         /*
  314          * Here we compute the working set size as the total number of items 
  315          * left outstanding since the last time interval.  This is slightly
  316          * suboptimal. What we really want is the highest number of outstanding
  317          * items during the last time quantum.  This should be close enough.
  318          *
  319          * The working set size is used to throttle the zone_drain function.
  320          * We don't want to return memory that we may need again immediately.
  321          */
  322         alloc = zone->uz_allocs - zone->uz_oallocs;
  323         zone->uz_oallocs = zone->uz_allocs;
  324         zone->uz_wssize = alloc;
  325 
  326         ZONE_UNLOCK(zone);
  327 }
  328 
  329 /*
  330  * Allocate and zero fill the next sized hash table from the appropriate
  331  * backing store.
  332  *
  333  * Arguments:
  334  *      hash  A new hash structure with the old hash size in uh_hashsize
  335  *
  336  * Returns:
  337  *      1 on sucess and 0 on failure.
  338  */
  339 static int
  340 hash_alloc(struct uma_hash *hash)
  341 {
  342         int oldsize;
  343         int alloc;
  344 
  345         oldsize = hash->uh_hashsize;
  346 
  347         /* We're just going to go to a power of two greater */
  348         if (oldsize)  {
  349                 hash->uh_hashsize = oldsize * 2;
  350                 alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize;
  351                 /* XXX Shouldn't be abusing DEVBUF here */
  352                 hash->uh_slab_hash = (struct slabhead *)malloc(alloc,
  353                     M_DEVBUF, M_NOWAIT);
  354         } else {
  355                 alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT;
  356                 hash->uh_slab_hash = uma_zalloc_internal(hashzone, NULL,
  357                     M_WAITOK);
  358                 hash->uh_hashsize = UMA_HASH_SIZE_INIT;
  359         }
  360         if (hash->uh_slab_hash) {
  361                 bzero(hash->uh_slab_hash, alloc);
  362                 hash->uh_hashmask = hash->uh_hashsize - 1;
  363                 return (1);
  364         }
  365 
  366         return (0);
  367 }
  368 
  369 /*
  370  * Expands the hash table for OFFPAGE zones.  This is done from zone_timeout
  371  * to reduce collisions.  This must not be done in the regular allocation path,
  372  * otherwise, we can recurse on the vm while allocating pages.
  373  *
  374  * Arguments:
  375  *      oldhash  The hash you want to expand 
  376  *      newhash  The hash structure for the new table
  377  *
  378  * Returns:
  379  *      Nothing
  380  *
  381  * Discussion:
  382  */
  383 static int
  384 hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash)
  385 {
  386         uma_slab_t slab;
  387         int hval;
  388         int i;
  389 
  390         if (!newhash->uh_slab_hash)
  391                 return (0);
  392 
  393         if (oldhash->uh_hashsize >= newhash->uh_hashsize)
  394                 return (0);
  395 
  396         /*
  397          * I need to investigate hash algorithms for resizing without a
  398          * full rehash.
  399          */
  400 
  401         for (i = 0; i < oldhash->uh_hashsize; i++)
  402                 while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) {
  403                         slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]);
  404                         SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink);
  405                         hval = UMA_HASH(newhash, slab->us_data);
  406                         SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval],
  407                             slab, us_hlink);
  408                 }
  409 
  410         return (1);
  411 }
  412 
  413 /*
  414  * Free the hash bucket to the appropriate backing store.
  415  *
  416  * Arguments:
  417  *      slab_hash  The hash bucket we're freeing
  418  *      hashsize   The number of entries in that hash bucket
  419  *
  420  * Returns:
  421  *      Nothing
  422  */
  423 static void
  424 hash_free(struct uma_hash *hash)
  425 {
  426         if (hash->uh_slab_hash == NULL)
  427                 return;
  428         if (hash->uh_hashsize == UMA_HASH_SIZE_INIT)
  429                 uma_zfree_internal(hashzone,
  430                     hash->uh_slab_hash, NULL, 0);
  431         else
  432                 free(hash->uh_slab_hash, M_DEVBUF);
  433 }
  434 
  435 /*
  436  * Frees all outstanding items in a bucket
  437  *
  438  * Arguments:
  439  *      zone   The zone to free to, must be unlocked.
  440  *      bucket The free/alloc bucket with items, cpu queue must be locked.
  441  *
  442  * Returns:
  443  *      Nothing
  444  */
  445 
  446 static void
  447 bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
  448 {
  449         uma_slab_t slab;
  450         int mzone;
  451         void *item;
  452 
  453         if (bucket == NULL)
  454                 return;
  455 
  456         slab = NULL;
  457         mzone = 0;
  458 
  459         /* We have to lookup the slab again for malloc.. */
  460         if (zone->uz_flags & UMA_ZFLAG_MALLOC)
  461                 mzone = 1;
  462 
  463         while (bucket->ub_ptr > -1)  {
  464                 item = bucket->ub_bucket[bucket->ub_ptr];
  465 #ifdef INVARIANTS
  466                 bucket->ub_bucket[bucket->ub_ptr] = NULL;
  467                 KASSERT(item != NULL,
  468                     ("bucket_drain: botched ptr, item is NULL"));
  469 #endif
  470                 bucket->ub_ptr--;
  471                 /* 
  472                  * This is extremely inefficient.  The slab pointer was passed
  473                  * to uma_zfree_arg, but we lost it because the buckets don't
  474                  * hold them.  This will go away when free() gets a size passed
  475                  * to it.
  476                  */
  477                 if (mzone)
  478                         slab = vtoslab((vm_offset_t)item & (~UMA_SLAB_MASK));
  479                 uma_zfree_internal(zone, item, slab, 1);
  480         }
  481 }
  482 
  483 /*
  484  * Drains the per cpu caches for a zone.
  485  *
  486  * Arguments:
  487  *      zone  The zone to drain, must be unlocked.
  488  *
  489  * Returns:
  490  *      Nothing
  491  *
  492  * This function returns with the zone locked so that the per cpu queues can
  493  * not be filled until zone_drain is finished.
  494  *
  495  */
  496 static void
  497 cache_drain(uma_zone_t zone)
  498 {
  499         uma_bucket_t bucket;
  500         uma_cache_t cache;
  501         int cpu;
  502 
  503         /*
  504          * Flush out the per cpu queues.
  505          *
  506          * XXX This causes unnecessary thrashing due to immediately having
  507          * empty per cpu queues.  I need to improve this.
  508          */
  509 
  510         /*
  511          * We have to lock each cpu cache before locking the zone
  512          */
  513         ZONE_UNLOCK(zone);
  514 
  515         for (cpu = 0; cpu < maxcpu; cpu++) {
  516                 if (CPU_ABSENT(cpu))
  517                         continue;
  518                 CPU_LOCK(zone, cpu);
  519                 cache = &zone->uz_cpu[cpu];
  520                 bucket_drain(zone, cache->uc_allocbucket);
  521                 bucket_drain(zone, cache->uc_freebucket);
  522         }
  523 
  524         /*
  525          * Drain the bucket queues and free the buckets, we just keep two per
  526          * cpu (alloc/free).
  527          */
  528         ZONE_LOCK(zone);
  529         while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
  530                 LIST_REMOVE(bucket, ub_link);
  531                 ZONE_UNLOCK(zone);
  532                 bucket_drain(zone, bucket);
  533                 uma_zfree_internal(bucketzone, bucket, NULL, 0);
  534                 ZONE_LOCK(zone);
  535         }
  536 
  537         /* Now we do the free queue.. */
  538         while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
  539                 LIST_REMOVE(bucket, ub_link);
  540                 uma_zfree_internal(bucketzone, bucket, NULL, 0);
  541         }
  542 
  543         /* We unlock here, but they will all block until the zone is unlocked */
  544         for (cpu = 0; cpu < maxcpu; cpu++) {
  545                 if (CPU_ABSENT(cpu))
  546                         continue;
  547                 CPU_UNLOCK(zone, cpu);
  548         }
  549 
  550         zone->uz_cachefree = 0;
  551 }
  552 
  553 /*
  554  * Frees pages from a zone back to the system.  This is done on demand from
  555  * the pageout daemon.
  556  *
  557  * Arguments:
  558  *      zone  The zone to free pages from
  559  *      all   Should we drain all items?
  560  *
  561  * Returns:
  562  *      Nothing.
  563  */
  564 static void
  565 zone_drain(uma_zone_t zone)
  566 {
  567         struct slabhead freeslabs = {};
  568         uma_slab_t slab;
  569         uma_slab_t n;
  570         u_int64_t extra;
  571         u_int8_t flags;
  572         u_int8_t *mem;
  573         int i;
  574 
  575         /*
  576          * We don't want to take pages from staticly allocated zones at this
  577          * time
  578          */
  579         if (zone->uz_flags & UMA_ZFLAG_NOFREE || zone->uz_freef == NULL)
  580                 return;
  581 
  582         ZONE_LOCK(zone);
  583 
  584         if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL))
  585                 cache_drain(zone);
  586 
  587         if (zone->uz_free < zone->uz_wssize)
  588                 goto finished;
  589 #ifdef UMA_DEBUG
  590         printf("%s working set size: %llu free items: %u\n",
  591             zone->uz_name, (unsigned long long)zone->uz_wssize, zone->uz_free);
  592 #endif
  593         extra = zone->uz_free - zone->uz_wssize;
  594         extra /= zone->uz_ipers;
  595 
  596         /* extra is now the number of extra slabs that we can free */
  597 
  598         if (extra == 0)
  599                 goto finished;
  600 
  601         slab = LIST_FIRST(&zone->uz_free_slab);
  602         while (slab && extra) {
  603                 n = LIST_NEXT(slab, us_link);
  604 
  605                 /* We have no where to free these to */
  606                 if (slab->us_flags & UMA_SLAB_BOOT) {
  607                         slab = n;
  608                         continue;
  609                 }
  610 
  611                 LIST_REMOVE(slab, us_link);
  612                 zone->uz_pages -= zone->uz_ppera;
  613                 zone->uz_free -= zone->uz_ipers;
  614 
  615                 if (zone->uz_flags & UMA_ZFLAG_HASH)
  616                         UMA_HASH_REMOVE(&zone->uz_hash, slab, slab->us_data);
  617 
  618                 SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink);
  619 
  620                 slab = n;
  621                 extra--;
  622         }
  623 finished:
  624         ZONE_UNLOCK(zone);
  625 
  626         while ((slab = SLIST_FIRST(&freeslabs)) != NULL) {
  627                 SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink);
  628                 if (zone->uz_fini)
  629                         for (i = 0; i < zone->uz_ipers; i++)
  630                                 zone->uz_fini(
  631                                     slab->us_data + (zone->uz_rsize * i),
  632                                     zone->uz_size);
  633                 flags = slab->us_flags;
  634                 mem = slab->us_data;
  635 
  636                 if (zone->uz_flags & UMA_ZFLAG_OFFPAGE)
  637                         uma_zfree_internal(slabzone, slab, NULL, 0);
  638                 if (zone->uz_flags & UMA_ZFLAG_MALLOC) {
  639                         vm_object_t obj;
  640 
  641                         if (flags & UMA_SLAB_KMEM)
  642                                 obj = kmem_object;
  643                         else
  644                                 obj = NULL;
  645                         for (i = 0; i < zone->uz_ppera; i++)
  646                                 vsetobj((vm_offset_t)mem + (i * PAGE_SIZE),
  647                                     obj);
  648                 }
  649 #ifdef UMA_DEBUG
  650                 printf("%s: Returning %d bytes.\n",
  651                     zone->uz_name, UMA_SLAB_SIZE * zone->uz_ppera);
  652 #endif
  653                 zone->uz_freef(mem, UMA_SLAB_SIZE * zone->uz_ppera, flags);
  654         }
  655 
  656 }
  657 
  658 /*
  659  * Allocate a new slab for a zone.  This does not insert the slab onto a list.
  660  *
  661  * Arguments:
  662  *      zone  The zone to allocate slabs for
  663  *      wait  Shall we wait?
  664  *
  665  * Returns:
  666  *      The slab that was allocated or NULL if there is no memory and the
  667  *      caller specified M_NOWAIT.
  668  *      
  669  */
  670 static uma_slab_t 
  671 slab_zalloc(uma_zone_t zone, int wait)
  672 {
  673         uma_slab_t slab;        /* Starting slab */
  674         u_int8_t *mem;
  675         u_int8_t flags;
  676         int i;
  677 
  678         slab = NULL;
  679 
  680 #ifdef UMA_DEBUG
  681         printf("slab_zalloc:  Allocating a new slab for %s\n", zone->uz_name);
  682 #endif
  683         ZONE_UNLOCK(zone);
  684 
  685         if (zone->uz_flags & UMA_ZFLAG_OFFPAGE) {
  686                 slab = uma_zalloc_internal(slabzone, NULL, wait);
  687                 if (slab == NULL) {
  688                         ZONE_LOCK(zone);
  689                         return NULL;
  690                 }
  691         }
  692 
  693         /*
  694          * This reproduces the old vm_zone behavior of zero filling pages the
  695          * first time they are added to a zone.
  696          *
  697          * Malloced items are zeroed in uma_zalloc.
  698          */
  699 
  700         if ((zone->uz_flags & UMA_ZFLAG_MALLOC) == 0)
  701                 wait |= M_ZERO;
  702         else
  703                 wait &= ~M_ZERO;
  704 
  705         if (booted || (zone->uz_flags & UMA_ZFLAG_PRIVALLOC)) {
  706                 if ((wait & M_NOWAIT) == 0) {
  707                         mtx_lock(&Giant);
  708                         mem = zone->uz_allocf(zone, 
  709                             zone->uz_ppera * UMA_SLAB_SIZE, &flags, wait);
  710                         mtx_unlock(&Giant);
  711                 } else {
  712                         mem = zone->uz_allocf(zone, 
  713                             zone->uz_ppera * UMA_SLAB_SIZE, &flags, wait);
  714                 }
  715                 if (mem == NULL) {
  716                         ZONE_LOCK(zone);
  717                         return (NULL);
  718                 }
  719         } else {
  720                 uma_slab_t tmps;
  721 
  722                 if (zone->uz_ppera > 1)
  723                         panic("UMA: Attemping to allocate multiple pages before vm has started.\n");
  724                 if (zone->uz_flags & UMA_ZFLAG_MALLOC)
  725                         panic("Mallocing before uma_startup2 has been called.\n");
  726                 if (uma_boot_free == 0)
  727                         panic("UMA: Ran out of pre init pages, increase UMA_BOOT_PAGES\n");
  728                 tmps = LIST_FIRST(&uma_boot_pages);
  729                 LIST_REMOVE(tmps, us_link);
  730                 uma_boot_free--;
  731                 mem = tmps->us_data;
  732                 flags = tmps->us_flags;
  733         }
  734 
  735         /* Point the slab into the allocated memory */
  736         if (!(zone->uz_flags & UMA_ZFLAG_OFFPAGE))
  737                 slab = (uma_slab_t )(mem + zone->uz_pgoff);
  738 
  739         if (zone->uz_flags & UMA_ZFLAG_MALLOC)
  740                 for (i = 0; i < zone->uz_ppera; i++)
  741                         vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab);
  742 
  743         slab->us_zone = zone;
  744         slab->us_data = mem;
  745 
  746         /*
  747          * This is intended to spread data out across cache lines.
  748          *
  749          * This code doesn't seem to work properly on x86, and on alpha
  750          * it makes absolutely no performance difference. I'm sure it could
  751          * use some tuning, but sun makes outrageous claims about it's
  752          * performance.
  753          */
  754 #if 0
  755         if (zone->uz_cachemax) {
  756                 slab->us_data += zone->uz_cacheoff;
  757                 zone->uz_cacheoff += UMA_CACHE_INC;
  758                 if (zone->uz_cacheoff > zone->uz_cachemax)
  759                         zone->uz_cacheoff = 0;
  760         }
  761 #endif
  762         
  763         slab->us_freecount = zone->uz_ipers;
  764         slab->us_firstfree = 0;
  765         slab->us_flags = flags;
  766         for (i = 0; i < zone->uz_ipers; i++)
  767                 slab->us_freelist[i] = i+1;
  768 
  769         if (zone->uz_init)
  770                 for (i = 0; i < zone->uz_ipers; i++)
  771                         zone->uz_init(slab->us_data + (zone->uz_rsize * i),
  772                             zone->uz_size);
  773         ZONE_LOCK(zone);
  774 
  775         if (zone->uz_flags & UMA_ZFLAG_HASH)
  776                 UMA_HASH_INSERT(&zone->uz_hash, slab, mem);
  777 
  778         zone->uz_pages += zone->uz_ppera;
  779         zone->uz_free += zone->uz_ipers;
  780 
  781 
  782         return (slab);
  783 }
  784 
  785 /*
  786  * Allocates a number of pages from the system
  787  *
  788  * Arguments:
  789  *      zone  Unused
  790  *      bytes  The number of bytes requested
  791  *      wait  Shall we wait?
  792  *
  793  * Returns:
  794  *      A pointer to the alloced memory or possibly 
  795  *      NULL if M_NOWAIT is set.
  796  */
  797 static void *
  798 page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
  799 {
  800         void *p;        /* Returned page */
  801 
  802         *pflag = UMA_SLAB_KMEM;
  803         p = (void *) kmem_malloc(kmem_map, bytes, wait);
  804   
  805         return (p);
  806 }
  807 
  808 /*
  809  * Allocates a number of pages from within an object
  810  *
  811  * Arguments:
  812  *      zone   Unused
  813  *      bytes  The number of bytes requested
  814  *      wait   Shall we wait?
  815  *
  816  * Returns:
  817  *      A pointer to the alloced memory or possibly 
  818  *      NULL if M_NOWAIT is set.
  819  *
  820  * TODO: If we fail during a multi-page allocation release the pages that have
  821  *       already been allocated.
  822  */
  823 static void *
  824 obj_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
  825 {
  826         vm_offset_t zkva;
  827         vm_offset_t retkva;
  828         vm_page_t p;
  829         int pages;
  830 
  831         retkva = 0;
  832         pages = zone->uz_pages;
  833 
  834         /* 
  835          * This looks a little weird since we're getting one page at a time
  836          */
  837         while (bytes > 0) {
  838                 VM_OBJECT_LOCK(zone->uz_obj);
  839                 p = vm_page_alloc(zone->uz_obj, pages,
  840                     VM_ALLOC_INTERRUPT);
  841                 VM_OBJECT_UNLOCK(zone->uz_obj);
  842                 if (p == NULL)
  843                         return (NULL);
  844 
  845                 zkva = zone->uz_kva + pages * PAGE_SIZE;
  846                 if (retkva == 0)
  847                         retkva = zkva;
  848                 pmap_qenter(zkva, &p, 1);
  849                 bytes -= PAGE_SIZE;
  850                 pages += 1;
  851         }
  852 
  853         *flags = UMA_SLAB_PRIV;
  854 
  855         return ((void *)retkva);
  856 }
  857 
  858 /*
  859  * Frees a number of pages to the system
  860  * 
  861  * Arguments:
  862  *      mem   A pointer to the memory to be freed
  863  *      size  The size of the memory being freed
  864  *      flags The original p->us_flags field
  865  *
  866  * Returns:
  867  *      Nothing
  868  *
  869  */
  870 static void
  871 page_free(void *mem, int size, u_int8_t flags)
  872 {
  873         vm_map_t map;
  874 
  875         if (flags & UMA_SLAB_KMEM)
  876                 map = kmem_map;
  877         else
  878                 panic("UMA: page_free used with invalid flags %d\n", flags);
  879 
  880         kmem_free(map, (vm_offset_t)mem, size);
  881 }
  882 
  883 /*
  884  * Zero fill initializer
  885  *
  886  * Arguments/Returns follow uma_init specifications
  887  *
  888  */
  889 static void
  890 zero_init(void *mem, int size)
  891 {
  892         bzero(mem, size);
  893 }
  894 
  895 /*
  896  * Finish creating a small uma zone.  This calculates ipers, and the zone size.
  897  *
  898  * Arguments
  899  *      zone  The zone we should initialize
  900  *
  901  * Returns
  902  *      Nothing
  903  */
  904 static void
  905 zone_small_init(uma_zone_t zone)
  906 {
  907         int rsize;
  908         int memused;
  909         int ipers;
  910 
  911         rsize = zone->uz_size;
  912 
  913         if (rsize < UMA_SMALLEST_UNIT)
  914                 rsize = UMA_SMALLEST_UNIT;
  915 
  916         if (rsize & zone->uz_align)
  917                 rsize = (rsize & ~zone->uz_align) + (zone->uz_align + 1);
  918 
  919         zone->uz_rsize = rsize;
  920 
  921         rsize += 1;     /* Account for the byte of linkage */
  922         zone->uz_ipers = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) / rsize;
  923         zone->uz_ppera = 1;
  924 
  925         memused = zone->uz_ipers * zone->uz_rsize;
  926 
  927         /* Can we do any better? */
  928         if ((UMA_SLAB_SIZE - memused) >= UMA_MAX_WASTE) {
  929                 if (zone->uz_flags & UMA_ZFLAG_INTERNAL) 
  930                         return;
  931                 ipers = UMA_SLAB_SIZE / zone->uz_rsize;
  932                 if (ipers > zone->uz_ipers) {
  933                         zone->uz_flags |= UMA_ZFLAG_OFFPAGE;
  934                         if ((zone->uz_flags & UMA_ZFLAG_MALLOC) == 0)
  935                                 zone->uz_flags |= UMA_ZFLAG_HASH;
  936                         zone->uz_ipers = ipers;
  937                 }
  938         }
  939 
  940 }
  941 
  942 /*
  943  * Finish creating a large (> UMA_SLAB_SIZE) uma zone.  Just give in and do 
  944  * OFFPAGE for now.  When I can allow for more dynamic slab sizes this will be
  945  * more complicated.
  946  *
  947  * Arguments
  948  *      zone  The zone we should initialize
  949  *
  950  * Returns
  951  *      Nothing
  952  */
  953 static void
  954 zone_large_init(uma_zone_t zone)
  955 {       
  956         int pages;
  957 
  958         pages = zone->uz_size / UMA_SLAB_SIZE;
  959 
  960         /* Account for remainder */
  961         if ((pages * UMA_SLAB_SIZE) < zone->uz_size)
  962                 pages++;
  963 
  964         zone->uz_ppera = pages;
  965         zone->uz_ipers = 1;
  966 
  967         zone->uz_flags |= UMA_ZFLAG_OFFPAGE;
  968         if ((zone->uz_flags & UMA_ZFLAG_MALLOC) == 0)
  969                 zone->uz_flags |= UMA_ZFLAG_HASH;
  970 
  971         zone->uz_rsize = zone->uz_size;
  972 }
  973 
  974 /* 
  975  * Zone header ctor.  This initializes all fields, locks, etc.  And inserts
  976  * the zone onto the global zone list.
  977  *
  978  * Arguments/Returns follow uma_ctor specifications
  979  *      udata  Actually uma_zcreat_args
  980  *
  981  */
  982 
  983 static void
  984 zone_ctor(void *mem, int size, void *udata)
  985 {
  986         struct uma_zctor_args *arg = udata;
  987         uma_zone_t zone = mem;
  988         int privlc;
  989         int cplen;
  990         int cpu;
  991 
  992         bzero(zone, size);
  993         zone->uz_name = arg->name;
  994         zone->uz_size = arg->size;
  995         zone->uz_ctor = arg->ctor;
  996         zone->uz_dtor = arg->dtor;
  997         zone->uz_init = arg->uminit;
  998         zone->uz_fini = arg->fini;
  999         zone->uz_align = arg->align;
 1000         zone->uz_free = 0;
 1001         zone->uz_pages = 0;
 1002         zone->uz_flags = 0;
 1003         zone->uz_allocf = page_alloc;
 1004         zone->uz_freef = page_free;
 1005 
 1006         if (arg->flags & UMA_ZONE_ZINIT)
 1007                 zone->uz_init = zero_init;
 1008 
 1009         if (arg->flags & UMA_ZONE_INTERNAL)
 1010                 zone->uz_flags |= UMA_ZFLAG_INTERNAL;
 1011 
 1012         if (arg->flags & UMA_ZONE_MALLOC)
 1013                 zone->uz_flags |= UMA_ZFLAG_MALLOC;
 1014 
 1015         if (arg->flags & UMA_ZONE_NOFREE)
 1016                 zone->uz_flags |= UMA_ZFLAG_NOFREE;
 1017 
 1018         if (arg->flags & UMA_ZONE_VM)
 1019                 zone->uz_flags |= UMA_ZFLAG_BUCKETCACHE;
 1020 
 1021         if (zone->uz_size > UMA_SLAB_SIZE)
 1022                 zone_large_init(zone);
 1023         else
 1024                 zone_small_init(zone);
 1025 #ifdef UMA_MD_SMALL_ALLOC
 1026         if (zone->uz_ppera == 1) {
 1027                 zone->uz_allocf = uma_small_alloc;
 1028                 zone->uz_freef = uma_small_free;
 1029         }
 1030 #endif  /* UMA_MD_SMALL_ALLOC */
 1031 
 1032         if (arg->flags & UMA_ZONE_MTXCLASS)
 1033                 privlc = 1;
 1034         else
 1035                 privlc = 0;
 1036 
 1037         /* We do this so that the per cpu lock name is unique for each zone */
 1038         memcpy(zone->uz_lname, "PCPU ", 5);
 1039         cplen = min(strlen(zone->uz_name) + 1, LOCKNAME_LEN - 6);
 1040         memcpy(zone->uz_lname+5, zone->uz_name, cplen);
 1041         zone->uz_lname[LOCKNAME_LEN - 1] = '\0';
 1042 
 1043         /*
 1044          * If we're putting the slab header in the actual page we need to
 1045          * figure out where in each page it goes.  This calculates a right 
 1046          * justified offset into the memory on an ALIGN_PTR boundary.
 1047          */
 1048         if (!(zone->uz_flags & UMA_ZFLAG_OFFPAGE)) {
 1049                 int totsize;
 1050                 int waste;
 1051 
 1052                 /* Size of the slab struct and free list */
 1053                 totsize = sizeof(struct uma_slab) + zone->uz_ipers;
 1054                 if (totsize & UMA_ALIGN_PTR)
 1055                         totsize = (totsize & ~UMA_ALIGN_PTR) +
 1056                             (UMA_ALIGN_PTR + 1);
 1057                 zone->uz_pgoff = UMA_SLAB_SIZE - totsize;
 1058 
 1059                 waste = zone->uz_pgoff;
 1060                 waste -= (zone->uz_ipers * zone->uz_rsize);
 1061 
 1062                 /*
 1063                  * This calculates how much space we have for cache line size
 1064                  * optimizations.  It works by offseting each slab slightly.
 1065                  * Currently it breaks on x86, and so it is disabled.
 1066                  */
 1067 
 1068                 if (zone->uz_align < UMA_CACHE_INC && waste > UMA_CACHE_INC) {
 1069                         zone->uz_cachemax = waste - UMA_CACHE_INC;
 1070                         zone->uz_cacheoff = 0;
 1071                 } 
 1072 
 1073                 totsize = zone->uz_pgoff + sizeof(struct uma_slab)
 1074                     + zone->uz_ipers;
 1075                 /* I don't think it's possible, but I'll make sure anyway */
 1076                 if (totsize > UMA_SLAB_SIZE) {
 1077                         printf("zone %s ipers %d rsize %d size %d\n",
 1078                             zone->uz_name, zone->uz_ipers, zone->uz_rsize,
 1079                             zone->uz_size);
 1080                         panic("UMA slab won't fit.\n");
 1081                 }
 1082         }
 1083 
 1084         if (zone->uz_flags & UMA_ZFLAG_HASH)
 1085                 hash_alloc(&zone->uz_hash);
 1086 
 1087 #ifdef UMA_DEBUG
 1088         printf("%s(%p) size = %d ipers = %d ppera = %d pgoff = %d\n",
 1089             zone->uz_name, zone,
 1090             zone->uz_size, zone->uz_ipers,
 1091             zone->uz_ppera, zone->uz_pgoff);
 1092 #endif
 1093         ZONE_LOCK_INIT(zone, privlc);
 1094 
 1095         mtx_lock(&uma_mtx);
 1096         LIST_INSERT_HEAD(&uma_zones, zone, uz_link);
 1097         mtx_unlock(&uma_mtx);
 1098 
 1099         /*
 1100          * Some internal zones don't have room allocated for the per cpu
 1101          * caches.  If we're internal, bail out here.
 1102          */
 1103 
 1104         if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
 1105                 return;
 1106 
 1107         if (zone->uz_ipers < UMA_BUCKET_SIZE)
 1108                 zone->uz_count = zone->uz_ipers - 1;
 1109         else
 1110                 zone->uz_count = UMA_BUCKET_SIZE - 1;
 1111 
 1112         for (cpu = 0; cpu < maxcpu; cpu++)
 1113                 CPU_LOCK_INIT(zone, cpu, privlc);
 1114 }
 1115 
 1116 /* 
 1117  * Zone header dtor.  This frees all data, destroys locks, frees the hash table
 1118  * and removes the zone from the global list.
 1119  *
 1120  * Arguments/Returns follow uma_dtor specifications
 1121  *      udata  unused
 1122  */
 1123 
 1124 static void
 1125 zone_dtor(void *arg, int size, void *udata)
 1126 {
 1127         uma_zone_t zone;
 1128         int cpu;
 1129 
 1130         zone = (uma_zone_t)arg;
 1131 
 1132         ZONE_LOCK(zone);
 1133         zone->uz_wssize = 0;
 1134         ZONE_UNLOCK(zone);
 1135 
 1136         mtx_lock(&uma_mtx);
 1137         LIST_REMOVE(zone, uz_link);
 1138         zone_drain(zone);
 1139         mtx_unlock(&uma_mtx);
 1140 
 1141         ZONE_LOCK(zone);
 1142         if (zone->uz_free != 0)
 1143                 printf("Zone %s was not empty (%d items).  Lost %d pages of memory.\n",
 1144                     zone->uz_name, zone->uz_free, zone->uz_pages);
 1145 
 1146         if ((zone->uz_flags & UMA_ZFLAG_INTERNAL) == 0)
 1147                 for (cpu = 0; cpu < maxcpu; cpu++)
 1148                         CPU_LOCK_FINI(zone, cpu);
 1149 
 1150         ZONE_UNLOCK(zone);
 1151         if ((zone->uz_flags & UMA_ZFLAG_OFFPAGE) != 0)
 1152                 hash_free(&zone->uz_hash);
 1153 
 1154         ZONE_LOCK_FINI(zone);
 1155 }
 1156 /*
 1157  * Traverses every zone in the system and calls a callback
 1158  *
 1159  * Arguments:
 1160  *      zfunc  A pointer to a function which accepts a zone
 1161  *              as an argument.
 1162  * 
 1163  * Returns:
 1164  *      Nothing
 1165  */
 1166 static void 
 1167 zone_foreach(void (*zfunc)(uma_zone_t))
 1168 {
 1169         uma_zone_t zone;
 1170 
 1171         mtx_lock(&uma_mtx);
 1172         LIST_FOREACH(zone, &uma_zones, uz_link) {
 1173                 zfunc(zone);
 1174         }
 1175         mtx_unlock(&uma_mtx);
 1176 }
 1177 
 1178 /* Public functions */
 1179 /* See uma.h */
 1180 void
 1181 uma_startup(void *bootmem)
 1182 {
 1183         struct uma_zctor_args args;
 1184         uma_slab_t slab;
 1185         int slabsize;
 1186         int i;
 1187 
 1188 #ifdef UMA_DEBUG
 1189         printf("Creating uma zone headers zone.\n");
 1190 #endif
 1191 #ifdef SMP
 1192         maxcpu = mp_maxid + 1;
 1193 #else
 1194         maxcpu = 1;
 1195 #endif
 1196 #ifdef UMA_DEBUG 
 1197         printf("Max cpu = %d, mp_maxid = %d\n", maxcpu, mp_maxid);
 1198         Debugger("stop");
 1199 #endif
 1200         mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF);
 1201         /* "manually" Create the initial zone */
 1202         args.name = "UMA Zones";
 1203         args.size = sizeof(struct uma_zone) +
 1204             (sizeof(struct uma_cache) * (maxcpu - 1));
 1205         args.ctor = zone_ctor;
 1206         args.dtor = zone_dtor;
 1207         args.uminit = zero_init;
 1208         args.fini = NULL;
 1209         args.align = 32 - 1;
 1210         args.flags = UMA_ZONE_INTERNAL;
 1211         /* The initial zone has no Per cpu queues so it's smaller */
 1212         zone_ctor(zones, sizeof(struct uma_zone), &args);
 1213 
 1214 #ifdef UMA_DEBUG
 1215         printf("Filling boot free list.\n");
 1216 #endif
 1217         for (i = 0; i < UMA_BOOT_PAGES; i++) {
 1218                 slab = (uma_slab_t)((u_int8_t *)bootmem + (i * UMA_SLAB_SIZE));
 1219                 slab->us_data = (u_int8_t *)slab;
 1220                 slab->us_flags = UMA_SLAB_BOOT;
 1221                 LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link);
 1222                 uma_boot_free++;
 1223         }
 1224 
 1225 #ifdef UMA_DEBUG
 1226         printf("Creating slab zone.\n");
 1227 #endif
 1228 
 1229         /*
 1230          * This is the max number of free list items we'll have with
 1231          * offpage slabs.
 1232          */
 1233 
 1234         slabsize = UMA_SLAB_SIZE - sizeof(struct uma_slab);
 1235         slabsize /= UMA_MAX_WASTE;
 1236         slabsize++;                     /* In case there it's rounded */
 1237         slabsize += sizeof(struct uma_slab);
 1238 
 1239         /* Now make a zone for slab headers */
 1240         slabzone = uma_zcreate("UMA Slabs",
 1241                                 slabsize,
 1242                                 NULL, NULL, NULL, NULL,
 1243                                 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL);
 1244 
 1245         hashzone = uma_zcreate("UMA Hash",
 1246             sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT,
 1247             NULL, NULL, NULL, NULL,
 1248             UMA_ALIGN_PTR, UMA_ZONE_INTERNAL);
 1249 
 1250         bucketzone = uma_zcreate("UMA Buckets", sizeof(struct uma_bucket),
 1251             NULL, NULL, NULL, NULL,
 1252             UMA_ALIGN_PTR, UMA_ZONE_INTERNAL);
 1253 
 1254 #ifdef UMA_MD_SMALL_ALLOC
 1255         booted = 1;
 1256 #endif
 1257 
 1258 #ifdef UMA_DEBUG
 1259         printf("UMA startup complete.\n");
 1260 #endif
 1261 }
 1262 
 1263 /* see uma.h */
 1264 void
 1265 uma_startup2(void)
 1266 {
 1267         booted = 1;
 1268         bucket_enable();
 1269 #ifdef UMA_DEBUG
 1270         printf("UMA startup2 complete.\n");
 1271 #endif
 1272 }
 1273 
 1274 /*
 1275  * Initialize our callout handle
 1276  *
 1277  */
 1278 
 1279 static void
 1280 uma_startup3(void)
 1281 {
 1282 #ifdef UMA_DEBUG
 1283         printf("Starting callout.\n");
 1284 #endif
 1285         callout_init(&uma_callout, 0);
 1286         callout_reset(&uma_callout, UMA_WORKING_TIME * hz, uma_timeout, NULL);
 1287 #ifdef UMA_DEBUG
 1288         printf("UMA startup3 complete.\n");
 1289 #endif
 1290 }
 1291 
 1292 /* See uma.h */
 1293 uma_zone_t  
 1294 uma_zcreate(char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
 1295                 uma_init uminit, uma_fini fini, int align, u_int16_t flags)
 1296                      
 1297 {
 1298         struct uma_zctor_args args;
 1299 
 1300         /* This stuff is essential for the zone ctor */
 1301         args.name = name;
 1302         args.size = size;
 1303         args.ctor = ctor;
 1304         args.dtor = dtor;
 1305         args.uminit = uminit;
 1306         args.fini = fini;
 1307         args.align = align;
 1308         args.flags = flags;
 1309 
 1310         return (uma_zalloc_internal(zones, &args, M_WAITOK));
 1311 }
 1312 
 1313 /* See uma.h */
 1314 void
 1315 uma_zdestroy(uma_zone_t zone)
 1316 {
 1317         uma_zfree_internal(zones, zone, NULL, 0);
 1318 }
 1319 
 1320 /* See uma.h */
 1321 void *
 1322 uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
 1323 {
 1324         void *item;
 1325         uma_cache_t cache;
 1326         uma_bucket_t bucket;
 1327         int cpu;
 1328 
 1329         /* This is the fast path allocation */
 1330 #ifdef UMA_DEBUG_ALLOC_1
 1331         printf("Allocating one item from %s(%p)\n", zone->uz_name, zone);
 1332 #endif
 1333 
 1334         if (!(flags & M_NOWAIT)) {
 1335                 KASSERT(curthread->td_intr_nesting_level == 0,
 1336                    ("malloc(M_WAITOK) in interrupt context"));
 1337                 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
 1338                     "malloc() of \"%s\"", zone->uz_name);
 1339         }
 1340 
 1341 zalloc_restart:
 1342         cpu = PCPU_GET(cpuid);
 1343         CPU_LOCK(zone, cpu);
 1344         cache = &zone->uz_cpu[cpu];
 1345 
 1346 zalloc_start:
 1347         bucket = cache->uc_allocbucket;
 1348 
 1349         if (bucket) {
 1350                 if (bucket->ub_ptr > -1) {
 1351                         item = bucket->ub_bucket[bucket->ub_ptr];
 1352 #ifdef INVARIANTS
 1353                         bucket->ub_bucket[bucket->ub_ptr] = NULL;
 1354 #endif
 1355                         bucket->ub_ptr--;
 1356                         KASSERT(item != NULL,
 1357                             ("uma_zalloc: Bucket pointer mangled."));
 1358                         cache->uc_allocs++;
 1359 #ifdef INVARIANTS
 1360                         ZONE_LOCK(zone);
 1361                         uma_dbg_alloc(zone, NULL, item);
 1362                         ZONE_UNLOCK(zone);
 1363 #endif
 1364                         CPU_UNLOCK(zone, cpu);
 1365                         if (zone->uz_ctor)
 1366                                 zone->uz_ctor(item, zone->uz_size, udata);
 1367                         if (flags & M_ZERO)
 1368                                 bzero(item, zone->uz_size);
 1369                         return (item);
 1370                 } else if (cache->uc_freebucket) {
 1371                         /*
 1372                          * We have run out of items in our allocbucket.
 1373                          * See if we can switch with our free bucket.
 1374                          */
 1375                         if (cache->uc_freebucket->ub_ptr > -1) {
 1376                                 uma_bucket_t swap;
 1377 
 1378 #ifdef UMA_DEBUG_ALLOC
 1379                                 printf("uma_zalloc: Swapping empty with alloc.\n");
 1380 #endif
 1381                                 swap = cache->uc_freebucket;
 1382                                 cache->uc_freebucket = cache->uc_allocbucket;
 1383                                 cache->uc_allocbucket = swap;
 1384 
 1385                                 goto zalloc_start;
 1386                         }
 1387                 }
 1388         }
 1389         ZONE_LOCK(zone);
 1390         /* Since we have locked the zone we may as well send back our stats */
 1391         zone->uz_allocs += cache->uc_allocs;
 1392         cache->uc_allocs = 0;
 1393 
 1394         /* Our old one is now a free bucket */
 1395         if (cache->uc_allocbucket) {
 1396                 KASSERT(cache->uc_allocbucket->ub_ptr == -1,
 1397                     ("uma_zalloc_arg: Freeing a non free bucket."));
 1398                 LIST_INSERT_HEAD(&zone->uz_free_bucket,
 1399                     cache->uc_allocbucket, ub_link);
 1400                 cache->uc_allocbucket = NULL;
 1401         }
 1402 
 1403         /* Check the free list for a new alloc bucket */
 1404         if ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
 1405                 KASSERT(bucket->ub_ptr != -1,
 1406                     ("uma_zalloc_arg: Returning an empty bucket."));
 1407 
 1408                 LIST_REMOVE(bucket, ub_link);
 1409                 cache->uc_allocbucket = bucket;
 1410                 ZONE_UNLOCK(zone);
 1411                 goto zalloc_start;
 1412         } 
 1413         /* We are no longer associated with this cpu!!! */
 1414         CPU_UNLOCK(zone, cpu);
 1415 
 1416         /* Bump up our uz_count so we get here less */
 1417         if (zone->uz_count < UMA_BUCKET_SIZE - 1)
 1418                 zone->uz_count++;
 1419 
 1420         /*
 1421          * Now lets just fill a bucket and put it on the free list.  If that
 1422          * works we'll restart the allocation from the begining.
 1423          */
 1424 
 1425         if (uma_zalloc_bucket(zone, flags)) {
 1426                 ZONE_UNLOCK(zone);
 1427                 goto zalloc_restart;
 1428         }
 1429         ZONE_UNLOCK(zone);
 1430         /*
 1431          * We may not be able to get a bucket so return an actual item.
 1432          */
 1433 #ifdef UMA_DEBUG
 1434         printf("uma_zalloc_arg: Bucketzone returned NULL\n");
 1435 #endif
 1436 
 1437         return (uma_zalloc_internal(zone, udata, flags));
 1438 }
 1439 
 1440 static uma_slab_t
 1441 uma_zone_slab(uma_zone_t zone, int flags)
 1442 {
 1443         uma_slab_t slab;
 1444 
 1445         /* 
 1446          * This is to prevent us from recursively trying to allocate
 1447          * buckets.  The problem is that if an allocation forces us to
 1448          * grab a new bucket we will call page_alloc, which will go off
 1449          * and cause the vm to allocate vm_map_entries.  If we need new
 1450          * buckets there too we will recurse in kmem_alloc and bad 
 1451          * things happen.  So instead we return a NULL bucket, and make
 1452          * the code that allocates buckets smart enough to deal with it
 1453          */ 
 1454         if (zone == bucketzone && zone->uz_recurse != 0)
 1455                 return (NULL);
 1456 
 1457         slab = NULL;
 1458 
 1459         for (;;) {
 1460                 /*
 1461                  * Find a slab with some space.  Prefer slabs that are partially
 1462                  * used over those that are totally full.  This helps to reduce
 1463                  * fragmentation.
 1464                  */
 1465                 if (zone->uz_free != 0) {
 1466                         if (!LIST_EMPTY(&zone->uz_part_slab)) {
 1467                                 slab = LIST_FIRST(&zone->uz_part_slab);
 1468                         } else {
 1469                                 slab = LIST_FIRST(&zone->uz_free_slab);
 1470                                 LIST_REMOVE(slab, us_link);
 1471                                 LIST_INSERT_HEAD(&zone->uz_part_slab, slab,
 1472                                 us_link);
 1473                         }
 1474                         return (slab);
 1475                 }
 1476 
 1477                 /*
 1478                  * M_NOVM means don't ask at all!
 1479                  */
 1480                 if (flags & M_NOVM)
 1481                         break;
 1482 
 1483                 if (zone->uz_maxpages &&
 1484                     zone->uz_pages >= zone->uz_maxpages) {
 1485                         zone->uz_flags |= UMA_ZFLAG_FULL;
 1486 
 1487                         if (flags & M_NOWAIT)
 1488                                 break;
 1489                         else 
 1490                                 msleep(zone, &zone->uz_lock, PVM, "zonelimit", 0);
 1491                         continue;
 1492                 }
 1493                 zone->uz_recurse++;
 1494                 slab = slab_zalloc(zone, flags);
 1495                 zone->uz_recurse--;
 1496                 /* 
 1497                  * If we got a slab here it's safe to mark it partially used
 1498                  * and return.  We assume that the caller is going to remove
 1499                  * at least one item.
 1500                  */
 1501                 if (slab) {
 1502                         LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link);
 1503                         return (slab);
 1504                 }
 1505                 /* 
 1506                  * We might not have been able to get a slab but another cpu
 1507                  * could have while we were unlocked.  Check again before we
 1508                  * fail.
 1509                  */
 1510                 if (flags & M_NOWAIT)
 1511                         flags |= M_NOVM;
 1512         }
 1513         return (slab);
 1514 }
 1515 
 1516 static __inline void *
 1517 uma_slab_alloc(uma_zone_t zone, uma_slab_t slab)
 1518 {
 1519         void *item;
 1520         u_int8_t freei;
 1521         
 1522         freei = slab->us_firstfree;
 1523         slab->us_firstfree = slab->us_freelist[freei];
 1524         item = slab->us_data + (zone->uz_rsize * freei);
 1525 
 1526         slab->us_freecount--;
 1527         zone->uz_free--;
 1528 #ifdef INVARIANTS
 1529         uma_dbg_alloc(zone, slab, item);
 1530 #endif
 1531         /* Move this slab to the full list */
 1532         if (slab->us_freecount == 0) {
 1533                 LIST_REMOVE(slab, us_link);
 1534                 LIST_INSERT_HEAD(&zone->uz_full_slab, slab, us_link);
 1535         }
 1536 
 1537         return (item);
 1538 }
 1539 
 1540 static int
 1541 uma_zalloc_bucket(uma_zone_t zone, int flags)
 1542 {
 1543         uma_bucket_t bucket;
 1544         uma_slab_t slab;
 1545 
 1546         /*
 1547          * Try this zone's free list first so we don't allocate extra buckets.
 1548          */
 1549 
 1550         if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
 1551                 KASSERT(bucket->ub_ptr == -1,
 1552                     ("uma_zalloc_bucket: Bucket on free list is not empty."));
 1553                 LIST_REMOVE(bucket, ub_link);
 1554         } else {
 1555                 int bflags;
 1556 
 1557                 bflags = flags;
 1558                 if (zone->uz_flags & UMA_ZFLAG_BUCKETCACHE)
 1559                         bflags |= M_NOVM;
 1560 
 1561                 ZONE_UNLOCK(zone);
 1562                 bucket = uma_zalloc_internal(bucketzone,
 1563                     NULL, bflags);
 1564                 ZONE_LOCK(zone);
 1565                 if (bucket != NULL) {
 1566 #ifdef INVARIANTS
 1567                         bzero(bucket, bucketzone->uz_size);
 1568 #endif
 1569                         bucket->ub_ptr = -1;
 1570                 }
 1571         }
 1572 
 1573         if (bucket == NULL)
 1574                 return (0);
 1575 
 1576 #ifdef SMP
 1577         /*
 1578          * This code is here to limit the number of simultaneous bucket fills
 1579          * for any given zone to the number of per cpu caches in this zone. This
 1580          * is done so that we don't allocate more memory than we really need.
 1581          */
 1582         if (zone->uz_fills >= mp_ncpus)
 1583                 goto done;
 1584 
 1585 #endif
 1586         zone->uz_fills++;
 1587 
 1588         /* Try to keep the buckets totally full */
 1589         while ((slab = uma_zone_slab(zone, flags)) != NULL &&
 1590             bucket->ub_ptr < zone->uz_count) {
 1591                 while (slab->us_freecount &&
 1592                     bucket->ub_ptr < zone->uz_count) {
 1593                         bucket->ub_bucket[++bucket->ub_ptr] =
 1594                             uma_slab_alloc(zone, slab);
 1595                 }
 1596                 /* Don't block on the next fill */
 1597                 flags |= M_NOWAIT;
 1598         }
 1599 
 1600         zone->uz_fills--;
 1601 
 1602         if (bucket->ub_ptr != -1) {
 1603                 LIST_INSERT_HEAD(&zone->uz_full_bucket,
 1604                     bucket, ub_link);
 1605                 return (1);
 1606         }
 1607 #ifdef SMP
 1608 done:
 1609 #endif
 1610         uma_zfree_internal(bucketzone, bucket, NULL, 0);
 1611 
 1612         return (0);
 1613 }
 1614 /*
 1615  * Allocates an item for an internal zone
 1616  *
 1617  * Arguments
 1618  *      zone   The zone to alloc for.
 1619  *      udata  The data to be passed to the constructor.
 1620  *      flags  M_WAITOK, M_NOWAIT, M_ZERO.
 1621  *
 1622  * Returns
 1623  *      NULL if there is no memory and M_NOWAIT is set
 1624  *      An item if successful
 1625  */
 1626 
 1627 static void *
 1628 uma_zalloc_internal(uma_zone_t zone, void *udata, int flags)
 1629 {
 1630         uma_slab_t slab;
 1631         void *item;
 1632 
 1633         item = NULL;
 1634 
 1635         /*
 1636          * This is to stop us from allocating per cpu buckets while we're
 1637          * running out of UMA_BOOT_PAGES.  Otherwise, we would exhaust the
 1638          * boot pages.
 1639          */
 1640 
 1641         if (bucketdisable && zone == bucketzone)
 1642                 return (NULL);
 1643 
 1644 #ifdef UMA_DEBUG_ALLOC
 1645         printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone);
 1646 #endif
 1647         ZONE_LOCK(zone);
 1648 
 1649         slab = uma_zone_slab(zone, flags);
 1650         if (slab == NULL) {
 1651                 ZONE_UNLOCK(zone);
 1652                 return (NULL);
 1653         }
 1654 
 1655         item = uma_slab_alloc(zone, slab);
 1656 
 1657         ZONE_UNLOCK(zone);
 1658 
 1659         if (zone->uz_ctor != NULL) 
 1660                 zone->uz_ctor(item, zone->uz_size, udata);
 1661         if (flags & M_ZERO)
 1662                 bzero(item, zone->uz_size);
 1663 
 1664         return (item);
 1665 }
 1666 
 1667 /* See uma.h */
 1668 void
 1669 uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
 1670 {
 1671         uma_cache_t cache;
 1672         uma_bucket_t bucket;
 1673         int bflags;
 1674         int cpu;
 1675 
 1676         /* This is the fast path free */
 1677 #ifdef UMA_DEBUG_ALLOC_1
 1678         printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone);
 1679 #endif
 1680         /*
 1681          * The race here is acceptable.  If we miss it we'll just have to wait
 1682          * a little longer for the limits to be reset.
 1683          */
 1684 
 1685         if (zone->uz_flags & UMA_ZFLAG_FULL)
 1686                 goto zfree_internal;
 1687 
 1688         if (zone->uz_dtor)
 1689                 zone->uz_dtor(item, zone->uz_size, udata);
 1690 
 1691 zfree_restart:
 1692         cpu = PCPU_GET(cpuid);
 1693         CPU_LOCK(zone, cpu);
 1694         cache = &zone->uz_cpu[cpu];
 1695 
 1696 zfree_start:
 1697         bucket = cache->uc_freebucket;
 1698 
 1699         if (bucket) {
 1700                 /*
 1701                  * Do we have room in our bucket? It is OK for this uz count
 1702                  * check to be slightly out of sync.
 1703                  */
 1704 
 1705                 if (bucket->ub_ptr < zone->uz_count) {
 1706                         bucket->ub_ptr++;
 1707                         KASSERT(bucket->ub_bucket[bucket->ub_ptr] == NULL,
 1708                             ("uma_zfree: Freeing to non free bucket index."));
 1709                         bucket->ub_bucket[bucket->ub_ptr] = item;
 1710 #ifdef INVARIANTS
 1711                         ZONE_LOCK(zone);
 1712                         if (zone->uz_flags & UMA_ZFLAG_MALLOC)
 1713                                 uma_dbg_free(zone, udata, item);
 1714                         else
 1715                                 uma_dbg_free(zone, NULL, item);
 1716                         ZONE_UNLOCK(zone);
 1717 #endif
 1718                         CPU_UNLOCK(zone, cpu);
 1719                         return;
 1720                 } else if (cache->uc_allocbucket) {
 1721 #ifdef UMA_DEBUG_ALLOC
 1722                         printf("uma_zfree: Swapping buckets.\n");
 1723 #endif
 1724                         /*
 1725                          * We have run out of space in our freebucket.
 1726                          * See if we can switch with our alloc bucket.
 1727                          */
 1728                         if (cache->uc_allocbucket->ub_ptr < 
 1729                             cache->uc_freebucket->ub_ptr) {
 1730                                 uma_bucket_t swap;
 1731 
 1732                                 swap = cache->uc_freebucket;
 1733                                 cache->uc_freebucket = cache->uc_allocbucket;
 1734                                 cache->uc_allocbucket = swap;
 1735 
 1736                                 goto zfree_start;
 1737                         }
 1738                 }
 1739         } 
 1740 
 1741         /*
 1742          * We can get here for two reasons:
 1743          *
 1744          * 1) The buckets are NULL
 1745          * 2) The alloc and free buckets are both somewhat full.
 1746          *
 1747          */
 1748 
 1749         ZONE_LOCK(zone);
 1750 
 1751         bucket = cache->uc_freebucket;
 1752         cache->uc_freebucket = NULL;
 1753 
 1754         /* Can we throw this on the zone full list? */
 1755         if (bucket != NULL) {
 1756 #ifdef UMA_DEBUG_ALLOC
 1757                 printf("uma_zfree: Putting old bucket on the free list.\n");
 1758 #endif
 1759                 /* ub_ptr is pointing to the last free item */
 1760                 KASSERT(bucket->ub_ptr != -1,
 1761                     ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n"));
 1762                 LIST_INSERT_HEAD(&zone->uz_full_bucket,
 1763                     bucket, ub_link);
 1764         }
 1765         if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
 1766                 LIST_REMOVE(bucket, ub_link);
 1767                 ZONE_UNLOCK(zone);
 1768                 cache->uc_freebucket = bucket;
 1769                 goto zfree_start;
 1770         }
 1771         /* We're done with this CPU now */
 1772         CPU_UNLOCK(zone, cpu);
 1773 
 1774         /* And the zone.. */
 1775         ZONE_UNLOCK(zone);
 1776 
 1777 #ifdef UMA_DEBUG_ALLOC
 1778         printf("uma_zfree: Allocating new free bucket.\n");
 1779 #endif
 1780         bflags = M_NOWAIT;
 1781 
 1782         if (zone->uz_flags & UMA_ZFLAG_BUCKETCACHE)
 1783                 bflags |= M_NOVM;
 1784 #ifdef INVARIANTS
 1785         bflags |= M_ZERO;
 1786 #endif
 1787         bucket = uma_zalloc_internal(bucketzone,
 1788             NULL, bflags);
 1789         if (bucket) {
 1790                 bucket->ub_ptr = -1;
 1791                 ZONE_LOCK(zone);
 1792                 LIST_INSERT_HEAD(&zone->uz_free_bucket,
 1793                     bucket, ub_link);
 1794                 ZONE_UNLOCK(zone);
 1795                 goto zfree_restart;
 1796         }
 1797 
 1798         /*
 1799          * If nothing else caught this, we'll just do an internal free.
 1800          */
 1801 
 1802 zfree_internal:
 1803 
 1804         uma_zfree_internal(zone, item, udata, 0);
 1805 
 1806         return;
 1807 
 1808 }
 1809 
 1810 /*
 1811  * Frees an item to an INTERNAL zone or allocates a free bucket
 1812  *
 1813  * Arguments:
 1814  *      zone   The zone to free to
 1815  *      item   The item we're freeing
 1816  *      udata  User supplied data for the dtor
 1817  *      skip   Skip the dtor, it was done in uma_zfree_arg
 1818  */
 1819 
 1820 static void
 1821 uma_zfree_internal(uma_zone_t zone, void *item, void *udata, int skip)
 1822 {
 1823         uma_slab_t slab;
 1824         u_int8_t *mem;
 1825         u_int8_t freei;
 1826 
 1827         if (!skip && zone->uz_dtor)
 1828                 zone->uz_dtor(item, zone->uz_size, udata);
 1829 
 1830         ZONE_LOCK(zone);
 1831 
 1832         if (!(zone->uz_flags & UMA_ZFLAG_MALLOC)) {
 1833                 mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK));
 1834                 if (zone->uz_flags & UMA_ZFLAG_HASH)
 1835                         slab = hash_sfind(&zone->uz_hash, mem);
 1836                 else {
 1837                         mem += zone->uz_pgoff;
 1838                         slab = (uma_slab_t)mem;
 1839                 }
 1840         } else {
 1841                 slab = (uma_slab_t)udata;
 1842         }
 1843 
 1844         /* Do we need to remove from any lists? */
 1845         if (slab->us_freecount+1 == zone->uz_ipers) {
 1846                 LIST_REMOVE(slab, us_link);
 1847                 LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link);
 1848         } else if (slab->us_freecount == 0) {
 1849                 LIST_REMOVE(slab, us_link);
 1850                 LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link);
 1851         }
 1852 
 1853         /* Slab management stuff */     
 1854         freei = ((unsigned long)item - (unsigned long)slab->us_data)
 1855                 / zone->uz_rsize;
 1856 
 1857 #ifdef INVARIANTS
 1858         if (!skip)
 1859                 uma_dbg_free(zone, slab, item);
 1860 #endif
 1861 
 1862         slab->us_freelist[freei] = slab->us_firstfree;
 1863         slab->us_firstfree = freei;
 1864         slab->us_freecount++;
 1865 
 1866         /* Zone statistics */
 1867         zone->uz_free++;
 1868 
 1869         if (zone->uz_flags & UMA_ZFLAG_FULL) {
 1870                 if (zone->uz_pages < zone->uz_maxpages)
 1871                         zone->uz_flags &= ~UMA_ZFLAG_FULL;
 1872 
 1873                 /* We can handle one more allocation */
 1874                 wakeup_one(zone);
 1875         }
 1876 
 1877         ZONE_UNLOCK(zone);
 1878 }
 1879 
 1880 /* See uma.h */
 1881 void
 1882 uma_zone_set_max(uma_zone_t zone, int nitems)
 1883 {
 1884         ZONE_LOCK(zone);
 1885         if (zone->uz_ppera > 1)
 1886                 zone->uz_maxpages = nitems * zone->uz_ppera;
 1887         else 
 1888                 zone->uz_maxpages = nitems / zone->uz_ipers;
 1889 
 1890         if (zone->uz_maxpages * zone->uz_ipers < nitems)
 1891                 zone->uz_maxpages++;
 1892 
 1893         ZONE_UNLOCK(zone);
 1894 }
 1895 
 1896 /* See uma.h */
 1897 void
 1898 uma_zone_set_freef(uma_zone_t zone, uma_free freef)
 1899 {
 1900         ZONE_LOCK(zone);
 1901 
 1902         zone->uz_freef = freef;
 1903 
 1904         ZONE_UNLOCK(zone);
 1905 }
 1906 
 1907 /* See uma.h */
 1908 void
 1909 uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf)
 1910 {
 1911         ZONE_LOCK(zone);
 1912 
 1913         zone->uz_flags |= UMA_ZFLAG_PRIVALLOC;
 1914         zone->uz_allocf = allocf;
 1915 
 1916         ZONE_UNLOCK(zone);
 1917 }
 1918 
 1919 /* See uma.h */
 1920 int
 1921 uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int count)
 1922 {
 1923         int pages;
 1924         vm_offset_t kva;
 1925 
 1926         mtx_lock(&Giant);
 1927 
 1928         pages = count / zone->uz_ipers;
 1929 
 1930         if (pages * zone->uz_ipers < count)
 1931                 pages++;
 1932 
 1933         kva = kmem_alloc_pageable(kernel_map, pages * UMA_SLAB_SIZE);
 1934 
 1935         if (kva == 0) {
 1936                 mtx_unlock(&Giant);
 1937                 return (0);
 1938         }
 1939 
 1940 
 1941         if (obj == NULL)
 1942                 obj = vm_object_allocate(OBJT_DEFAULT,
 1943                     pages);
 1944         else {
 1945                 VM_OBJECT_LOCK_INIT(obj);
 1946                 _vm_object_allocate(OBJT_DEFAULT,
 1947                     pages, obj);
 1948         }
 1949         ZONE_LOCK(zone);
 1950         zone->uz_kva = kva;
 1951         zone->uz_obj = obj;
 1952         zone->uz_maxpages = pages;
 1953 
 1954         zone->uz_allocf = obj_alloc;
 1955         zone->uz_flags |= UMA_ZFLAG_NOFREE | UMA_ZFLAG_PRIVALLOC;
 1956 
 1957         ZONE_UNLOCK(zone);
 1958         mtx_unlock(&Giant);
 1959 
 1960         return (1);
 1961 }
 1962 
 1963 /* See uma.h */
 1964 void
 1965 uma_prealloc(uma_zone_t zone, int items)
 1966 {
 1967         int slabs;
 1968         uma_slab_t slab;
 1969 
 1970         ZONE_LOCK(zone);
 1971         slabs = items / zone->uz_ipers;
 1972         if (slabs * zone->uz_ipers < items)
 1973                 slabs++;
 1974 
 1975         while (slabs > 0) {
 1976                 slab = slab_zalloc(zone, M_WAITOK);
 1977                 LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link);
 1978                 slabs--;
 1979         }
 1980         ZONE_UNLOCK(zone);
 1981 }
 1982 
 1983 /* See uma.h */
 1984 void
 1985 uma_reclaim(void)
 1986 {
 1987         /*
 1988          * You might think that the delay below would improve performance since
 1989          * the allocator will give away memory that it may ask for immediately.
 1990          * Really, it makes things worse, since cpu cycles are so much cheaper
 1991          * than disk activity.
 1992          */
 1993 #if 0
 1994         static struct timeval tv = {0};
 1995         struct timeval now;
 1996         getmicrouptime(&now);
 1997         if (now.tv_sec > tv.tv_sec + 30)
 1998                 tv = now;
 1999         else
 2000                 return;
 2001 #endif
 2002 #ifdef UMA_DEBUG
 2003         printf("UMA: vm asked us to release pages!\n");
 2004 #endif
 2005         bucket_enable();
 2006         zone_foreach(zone_drain);
 2007 
 2008         /*
 2009          * Some slabs may have been freed but this zone will be visited early
 2010          * we visit again so that we can free pages that are empty once other
 2011          * zones are drained.  We have to do the same for buckets.
 2012          */
 2013         zone_drain(slabzone);
 2014         zone_drain(bucketzone);
 2015 }
 2016 
 2017 void *
 2018 uma_large_malloc(int size, int wait)
 2019 {
 2020         void *mem;
 2021         uma_slab_t slab;
 2022         u_int8_t flags;
 2023 
 2024         slab = uma_zalloc_internal(slabzone, NULL, wait);
 2025         if (slab == NULL)
 2026                 return (NULL);
 2027 
 2028         /* XXX: kmem_malloc panics if Giant isn't held and sleep allowed */
 2029         if ((wait & M_NOWAIT) == 0 && !mtx_owned(&Giant)) {
 2030                 mtx_lock(&Giant);
 2031                 mem = page_alloc(NULL, size, &flags, wait);
 2032                 mtx_unlock(&Giant);
 2033         } else
 2034                 mem = page_alloc(NULL, size, &flags, wait);
 2035         if (mem) {
 2036                 vsetslab((vm_offset_t)mem, slab);
 2037                 slab->us_data = mem;
 2038                 slab->us_flags = flags | UMA_SLAB_MALLOC;
 2039                 slab->us_size = size;
 2040         } else {
 2041                 uma_zfree_internal(slabzone, slab, NULL, 0);
 2042         }
 2043 
 2044 
 2045         return (mem);
 2046 }
 2047 
 2048 void
 2049 uma_large_free(uma_slab_t slab)
 2050 {
 2051         vsetobj((vm_offset_t)slab->us_data, kmem_object);
 2052         /* 
 2053          * XXX: We get a lock order reversal if we don't have Giant:
 2054          * vm_map_remove (locks system map) -> vm_map_delete ->
 2055          *    vm_map_entry_unwire -> vm_fault_unwire -> mtx_lock(&Giant)
 2056          */
 2057         if (!mtx_owned(&Giant)) {
 2058                 mtx_lock(&Giant);
 2059                 page_free(slab->us_data, slab->us_size, slab->us_flags);
 2060                 mtx_unlock(&Giant);
 2061         } else
 2062                 page_free(slab->us_data, slab->us_size, slab->us_flags);
 2063         uma_zfree_internal(slabzone, slab, NULL, 0);
 2064 }
 2065 
 2066 void
 2067 uma_print_stats(void)
 2068 {
 2069         zone_foreach(uma_print_zone);
 2070 }
 2071 
 2072 void
 2073 uma_print_zone(uma_zone_t zone)
 2074 {
 2075         printf("%s(%p) size %d(%d) flags %d ipers %d ppera %d out %d free %d\n",
 2076             zone->uz_name, zone, zone->uz_size, zone->uz_rsize, zone->uz_flags,
 2077             zone->uz_ipers, zone->uz_ppera,
 2078             (zone->uz_ipers * zone->uz_pages) - zone->uz_free, zone->uz_free);
 2079 }
 2080 
 2081 /*
 2082  * Sysctl handler for vm.zone 
 2083  *
 2084  * stolen from vm_zone.c
 2085  */
 2086 static int
 2087 sysctl_vm_zone(SYSCTL_HANDLER_ARGS)
 2088 {
 2089         int error, len, cnt;
 2090         const int linesize = 128;       /* conservative */
 2091         int totalfree;
 2092         char *tmpbuf, *offset;
 2093         uma_zone_t z;
 2094         char *p;
 2095 
 2096         cnt = 0;
 2097         mtx_lock(&uma_mtx);
 2098         LIST_FOREACH(z, &uma_zones, uz_link)
 2099                 cnt++;
 2100         mtx_unlock(&uma_mtx);
 2101         MALLOC(tmpbuf, char *, (cnt == 0 ? 1 : cnt) * linesize,
 2102                         M_TEMP, M_WAITOK);
 2103         len = snprintf(tmpbuf, linesize,
 2104             "\nITEM            SIZE     LIMIT     USED    FREE  REQUESTS\n\n");
 2105         if (cnt == 0)
 2106                 tmpbuf[len - 1] = '\0';
 2107         error = SYSCTL_OUT(req, tmpbuf, cnt == 0 ? len-1 : len);
 2108         if (error || cnt == 0)
 2109                 goto out;
 2110         offset = tmpbuf;
 2111         mtx_lock(&uma_mtx);
 2112         LIST_FOREACH(z, &uma_zones, uz_link) {
 2113                 if (cnt == 0)   /* list may have changed size */
 2114                         break;
 2115                 ZONE_LOCK(z);
 2116                 totalfree = z->uz_free + z->uz_cachefree;
 2117                 len = snprintf(offset, linesize,
 2118                     "%-12.12s  %6.6u, %8.8u, %6.6u, %6.6u, %8.8llu\n",
 2119                     z->uz_name, z->uz_size,
 2120                     z->uz_maxpages * z->uz_ipers,
 2121                     (z->uz_ipers * (z->uz_pages / z->uz_ppera)) - totalfree,
 2122                     totalfree,
 2123                     (unsigned long long)z->uz_allocs);
 2124                 ZONE_UNLOCK(z);
 2125                 for (p = offset + 12; p > offset && *p == ' '; --p)
 2126                         /* nothing */ ;
 2127                 p[1] = ':';
 2128                 cnt--;
 2129                 offset += len;
 2130         }
 2131         mtx_unlock(&uma_mtx);
 2132         *offset++ = '\0';
 2133         error = SYSCTL_OUT(req, tmpbuf, offset - tmpbuf);
 2134 out:
 2135         FREE(tmpbuf, M_TEMP);
 2136         return (error);
 2137 }
Cache object: 43bbf037b9a07789732968dbf79289e8
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/vm/uma_core.c

FreeBSD/Linux Kernel Cross Reference
sys/vm/uma_core.c