subr_pool.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*      $NetBSD: subr_pool.c,v 1.170.4.1 2008/11/17 18:46:11 snj Exp $  */
    2 
    3 /*-
    4  * Copyright (c) 1997, 1999, 2000, 2002, 2007, 2008 The NetBSD Foundation, Inc.
    5  * All rights reserved.
    6  *
    7  * This code is derived from software contributed to The NetBSD Foundation
    8  * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace
    9  * Simulation Facility, NASA Ames Research Center, and by Andrew Doran.
   10  *
   11  * Redistribution and use in source and binary forms, with or without
   12  * modification, are permitted provided that the following conditions
   13  * are met:
   14  * 1. Redistributions of source code must retain the above copyright
   15  *    notice, this list of conditions and the following disclaimer.
   16  * 2. Redistributions in binary form must reproduce the above copyright
   17  *    notice, this list of conditions and the following disclaimer in the
   18  *    documentation and/or other materials provided with the distribution.
   19  *
   20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   30  * POSSIBILITY OF SUCH DAMAGE.
   31  */
   32 
   33 #include <sys/cdefs.h>
   34 __KERNEL_RCSID(0, "$NetBSD: subr_pool.c,v 1.170.4.1 2008/11/17 18:46:11 snj Exp $");
   35 
   36 #include "opt_ddb.h"
   37 #include "opt_pool.h"
   38 #include "opt_poollog.h"
   39 #include "opt_lockdebug.h"
   40 
   41 #include <sys/param.h>
   42 #include <sys/systm.h>
   43 #include <sys/bitops.h>
   44 #include <sys/proc.h>
   45 #include <sys/errno.h>
   46 #include <sys/kernel.h>
   47 #include <sys/malloc.h>
   48 #include <sys/pool.h>
   49 #include <sys/syslog.h>
   50 #include <sys/debug.h>
   51 #include <sys/lockdebug.h>
   52 #include <sys/xcall.h>
   53 #include <sys/cpu.h>
   54 #include <sys/atomic.h>
   55 
   56 #include <uvm/uvm.h>
   57 
   58 /*
   59  * Pool resource management utility.
   60  *
   61  * Memory is allocated in pages which are split into pieces according to
   62  * the pool item size. Each page is kept on one of three lists in the
   63  * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages',
   64  * for empty, full and partially-full pages respectively. The individual
   65  * pool items are on a linked list headed by `ph_itemlist' in each page
   66  * header. The memory for building the page list is either taken from
   67  * the allocated pages themselves (for small pool items) or taken from
   68  * an internal pool of page headers (`phpool').
   69  */
   70 
   71 /* List of all pools */
   72 TAILQ_HEAD(,pool) pool_head = TAILQ_HEAD_INITIALIZER(pool_head);
   73 
   74 /* Private pool for page header structures */
   75 #define PHPOOL_MAX      8
   76 static struct pool phpool[PHPOOL_MAX];
   77 #define PHPOOL_FREELIST_NELEM(idx) \
   78         (((idx) == 0) ? 0 : BITMAP_SIZE * (1 << (idx)))
   79 
   80 #ifdef POOL_SUBPAGE
   81 /* Pool of subpages for use by normal pools. */
   82 static struct pool psppool;
   83 #endif
   84 
   85 static SLIST_HEAD(, pool_allocator) pa_deferinitq =
   86     SLIST_HEAD_INITIALIZER(pa_deferinitq);
   87 
   88 static void *pool_page_alloc_meta(struct pool *, int);
   89 static void pool_page_free_meta(struct pool *, void *);
   90 
   91 /* allocator for pool metadata */
   92 struct pool_allocator pool_allocator_meta = {
   93         pool_page_alloc_meta, pool_page_free_meta,
   94         .pa_backingmapptr = &kmem_map,
   95 };
   96 
   97 /* # of seconds to retain page after last use */
   98 int pool_inactive_time = 10;
   99 
  100 /* Next candidate for drainage (see pool_drain()) */
  101 static struct pool      *drainpp;
  102 
  103 /* This lock protects both pool_head and drainpp. */
  104 static kmutex_t pool_head_lock;
  105 static kcondvar_t pool_busy;
  106 
  107 typedef uint32_t pool_item_bitmap_t;
  108 #define BITMAP_SIZE     (CHAR_BIT * sizeof(pool_item_bitmap_t))
  109 #define BITMAP_MASK     (BITMAP_SIZE - 1)
  110 
  111 struct pool_item_header {
  112         /* Page headers */
  113         LIST_ENTRY(pool_item_header)
  114                                 ph_pagelist;    /* pool page list */
  115         SPLAY_ENTRY(pool_item_header)
  116                                 ph_node;        /* Off-page page headers */
  117         void *                  ph_page;        /* this page's address */
  118         uint32_t                ph_time;        /* last referenced */
  119         uint16_t                ph_nmissing;    /* # of chunks in use */
  120         uint16_t                ph_off;         /* start offset in page */
  121         union {
  122                 /* !PR_NOTOUCH */
  123                 struct {
  124                         LIST_HEAD(, pool_item)
  125                                 phu_itemlist;   /* chunk list for this page */
  126                 } phu_normal;
  127                 /* PR_NOTOUCH */
  128                 struct {
  129                         pool_item_bitmap_t phu_bitmap[1];
  130                 } phu_notouch;
  131         } ph_u;
  132 };
  133 #define ph_itemlist     ph_u.phu_normal.phu_itemlist
  134 #define ph_bitmap       ph_u.phu_notouch.phu_bitmap
  135 
  136 struct pool_item {
  137 #ifdef DIAGNOSTIC
  138         u_int pi_magic;
  139 #endif
  140 #define PI_MAGIC 0xdeaddeadU
  141         /* Other entries use only this list entry */
  142         LIST_ENTRY(pool_item)   pi_list;
  143 };
  144 
  145 #define POOL_NEEDS_CATCHUP(pp)                                          \
  146         ((pp)->pr_nitems < (pp)->pr_minitems)
  147 
  148 /*
  149  * Pool cache management.
  150  *
  151  * Pool caches provide a way for constructed objects to be cached by the
  152  * pool subsystem.  This can lead to performance improvements by avoiding
  153  * needless object construction/destruction; it is deferred until absolutely
  154  * necessary.
  155  *
  156  * Caches are grouped into cache groups.  Each cache group references up
  157  * to PCG_NUMOBJECTS constructed objects.  When a cache allocates an
  158  * object from the pool, it calls the object's constructor and places it
  159  * into a cache group.  When a cache group frees an object back to the
  160  * pool, it first calls the object's destructor.  This allows the object
  161  * to persist in constructed form while freed to the cache.
  162  *
  163  * The pool references each cache, so that when a pool is drained by the
  164  * pagedaemon, it can drain each individual cache as well.  Each time a
  165  * cache is drained, the most idle cache group is freed to the pool in
  166  * its entirety.
  167  *
  168  * Pool caches are layed on top of pools.  By layering them, we can avoid
  169  * the complexity of cache management for pools which would not benefit
  170  * from it.
  171  */
  172 
  173 static struct pool pcg_normal_pool;
  174 static struct pool pcg_large_pool;
  175 static struct pool cache_pool;
  176 static struct pool cache_cpu_pool;
  177 
  178 /* List of all caches. */
  179 TAILQ_HEAD(,pool_cache) pool_cache_head =
  180     TAILQ_HEAD_INITIALIZER(pool_cache_head);
  181 
  182 int pool_cache_disable;         /* global disable for caching */
  183 static const pcg_t pcg_dummy;   /* zero sized: always empty, yet always full */
  184 
  185 static bool     pool_cache_put_slow(pool_cache_cpu_t *, int,
  186                                     void *);
  187 static bool     pool_cache_get_slow(pool_cache_cpu_t *, int,
  188                                     void **, paddr_t *, int);
  189 static void     pool_cache_cpu_init1(struct cpu_info *, pool_cache_t);
  190 static void     pool_cache_invalidate_groups(pool_cache_t, pcg_t *);
  191 static void     pool_cache_xcall(pool_cache_t);
  192 
  193 static int      pool_catchup(struct pool *);
  194 static void     pool_prime_page(struct pool *, void *,
  195                     struct pool_item_header *);
  196 static void     pool_update_curpage(struct pool *);
  197 
  198 static int      pool_grow(struct pool *, int);
  199 static void     *pool_allocator_alloc(struct pool *, int);
  200 static void     pool_allocator_free(struct pool *, void *);
  201 
  202 static void pool_print_pagelist(struct pool *, struct pool_pagelist *,
  203         void (*)(const char *, ...));
  204 static void pool_print1(struct pool *, const char *,
  205         void (*)(const char *, ...));
  206 
  207 static int pool_chk_page(struct pool *, const char *,
  208                          struct pool_item_header *);
  209 
  210 /*
  211  * Pool log entry. An array of these is allocated in pool_init().
  212  */
  213 struct pool_log {
  214         const char      *pl_file;
  215         long            pl_line;
  216         int             pl_action;
  217 #define PRLOG_GET       1
  218 #define PRLOG_PUT       2
  219         void            *pl_addr;
  220 };
  221 
  222 #ifdef POOL_DIAGNOSTIC
  223 /* Number of entries in pool log buffers */
  224 #ifndef POOL_LOGSIZE
  225 #define POOL_LOGSIZE    10
  226 #endif
  227 
  228 int pool_logsize = POOL_LOGSIZE;
  229 
  230 static inline void
  231 pr_log(struct pool *pp, void *v, int action, const char *file, long line)
  232 {
  233         int n = pp->pr_curlogentry;
  234         struct pool_log *pl;
  235 
  236         if ((pp->pr_roflags & PR_LOGGING) == 0)
  237                 return;
  238 
  239         /*
  240          * Fill in the current entry. Wrap around and overwrite
  241          * the oldest entry if necessary.
  242          */
  243         pl = &pp->pr_log[n];
  244         pl->pl_file = file;
  245         pl->pl_line = line;
  246         pl->pl_action = action;
  247         pl->pl_addr = v;
  248         if (++n >= pp->pr_logsize)
  249                 n = 0;
  250         pp->pr_curlogentry = n;
  251 }
  252 
  253 static void
  254 pr_printlog(struct pool *pp, struct pool_item *pi,
  255     void (*pr)(const char *, ...))
  256 {
  257         int i = pp->pr_logsize;
  258         int n = pp->pr_curlogentry;
  259 
  260         if ((pp->pr_roflags & PR_LOGGING) == 0)
  261                 return;
  262 
  263         /*
  264          * Print all entries in this pool's log.
  265          */
  266         while (i-- > 0) {
  267                 struct pool_log *pl = &pp->pr_log[n];
  268                 if (pl->pl_action != 0) {
  269                         if (pi == NULL || pi == pl->pl_addr) {
  270                                 (*pr)("\tlog entry %d:\n", i);
  271                                 (*pr)("\t\taction = %s, addr = %p\n",
  272                                     pl->pl_action == PRLOG_GET ? "get" : "put",
  273                                     pl->pl_addr);
  274                                 (*pr)("\t\tfile: %s at line %lu\n",
  275                                     pl->pl_file, pl->pl_line);
  276                         }
  277                 }
  278                 if (++n >= pp->pr_logsize)
  279                         n = 0;
  280         }
  281 }
  282 
  283 static inline void
  284 pr_enter(struct pool *pp, const char *file, long line)
  285 {
  286 
  287         if (__predict_false(pp->pr_entered_file != NULL)) {
  288                 printf("pool %s: reentrancy at file %s line %ld\n",
  289                     pp->pr_wchan, file, line);
  290                 printf("         previous entry at file %s line %ld\n",
  291                     pp->pr_entered_file, pp->pr_entered_line);
  292                 panic("pr_enter");
  293         }
  294 
  295         pp->pr_entered_file = file;
  296         pp->pr_entered_line = line;
  297 }
  298 
  299 static inline void
  300 pr_leave(struct pool *pp)
  301 {
  302 
  303         if (__predict_false(pp->pr_entered_file == NULL)) {
  304                 printf("pool %s not entered?\n", pp->pr_wchan);
  305                 panic("pr_leave");
  306         }
  307 
  308         pp->pr_entered_file = NULL;
  309         pp->pr_entered_line = 0;
  310 }
  311 
  312 static inline void
  313 pr_enter_check(struct pool *pp, void (*pr)(const char *, ...))
  314 {
  315 
  316         if (pp->pr_entered_file != NULL)
  317                 (*pr)("\n\tcurrently entered from file %s line %ld\n",
  318                     pp->pr_entered_file, pp->pr_entered_line);
  319 }
  320 #else
  321 #define pr_log(pp, v, action, file, line)
  322 #define pr_printlog(pp, pi, pr)
  323 #define pr_enter(pp, file, line)
  324 #define pr_leave(pp)
  325 #define pr_enter_check(pp, pr)
  326 #endif /* POOL_DIAGNOSTIC */
  327 
  328 static inline unsigned int
  329 pr_item_notouch_index(const struct pool *pp, const struct pool_item_header *ph,
  330     const void *v)
  331 {
  332         const char *cp = v;
  333         unsigned int idx;
  334 
  335         KASSERT(pp->pr_roflags & PR_NOTOUCH);
  336         idx = (cp - (char *)ph->ph_page - ph->ph_off) / pp->pr_size;
  337         KASSERT(idx < pp->pr_itemsperpage);
  338         return idx;
  339 }
  340 
  341 static inline void
  342 pr_item_notouch_put(const struct pool *pp, struct pool_item_header *ph,
  343     void *obj)
  344 {
  345         unsigned int idx = pr_item_notouch_index(pp, ph, obj);
  346         pool_item_bitmap_t *bitmap = ph->ph_bitmap + (idx / BITMAP_SIZE);
  347         pool_item_bitmap_t mask = 1 << (idx & BITMAP_MASK);
  348 
  349         KASSERT((*bitmap & mask) == 0);
  350         *bitmap |= mask;
  351 }
  352 
  353 static inline void *
  354 pr_item_notouch_get(const struct pool *pp, struct pool_item_header *ph)
  355 {
  356         pool_item_bitmap_t *bitmap = ph->ph_bitmap;
  357         unsigned int idx;
  358         int i;
  359 
  360         for (i = 0; ; i++) {
  361                 int bit;
  362 
  363                 KASSERT((i * BITMAP_SIZE) < pp->pr_itemsperpage);
  364                 bit = ffs32(bitmap[i]);
  365                 if (bit) {
  366                         pool_item_bitmap_t mask;
  367 
  368                         bit--;
  369                         idx = (i * BITMAP_SIZE) + bit;
  370                         mask = 1 << bit;
  371                         KASSERT((bitmap[i] & mask) != 0);
  372                         bitmap[i] &= ~mask;
  373                         break;
  374                 }
  375         }
  376         KASSERT(idx < pp->pr_itemsperpage);
  377         return (char *)ph->ph_page + ph->ph_off + idx * pp->pr_size;
  378 }
  379 
  380 static inline void
  381 pr_item_notouch_init(const struct pool *pp, struct pool_item_header *ph)
  382 {
  383         pool_item_bitmap_t *bitmap = ph->ph_bitmap;
  384         const int n = howmany(pp->pr_itemsperpage, BITMAP_SIZE);
  385         int i;
  386 
  387         for (i = 0; i < n; i++) {
  388                 bitmap[i] = (pool_item_bitmap_t)-1;
  389         }
  390 }
  391 
  392 static inline int
  393 phtree_compare(struct pool_item_header *a, struct pool_item_header *b)
  394 {
  395 
  396         /*
  397          * we consider pool_item_header with smaller ph_page bigger.
  398          * (this unnatural ordering is for the benefit of pr_find_pagehead.)
  399          */
  400 
  401         if (a->ph_page < b->ph_page)
  402                 return (1);
  403         else if (a->ph_page > b->ph_page)
  404                 return (-1);
  405         else
  406                 return (0);
  407 }
  408 
  409 SPLAY_PROTOTYPE(phtree, pool_item_header, ph_node, phtree_compare);
  410 SPLAY_GENERATE(phtree, pool_item_header, ph_node, phtree_compare);
  411 
  412 static inline struct pool_item_header *
  413 pr_find_pagehead_noalign(struct pool *pp, void *v)
  414 {
  415         struct pool_item_header *ph, tmp;
  416 
  417         tmp.ph_page = (void *)(uintptr_t)v;
  418         ph = SPLAY_FIND(phtree, &pp->pr_phtree, &tmp);
  419         if (ph == NULL) {
  420                 ph = SPLAY_ROOT(&pp->pr_phtree);
  421                 if (ph != NULL && phtree_compare(&tmp, ph) >= 0) {
  422                         ph = SPLAY_NEXT(phtree, &pp->pr_phtree, ph);
  423                 }
  424                 KASSERT(ph == NULL || phtree_compare(&tmp, ph) < 0);
  425         }
  426 
  427         return ph;
  428 }
  429 
  430 /*
  431  * Return the pool page header based on item address.
  432  */
  433 static inline struct pool_item_header *
  434 pr_find_pagehead(struct pool *pp, void *v)
  435 {
  436         struct pool_item_header *ph, tmp;
  437 
  438         if ((pp->pr_roflags & PR_NOALIGN) != 0) {
  439                 ph = pr_find_pagehead_noalign(pp, v);
  440         } else {
  441                 void *page =
  442                     (void *)((uintptr_t)v & pp->pr_alloc->pa_pagemask);
  443 
  444                 if ((pp->pr_roflags & PR_PHINPAGE) != 0) {
  445                         ph = (struct pool_item_header *)((char *)page + pp->pr_phoffset);
  446                 } else {
  447                         tmp.ph_page = page;
  448                         ph = SPLAY_FIND(phtree, &pp->pr_phtree, &tmp);
  449                 }
  450         }
  451 
  452         KASSERT(ph == NULL || ((pp->pr_roflags & PR_PHINPAGE) != 0) ||
  453             ((char *)ph->ph_page <= (char *)v &&
  454             (char *)v < (char *)ph->ph_page + pp->pr_alloc->pa_pagesz));
  455         return ph;
  456 }
  457 
  458 static void
  459 pr_pagelist_free(struct pool *pp, struct pool_pagelist *pq)
  460 {
  461         struct pool_item_header *ph;
  462 
  463         while ((ph = LIST_FIRST(pq)) != NULL) {
  464                 LIST_REMOVE(ph, ph_pagelist);
  465                 pool_allocator_free(pp, ph->ph_page);
  466                 if ((pp->pr_roflags & PR_PHINPAGE) == 0)
  467                         pool_put(pp->pr_phpool, ph);
  468         }
  469 }
  470 
  471 /*
  472  * Remove a page from the pool.
  473  */
  474 static inline void
  475 pr_rmpage(struct pool *pp, struct pool_item_header *ph,
  476      struct pool_pagelist *pq)
  477 {
  478 
  479         KASSERT(mutex_owned(&pp->pr_lock));
  480 
  481         /*
  482          * If the page was idle, decrement the idle page count.
  483          */
  484         if (ph->ph_nmissing == 0) {
  485 #ifdef DIAGNOSTIC
  486                 if (pp->pr_nidle == 0)
  487                         panic("pr_rmpage: nidle inconsistent");
  488                 if (pp->pr_nitems < pp->pr_itemsperpage)
  489                         panic("pr_rmpage: nitems inconsistent");
  490 #endif
  491                 pp->pr_nidle--;
  492         }
  493 
  494         pp->pr_nitems -= pp->pr_itemsperpage;
  495 
  496         /*
  497          * Unlink the page from the pool and queue it for release.
  498          */
  499         LIST_REMOVE(ph, ph_pagelist);
  500         if ((pp->pr_roflags & PR_PHINPAGE) == 0)
  501                 SPLAY_REMOVE(phtree, &pp->pr_phtree, ph);
  502         LIST_INSERT_HEAD(pq, ph, ph_pagelist);
  503 
  504         pp->pr_npages--;
  505         pp->pr_npagefree++;
  506 
  507         pool_update_curpage(pp);
  508 }
  509 
  510 static bool
  511 pa_starved_p(struct pool_allocator *pa)
  512 {
  513 
  514         if (pa->pa_backingmap != NULL) {
  515                 return vm_map_starved_p(pa->pa_backingmap);
  516         }
  517         return false;
  518 }
  519 
  520 static int
  521 pool_reclaim_callback(struct callback_entry *ce, void *obj, void *arg)
  522 {
  523         struct pool *pp = obj;
  524         struct pool_allocator *pa = pp->pr_alloc;
  525 
  526         KASSERT(&pp->pr_reclaimerentry == ce);
  527         pool_reclaim(pp);
  528         if (!pa_starved_p(pa)) {
  529                 return CALLBACK_CHAIN_ABORT;
  530         }
  531         return CALLBACK_CHAIN_CONTINUE;
  532 }
  533 
  534 static void
  535 pool_reclaim_register(struct pool *pp)
  536 {
  537         struct vm_map *map = pp->pr_alloc->pa_backingmap;
  538         int s;
  539 
  540         if (map == NULL) {
  541                 return;
  542         }
  543 
  544         s = splvm(); /* not necessary for INTRSAFE maps, but don't care. */
  545         callback_register(&vm_map_to_kernel(map)->vmk_reclaim_callback,
  546             &pp->pr_reclaimerentry, pp, pool_reclaim_callback);
  547         splx(s);
  548 }
  549 
  550 static void
  551 pool_reclaim_unregister(struct pool *pp)
  552 {
  553         struct vm_map *map = pp->pr_alloc->pa_backingmap;
  554         int s;
  555 
  556         if (map == NULL) {
  557                 return;
  558         }
  559 
  560         s = splvm(); /* not necessary for INTRSAFE maps, but don't care. */
  561         callback_unregister(&vm_map_to_kernel(map)->vmk_reclaim_callback,
  562             &pp->pr_reclaimerentry);
  563         splx(s);
  564 }
  565 
  566 static void
  567 pa_reclaim_register(struct pool_allocator *pa)
  568 {
  569         struct vm_map *map = *pa->pa_backingmapptr;
  570         struct pool *pp;
  571 
  572         KASSERT(pa->pa_backingmap == NULL);
  573         if (map == NULL) {
  574                 SLIST_INSERT_HEAD(&pa_deferinitq, pa, pa_q);
  575                 return;
  576         }
  577         pa->pa_backingmap = map;
  578         TAILQ_FOREACH(pp, &pa->pa_list, pr_alloc_list) {
  579                 pool_reclaim_register(pp);
  580         }
  581 }
  582 
  583 /*
  584  * Initialize all the pools listed in the "pools" link set.
  585  */
  586 void
  587 pool_subsystem_init(void)
  588 {
  589         struct pool_allocator *pa;
  590         __link_set_decl(pools, struct link_pool_init);
  591         struct link_pool_init * const *pi;
  592 
  593         mutex_init(&pool_head_lock, MUTEX_DEFAULT, IPL_NONE);
  594         cv_init(&pool_busy, "poolbusy");
  595 
  596         __link_set_foreach(pi, pools)
  597                 pool_init((*pi)->pp, (*pi)->size, (*pi)->align,
  598                     (*pi)->align_offset, (*pi)->flags, (*pi)->wchan,
  599                     (*pi)->palloc, (*pi)->ipl);
  600 
  601         while ((pa = SLIST_FIRST(&pa_deferinitq)) != NULL) {
  602                 KASSERT(pa->pa_backingmapptr != NULL);
  603                 KASSERT(*pa->pa_backingmapptr != NULL);
  604                 SLIST_REMOVE_HEAD(&pa_deferinitq, pa_q);
  605                 pa_reclaim_register(pa);
  606         }
  607 
  608         pool_init(&cache_pool, sizeof(struct pool_cache), coherency_unit,
  609             0, 0, "pcache", &pool_allocator_nointr, IPL_NONE);
  610 
  611         pool_init(&cache_cpu_pool, sizeof(pool_cache_cpu_t), coherency_unit,
  612             0, 0, "pcachecpu", &pool_allocator_nointr, IPL_NONE);
  613 }
  614 
  615 /*
  616  * Initialize the given pool resource structure.
  617  *
  618  * We export this routine to allow other kernel parts to declare
  619  * static pools that must be initialized before malloc() is available.
  620  */
  621 void
  622 pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags,
  623     const char *wchan, struct pool_allocator *palloc, int ipl)
  624 {
  625         struct pool *pp1;
  626         size_t trysize, phsize;
  627         int off, slack;
  628 
  629 #ifdef DEBUG
  630         /*
  631          * Check that the pool hasn't already been initialised and
  632          * added to the list of all pools.
  633          */
  634         TAILQ_FOREACH(pp1, &pool_head, pr_poollist) {
  635                 if (pp == pp1)
  636                         panic("pool_init: pool %s already initialised",
  637                             wchan);
  638         }
  639 #endif
  640 
  641 #ifdef POOL_DIAGNOSTIC
  642         /*
  643          * Always log if POOL_DIAGNOSTIC is defined.
  644          */
  645         if (pool_logsize != 0)
  646                 flags |= PR_LOGGING;
  647 #endif
  648 
  649         if (palloc == NULL)
  650                 palloc = &pool_allocator_kmem;
  651 #ifdef POOL_SUBPAGE
  652         if (size > palloc->pa_pagesz) {
  653                 if (palloc == &pool_allocator_kmem)
  654                         palloc = &pool_allocator_kmem_fullpage;
  655                 else if (palloc == &pool_allocator_nointr)
  656                         palloc = &pool_allocator_nointr_fullpage;
  657         }               
  658 #endif /* POOL_SUBPAGE */
  659         if ((palloc->pa_flags & PA_INITIALIZED) == 0) {
  660                 if (palloc->pa_pagesz == 0)
  661                         palloc->pa_pagesz = PAGE_SIZE;
  662 
  663                 TAILQ_INIT(&palloc->pa_list);
  664 
  665                 mutex_init(&palloc->pa_lock, MUTEX_DEFAULT, IPL_VM);
  666                 palloc->pa_pagemask = ~(palloc->pa_pagesz - 1);
  667                 palloc->pa_pageshift = ffs(palloc->pa_pagesz) - 1;
  668 
  669                 if (palloc->pa_backingmapptr != NULL) {
  670                         pa_reclaim_register(palloc);
  671                 }
  672                 palloc->pa_flags |= PA_INITIALIZED;
  673         }
  674 
  675         if (align == 0)
  676                 align = ALIGN(1);
  677 
  678         if ((flags & PR_NOTOUCH) == 0 && size < sizeof(struct pool_item))
  679                 size = sizeof(struct pool_item);
  680 
  681         size = roundup(size, align);
  682 #ifdef DIAGNOSTIC
  683         if (size > palloc->pa_pagesz)
  684                 panic("pool_init: pool item size (%zu) too large", size);
  685 #endif
  686 
  687         /*
  688          * Initialize the pool structure.
  689          */
  690         LIST_INIT(&pp->pr_emptypages);
  691         LIST_INIT(&pp->pr_fullpages);
  692         LIST_INIT(&pp->pr_partpages);
  693         pp->pr_cache = NULL;
  694         pp->pr_curpage = NULL;
  695         pp->pr_npages = 0;
  696         pp->pr_minitems = 0;
  697         pp->pr_minpages = 0;
  698         pp->pr_maxpages = UINT_MAX;
  699         pp->pr_roflags = flags;
  700         pp->pr_flags = 0;
  701         pp->pr_size = size;
  702         pp->pr_align = align;
  703         pp->pr_wchan = wchan;
  704         pp->pr_alloc = palloc;
  705         pp->pr_nitems = 0;
  706         pp->pr_nout = 0;
  707         pp->pr_hardlimit = UINT_MAX;
  708         pp->pr_hardlimit_warning = NULL;
  709         pp->pr_hardlimit_ratecap.tv_sec = 0;
  710         pp->pr_hardlimit_ratecap.tv_usec = 0;
  711         pp->pr_hardlimit_warning_last.tv_sec = 0;
  712         pp->pr_hardlimit_warning_last.tv_usec = 0;
  713         pp->pr_drain_hook = NULL;
  714         pp->pr_drain_hook_arg = NULL;
  715         pp->pr_freecheck = NULL;
  716 
  717         /*
  718          * Decide whether to put the page header off page to avoid
  719          * wasting too large a part of the page or too big item.
  720          * Off-page page headers go on a hash table, so we can match
  721          * a returned item with its header based on the page address.
  722          * We use 1/16 of the page size and about 8 times of the item
  723          * size as the threshold (XXX: tune)
  724          *
  725          * However, we'll put the header into the page if we can put
  726          * it without wasting any items.
  727          *
  728          * Silently enforce `0 <= ioff < align'.
  729          */
  730         pp->pr_itemoffset = ioff %= align;
  731         /* See the comment below about reserved bytes. */
  732         trysize = palloc->pa_pagesz - ((align - ioff) % align);
  733         phsize = ALIGN(sizeof(struct pool_item_header));
  734         if ((pp->pr_roflags & (PR_NOTOUCH | PR_NOALIGN)) == 0 &&
  735             (pp->pr_size < MIN(palloc->pa_pagesz / 16, phsize << 3) ||
  736             trysize / pp->pr_size == (trysize - phsize) / pp->pr_size)) {
  737                 /* Use the end of the page for the page header */
  738                 pp->pr_roflags |= PR_PHINPAGE;
  739                 pp->pr_phoffset = off = palloc->pa_pagesz - phsize;
  740         } else {
  741                 /* The page header will be taken from our page header pool */
  742                 pp->pr_phoffset = 0;
  743                 off = palloc->pa_pagesz;
  744                 SPLAY_INIT(&pp->pr_phtree);
  745         }
  746 
  747         /*
  748          * Alignment is to take place at `ioff' within the item. This means
  749          * we must reserve up to `align - 1' bytes on the page to allow
  750          * appropriate positioning of each item.
  751          */
  752         pp->pr_itemsperpage = (off - ((align - ioff) % align)) / pp->pr_size;
  753         KASSERT(pp->pr_itemsperpage != 0);
  754         if ((pp->pr_roflags & PR_NOTOUCH)) {
  755                 int idx;
  756 
  757                 for (idx = 0; pp->pr_itemsperpage > PHPOOL_FREELIST_NELEM(idx);
  758                     idx++) {
  759                         /* nothing */
  760                 }
  761                 if (idx >= PHPOOL_MAX) {
  762                         /*
  763                          * if you see this panic, consider to tweak
  764                          * PHPOOL_MAX and PHPOOL_FREELIST_NELEM.
  765                          */
  766                         panic("%s: too large itemsperpage(%d) for PR_NOTOUCH",
  767                             pp->pr_wchan, pp->pr_itemsperpage);
  768                 }
  769                 pp->pr_phpool = &phpool[idx];
  770         } else if ((pp->pr_roflags & PR_PHINPAGE) == 0) {
  771                 pp->pr_phpool = &phpool[0];
  772         }
  773 #if defined(DIAGNOSTIC)
  774         else {
  775                 pp->pr_phpool = NULL;
  776         }
  777 #endif
  778 
  779         /*
  780          * Use the slack between the chunks and the page header
  781          * for "cache coloring".
  782          */
  783         slack = off - pp->pr_itemsperpage * pp->pr_size;
  784         pp->pr_maxcolor = (slack / align) * align;
  785         pp->pr_curcolor = 0;
  786 
  787         pp->pr_nget = 0;
  788         pp->pr_nfail = 0;
  789         pp->pr_nput = 0;
  790         pp->pr_npagealloc = 0;
  791         pp->pr_npagefree = 0;
  792         pp->pr_hiwat = 0;
  793         pp->pr_nidle = 0;
  794         pp->pr_refcnt = 0;
  795 
  796 #ifdef POOL_DIAGNOSTIC
  797         if (flags & PR_LOGGING) {
  798                 if (kmem_map == NULL ||
  799                     (pp->pr_log = malloc(pool_logsize * sizeof(struct pool_log),
  800                      M_TEMP, M_NOWAIT)) == NULL)
  801                         pp->pr_roflags &= ~PR_LOGGING;
  802                 pp->pr_curlogentry = 0;
  803                 pp->pr_logsize = pool_logsize;
  804         }
  805 #endif
  806 
  807         pp->pr_entered_file = NULL;
  808         pp->pr_entered_line = 0;
  809 
  810         mutex_init(&pp->pr_lock, MUTEX_DEFAULT, ipl);
  811         cv_init(&pp->pr_cv, wchan);
  812         pp->pr_ipl = ipl;
  813 
  814         /*
  815          * Initialize private page header pool and cache magazine pool if we
  816          * haven't done so yet.
  817          * XXX LOCKING.
  818          */
  819         if (phpool[0].pr_size == 0) {
  820                 int idx;
  821                 for (idx = 0; idx < PHPOOL_MAX; idx++) {
  822                         static char phpool_names[PHPOOL_MAX][6+1+6+1];
  823                         int nelem;
  824                         size_t sz;
  825 
  826                         nelem = PHPOOL_FREELIST_NELEM(idx);
  827                         snprintf(phpool_names[idx], sizeof(phpool_names[idx]),
  828                             "phpool-%d", nelem);
  829                         sz = sizeof(struct pool_item_header);
  830                         if (nelem) {
  831                                 sz = offsetof(struct pool_item_header,
  832                                     ph_bitmap[howmany(nelem, BITMAP_SIZE)]);
  833                         }
  834                         pool_init(&phpool[idx], sz, 0, 0, 0,
  835                             phpool_names[idx], &pool_allocator_meta, IPL_VM);
  836                 }
  837 #ifdef POOL_SUBPAGE
  838                 pool_init(&psppool, POOL_SUBPAGE, POOL_SUBPAGE, 0,
  839                     PR_RECURSIVE, "psppool", &pool_allocator_meta, IPL_VM);
  840 #endif
  841 
  842                 size = sizeof(pcg_t) +
  843                     (PCG_NOBJECTS_NORMAL - 1) * sizeof(pcgpair_t);
  844                 pool_init(&pcg_normal_pool, size, coherency_unit, 0, 0,
  845                     "pcgnormal", &pool_allocator_meta, IPL_VM);
  846 
  847                 size = sizeof(pcg_t) +
  848                     (PCG_NOBJECTS_LARGE - 1) * sizeof(pcgpair_t);
  849                 pool_init(&pcg_large_pool, size, coherency_unit, 0, 0,
  850                     "pcglarge", &pool_allocator_meta, IPL_VM);
  851         }
  852 
  853         /* Insert into the list of all pools. */
  854         if (__predict_true(!cold))
  855                 mutex_enter(&pool_head_lock);
  856         TAILQ_FOREACH(pp1, &pool_head, pr_poollist) {
  857                 if (strcmp(pp1->pr_wchan, pp->pr_wchan) > 0)
  858                         break;
  859         }
  860         if (pp1 == NULL)
  861                 TAILQ_INSERT_TAIL(&pool_head, pp, pr_poollist);
  862         else
  863                 TAILQ_INSERT_BEFORE(pp1, pp, pr_poollist);
  864         if (__predict_true(!cold))
  865                 mutex_exit(&pool_head_lock);
  866 
  867         /* Insert this into the list of pools using this allocator. */
  868         if (__predict_true(!cold))
  869                 mutex_enter(&palloc->pa_lock);
  870         TAILQ_INSERT_TAIL(&palloc->pa_list, pp, pr_alloc_list);
  871         if (__predict_true(!cold))
  872                 mutex_exit(&palloc->pa_lock);
  873 
  874         pool_reclaim_register(pp);
  875 }
  876 
  877 /*
  878  * De-commision a pool resource.
  879  */
  880 void
  881 pool_destroy(struct pool *pp)
  882 {
  883         struct pool_pagelist pq;
  884         struct pool_item_header *ph;
  885 
  886         /* Remove from global pool list */
  887         mutex_enter(&pool_head_lock);
  888         while (pp->pr_refcnt != 0)
  889                 cv_wait(&pool_busy, &pool_head_lock);
  890         TAILQ_REMOVE(&pool_head, pp, pr_poollist);
  891         if (drainpp == pp)
  892                 drainpp = NULL;
  893         mutex_exit(&pool_head_lock);
  894 
  895         /* Remove this pool from its allocator's list of pools. */
  896         pool_reclaim_unregister(pp);
  897         mutex_enter(&pp->pr_alloc->pa_lock);
  898         TAILQ_REMOVE(&pp->pr_alloc->pa_list, pp, pr_alloc_list);
  899         mutex_exit(&pp->pr_alloc->pa_lock);
  900 
  901         mutex_enter(&pp->pr_lock);
  902 
  903         KASSERT(pp->pr_cache == NULL);
  904 
  905 #ifdef DIAGNOSTIC
  906         if (pp->pr_nout != 0) {
  907                 pr_printlog(pp, NULL, printf);
  908                 panic("pool_destroy: pool busy: still out: %u",
  909                     pp->pr_nout);
  910         }
  911 #endif
  912 
  913         KASSERT(LIST_EMPTY(&pp->pr_fullpages));
  914         KASSERT(LIST_EMPTY(&pp->pr_partpages));
  915 
  916         /* Remove all pages */
  917         LIST_INIT(&pq);
  918         while ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL)
  919                 pr_rmpage(pp, ph, &pq);
  920 
  921         mutex_exit(&pp->pr_lock);
  922 
  923         pr_pagelist_free(pp, &pq);
  924 
  925 #ifdef POOL_DIAGNOSTIC
  926         if ((pp->pr_roflags & PR_LOGGING) != 0)
  927                 free(pp->pr_log, M_TEMP);
  928 #endif
  929 
  930         cv_destroy(&pp->pr_cv);
  931         mutex_destroy(&pp->pr_lock);
  932 }
  933 
  934 void
  935 pool_set_drain_hook(struct pool *pp, void (*fn)(void *, int), void *arg)
  936 {
  937 
  938         /* XXX no locking -- must be used just after pool_init() */
  939 #ifdef DIAGNOSTIC
  940         if (pp->pr_drain_hook != NULL)
  941                 panic("pool_set_drain_hook(%s): already set", pp->pr_wchan);
  942 #endif
  943         pp->pr_drain_hook = fn;
  944         pp->pr_drain_hook_arg = arg;
  945 }
  946 
  947 static struct pool_item_header *
  948 pool_alloc_item_header(struct pool *pp, void *storage, int flags)
  949 {
  950         struct pool_item_header *ph;
  951 
  952         if ((pp->pr_roflags & PR_PHINPAGE) != 0)
  953                 ph = (struct pool_item_header *) ((char *)storage + pp->pr_phoffset);
  954         else
  955                 ph = pool_get(pp->pr_phpool, flags);
  956 
  957         return (ph);
  958 }
  959 
  960 /*
  961  * Grab an item from the pool.
  962  */
  963 void *
  964 #ifdef POOL_DIAGNOSTIC
  965 _pool_get(struct pool *pp, int flags, const char *file, long line)
  966 #else
  967 pool_get(struct pool *pp, int flags)
  968 #endif
  969 {
  970         struct pool_item *pi;
  971         struct pool_item_header *ph;
  972         void *v;
  973 
  974 #ifdef DIAGNOSTIC
  975         if (__predict_false(pp->pr_itemsperpage == 0))
  976                 panic("pool_get: pool %p: pr_itemsperpage is zero, "
  977                     "pool not initialized?", pp);
  978         if (__predict_false(curlwp == NULL && doing_shutdown == 0 &&
  979                             (flags & PR_WAITOK) != 0))
  980                 panic("pool_get: %s: must have NOWAIT", pp->pr_wchan);
  981 
  982 #endif /* DIAGNOSTIC */
  983 #ifdef LOCKDEBUG
  984         if (flags & PR_WAITOK) {
  985                 ASSERT_SLEEPABLE();
  986         }
  987 #endif
  988 
  989         mutex_enter(&pp->pr_lock);
  990         pr_enter(pp, file, line);
  991 
  992  startover:
  993         /*
  994          * Check to see if we've reached the hard limit.  If we have,
  995          * and we can wait, then wait until an item has been returned to
  996          * the pool.
  997          */
  998 #ifdef DIAGNOSTIC
  999         if (__predict_false(pp->pr_nout > pp->pr_hardlimit)) {
 1000                 pr_leave(pp);
 1001                 mutex_exit(&pp->pr_lock);
 1002                 panic("pool_get: %s: crossed hard limit", pp->pr_wchan);
 1003         }
 1004 #endif
 1005         if (__predict_false(pp->pr_nout == pp->pr_hardlimit)) {
 1006                 if (pp->pr_drain_hook != NULL) {
 1007                         /*
 1008                          * Since the drain hook is going to free things
 1009                          * back to the pool, unlock, call the hook, re-lock,
 1010                          * and check the hardlimit condition again.
 1011                          */
 1012                         pr_leave(pp);
 1013                         mutex_exit(&pp->pr_lock);
 1014                         (*pp->pr_drain_hook)(pp->pr_drain_hook_arg, flags);
 1015                         mutex_enter(&pp->pr_lock);
 1016                         pr_enter(pp, file, line);
 1017                         if (pp->pr_nout < pp->pr_hardlimit)
 1018                                 goto startover;
 1019                 }
 1020 
 1021                 if ((flags & PR_WAITOK) && !(flags & PR_LIMITFAIL)) {
 1022                         /*
 1023                          * XXX: A warning isn't logged in this case.  Should
 1024                          * it be?
 1025                          */
 1026                         pp->pr_flags |= PR_WANTED;
 1027                         pr_leave(pp);
 1028                         cv_wait(&pp->pr_cv, &pp->pr_lock);
 1029                         pr_enter(pp, file, line);
 1030                         goto startover;
 1031                 }
 1032 
 1033                 /*
 1034                  * Log a message that the hard limit has been hit.
 1035                  */
 1036                 if (pp->pr_hardlimit_warning != NULL &&
 1037                     ratecheck(&pp->pr_hardlimit_warning_last,
 1038                               &pp->pr_hardlimit_ratecap))
 1039                         log(LOG_ERR, "%s\n", pp->pr_hardlimit_warning);
 1040 
 1041                 pp->pr_nfail++;
 1042 
 1043                 pr_leave(pp);
 1044                 mutex_exit(&pp->pr_lock);
 1045                 return (NULL);
 1046         }
 1047 
 1048         /*
 1049          * The convention we use is that if `curpage' is not NULL, then
 1050          * it points at a non-empty bucket. In particular, `curpage'
 1051          * never points at a page header which has PR_PHINPAGE set and
 1052          * has no items in its bucket.
 1053          */
 1054         if ((ph = pp->pr_curpage) == NULL) {
 1055                 int error;
 1056 
 1057 #ifdef DIAGNOSTIC
 1058                 if (pp->pr_nitems != 0) {
 1059                         mutex_exit(&pp->pr_lock);
 1060                         printf("pool_get: %s: curpage NULL, nitems %u\n",
 1061                             pp->pr_wchan, pp->pr_nitems);
 1062                         panic("pool_get: nitems inconsistent");
 1063                 }
 1064 #endif
 1065 
 1066                 /*
 1067                  * Call the back-end page allocator for more memory.
 1068                  * Release the pool lock, as the back-end page allocator
 1069                  * may block.
 1070                  */
 1071                 pr_leave(pp);
 1072                 error = pool_grow(pp, flags);
 1073                 pr_enter(pp, file, line);
 1074                 if (error != 0) {
 1075                         /*
 1076                          * We were unable to allocate a page or item
 1077                          * header, but we released the lock during
 1078                          * allocation, so perhaps items were freed
 1079                          * back to the pool.  Check for this case.
 1080                          */
 1081                         if (pp->pr_curpage != NULL)
 1082                                 goto startover;
 1083 
 1084                         pp->pr_nfail++;
 1085                         pr_leave(pp);
 1086                         mutex_exit(&pp->pr_lock);
 1087                         return (NULL);
 1088                 }
 1089 
 1090                 /* Start the allocation process over. */
 1091                 goto startover;
 1092         }
 1093         if (pp->pr_roflags & PR_NOTOUCH) {
 1094 #ifdef DIAGNOSTIC
 1095                 if (__predict_false(ph->ph_nmissing == pp->pr_itemsperpage)) {
 1096                         pr_leave(pp);
 1097                         mutex_exit(&pp->pr_lock);
 1098                         panic("pool_get: %s: page empty", pp->pr_wchan);
 1099                 }
 1100 #endif
 1101                 v = pr_item_notouch_get(pp, ph);
 1102 #ifdef POOL_DIAGNOSTIC
 1103                 pr_log(pp, v, PRLOG_GET, file, line);
 1104 #endif
 1105         } else {
 1106                 v = pi = LIST_FIRST(&ph->ph_itemlist);
 1107                 if (__predict_false(v == NULL)) {
 1108                         pr_leave(pp);
 1109                         mutex_exit(&pp->pr_lock);
 1110                         panic("pool_get: %s: page empty", pp->pr_wchan);
 1111                 }
 1112 #ifdef DIAGNOSTIC
 1113                 if (__predict_false(pp->pr_nitems == 0)) {
 1114                         pr_leave(pp);
 1115                         mutex_exit(&pp->pr_lock);
 1116                         printf("pool_get: %s: items on itemlist, nitems %u\n",
 1117                             pp->pr_wchan, pp->pr_nitems);
 1118                         panic("pool_get: nitems inconsistent");
 1119                 }
 1120 #endif
 1121 
 1122 #ifdef POOL_DIAGNOSTIC
 1123                 pr_log(pp, v, PRLOG_GET, file, line);
 1124 #endif
 1125 
 1126 #ifdef DIAGNOSTIC
 1127                 if (__predict_false(pi->pi_magic != PI_MAGIC)) {
 1128                         pr_printlog(pp, pi, printf);
 1129                         panic("pool_get(%s): free list modified: "
 1130                             "magic=%x; page %p; item addr %p\n",
 1131                             pp->pr_wchan, pi->pi_magic, ph->ph_page, pi);
 1132                 }
 1133 #endif
 1134 
 1135                 /*
 1136                  * Remove from item list.
 1137                  */
 1138                 LIST_REMOVE(pi, pi_list);
 1139         }
 1140         pp->pr_nitems--;
 1141         pp->pr_nout++;
 1142         if (ph->ph_nmissing == 0) {
 1143 #ifdef DIAGNOSTIC
 1144                 if (__predict_false(pp->pr_nidle == 0))
 1145                         panic("pool_get: nidle inconsistent");
 1146 #endif
 1147                 pp->pr_nidle--;
 1148 
 1149                 /*
 1150                  * This page was previously empty.  Move it to the list of
 1151                  * partially-full pages.  This page is already curpage.
 1152                  */
 1153                 LIST_REMOVE(ph, ph_pagelist);
 1154                 LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist);
 1155         }
 1156         ph->ph_nmissing++;
 1157         if (ph->ph_nmissing == pp->pr_itemsperpage) {
 1158 #ifdef DIAGNOSTIC
 1159                 if (__predict_false((pp->pr_roflags & PR_NOTOUCH) == 0 &&
 1160                     !LIST_EMPTY(&ph->ph_itemlist))) {
 1161                         pr_leave(pp);
 1162                         mutex_exit(&pp->pr_lock);
 1163                         panic("pool_get: %s: nmissing inconsistent",
 1164                             pp->pr_wchan);
 1165                 }
 1166 #endif
 1167                 /*
 1168                  * This page is now full.  Move it to the full list
 1169                  * and select a new current page.
 1170                  */
 1171                 LIST_REMOVE(ph, ph_pagelist);
 1172                 LIST_INSERT_HEAD(&pp->pr_fullpages, ph, ph_pagelist);
 1173                 pool_update_curpage(pp);
 1174         }
 1175 
 1176         pp->pr_nget++;
 1177         pr_leave(pp);
 1178 
 1179         /*
 1180          * If we have a low water mark and we are now below that low
 1181          * water mark, add more items to the pool.
 1182          */
 1183         if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) {
 1184                 /*
 1185                  * XXX: Should we log a warning?  Should we set up a timeout
 1186                  * to try again in a second or so?  The latter could break
 1187                  * a caller's assumptions about interrupt protection, etc.
 1188                  */
 1189         }
 1190 
 1191         mutex_exit(&pp->pr_lock);
 1192         KASSERT((((vaddr_t)v + pp->pr_itemoffset) & (pp->pr_align - 1)) == 0);
 1193         FREECHECK_OUT(&pp->pr_freecheck, v);
 1194         return (v);
 1195 }
 1196 
 1197 /*
 1198  * Internal version of pool_put().  Pool is already locked/entered.
 1199  */
 1200 static void
 1201 pool_do_put(struct pool *pp, void *v, struct pool_pagelist *pq)
 1202 {
 1203         struct pool_item *pi = v;
 1204         struct pool_item_header *ph;
 1205 
 1206         KASSERT(mutex_owned(&pp->pr_lock));
 1207         FREECHECK_IN(&pp->pr_freecheck, v);
 1208         LOCKDEBUG_MEM_CHECK(v, pp->pr_size);
 1209 
 1210 #ifdef DIAGNOSTIC
 1211         if (__predict_false(pp->pr_nout == 0)) {
 1212                 printf("pool %s: putting with none out\n",
 1213                     pp->pr_wchan);
 1214                 panic("pool_put");
 1215         }
 1216 #endif
 1217 
 1218         if (__predict_false((ph = pr_find_pagehead(pp, v)) == NULL)) {
 1219                 pr_printlog(pp, NULL, printf);
 1220                 panic("pool_put: %s: page header missing", pp->pr_wchan);
 1221         }
 1222 
 1223         /*
 1224          * Return to item list.
 1225          */
 1226         if (pp->pr_roflags & PR_NOTOUCH) {
 1227                 pr_item_notouch_put(pp, ph, v);
 1228         } else {
 1229 #ifdef DIAGNOSTIC
 1230                 pi->pi_magic = PI_MAGIC;
 1231 #endif
 1232 #ifdef DEBUG
 1233                 {
 1234                         int i, *ip = v;
 1235 
 1236                         for (i = 0; i < pp->pr_size / sizeof(int); i++) {
 1237                                 *ip++ = PI_MAGIC;
 1238                         }
 1239                 }
 1240 #endif
 1241 
 1242                 LIST_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list);
 1243         }
 1244         KDASSERT(ph->ph_nmissing != 0);
 1245         ph->ph_nmissing--;
 1246         pp->pr_nput++;
 1247         pp->pr_nitems++;
 1248         pp->pr_nout--;
 1249 
 1250         /* Cancel "pool empty" condition if it exists */
 1251         if (pp->pr_curpage == NULL)
 1252                 pp->pr_curpage = ph;
 1253 
 1254         if (pp->pr_flags & PR_WANTED) {
 1255                 pp->pr_flags &= ~PR_WANTED;
 1256                 cv_broadcast(&pp->pr_cv);
 1257         }
 1258 
 1259         /*
 1260          * If this page is now empty, do one of two things:
 1261          *
 1262          *      (1) If we have more pages than the page high water mark,
 1263          *          free the page back to the system.  ONLY CONSIDER
 1264          *          FREEING BACK A PAGE IF WE HAVE MORE THAN OUR MINIMUM PAGE
 1265          *          CLAIM.
 1266          *
 1267          *      (2) Otherwise, move the page to the empty page list.
 1268          *
 1269          * Either way, select a new current page (so we use a partially-full
 1270          * page if one is available).
 1271          */
 1272         if (ph->ph_nmissing == 0) {
 1273                 pp->pr_nidle++;
 1274                 if (pp->pr_npages > pp->pr_minpages &&
 1275                     pp->pr_npages > pp->pr_maxpages) {
 1276                         pr_rmpage(pp, ph, pq);
 1277                 } else {
 1278                         LIST_REMOVE(ph, ph_pagelist);
 1279                         LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist);
 1280 
 1281                         /*
 1282                          * Update the timestamp on the page.  A page must
 1283                          * be idle for some period of time before it can
 1284                          * be reclaimed by the pagedaemon.  This minimizes
 1285                          * ping-pong'ing for memory.
 1286                          *
 1287                          * note for 64-bit time_t: truncating to 32-bit is not
 1288                          * a problem for our usage.
 1289                          */
 1290                         ph->ph_time = time_uptime;
 1291                 }
 1292                 pool_update_curpage(pp);
 1293         }
 1294 
 1295         /*
 1296          * If the page was previously completely full, move it to the
 1297          * partially-full list and make it the current page.  The next
 1298          * allocation will get the item from this page, instead of
 1299          * further fragmenting the pool.
 1300          */
 1301         else if (ph->ph_nmissing == (pp->pr_itemsperpage - 1)) {
 1302                 LIST_REMOVE(ph, ph_pagelist);
 1303                 LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist);
 1304                 pp->pr_curpage = ph;
 1305         }
 1306 }
 1307 
 1308 /*
 1309  * Return resource to the pool.
 1310  */
 1311 #ifdef POOL_DIAGNOSTIC
 1312 void
 1313 _pool_put(struct pool *pp, void *v, const char *file, long line)
 1314 {
 1315         struct pool_pagelist pq;
 1316 
 1317         LIST_INIT(&pq);
 1318 
 1319         mutex_enter(&pp->pr_lock);
 1320         pr_enter(pp, file, line);
 1321 
 1322         pr_log(pp, v, PRLOG_PUT, file, line);
 1323 
 1324         pool_do_put(pp, v, &pq);
 1325 
 1326         pr_leave(pp);
 1327         mutex_exit(&pp->pr_lock);
 1328 
 1329         pr_pagelist_free(pp, &pq);
 1330 }
 1331 #undef pool_put
 1332 #endif /* POOL_DIAGNOSTIC */
 1333 
 1334 void
 1335 pool_put(struct pool *pp, void *v)
 1336 {
 1337         struct pool_pagelist pq;
 1338 
 1339         LIST_INIT(&pq);
 1340 
 1341         mutex_enter(&pp->pr_lock);
 1342         pool_do_put(pp, v, &pq);
 1343         mutex_exit(&pp->pr_lock);
 1344 
 1345         pr_pagelist_free(pp, &pq);
 1346 }
 1347 
 1348 #ifdef POOL_DIAGNOSTIC
 1349 #define         pool_put(h, v)  _pool_put((h), (v), __FILE__, __LINE__)
 1350 #endif
 1351 
 1352 /*
 1353  * pool_grow: grow a pool by a page.
 1354  *
 1355  * => called with pool locked.
 1356  * => unlock and relock the pool.
 1357  * => return with pool locked.
 1358  */
 1359 
 1360 static int
 1361 pool_grow(struct pool *pp, int flags)
 1362 {
 1363         struct pool_item_header *ph = NULL;
 1364         char *cp;
 1365 
 1366         mutex_exit(&pp->pr_lock);
 1367         cp = pool_allocator_alloc(pp, flags);
 1368         if (__predict_true(cp != NULL)) {
 1369                 ph = pool_alloc_item_header(pp, cp, flags);
 1370         }
 1371         if (__predict_false(cp == NULL || ph == NULL)) {
 1372                 if (cp != NULL) {
 1373                         pool_allocator_free(pp, cp);
 1374                 }
 1375                 mutex_enter(&pp->pr_lock);
 1376                 return ENOMEM;
 1377         }
 1378 
 1379         mutex_enter(&pp->pr_lock);
 1380         pool_prime_page(pp, cp, ph);
 1381         pp->pr_npagealloc++;
 1382         return 0;
 1383 }
 1384 
 1385 /*
 1386  * Add N items to the pool.
 1387  */
 1388 int
 1389 pool_prime(struct pool *pp, int n)
 1390 {
 1391         int newpages;
 1392         int error = 0;
 1393 
 1394         mutex_enter(&pp->pr_lock);
 1395 
 1396         newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
 1397 
 1398         while (newpages-- > 0) {
 1399                 error = pool_grow(pp, PR_NOWAIT);
 1400                 if (error) {
 1401                         break;
 1402                 }
 1403                 pp->pr_minpages++;
 1404         }
 1405 
 1406         if (pp->pr_minpages >= pp->pr_maxpages)
 1407                 pp->pr_maxpages = pp->pr_minpages + 1;  /* XXX */
 1408 
 1409         mutex_exit(&pp->pr_lock);
 1410         return error;
 1411 }
 1412 
 1413 /*
 1414  * Add a page worth of items to the pool.
 1415  *
 1416  * Note, we must be called with the pool descriptor LOCKED.
 1417  */
 1418 static void
 1419 pool_prime_page(struct pool *pp, void *storage, struct pool_item_header *ph)
 1420 {
 1421         struct pool_item *pi;
 1422         void *cp = storage;
 1423         const unsigned int align = pp->pr_align;
 1424         const unsigned int ioff = pp->pr_itemoffset;
 1425         int n;
 1426 
 1427         KASSERT(mutex_owned(&pp->pr_lock));
 1428 
 1429 #ifdef DIAGNOSTIC
 1430         if ((pp->pr_roflags & PR_NOALIGN) == 0 &&
 1431             ((uintptr_t)cp & (pp->pr_alloc->pa_pagesz - 1)) != 0)
 1432                 panic("pool_prime_page: %s: unaligned page", pp->pr_wchan);
 1433 #endif
 1434 
 1435         /*
 1436          * Insert page header.
 1437          */
 1438         LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist);
 1439         LIST_INIT(&ph->ph_itemlist);
 1440         ph->ph_page = storage;
 1441         ph->ph_nmissing = 0;
 1442         ph->ph_time = time_uptime;
 1443         if ((pp->pr_roflags & PR_PHINPAGE) == 0)
 1444                 SPLAY_INSERT(phtree, &pp->pr_phtree, ph);
 1445 
 1446         pp->pr_nidle++;
 1447 
 1448         /*
 1449          * Color this page.
 1450          */
 1451         ph->ph_off = pp->pr_curcolor;
 1452         cp = (char *)cp + ph->ph_off;
 1453         if ((pp->pr_curcolor += align) > pp->pr_maxcolor)
 1454                 pp->pr_curcolor = 0;
 1455 
 1456         /*
 1457          * Adjust storage to apply aligment to `pr_itemoffset' in each item.
 1458          */
 1459         if (ioff != 0)
 1460                 cp = (char *)cp + align - ioff;
 1461 
 1462         KASSERT((((vaddr_t)cp + ioff) & (align - 1)) == 0);
 1463 
 1464         /*
 1465          * Insert remaining chunks on the bucket list.
 1466          */
 1467         n = pp->pr_itemsperpage;
 1468         pp->pr_nitems += n;
 1469 
 1470         if (pp->pr_roflags & PR_NOTOUCH) {
 1471                 pr_item_notouch_init(pp, ph);
 1472         } else {
 1473                 while (n--) {
 1474                         pi = (struct pool_item *)cp;
 1475 
 1476                         KASSERT(((((vaddr_t)pi) + ioff) & (align - 1)) == 0);
 1477 
 1478                         /* Insert on page list */
 1479                         LIST_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list);
 1480 #ifdef DIAGNOSTIC
 1481                         pi->pi_magic = PI_MAGIC;
 1482 #endif
 1483                         cp = (char *)cp + pp->pr_size;
 1484 
 1485                         KASSERT((((vaddr_t)cp + ioff) & (align - 1)) == 0);
 1486                 }
 1487         }
 1488 
 1489         /*
 1490          * If the pool was depleted, point at the new page.
 1491          */
 1492         if (pp->pr_curpage == NULL)
 1493                 pp->pr_curpage = ph;
 1494 
 1495         if (++pp->pr_npages > pp->pr_hiwat)
 1496                 pp->pr_hiwat = pp->pr_npages;
 1497 }
 1498 
 1499 /*
 1500  * Used by pool_get() when nitems drops below the low water mark.  This
 1501  * is used to catch up pr_nitems with the low water mark.
 1502  *
 1503  * Note 1, we never wait for memory here, we let the caller decide what to do.
 1504  *
 1505  * Note 2, we must be called with the pool already locked, and we return
 1506  * with it locked.
 1507  */
 1508 static int
 1509 pool_catchup(struct pool *pp)
 1510 {
 1511         int error = 0;
 1512 
 1513         while (POOL_NEEDS_CATCHUP(pp)) {
 1514                 error = pool_grow(pp, PR_NOWAIT);
 1515                 if (error) {
 1516                         break;
 1517                 }
 1518         }
 1519         return error;
 1520 }
 1521 
 1522 static void
 1523 pool_update_curpage(struct pool *pp)
 1524 {
 1525 
 1526         pp->pr_curpage = LIST_FIRST(&pp->pr_partpages);
 1527         if (pp->pr_curpage == NULL) {
 1528                 pp->pr_curpage = LIST_FIRST(&pp->pr_emptypages);
 1529         }
 1530         KASSERT((pp->pr_curpage == NULL && pp->pr_nitems == 0) ||
 1531             (pp->pr_curpage != NULL && pp->pr_nitems > 0));
 1532 }
 1533 
 1534 void
 1535 pool_setlowat(struct pool *pp, int n)
 1536 {
 1537 
 1538         mutex_enter(&pp->pr_lock);
 1539 
 1540         pp->pr_minitems = n;
 1541         pp->pr_minpages = (n == 0)
 1542                 ? 0
 1543                 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
 1544 
 1545         /* Make sure we're caught up with the newly-set low water mark. */
 1546         if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) {
 1547                 /*
 1548                  * XXX: Should we log a warning?  Should we set up a timeout
 1549                  * to try again in a second or so?  The latter could break
 1550                  * a caller's assumptions about interrupt protection, etc.
 1551                  */
 1552         }
 1553 
 1554         mutex_exit(&pp->pr_lock);
 1555 }
 1556 
 1557 void
 1558 pool_sethiwat(struct pool *pp, int n)
 1559 {
 1560 
 1561         mutex_enter(&pp->pr_lock);
 1562 
 1563         pp->pr_maxpages = (n == 0)
 1564                 ? 0
 1565                 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
 1566 
 1567         mutex_exit(&pp->pr_lock);
 1568 }
 1569 
 1570 void
 1571 pool_sethardlimit(struct pool *pp, int n, const char *warnmess, int ratecap)
 1572 {
 1573 
 1574         mutex_enter(&pp->pr_lock);
 1575 
 1576         pp->pr_hardlimit = n;
 1577         pp->pr_hardlimit_warning = warnmess;
 1578         pp->pr_hardlimit_ratecap.tv_sec = ratecap;
 1579         pp->pr_hardlimit_warning_last.tv_sec = 0;
 1580         pp->pr_hardlimit_warning_last.tv_usec = 0;
 1581 
 1582         /*
 1583          * In-line version of pool_sethiwat(), because we don't want to
 1584          * release the lock.
 1585          */
 1586         pp->pr_maxpages = (n == 0)
 1587                 ? 0
 1588                 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
 1589 
 1590         mutex_exit(&pp->pr_lock);
 1591 }
 1592 
 1593 /*
 1594  * Release all complete pages that have not been used recently.
 1595  */
 1596 int
 1597 #ifdef POOL_DIAGNOSTIC
 1598 _pool_reclaim(struct pool *pp, const char *file, long line)
 1599 #else
 1600 pool_reclaim(struct pool *pp)
 1601 #endif
 1602 {
 1603         struct pool_item_header *ph, *phnext;
 1604         struct pool_pagelist pq;
 1605         uint32_t curtime;
 1606         bool klock;
 1607         int rv;
 1608 
 1609         if (pp->pr_drain_hook != NULL) {
 1610                 /*
 1611                  * The drain hook must be called with the pool unlocked.
 1612                  */
 1613                 (*pp->pr_drain_hook)(pp->pr_drain_hook_arg, PR_NOWAIT);
 1614         }
 1615 
 1616         /*
 1617          * XXXSMP Because we do not want to cause non-MPSAFE code
 1618          * to block.
 1619          */
 1620         if (pp->pr_ipl == IPL_SOFTNET || pp->pr_ipl == IPL_SOFTCLOCK ||
 1621             pp->pr_ipl == IPL_SOFTSERIAL) {
 1622                 KERNEL_LOCK(1, NULL);
 1623                 klock = true;
 1624         } else
 1625                 klock = false;
 1626 
 1627         /* Reclaim items from the pool's cache (if any). */
 1628         if (pp->pr_cache != NULL)
 1629                 pool_cache_invalidate(pp->pr_cache);
 1630 
 1631         if (mutex_tryenter(&pp->pr_lock) == 0) {
 1632                 if (klock) {
 1633                         KERNEL_UNLOCK_ONE(NULL);
 1634                 }
 1635                 return (0);
 1636         }
 1637         pr_enter(pp, file, line);
 1638 
 1639         LIST_INIT(&pq);
 1640 
 1641         curtime = time_uptime;
 1642 
 1643         for (ph = LIST_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) {
 1644                 phnext = LIST_NEXT(ph, ph_pagelist);
 1645 
 1646                 /* Check our minimum page claim */
 1647                 if (pp->pr_npages <= pp->pr_minpages)
 1648                         break;
 1649 
 1650                 KASSERT(ph->ph_nmissing == 0);
 1651                 if (curtime - ph->ph_time < pool_inactive_time
 1652                     && !pa_starved_p(pp->pr_alloc))
 1653                         continue;
 1654 
 1655                 /*
 1656                  * If freeing this page would put us below
 1657                  * the low water mark, stop now.
 1658                  */
 1659                 if ((pp->pr_nitems - pp->pr_itemsperpage) <
 1660                     pp->pr_minitems)
 1661                         break;
 1662 
 1663                 pr_rmpage(pp, ph, &pq);
 1664         }
 1665 
 1666         pr_leave(pp);
 1667         mutex_exit(&pp->pr_lock);
 1668 
 1669         if (LIST_EMPTY(&pq))
 1670                 rv = 0;
 1671         else {
 1672                 pr_pagelist_free(pp, &pq);
 1673                 rv = 1;
 1674         }
 1675 
 1676         if (klock) {
 1677                 KERNEL_UNLOCK_ONE(NULL);
 1678         }
 1679 
 1680         return (rv);
 1681 }
 1682 
 1683 /*
 1684  * Drain pools, one at a time.  This is a two stage process;
 1685  * drain_start kicks off a cross call to drain CPU-level caches
 1686  * if the pool has an associated pool_cache.  drain_end waits
 1687  * for those cross calls to finish, and then drains the cache
 1688  * (if any) and pool.
 1689  *
 1690  * Note, must never be called from interrupt context.
 1691  */
 1692 void
 1693 pool_drain_start(struct pool **ppp, uint64_t *wp)
 1694 {
 1695         struct pool *pp;
 1696 
 1697         KASSERT(!TAILQ_EMPTY(&pool_head));
 1698 
 1699         pp = NULL;
 1700 
 1701         /* Find next pool to drain, and add a reference. */
 1702         mutex_enter(&pool_head_lock);
 1703         do {
 1704                 if (drainpp == NULL) {
 1705                         drainpp = TAILQ_FIRST(&pool_head);
 1706                 }
 1707                 if (drainpp != NULL) {
 1708                         pp = drainpp;
 1709                         drainpp = TAILQ_NEXT(pp, pr_poollist);
 1710                 }
 1711                 /*
 1712                  * Skip completely idle pools.  We depend on at least
 1713                  * one pool in the system being active.
 1714                  */
 1715         } while (pp == NULL || pp->pr_npages == 0);
 1716         pp->pr_refcnt++;
 1717         mutex_exit(&pool_head_lock);
 1718 
 1719         /* If there is a pool_cache, drain CPU level caches. */
 1720         *ppp = pp;
 1721         if (pp->pr_cache != NULL) {
 1722                 *wp = xc_broadcast(0, (xcfunc_t)pool_cache_xcall,
 1723                     pp->pr_cache, NULL);
 1724         }
 1725 }
 1726 
 1727 void
 1728 pool_drain_end(struct pool *pp, uint64_t where)
 1729 {
 1730 
 1731         if (pp == NULL)
 1732                 return;
 1733 
 1734         KASSERT(pp->pr_refcnt > 0);
 1735 
 1736         /* Wait for remote draining to complete. */
 1737         if (pp->pr_cache != NULL)
 1738                 xc_wait(where);
 1739 
 1740         /* Drain the cache (if any) and pool.. */
 1741         pool_reclaim(pp);
 1742 
 1743         /* Finally, unlock the pool. */
 1744         mutex_enter(&pool_head_lock);
 1745         pp->pr_refcnt--;
 1746         cv_broadcast(&pool_busy);
 1747         mutex_exit(&pool_head_lock);
 1748 }
 1749 
 1750 /*
 1751  * Diagnostic helpers.
 1752  */
 1753 void
 1754 pool_print(struct pool *pp, const char *modif)
 1755 {
 1756 
 1757         pool_print1(pp, modif, printf);
 1758 }
 1759 
 1760 void
 1761 pool_printall(const char *modif, void (*pr)(const char *, ...))
 1762 {
 1763         struct pool *pp;
 1764 
 1765         TAILQ_FOREACH(pp, &pool_head, pr_poollist) {
 1766                 pool_printit(pp, modif, pr);
 1767         }
 1768 }
 1769 
 1770 void
 1771 pool_printit(struct pool *pp, const char *modif, void (*pr)(const char *, ...))
 1772 {
 1773 
 1774         if (pp == NULL) {
 1775                 (*pr)("Must specify a pool to print.\n");
 1776                 return;
 1777         }
 1778 
 1779         pool_print1(pp, modif, pr);
 1780 }
 1781 
 1782 static void
 1783 pool_print_pagelist(struct pool *pp, struct pool_pagelist *pl,
 1784     void (*pr)(const char *, ...))
 1785 {
 1786         struct pool_item_header *ph;
 1787 #ifdef DIAGNOSTIC
 1788         struct pool_item *pi;
 1789 #endif
 1790 
 1791         LIST_FOREACH(ph, pl, ph_pagelist) {
 1792                 (*pr)("\t\tpage %p, nmissing %d, time %" PRIu32 "\n",
 1793                     ph->ph_page, ph->ph_nmissing, ph->ph_time);
 1794 #ifdef DIAGNOSTIC
 1795                 if (!(pp->pr_roflags & PR_NOTOUCH)) {
 1796                         LIST_FOREACH(pi, &ph->ph_itemlist, pi_list) {
 1797                                 if (pi->pi_magic != PI_MAGIC) {
 1798                                         (*pr)("\t\t\titem %p, magic 0x%x\n",
 1799                                             pi, pi->pi_magic);
 1800                                 }
 1801                         }
 1802                 }
 1803 #endif
 1804         }
 1805 }
 1806 
 1807 static void
 1808 pool_print1(struct pool *pp, const char *modif, void (*pr)(const char *, ...))
 1809 {
 1810         struct pool_item_header *ph;
 1811         pool_cache_t pc;
 1812         pcg_t *pcg;
 1813         pool_cache_cpu_t *cc;
 1814         uint64_t cpuhit, cpumiss;
 1815         int i, print_log = 0, print_pagelist = 0, print_cache = 0;
 1816         char c;
 1817 
 1818         while ((c = *modif++) != '\0') {
 1819                 if (c == 'l')
 1820                         print_log = 1;
 1821                 if (c == 'p')
 1822                         print_pagelist = 1;
 1823                 if (c == 'c')
 1824                         print_cache = 1;
 1825         }
 1826 
 1827         if ((pc = pp->pr_cache) != NULL) {
 1828                 (*pr)("POOL CACHE");
 1829         } else {
 1830                 (*pr)("POOL");
 1831         }
 1832 
 1833         (*pr)(" %s: size %u, align %u, ioff %u, roflags 0x%08x\n",
 1834             pp->pr_wchan, pp->pr_size, pp->pr_align, pp->pr_itemoffset,
 1835             pp->pr_roflags);
 1836         (*pr)("\talloc %p\n", pp->pr_alloc);
 1837         (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n",
 1838             pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages);
 1839         (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n",
 1840             pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit);
 1841 
 1842         (*pr)("\tnget %lu, nfail %lu, nput %lu\n",
 1843             pp->pr_nget, pp->pr_nfail, pp->pr_nput);
 1844         (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n",
 1845             pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle);
 1846 
 1847         if (print_pagelist == 0)
 1848                 goto skip_pagelist;
 1849 
 1850         if ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL)
 1851                 (*pr)("\n\tempty page list:\n");
 1852         pool_print_pagelist(pp, &pp->pr_emptypages, pr);
 1853         if ((ph = LIST_FIRST(&pp->pr_fullpages)) != NULL)
 1854                 (*pr)("\n\tfull page list:\n");
 1855         pool_print_pagelist(pp, &pp->pr_fullpages, pr);
 1856         if ((ph = LIST_FIRST(&pp->pr_partpages)) != NULL)
 1857                 (*pr)("\n\tpartial-page list:\n");
 1858         pool_print_pagelist(pp, &pp->pr_partpages, pr);
 1859 
 1860         if (pp->pr_curpage == NULL)
 1861                 (*pr)("\tno current page\n");
 1862         else
 1863                 (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page);
 1864 
 1865  skip_pagelist:
 1866         if (print_log == 0)
 1867                 goto skip_log;
 1868 
 1869         (*pr)("\n");
 1870         if ((pp->pr_roflags & PR_LOGGING) == 0)
 1871                 (*pr)("\tno log\n");
 1872         else {
 1873                 pr_printlog(pp, NULL, pr);
 1874         }
 1875 
 1876  skip_log:
 1877 
 1878 #define PR_GROUPLIST(pcg)                                               \
 1879         (*pr)("\t\tgroup %p: avail %d\n", pcg, pcg->pcg_avail);         \
 1880         for (i = 0; i < pcg->pcg_size; i++) {                           \
 1881                 if (pcg->pcg_objects[i].pcgo_pa !=                      \
 1882                     POOL_PADDR_INVALID) {                               \
 1883                         (*pr)("\t\t\t%p, 0x%llx\n",                     \
 1884                             pcg->pcg_objects[i].pcgo_va,                \
 1885                             (unsigned long long)                        \
 1886                             pcg->pcg_objects[i].pcgo_pa);               \
 1887                 } else {                                                \
 1888                         (*pr)("\t\t\t%p\n",                             \
 1889                             pcg->pcg_objects[i].pcgo_va);               \
 1890                 }                                                       \
 1891         }
 1892 
 1893         if (pc != NULL) {
 1894                 cpuhit = 0;
 1895                 cpumiss = 0;
 1896                 for (i = 0; i < MAXCPUS; i++) {
 1897                         if ((cc = pc->pc_cpus[i]) == NULL)
 1898                                 continue;
 1899                         cpuhit += cc->cc_hits;
 1900                         cpumiss += cc->cc_misses;
 1901                 }
 1902                 (*pr)("\tcpu layer hits %llu misses %llu\n", cpuhit, cpumiss);
 1903                 (*pr)("\tcache layer hits %llu misses %llu\n",
 1904                     pc->pc_hits, pc->pc_misses);
 1905                 (*pr)("\tcache layer entry uncontended %llu contended %llu\n",
 1906                     pc->pc_hits + pc->pc_misses - pc->pc_contended,
 1907                     pc->pc_contended);
 1908                 (*pr)("\tcache layer empty groups %u full groups %u\n",
 1909                     pc->pc_nempty, pc->pc_nfull);
 1910                 if (print_cache) {
 1911                         (*pr)("\tfull cache groups:\n");
 1912                         for (pcg = pc->pc_fullgroups; pcg != NULL;
 1913                             pcg = pcg->pcg_next) {
 1914                                 PR_GROUPLIST(pcg);
 1915                         }
 1916                         (*pr)("\tempty cache groups:\n");
 1917                         for (pcg = pc->pc_emptygroups; pcg != NULL;
 1918                             pcg = pcg->pcg_next) {
 1919                                 PR_GROUPLIST(pcg);
 1920                         }
 1921                 }
 1922         }
 1923 #undef PR_GROUPLIST
 1924 
 1925         pr_enter_check(pp, pr);
 1926 }
 1927 
 1928 static int
 1929 pool_chk_page(struct pool *pp, const char *label, struct pool_item_header *ph)
 1930 {
 1931         struct pool_item *pi;
 1932         void *page;
 1933         int n;
 1934 
 1935         if ((pp->pr_roflags & PR_NOALIGN) == 0) {
 1936                 page = (void *)((uintptr_t)ph & pp->pr_alloc->pa_pagemask);
 1937                 if (page != ph->ph_page &&
 1938                     (pp->pr_roflags & PR_PHINPAGE) != 0) {
 1939                         if (label != NULL)
 1940                                 printf("%s: ", label);
 1941                         printf("pool(%p:%s): page inconsistency: page %p;"
 1942                                " at page head addr %p (p %p)\n", pp,
 1943                                 pp->pr_wchan, ph->ph_page,
 1944                                 ph, page);
 1945                         return 1;
 1946                 }
 1947         }
 1948 
 1949         if ((pp->pr_roflags & PR_NOTOUCH) != 0)
 1950                 return 0;
 1951 
 1952         for (pi = LIST_FIRST(&ph->ph_itemlist), n = 0;
 1953              pi != NULL;
 1954              pi = LIST_NEXT(pi,pi_list), n++) {
 1955 
 1956 #ifdef DIAGNOSTIC
 1957                 if (pi->pi_magic != PI_MAGIC) {
 1958                         if (label != NULL)
 1959                                 printf("%s: ", label);
 1960                         printf("pool(%s): free list modified: magic=%x;"
 1961                                " page %p; item ordinal %d; addr %p\n",
 1962                                 pp->pr_wchan, pi->pi_magic, ph->ph_page,
 1963                                 n, pi);
 1964                         panic("pool");
 1965                 }
 1966 #endif
 1967                 if ((pp->pr_roflags & PR_NOALIGN) != 0) {
 1968                         continue;
 1969                 }
 1970                 page = (void *)((uintptr_t)pi & pp->pr_alloc->pa_pagemask);
 1971                 if (page == ph->ph_page)
 1972                         continue;
 1973 
 1974                 if (label != NULL)
 1975                         printf("%s: ", label);
 1976                 printf("pool(%p:%s): page inconsistency: page %p;"
 1977                        " item ordinal %d; addr %p (p %p)\n", pp,
 1978                         pp->pr_wchan, ph->ph_page,
 1979                         n, pi, page);
 1980                 return 1;
 1981         }
 1982         return 0;
 1983 }
 1984 
 1985 
 1986 int
 1987 pool_chk(struct pool *pp, const char *label)
 1988 {
 1989         struct pool_item_header *ph;
 1990         int r = 0;
 1991 
 1992         mutex_enter(&pp->pr_lock);
 1993         LIST_FOREACH(ph, &pp->pr_emptypages, ph_pagelist) {
 1994                 r = pool_chk_page(pp, label, ph);
 1995                 if (r) {
 1996                         goto out;
 1997                 }
 1998         }
 1999         LIST_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) {
 2000                 r = pool_chk_page(pp, label, ph);
 2001                 if (r) {
 2002                         goto out;
 2003                 }
 2004         }
 2005         LIST_FOREACH(ph, &pp->pr_partpages, ph_pagelist) {
 2006                 r = pool_chk_page(pp, label, ph);
 2007                 if (r) {
 2008                         goto out;
 2009                 }
 2010         }
 2011 
 2012 out:
 2013         mutex_exit(&pp->pr_lock);
 2014         return (r);
 2015 }
 2016 
 2017 /*
 2018  * pool_cache_init:
 2019  *
 2020  *      Initialize a pool cache.
 2021  */
 2022 pool_cache_t
 2023 pool_cache_init(size_t size, u_int align, u_int align_offset, u_int flags,
 2024     const char *wchan, struct pool_allocator *palloc, int ipl,
 2025     int (*ctor)(void *, void *, int), void (*dtor)(void *, void *), void *arg)
 2026 {
 2027         pool_cache_t pc;
 2028 
 2029         pc = pool_get(&cache_pool, PR_WAITOK);
 2030         if (pc == NULL)
 2031                 return NULL;
 2032 
 2033         pool_cache_bootstrap(pc, size, align, align_offset, flags, wchan,
 2034            palloc, ipl, ctor, dtor, arg);
 2035 
 2036         return pc;
 2037 }
 2038 
 2039 /*
 2040  * pool_cache_bootstrap:
 2041  *
 2042  *      Kernel-private version of pool_cache_init().  The caller
 2043  *      provides initial storage.
 2044  */
 2045 void
 2046 pool_cache_bootstrap(pool_cache_t pc, size_t size, u_int align,
 2047     u_int align_offset, u_int flags, const char *wchan,
 2048     struct pool_allocator *palloc, int ipl,
 2049     int (*ctor)(void *, void *, int), void (*dtor)(void *, void *),
 2050     void *arg)
 2051 {
 2052         CPU_INFO_ITERATOR cii;
 2053         pool_cache_t pc1;
 2054         struct cpu_info *ci;
 2055         struct pool *pp;
 2056 
 2057         pp = &pc->pc_pool;
 2058         if (palloc == NULL && ipl == IPL_NONE)
 2059                 palloc = &pool_allocator_nointr;
 2060         pool_init(pp, size, align, align_offset, flags, wchan, palloc, ipl);
 2061         mutex_init(&pc->pc_lock, MUTEX_DEFAULT, ipl);
 2062 
 2063         if (ctor == NULL) {
 2064                 ctor = (int (*)(void *, void *, int))nullop;
 2065         }
 2066         if (dtor == NULL) {
 2067                 dtor = (void (*)(void *, void *))nullop;
 2068         }
 2069 
 2070         pc->pc_emptygroups = NULL;
 2071         pc->pc_fullgroups = NULL;
 2072         pc->pc_partgroups = NULL;
 2073         pc->pc_ctor = ctor;
 2074         pc->pc_dtor = dtor;
 2075         pc->pc_arg  = arg;
 2076         pc->pc_hits  = 0;
 2077         pc->pc_misses = 0;
 2078         pc->pc_nempty = 0;
 2079         pc->pc_npart = 0;
 2080         pc->pc_nfull = 0;
 2081         pc->pc_contended = 0;
 2082         pc->pc_refcnt = 0;
 2083         pc->pc_freecheck = NULL;
 2084 
 2085         if ((flags & PR_LARGECACHE) != 0) {
 2086                 pc->pc_pcgsize = PCG_NOBJECTS_LARGE;
 2087                 pc->pc_pcgpool = &pcg_large_pool;
 2088         } else {
 2089                 pc->pc_pcgsize = PCG_NOBJECTS_NORMAL;
 2090                 pc->pc_pcgpool = &pcg_normal_pool;
 2091         }
 2092 
 2093         /* Allocate per-CPU caches. */
 2094         memset(pc->pc_cpus, 0, sizeof(pc->pc_cpus));
 2095         pc->pc_ncpu = 0;
 2096         if (ncpu < 2) {
 2097                 /* XXX For sparc: boot CPU is not attached yet. */
 2098                 pool_cache_cpu_init1(curcpu(), pc);
 2099         } else {
 2100                 for (CPU_INFO_FOREACH(cii, ci)) {
 2101                         pool_cache_cpu_init1(ci, pc);
 2102                 }
 2103         }
 2104 
 2105         /* Add to list of all pools. */
 2106         if (__predict_true(!cold))
 2107                 mutex_enter(&pool_head_lock);
 2108         TAILQ_FOREACH(pc1, &pool_cache_head, pc_cachelist) {
 2109                 if (strcmp(pc1->pc_pool.pr_wchan, pc->pc_pool.pr_wchan) > 0)
 2110                         break;
 2111         }
 2112         if (pc1 == NULL)
 2113                 TAILQ_INSERT_TAIL(&pool_cache_head, pc, pc_cachelist);
 2114         else
 2115                 TAILQ_INSERT_BEFORE(pc1, pc, pc_cachelist);
 2116         if (__predict_true(!cold))
 2117                 mutex_exit(&pool_head_lock);
 2118 
 2119         membar_sync();
 2120         pp->pr_cache = pc;
 2121 }
 2122 
 2123 /*
 2124  * pool_cache_destroy:
 2125  *
 2126  *      Destroy a pool cache.
 2127  */
 2128 void
 2129 pool_cache_destroy(pool_cache_t pc)
 2130 {
 2131         struct pool *pp = &pc->pc_pool;
 2132         pool_cache_cpu_t *cc;
 2133         pcg_t *pcg;
 2134         int i;
 2135 
 2136         /* Remove it from the global list. */
 2137         mutex_enter(&pool_head_lock);
 2138         while (pc->pc_refcnt != 0)
 2139                 cv_wait(&pool_busy, &pool_head_lock);
 2140         TAILQ_REMOVE(&pool_cache_head, pc, pc_cachelist);
 2141         mutex_exit(&pool_head_lock);
 2142 
 2143         /* First, invalidate the entire cache. */
 2144         pool_cache_invalidate(pc);
 2145 
 2146         /* Disassociate it from the pool. */
 2147         mutex_enter(&pp->pr_lock);
 2148         pp->pr_cache = NULL;
 2149         mutex_exit(&pp->pr_lock);
 2150 
 2151         /* Destroy per-CPU data */
 2152         for (i = 0; i < MAXCPUS; i++) {
 2153                 if ((cc = pc->pc_cpus[i]) == NULL)
 2154                         continue;
 2155                 if ((pcg = cc->cc_current) != &pcg_dummy) {
 2156                         pcg->pcg_next = NULL;
 2157                         pool_cache_invalidate_groups(pc, pcg);
 2158                 }
 2159                 if ((pcg = cc->cc_previous) != &pcg_dummy) {
 2160                         pcg->pcg_next = NULL;
 2161                         pool_cache_invalidate_groups(pc, pcg);
 2162                 }
 2163                 if (cc != &pc->pc_cpu0)
 2164                         pool_put(&cache_cpu_pool, cc);
 2165         }
 2166 
 2167         /* Finally, destroy it. */
 2168         mutex_destroy(&pc->pc_lock);
 2169         pool_destroy(pp);
 2170         pool_put(&cache_pool, pc);
 2171 }
 2172 
 2173 /*
 2174  * pool_cache_cpu_init1:
 2175  *
 2176  *      Called for each pool_cache whenever a new CPU is attached.
 2177  */
 2178 static void
 2179 pool_cache_cpu_init1(struct cpu_info *ci, pool_cache_t pc)
 2180 {
 2181         pool_cache_cpu_t *cc;
 2182         int index;
 2183 
 2184         index = ci->ci_index;
 2185 
 2186         KASSERT(index < MAXCPUS);
 2187 
 2188         if ((cc = pc->pc_cpus[index]) != NULL) {
 2189                 KASSERT(cc->cc_cpuindex == index);
 2190                 return;
 2191         }
 2192 
 2193         /*
 2194          * The first CPU is 'free'.  This needs to be the case for
 2195          * bootstrap - we may not be able to allocate yet.
 2196          */
 2197         if (pc->pc_ncpu == 0) {
 2198                 cc = &pc->pc_cpu0;
 2199                 pc->pc_ncpu = 1;
 2200         } else {
 2201                 mutex_enter(&pc->pc_lock);
 2202                 pc->pc_ncpu++;
 2203                 mutex_exit(&pc->pc_lock);
 2204                 cc = pool_get(&cache_cpu_pool, PR_WAITOK);
 2205         }
 2206 
 2207         cc->cc_ipl = pc->pc_pool.pr_ipl;
 2208         cc->cc_iplcookie = makeiplcookie(cc->cc_ipl);
 2209         cc->cc_cache = pc;
 2210         cc->cc_cpuindex = index;
 2211         cc->cc_hits = 0;
 2212         cc->cc_misses = 0;
 2213         cc->cc_current = __UNCONST(&pcg_dummy);
 2214         cc->cc_previous = __UNCONST(&pcg_dummy);
 2215 
 2216         pc->pc_cpus[index] = cc;
 2217 }
 2218 
 2219 /*
 2220  * pool_cache_cpu_init:
 2221  *
 2222  *      Called whenever a new CPU is attached.
 2223  */
 2224 void
 2225 pool_cache_cpu_init(struct cpu_info *ci)
 2226 {
 2227         pool_cache_t pc;
 2228 
 2229         mutex_enter(&pool_head_lock);
 2230         TAILQ_FOREACH(pc, &pool_cache_head, pc_cachelist) {
 2231                 pc->pc_refcnt++;
 2232                 mutex_exit(&pool_head_lock);
 2233 
 2234                 pool_cache_cpu_init1(ci, pc);
 2235 
 2236                 mutex_enter(&pool_head_lock);
 2237                 pc->pc_refcnt--;
 2238                 cv_broadcast(&pool_busy);
 2239         }
 2240         mutex_exit(&pool_head_lock);
 2241 }
 2242 
 2243 /*
 2244  * pool_cache_reclaim:
 2245  *
 2246  *      Reclaim memory from a pool cache.
 2247  */
 2248 bool
 2249 pool_cache_reclaim(pool_cache_t pc)
 2250 {
 2251 
 2252         return pool_reclaim(&pc->pc_pool);
 2253 }
 2254 
 2255 static void
 2256 pool_cache_destruct_object1(pool_cache_t pc, void *object)
 2257 {
 2258 
 2259         (*pc->pc_dtor)(pc->pc_arg, object);
 2260         pool_put(&pc->pc_pool, object);
 2261 }
 2262 
 2263 /*
 2264  * pool_cache_destruct_object:
 2265  *
 2266  *      Force destruction of an object and its release back into
 2267  *      the pool.
 2268  */
 2269 void
 2270 pool_cache_destruct_object(pool_cache_t pc, void *object)
 2271 {
 2272 
 2273         FREECHECK_IN(&pc->pc_freecheck, object);
 2274 
 2275         pool_cache_destruct_object1(pc, object);
 2276 }
 2277 
 2278 /*
 2279  * pool_cache_invalidate_groups:
 2280  *
 2281  *      Invalidate a chain of groups and destruct all objects.
 2282  */
 2283 static void
 2284 pool_cache_invalidate_groups(pool_cache_t pc, pcg_t *pcg)
 2285 {
 2286         void *object;
 2287         pcg_t *next;
 2288         int i;
 2289 
 2290         for (; pcg != NULL; pcg = next) {
 2291                 next = pcg->pcg_next;
 2292 
 2293                 for (i = 0; i < pcg->pcg_avail; i++) {
 2294                         object = pcg->pcg_objects[i].pcgo_va;
 2295                         pool_cache_destruct_object1(pc, object);
 2296                 }
 2297 
 2298                 if (pcg->pcg_size == PCG_NOBJECTS_LARGE) {
 2299                         pool_put(&pcg_large_pool, pcg);
 2300                 } else {
 2301                         KASSERT(pcg->pcg_size == PCG_NOBJECTS_NORMAL);
 2302                         pool_put(&pcg_normal_pool, pcg);
 2303                 }
 2304         }
 2305 }
 2306 
 2307 /*
 2308  * pool_cache_invalidate:
 2309  *
 2310  *      Invalidate a pool cache (destruct and release all of the
 2311  *      cached objects).  Does not reclaim objects from the pool.
 2312  */
 2313 void
 2314 pool_cache_invalidate(pool_cache_t pc)
 2315 {
 2316         pcg_t *full, *empty, *part;
 2317 
 2318         mutex_enter(&pc->pc_lock);
 2319         full = pc->pc_fullgroups;
 2320         empty = pc->pc_emptygroups;
 2321         part = pc->pc_partgroups;
 2322         pc->pc_fullgroups = NULL;
 2323         pc->pc_emptygroups = NULL;
 2324         pc->pc_partgroups = NULL;
 2325         pc->pc_nfull = 0;
 2326         pc->pc_nempty = 0;
 2327         pc->pc_npart = 0;
 2328         mutex_exit(&pc->pc_lock);
 2329 
 2330         pool_cache_invalidate_groups(pc, full);
 2331         pool_cache_invalidate_groups(pc, empty);
 2332         pool_cache_invalidate_groups(pc, part);
 2333 }
 2334 
 2335 void
 2336 pool_cache_set_drain_hook(pool_cache_t pc, void (*fn)(void *, int), void *arg)
 2337 {
 2338 
 2339         pool_set_drain_hook(&pc->pc_pool, fn, arg);
 2340 }
 2341 
 2342 void
 2343 pool_cache_setlowat(pool_cache_t pc, int n)
 2344 {
 2345 
 2346         pool_setlowat(&pc->pc_pool, n);
 2347 }
 2348 
 2349 void
 2350 pool_cache_sethiwat(pool_cache_t pc, int n)
 2351 {
 2352 
 2353         pool_sethiwat(&pc->pc_pool, n);
 2354 }
 2355 
 2356 void
 2357 pool_cache_sethardlimit(pool_cache_t pc, int n, const char *warnmess, int ratecap)
 2358 {
 2359 
 2360         pool_sethardlimit(&pc->pc_pool, n, warnmess, ratecap);
 2361 }
 2362 
 2363 static bool __noinline
 2364 pool_cache_get_slow(pool_cache_cpu_t *cc, int s, void **objectp,
 2365                     paddr_t *pap, int flags)
 2366 {
 2367         pcg_t *pcg, *cur;
 2368         uint64_t ncsw;
 2369         pool_cache_t pc;
 2370         void *object;
 2371 
 2372         KASSERT(cc->cc_current->pcg_avail == 0);
 2373         KASSERT(cc->cc_previous->pcg_avail == 0);
 2374 
 2375         pc = cc->cc_cache;
 2376         cc->cc_misses++;
 2377 
 2378         /*
 2379          * Nothing was available locally.  Try and grab a group
 2380          * from the cache.
 2381          */
 2382         if (__predict_false(!mutex_tryenter(&pc->pc_lock))) {
 2383                 ncsw = curlwp->l_ncsw;
 2384                 mutex_enter(&pc->pc_lock);
 2385                 pc->pc_contended++;
 2386 
 2387                 /*
 2388                  * If we context switched while locking, then
 2389                  * our view of the per-CPU data is invalid:
 2390                  * retry.
 2391                  */
 2392                 if (curlwp->l_ncsw != ncsw) {
 2393                         mutex_exit(&pc->pc_lock);
 2394                         return true;
 2395                 }
 2396         }
 2397 
 2398         if (__predict_true((pcg = pc->pc_fullgroups) != NULL)) {
 2399                 /*
 2400                  * If there's a full group, release our empty
 2401                  * group back to the cache.  Install the full
 2402                  * group as cc_current and return.
 2403                  */
 2404                 if (__predict_true((cur = cc->cc_current) != &pcg_dummy)) {
 2405                         KASSERT(cur->pcg_avail == 0);
 2406                         cur->pcg_next = pc->pc_emptygroups;
 2407                         pc->pc_emptygroups = cur;
 2408                         pc->pc_nempty++;
 2409                 }
 2410                 KASSERT(pcg->pcg_avail == pcg->pcg_size);
 2411                 cc->cc_current = pcg;
 2412                 pc->pc_fullgroups = pcg->pcg_next;
 2413                 pc->pc_hits++;
 2414                 pc->pc_nfull--;
 2415                 mutex_exit(&pc->pc_lock);
 2416                 return true;
 2417         }
 2418 
 2419         /*
 2420          * Nothing available locally or in cache.  Take the slow
 2421          * path: fetch a new object from the pool and construct
 2422          * it.
 2423          */
 2424         pc->pc_misses++;
 2425         mutex_exit(&pc->pc_lock);
 2426         splx(s);
 2427 
 2428         object = pool_get(&pc->pc_pool, flags);
 2429         *objectp = object;
 2430         if (__predict_false(object == NULL))
 2431                 return false;
 2432 
 2433         if (__predict_false((*pc->pc_ctor)(pc->pc_arg, object, flags) != 0)) {
 2434                 pool_put(&pc->pc_pool, object);
 2435                 *objectp = NULL;
 2436                 return false;
 2437         }
 2438 
 2439         KASSERT((((vaddr_t)object + pc->pc_pool.pr_itemoffset) &
 2440             (pc->pc_pool.pr_align - 1)) == 0);
 2441 
 2442         if (pap != NULL) {
 2443 #ifdef POOL_VTOPHYS
 2444                 *pap = POOL_VTOPHYS(object);
 2445 #else
 2446                 *pap = POOL_PADDR_INVALID;
 2447 #endif
 2448         }
 2449 
 2450         FREECHECK_OUT(&pc->pc_freecheck, object);
 2451         return false;
 2452 }
 2453 
 2454 /*
 2455  * pool_cache_get{,_paddr}:
 2456  *
 2457  *      Get an object from a pool cache (optionally returning
 2458  *      the physical address of the object).
 2459  */
 2460 void *
 2461 pool_cache_get_paddr(pool_cache_t pc, int flags, paddr_t *pap)
 2462 {
 2463         pool_cache_cpu_t *cc;
 2464         pcg_t *pcg;
 2465         void *object;
 2466         int s;
 2467 
 2468 #ifdef LOCKDEBUG
 2469         if (flags & PR_WAITOK) {
 2470                 ASSERT_SLEEPABLE();
 2471         }
 2472 #endif
 2473 
 2474         /* Lock out interrupts and disable preemption. */
 2475         s = splvm();
 2476         while (/* CONSTCOND */ true) {
 2477                 /* Try and allocate an object from the current group. */
 2478                 cc = pc->pc_cpus[curcpu()->ci_index];
 2479                 KASSERT(cc->cc_cache == pc);
 2480                 pcg = cc->cc_current;
 2481                 if (__predict_true(pcg->pcg_avail > 0)) {
 2482                         object = pcg->pcg_objects[--pcg->pcg_avail].pcgo_va;
 2483                         if (__predict_false(pap != NULL))
 2484                                 *pap = pcg->pcg_objects[pcg->pcg_avail].pcgo_pa;
 2485 #if defined(DIAGNOSTIC)
 2486                         pcg->pcg_objects[pcg->pcg_avail].pcgo_va = NULL;
 2487                         KASSERT(pcg->pcg_avail < pcg->pcg_size);
 2488                         KASSERT(object != NULL);
 2489 #endif
 2490                         cc->cc_hits++;
 2491                         splx(s);
 2492                         FREECHECK_OUT(&pc->pc_freecheck, object);
 2493                         return object;
 2494                 }
 2495 
 2496                 /*
 2497                  * That failed.  If the previous group isn't empty, swap
 2498                  * it with the current group and allocate from there.
 2499                  */
 2500                 pcg = cc->cc_previous;
 2501                 if (__predict_true(pcg->pcg_avail > 0)) {
 2502                         cc->cc_previous = cc->cc_current;
 2503                         cc->cc_current = pcg;
 2504                         continue;
 2505                 }
 2506 
 2507                 /*
 2508                  * Can't allocate from either group: try the slow path.
 2509                  * If get_slow() allocated an object for us, or if
 2510                  * no more objects are available, it will return false.
 2511                  * Otherwise, we need to retry.
 2512                  */
 2513                 if (!pool_cache_get_slow(cc, s, &object, pap, flags))
 2514                         break;
 2515         }
 2516 
 2517         return object;
 2518 }
 2519 
 2520 static bool __noinline
 2521 pool_cache_put_slow(pool_cache_cpu_t *cc, int s, void *object)
 2522 {
 2523         pcg_t *pcg, *cur;
 2524         uint64_t ncsw;
 2525         pool_cache_t pc;
 2526 
 2527         KASSERT(cc->cc_current->pcg_avail == cc->cc_current->pcg_size);
 2528         KASSERT(cc->cc_previous->pcg_avail == cc->cc_previous->pcg_size);
 2529 
 2530         pc = cc->cc_cache;
 2531         pcg = NULL;
 2532         cc->cc_misses++;
 2533 
 2534         /*
 2535          * If there are no empty groups in the cache then allocate one
 2536          * while still unlocked.
 2537          */
 2538         if (__predict_false(pc->pc_emptygroups == NULL)) {
 2539                 if (__predict_true(!pool_cache_disable)) {
 2540                         pcg = pool_get(pc->pc_pcgpool, PR_NOWAIT);
 2541                 }
 2542                 if (__predict_true(pcg != NULL)) {
 2543                         pcg->pcg_avail = 0;
 2544                         pcg->pcg_size = pc->pc_pcgsize;
 2545                 }
 2546         }
 2547 
 2548         /* Lock the cache. */
 2549         if (__predict_false(!mutex_tryenter(&pc->pc_lock))) {
 2550                 ncsw = curlwp->l_ncsw;
 2551                 mutex_enter(&pc->pc_lock);
 2552                 pc->pc_contended++;
 2553 
 2554                 /*
 2555                  * If we context switched while locking, then our view of
 2556                  * the per-CPU data is invalid: retry.
 2557                  */
 2558                 if (__predict_false(curlwp->l_ncsw != ncsw)) {
 2559                         mutex_exit(&pc->pc_lock);
 2560                         if (pcg != NULL) {
 2561                                 pool_put(pc->pc_pcgpool, pcg);
 2562                         }
 2563                         return true;
 2564                 }
 2565         }
 2566 
 2567         /* If there are no empty groups in the cache then allocate one. */
 2568         if (pcg == NULL && pc->pc_emptygroups != NULL) {
 2569                 pcg = pc->pc_emptygroups;
 2570                 pc->pc_emptygroups = pcg->pcg_next;
 2571                 pc->pc_nempty--;
 2572         }
 2573 
 2574         /*
 2575          * If there's a empty group, release our full group back
 2576          * to the cache.  Install the empty group to the local CPU
 2577          * and return.
 2578          */
 2579         if (pcg != NULL) {
 2580                 KASSERT(pcg->pcg_avail == 0);
 2581                 if (__predict_false(cc->cc_previous == &pcg_dummy)) {
 2582                         cc->cc_previous = pcg;
 2583                 } else {
 2584                         cur = cc->cc_current;
 2585                         if (__predict_true(cur != &pcg_dummy)) {
 2586                                 KASSERT(cur->pcg_avail == cur->pcg_size);
 2587                                 cur->pcg_next = pc->pc_fullgroups;
 2588                                 pc->pc_fullgroups = cur;
 2589                                 pc->pc_nfull++;
 2590                         }
 2591                         cc->cc_current = pcg;
 2592                 }
 2593                 pc->pc_hits++;
 2594                 mutex_exit(&pc->pc_lock);
 2595                 return true;
 2596         }
 2597 
 2598         /*
 2599          * Nothing available locally or in cache, and we didn't
 2600          * allocate an empty group.  Take the slow path and destroy
 2601          * the object here and now.
 2602          */
 2603         pc->pc_misses++;
 2604         mutex_exit(&pc->pc_lock);
 2605         splx(s);
 2606         pool_cache_destruct_object(pc, object);
 2607 
 2608         return false;
 2609 } 
 2610 
 2611 /*
 2612  * pool_cache_put{,_paddr}:
 2613  *
 2614  *      Put an object back to the pool cache (optionally caching the
 2615  *      physical address of the object).
 2616  */
 2617 void
 2618 pool_cache_put_paddr(pool_cache_t pc, void *object, paddr_t pa)
 2619 {
 2620         pool_cache_cpu_t *cc;
 2621         pcg_t *pcg;
 2622         int s;
 2623 
 2624         FREECHECK_IN(&pc->pc_freecheck, object);
 2625 
 2626         /* Lock out interrupts and disable preemption. */
 2627         s = splvm();
 2628         while (/* CONSTCOND */ true) {
 2629                 /* If the current group isn't full, release it there. */
 2630                 cc = pc->pc_cpus[curcpu()->ci_index];
 2631                 KASSERT(cc->cc_cache == pc);
 2632                 pcg = cc->cc_current;
 2633                 if (__predict_true(pcg->pcg_avail < pcg->pcg_size)) {
 2634                         pcg->pcg_objects[pcg->pcg_avail].pcgo_va = object;
 2635                         pcg->pcg_objects[pcg->pcg_avail].pcgo_pa = pa;
 2636                         pcg->pcg_avail++;
 2637                         cc->cc_hits++;
 2638                         splx(s);
 2639                         return;
 2640                 }
 2641 
 2642                 /*
 2643                  * That failed.  If the previous group isn't full, swap
 2644                  * it with the current group and try again.
 2645                  */
 2646                 pcg = cc->cc_previous;
 2647                 if (__predict_true(pcg->pcg_avail < pcg->pcg_size)) {
 2648                         cc->cc_previous = cc->cc_current;
 2649                         cc->cc_current = pcg;
 2650                         continue;
 2651                 }
 2652 
 2653                 /*
 2654                  * Can't free to either group: try the slow path. 
 2655                  * If put_slow() releases the object for us, it
 2656                  * will return false.  Otherwise we need to retry.
 2657                  */
 2658                 if (!pool_cache_put_slow(cc, s, object))
 2659                         break;
 2660         }
 2661 }
 2662 
 2663 /*
 2664  * pool_cache_xcall:
 2665  *
 2666  *      Transfer objects from the per-CPU cache to the global cache.
 2667  *      Run within a cross-call thread.
 2668  */
 2669 static void
 2670 pool_cache_xcall(pool_cache_t pc)
 2671 {
 2672         pool_cache_cpu_t *cc;
 2673         pcg_t *prev, *cur, **list;
 2674         int s;
 2675 
 2676         s = splvm();
 2677         mutex_enter(&pc->pc_lock);
 2678         cc = pc->pc_cpus[curcpu()->ci_index];
 2679         cur = cc->cc_current;
 2680         cc->cc_current = __UNCONST(&pcg_dummy);
 2681         prev = cc->cc_previous;
 2682         cc->cc_previous = __UNCONST(&pcg_dummy);
 2683         if (cur != &pcg_dummy) {
 2684                 if (cur->pcg_avail == cur->pcg_size) {
 2685                         list = &pc->pc_fullgroups;
 2686                         pc->pc_nfull++;
 2687                 } else if (cur->pcg_avail == 0) {
 2688                         list = &pc->pc_emptygroups;
 2689                         pc->pc_nempty++;
 2690                 } else {
 2691                         list = &pc->pc_partgroups;
 2692                         pc->pc_npart++;
 2693                 }
 2694                 cur->pcg_next = *list;
 2695                 *list = cur;
 2696         }
 2697         if (prev != &pcg_dummy) {
 2698                 if (prev->pcg_avail == prev->pcg_size) {
 2699                         list = &pc->pc_fullgroups;
 2700                         pc->pc_nfull++;
 2701                 } else if (prev->pcg_avail == 0) {
 2702                         list = &pc->pc_emptygroups;
 2703                         pc->pc_nempty++;
 2704                 } else {
 2705                         list = &pc->pc_partgroups;
 2706                         pc->pc_npart++;
 2707                 }
 2708                 prev->pcg_next = *list;
 2709                 *list = prev;
 2710         }
 2711         mutex_exit(&pc->pc_lock);
 2712         splx(s);
 2713 }
 2714 
 2715 /*
 2716  * Pool backend allocators.
 2717  *
 2718  * Each pool has a backend allocator that handles allocation, deallocation,
 2719  * and any additional draining that might be needed.
 2720  *
 2721  * We provide two standard allocators:
 2722  *
 2723  *      pool_allocator_kmem - the default when no allocator is specified
 2724  *
 2725  *      pool_allocator_nointr - used for pools that will not be accessed
 2726  *      in interrupt context.
 2727  */
 2728 void    *pool_page_alloc(struct pool *, int);
 2729 void    pool_page_free(struct pool *, void *);
 2730 
 2731 #ifdef POOL_SUBPAGE
 2732 struct pool_allocator pool_allocator_kmem_fullpage = {
 2733         pool_page_alloc, pool_page_free, 0,
 2734         .pa_backingmapptr = &kmem_map,
 2735 };
 2736 #else
 2737 struct pool_allocator pool_allocator_kmem = {
 2738         pool_page_alloc, pool_page_free, 0,
 2739         .pa_backingmapptr = &kmem_map,
 2740 };
 2741 #endif
 2742 
 2743 void    *pool_page_alloc_nointr(struct pool *, int);
 2744 void    pool_page_free_nointr(struct pool *, void *);
 2745 
 2746 #ifdef POOL_SUBPAGE
 2747 struct pool_allocator pool_allocator_nointr_fullpage = {
 2748         pool_page_alloc_nointr, pool_page_free_nointr, 0,
 2749         .pa_backingmapptr = &kernel_map,
 2750 };
 2751 #else
 2752 struct pool_allocator pool_allocator_nointr = {
 2753         pool_page_alloc_nointr, pool_page_free_nointr, 0,
 2754         .pa_backingmapptr = &kernel_map,
 2755 };
 2756 #endif
 2757 
 2758 #ifdef POOL_SUBPAGE
 2759 void    *pool_subpage_alloc(struct pool *, int);
 2760 void    pool_subpage_free(struct pool *, void *);
 2761 
 2762 struct pool_allocator pool_allocator_kmem = {
 2763         pool_subpage_alloc, pool_subpage_free, POOL_SUBPAGE,
 2764         .pa_backingmapptr = &kmem_map,
 2765 };
 2766 
 2767 void    *pool_subpage_alloc_nointr(struct pool *, int);
 2768 void    pool_subpage_free_nointr(struct pool *, void *);
 2769 
 2770 struct pool_allocator pool_allocator_nointr = {
 2771         pool_subpage_alloc, pool_subpage_free, POOL_SUBPAGE,
 2772         .pa_backingmapptr = &kmem_map,
 2773 };
 2774 #endif /* POOL_SUBPAGE */
 2775 
 2776 static void *
 2777 pool_allocator_alloc(struct pool *pp, int flags)
 2778 {
 2779         struct pool_allocator *pa = pp->pr_alloc;
 2780         void *res;
 2781 
 2782         res = (*pa->pa_alloc)(pp, flags);
 2783         if (res == NULL && (flags & PR_WAITOK) == 0) {
 2784                 /*
 2785                  * We only run the drain hook here if PR_NOWAIT.
 2786                  * In other cases, the hook will be run in
 2787                  * pool_reclaim().
 2788                  */
 2789                 if (pp->pr_drain_hook != NULL) {
 2790                         (*pp->pr_drain_hook)(pp->pr_drain_hook_arg, flags);
 2791                         res = (*pa->pa_alloc)(pp, flags);
 2792                 }
 2793         }
 2794         return res;
 2795 }
 2796 
 2797 static void
 2798 pool_allocator_free(struct pool *pp, void *v)
 2799 {
 2800         struct pool_allocator *pa = pp->pr_alloc;
 2801 
 2802         (*pa->pa_free)(pp, v);
 2803 }
 2804 
 2805 void *
 2806 pool_page_alloc(struct pool *pp, int flags)
 2807 {
 2808         bool waitok = (flags & PR_WAITOK) ? true : false;
 2809 
 2810         return ((void *) uvm_km_alloc_poolpage_cache(kmem_map, waitok));
 2811 }
 2812 
 2813 void
 2814 pool_page_free(struct pool *pp, void *v)
 2815 {
 2816 
 2817         uvm_km_free_poolpage_cache(kmem_map, (vaddr_t) v);
 2818 }
 2819 
 2820 static void *
 2821 pool_page_alloc_meta(struct pool *pp, int flags)
 2822 {
 2823         bool waitok = (flags & PR_WAITOK) ? true : false;
 2824 
 2825         return ((void *) uvm_km_alloc_poolpage(kmem_map, waitok));
 2826 }
 2827 
 2828 static void
 2829 pool_page_free_meta(struct pool *pp, void *v)
 2830 {
 2831 
 2832         uvm_km_free_poolpage(kmem_map, (vaddr_t) v);
 2833 }
 2834 
 2835 #ifdef POOL_SUBPAGE
 2836 /* Sub-page allocator, for machines with large hardware pages. */
 2837 void *
 2838 pool_subpage_alloc(struct pool *pp, int flags)
 2839 {
 2840         return pool_get(&psppool, flags);
 2841 }
 2842 
 2843 void
 2844 pool_subpage_free(struct pool *pp, void *v)
 2845 {
 2846         pool_put(&psppool, v);
 2847 }
 2848 
 2849 /* We don't provide a real nointr allocator.  Maybe later. */
 2850 void *
 2851 pool_subpage_alloc_nointr(struct pool *pp, int flags)
 2852 {
 2853 
 2854         return (pool_subpage_alloc(pp, flags));
 2855 }
 2856 
 2857 void
 2858 pool_subpage_free_nointr(struct pool *pp, void *v)
 2859 {
 2860 
 2861         pool_subpage_free(pp, v);
 2862 }
 2863 #endif /* POOL_SUBPAGE */
 2864 void *
 2865 pool_page_alloc_nointr(struct pool *pp, int flags)
 2866 {
 2867         bool waitok = (flags & PR_WAITOK) ? true : false;
 2868 
 2869         return ((void *) uvm_km_alloc_poolpage_cache(kernel_map, waitok));
 2870 }
 2871 
 2872 void
 2873 pool_page_free_nointr(struct pool *pp, void *v)
 2874 {
 2875 
 2876         uvm_km_free_poolpage_cache(kernel_map, (vaddr_t) v);
 2877 }
 2878 
 2879 #if defined(DDB)
 2880 static bool
 2881 pool_in_page(struct pool *pp, struct pool_item_header *ph, uintptr_t addr)
 2882 {
 2883 
 2884         return (uintptr_t)ph->ph_page <= addr &&
 2885             addr < (uintptr_t)ph->ph_page + pp->pr_alloc->pa_pagesz;
 2886 }
 2887 
 2888 static bool
 2889 pool_in_item(struct pool *pp, void *item, uintptr_t addr)
 2890 {
 2891 
 2892         return (uintptr_t)item <= addr && addr < (uintptr_t)item + pp->pr_size;
 2893 }
 2894 
 2895 static bool
 2896 pool_in_cg(struct pool *pp, struct pool_cache_group *pcg, uintptr_t addr)
 2897 {
 2898         int i;
 2899 
 2900         if (pcg == NULL) {
 2901                 return false;
 2902         }
 2903         for (i = 0; i < pcg->pcg_avail; i++) {
 2904                 if (pool_in_item(pp, pcg->pcg_objects[i].pcgo_va, addr)) {
 2905                         return true;
 2906                 }
 2907         }
 2908         return false;
 2909 }
 2910 
 2911 static bool
 2912 pool_allocated(struct pool *pp, struct pool_item_header *ph, uintptr_t addr)
 2913 {
 2914 
 2915         if ((pp->pr_roflags & PR_NOTOUCH) != 0) {
 2916                 unsigned int idx = pr_item_notouch_index(pp, ph, (void *)addr);
 2917                 pool_item_bitmap_t *bitmap =
 2918                     ph->ph_bitmap + (idx / BITMAP_SIZE);
 2919                 pool_item_bitmap_t mask = 1 << (idx & BITMAP_MASK);
 2920 
 2921                 return (*bitmap & mask) == 0;
 2922         } else {
 2923                 struct pool_item *pi;
 2924 
 2925                 LIST_FOREACH(pi, &ph->ph_itemlist, pi_list) {
 2926                         if (pool_in_item(pp, pi, addr)) {
 2927                                 return false;
 2928                         }
 2929                 }
 2930                 return true;
 2931         }
 2932 }
 2933 
 2934 void
 2935 pool_whatis(uintptr_t addr, void (*pr)(const char *, ...))
 2936 {
 2937         struct pool *pp;
 2938 
 2939         TAILQ_FOREACH(pp, &pool_head, pr_poollist) {
 2940                 struct pool_item_header *ph;
 2941                 uintptr_t item;
 2942                 bool allocated = true;
 2943                 bool incache = false;
 2944                 bool incpucache = false;
 2945                 char cpucachestr[32];
 2946 
 2947                 if ((pp->pr_roflags & PR_PHINPAGE) != 0) {
 2948                         LIST_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) {
 2949                                 if (pool_in_page(pp, ph, addr)) {
 2950                                         goto found;
 2951                                 }
 2952                         }
 2953                         LIST_FOREACH(ph, &pp->pr_partpages, ph_pagelist) {
 2954                                 if (pool_in_page(pp, ph, addr)) {
 2955                                         allocated =
 2956                                             pool_allocated(pp, ph, addr);
 2957                                         goto found;
 2958                                 }
 2959                         }
 2960                         LIST_FOREACH(ph, &pp->pr_emptypages, ph_pagelist) {
 2961                                 if (pool_in_page(pp, ph, addr)) {
 2962                                         allocated = false;
 2963                                         goto found;
 2964                                 }
 2965                         }
 2966                         continue;
 2967                 } else {
 2968                         ph = pr_find_pagehead_noalign(pp, (void *)addr);
 2969                         if (ph == NULL || !pool_in_page(pp, ph, addr)) {
 2970                                 continue;
 2971                         }
 2972                         allocated = pool_allocated(pp, ph, addr);
 2973                 }
 2974 found:
 2975                 if (allocated && pp->pr_cache) {
 2976                         pool_cache_t pc = pp->pr_cache;
 2977                         struct pool_cache_group *pcg;
 2978                         int i;
 2979 
 2980                         for (pcg = pc->pc_fullgroups; pcg != NULL;
 2981                             pcg = pcg->pcg_next) {
 2982                                 if (pool_in_cg(pp, pcg, addr)) {
 2983                                         incache = true;
 2984                                         goto print;
 2985                                 }
 2986                         }
 2987                         for (i = 0; i < MAXCPUS; i++) {
 2988                                 pool_cache_cpu_t *cc;
 2989 
 2990                                 if ((cc = pc->pc_cpus[i]) == NULL) {
 2991                                         continue;
 2992                                 }
 2993                                 if (pool_in_cg(pp, cc->cc_current, addr) ||
 2994                                     pool_in_cg(pp, cc->cc_previous, addr)) {
 2995                                         struct cpu_info *ci =
 2996                                             cpu_lookup(i);
 2997 
 2998                                         incpucache = true;
 2999                                         snprintf(cpucachestr,
 3000                                             sizeof(cpucachestr),
 3001                                             "cached by CPU %u",
 3002                                             ci->ci_index);
 3003                                         goto print;
 3004                                 }
 3005                         }
 3006                 }
 3007 print:
 3008                 item = (uintptr_t)ph->ph_page + ph->ph_off;
 3009                 item = item + rounddown(addr - item, pp->pr_size);
 3010                 (*pr)("%p is %p+%zu in POOL '%s' (%s)\n",
 3011                     (void *)addr, item, (size_t)(addr - item),
 3012                     pp->pr_wchan,
 3013                     incpucache ? cpucachestr :
 3014                     incache ? "cached" : allocated ? "allocated" : "free");
 3015         }
 3016 }
 3017 #endif /* defined(DDB) */
Cache object: f47107cd54e7e3dcc1bb48715893c93f
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/kern/subr_pool.c

FreeBSD/Linux Kernel Cross Reference
sys/kern/subr_pool.c