subr_pool.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*      $NetBSD: subr_pool.c,v 1.124 2006/11/01 10:17:58 yamt Exp $     */
    2 
    3 /*-
    4  * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc.
    5  * All rights reserved.
    6  *
    7  * This code is derived from software contributed to The NetBSD Foundation
    8  * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace
    9  * Simulation Facility, NASA Ames Research Center.
   10  *
   11  * Redistribution and use in source and binary forms, with or without
   12  * modification, are permitted provided that the following conditions
   13  * are met:
   14  * 1. Redistributions of source code must retain the above copyright
   15  *    notice, this list of conditions and the following disclaimer.
   16  * 2. Redistributions in binary form must reproduce the above copyright
   17  *    notice, this list of conditions and the following disclaimer in the
   18  *    documentation and/or other materials provided with the distribution.
   19  * 3. All advertising materials mentioning features or use of this software
   20  *    must display the following acknowledgement:
   21  *      This product includes software developed by the NetBSD
   22  *      Foundation, Inc. and its contributors.
   23  * 4. Neither the name of The NetBSD Foundation nor the names of its
   24  *    contributors may be used to endorse or promote products derived
   25  *    from this software without specific prior written permission.
   26  *
   27  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   28  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   29  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   30  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   31  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   32  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   33  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   34  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   35  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   36  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   37  * POSSIBILITY OF SUCH DAMAGE.
   38  */
   39 
   40 #include <sys/cdefs.h>
   41 __KERNEL_RCSID(0, "$NetBSD: subr_pool.c,v 1.124 2006/11/01 10:17:58 yamt Exp $");
   42 
   43 #include "opt_pool.h"
   44 #include "opt_poollog.h"
   45 #include "opt_lockdebug.h"
   46 
   47 #include <sys/param.h>
   48 #include <sys/systm.h>
   49 #include <sys/proc.h>
   50 #include <sys/errno.h>
   51 #include <sys/kernel.h>
   52 #include <sys/malloc.h>
   53 #include <sys/lock.h>
   54 #include <sys/pool.h>
   55 #include <sys/syslog.h>
   56 
   57 #include <uvm/uvm.h>
   58 
   59 /*
   60  * Pool resource management utility.
   61  *
   62  * Memory is allocated in pages which are split into pieces according to
   63  * the pool item size. Each page is kept on one of three lists in the
   64  * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages',
   65  * for empty, full and partially-full pages respectively. The individual
   66  * pool items are on a linked list headed by `ph_itemlist' in each page
   67  * header. The memory for building the page list is either taken from
   68  * the allocated pages themselves (for small pool items) or taken from
   69  * an internal pool of page headers (`phpool').
   70  */
   71 
   72 /* List of all pools */
   73 LIST_HEAD(,pool) pool_head = LIST_HEAD_INITIALIZER(pool_head);
   74 
   75 /* Private pool for page header structures */
   76 #define PHPOOL_MAX      8
   77 static struct pool phpool[PHPOOL_MAX];
   78 #define PHPOOL_FREELIST_NELEM(idx)      (((idx) == 0) ? 0 : (1 << (idx)))
   79 
   80 #ifdef POOL_SUBPAGE
   81 /* Pool of subpages for use by normal pools. */
   82 static struct pool psppool;
   83 #endif
   84 
   85 static SLIST_HEAD(, pool_allocator) pa_deferinitq =
   86     SLIST_HEAD_INITIALIZER(pa_deferinitq);
   87 
   88 static void *pool_page_alloc_meta(struct pool *, int);
   89 static void pool_page_free_meta(struct pool *, void *);
   90 
   91 /* allocator for pool metadata */
   92 static struct pool_allocator pool_allocator_meta = {
   93         pool_page_alloc_meta, pool_page_free_meta,
   94         .pa_backingmapptr = &kmem_map,
   95 };
   96 
   97 /* # of seconds to retain page after last use */
   98 int pool_inactive_time = 10;
   99 
  100 /* Next candidate for drainage (see pool_drain()) */
  101 static struct pool      *drainpp;
  102 
  103 /* This spin lock protects both pool_head and drainpp. */
  104 struct simplelock pool_head_slock = SIMPLELOCK_INITIALIZER;
  105 
  106 typedef uint8_t pool_item_freelist_t;
  107 
  108 struct pool_item_header {
  109         /* Page headers */
  110         LIST_ENTRY(pool_item_header)
  111                                 ph_pagelist;    /* pool page list */
  112         SPLAY_ENTRY(pool_item_header)
  113                                 ph_node;        /* Off-page page headers */
  114         caddr_t                 ph_page;        /* this page's address */
  115         struct timeval          ph_time;        /* last referenced */
  116         union {
  117                 /* !PR_NOTOUCH */
  118                 struct {
  119                         LIST_HEAD(, pool_item)
  120                                 phu_itemlist;   /* chunk list for this page */
  121                 } phu_normal;
  122                 /* PR_NOTOUCH */
  123                 struct {
  124                         uint16_t
  125                                 phu_off;        /* start offset in page */
  126                         pool_item_freelist_t
  127                                 phu_firstfree;  /* first free item */
  128                         /*
  129                          * XXX it might be better to use
  130                          * a simple bitmap and ffs(3)
  131                          */
  132                 } phu_notouch;
  133         } ph_u;
  134         uint16_t                ph_nmissing;    /* # of chunks in use */
  135 };
  136 #define ph_itemlist     ph_u.phu_normal.phu_itemlist
  137 #define ph_off          ph_u.phu_notouch.phu_off
  138 #define ph_firstfree    ph_u.phu_notouch.phu_firstfree
  139 
  140 struct pool_item {
  141 #ifdef DIAGNOSTIC
  142         u_int pi_magic;
  143 #endif
  144 #define PI_MAGIC 0xdeadbeefU
  145         /* Other entries use only this list entry */
  146         LIST_ENTRY(pool_item)   pi_list;
  147 };
  148 
  149 #define POOL_NEEDS_CATCHUP(pp)                                          \
  150         ((pp)->pr_nitems < (pp)->pr_minitems)
  151 
  152 /*
  153  * Pool cache management.
  154  *
  155  * Pool caches provide a way for constructed objects to be cached by the
  156  * pool subsystem.  This can lead to performance improvements by avoiding
  157  * needless object construction/destruction; it is deferred until absolutely
  158  * necessary.
  159  *
  160  * Caches are grouped into cache groups.  Each cache group references
  161  * up to 16 constructed objects.  When a cache allocates an object
  162  * from the pool, it calls the object's constructor and places it into
  163  * a cache group.  When a cache group frees an object back to the pool,
  164  * it first calls the object's destructor.  This allows the object to
  165  * persist in constructed form while freed to the cache.
  166  *
  167  * Multiple caches may exist for each pool.  This allows a single
  168  * object type to have multiple constructed forms.  The pool references
  169  * each cache, so that when a pool is drained by the pagedaemon, it can
  170  * drain each individual cache as well.  Each time a cache is drained,
  171  * the most idle cache group is freed to the pool in its entirety.
  172  *
  173  * Pool caches are layed on top of pools.  By layering them, we can avoid
  174  * the complexity of cache management for pools which would not benefit
  175  * from it.
  176  */
  177 
  178 /* The cache group pool. */
  179 static struct pool pcgpool;
  180 
  181 static void     pool_cache_reclaim(struct pool_cache *, struct pool_pagelist *,
  182                                    struct pool_cache_grouplist *);
  183 static void     pcg_grouplist_free(struct pool_cache_grouplist *);
  184 
  185 static int      pool_catchup(struct pool *);
  186 static void     pool_prime_page(struct pool *, caddr_t,
  187                     struct pool_item_header *);
  188 static void     pool_update_curpage(struct pool *);
  189 
  190 static int      pool_grow(struct pool *, int);
  191 static void     *pool_allocator_alloc(struct pool *, int);
  192 static void     pool_allocator_free(struct pool *, void *);
  193 
  194 static void pool_print_pagelist(struct pool *, struct pool_pagelist *,
  195         void (*)(const char *, ...));
  196 static void pool_print1(struct pool *, const char *,
  197         void (*)(const char *, ...));
  198 
  199 static int pool_chk_page(struct pool *, const char *,
  200                          struct pool_item_header *);
  201 
  202 /*
  203  * Pool log entry. An array of these is allocated in pool_init().
  204  */
  205 struct pool_log {
  206         const char      *pl_file;
  207         long            pl_line;
  208         int             pl_action;
  209 #define PRLOG_GET       1
  210 #define PRLOG_PUT       2
  211         void            *pl_addr;
  212 };
  213 
  214 #ifdef POOL_DIAGNOSTIC
  215 /* Number of entries in pool log buffers */
  216 #ifndef POOL_LOGSIZE
  217 #define POOL_LOGSIZE    10
  218 #endif
  219 
  220 int pool_logsize = POOL_LOGSIZE;
  221 
  222 static inline void
  223 pr_log(struct pool *pp, void *v, int action, const char *file, long line)
  224 {
  225         int n = pp->pr_curlogentry;
  226         struct pool_log *pl;
  227 
  228         if ((pp->pr_roflags & PR_LOGGING) == 0)
  229                 return;
  230 
  231         /*
  232          * Fill in the current entry. Wrap around and overwrite
  233          * the oldest entry if necessary.
  234          */
  235         pl = &pp->pr_log[n];
  236         pl->pl_file = file;
  237         pl->pl_line = line;
  238         pl->pl_action = action;
  239         pl->pl_addr = v;
  240         if (++n >= pp->pr_logsize)
  241                 n = 0;
  242         pp->pr_curlogentry = n;
  243 }
  244 
  245 static void
  246 pr_printlog(struct pool *pp, struct pool_item *pi,
  247     void (*pr)(const char *, ...))
  248 {
  249         int i = pp->pr_logsize;
  250         int n = pp->pr_curlogentry;
  251 
  252         if ((pp->pr_roflags & PR_LOGGING) == 0)
  253                 return;
  254 
  255         /*
  256          * Print all entries in this pool's log.
  257          */
  258         while (i-- > 0) {
  259                 struct pool_log *pl = &pp->pr_log[n];
  260                 if (pl->pl_action != 0) {
  261                         if (pi == NULL || pi == pl->pl_addr) {
  262                                 (*pr)("\tlog entry %d:\n", i);
  263                                 (*pr)("\t\taction = %s, addr = %p\n",
  264                                     pl->pl_action == PRLOG_GET ? "get" : "put",
  265                                     pl->pl_addr);
  266                                 (*pr)("\t\tfile: %s at line %lu\n",
  267                                     pl->pl_file, pl->pl_line);
  268                         }
  269                 }
  270                 if (++n >= pp->pr_logsize)
  271                         n = 0;
  272         }
  273 }
  274 
  275 static inline void
  276 pr_enter(struct pool *pp, const char *file, long line)
  277 {
  278 
  279         if (__predict_false(pp->pr_entered_file != NULL)) {
  280                 printf("pool %s: reentrancy at file %s line %ld\n",
  281                     pp->pr_wchan, file, line);
  282                 printf("         previous entry at file %s line %ld\n",
  283                     pp->pr_entered_file, pp->pr_entered_line);
  284                 panic("pr_enter");
  285         }
  286 
  287         pp->pr_entered_file = file;
  288         pp->pr_entered_line = line;
  289 }
  290 
  291 static inline void
  292 pr_leave(struct pool *pp)
  293 {
  294 
  295         if (__predict_false(pp->pr_entered_file == NULL)) {
  296                 printf("pool %s not entered?\n", pp->pr_wchan);
  297                 panic("pr_leave");
  298         }
  299 
  300         pp->pr_entered_file = NULL;
  301         pp->pr_entered_line = 0;
  302 }
  303 
  304 static inline void
  305 pr_enter_check(struct pool *pp, void (*pr)(const char *, ...))
  306 {
  307 
  308         if (pp->pr_entered_file != NULL)
  309                 (*pr)("\n\tcurrently entered from file %s line %ld\n",
  310                     pp->pr_entered_file, pp->pr_entered_line);
  311 }
  312 #else
  313 #define pr_log(pp, v, action, file, line)
  314 #define pr_printlog(pp, pi, pr)
  315 #define pr_enter(pp, file, line)
  316 #define pr_leave(pp)
  317 #define pr_enter_check(pp, pr)
  318 #endif /* POOL_DIAGNOSTIC */
  319 
  320 static inline int
  321 pr_item_notouch_index(const struct pool *pp, const struct pool_item_header *ph,
  322     const void *v)
  323 {
  324         const char *cp = v;
  325         int idx;
  326 
  327         KASSERT(pp->pr_roflags & PR_NOTOUCH);
  328         idx = (cp - ph->ph_page - ph->ph_off) / pp->pr_size;
  329         KASSERT(idx < pp->pr_itemsperpage);
  330         return idx;
  331 }
  332 
  333 #define PR_FREELIST_ALIGN(p) \
  334         roundup((uintptr_t)(p), sizeof(pool_item_freelist_t))
  335 #define PR_FREELIST(ph) ((pool_item_freelist_t *)PR_FREELIST_ALIGN((ph) + 1))
  336 #define PR_INDEX_USED   ((pool_item_freelist_t)-1)
  337 #define PR_INDEX_EOL    ((pool_item_freelist_t)-2)
  338 
  339 static inline void
  340 pr_item_notouch_put(const struct pool *pp, struct pool_item_header *ph,
  341     void *obj)
  342 {
  343         int idx = pr_item_notouch_index(pp, ph, obj);
  344         pool_item_freelist_t *freelist = PR_FREELIST(ph);
  345 
  346         KASSERT(freelist[idx] == PR_INDEX_USED);
  347         freelist[idx] = ph->ph_firstfree;
  348         ph->ph_firstfree = idx;
  349 }
  350 
  351 static inline void *
  352 pr_item_notouch_get(const struct pool *pp, struct pool_item_header *ph)
  353 {
  354         int idx = ph->ph_firstfree;
  355         pool_item_freelist_t *freelist = PR_FREELIST(ph);
  356 
  357         KASSERT(freelist[idx] != PR_INDEX_USED);
  358         ph->ph_firstfree = freelist[idx];
  359         freelist[idx] = PR_INDEX_USED;
  360 
  361         return ph->ph_page + ph->ph_off + idx * pp->pr_size;
  362 }
  363 
  364 static inline int
  365 phtree_compare(struct pool_item_header *a, struct pool_item_header *b)
  366 {
  367 
  368         /*
  369          * we consider pool_item_header with smaller ph_page bigger.
  370          * (this unnatural ordering is for the benefit of pr_find_pagehead.)
  371          */
  372 
  373         if (a->ph_page < b->ph_page)
  374                 return (1);
  375         else if (a->ph_page > b->ph_page)
  376                 return (-1);
  377         else
  378                 return (0);
  379 }
  380 
  381 SPLAY_PROTOTYPE(phtree, pool_item_header, ph_node, phtree_compare);
  382 SPLAY_GENERATE(phtree, pool_item_header, ph_node, phtree_compare);
  383 
  384 /*
  385  * Return the pool page header based on item address.
  386  */
  387 static inline struct pool_item_header *
  388 pr_find_pagehead(struct pool *pp, void *v)
  389 {
  390         struct pool_item_header *ph, tmp;
  391 
  392         if ((pp->pr_roflags & PR_NOALIGN) != 0) {
  393                 tmp.ph_page = (caddr_t)(uintptr_t)v;
  394                 ph = SPLAY_FIND(phtree, &pp->pr_phtree, &tmp);
  395                 if (ph == NULL) {
  396                         ph = SPLAY_ROOT(&pp->pr_phtree);
  397                         if (ph != NULL && phtree_compare(&tmp, ph) >= 0) {
  398                                 ph = SPLAY_NEXT(phtree, &pp->pr_phtree, ph);
  399                         }
  400                         KASSERT(ph == NULL || phtree_compare(&tmp, ph) < 0);
  401                 }
  402         } else {
  403                 caddr_t page =
  404                     (caddr_t)((uintptr_t)v & pp->pr_alloc->pa_pagemask);
  405 
  406                 if ((pp->pr_roflags & PR_PHINPAGE) != 0) {
  407                         ph = (void *)(page + pp->pr_phoffset);
  408                 } else {
  409                         tmp.ph_page = page;
  410                         ph = SPLAY_FIND(phtree, &pp->pr_phtree, &tmp);
  411                 }
  412         }
  413 
  414         KASSERT(ph == NULL || ((pp->pr_roflags & PR_PHINPAGE) != 0) ||
  415             (ph->ph_page <= (char *)v &&
  416             (char *)v < ph->ph_page + pp->pr_alloc->pa_pagesz));
  417         return ph;
  418 }
  419 
  420 static void
  421 pr_pagelist_free(struct pool *pp, struct pool_pagelist *pq)
  422 {
  423         struct pool_item_header *ph;
  424         int s;
  425 
  426         while ((ph = LIST_FIRST(pq)) != NULL) {
  427                 LIST_REMOVE(ph, ph_pagelist);
  428                 pool_allocator_free(pp, ph->ph_page);
  429                 if ((pp->pr_roflags & PR_PHINPAGE) == 0) {
  430                         s = splvm();
  431                         pool_put(pp->pr_phpool, ph);
  432                         splx(s);
  433                 }
  434         }
  435 }
  436 
  437 /*
  438  * Remove a page from the pool.
  439  */
  440 static inline void
  441 pr_rmpage(struct pool *pp, struct pool_item_header *ph,
  442      struct pool_pagelist *pq)
  443 {
  444 
  445         LOCK_ASSERT(simple_lock_held(&pp->pr_slock));
  446 
  447         /*
  448          * If the page was idle, decrement the idle page count.
  449          */
  450         if (ph->ph_nmissing == 0) {
  451 #ifdef DIAGNOSTIC
  452                 if (pp->pr_nidle == 0)
  453                         panic("pr_rmpage: nidle inconsistent");
  454                 if (pp->pr_nitems < pp->pr_itemsperpage)
  455                         panic("pr_rmpage: nitems inconsistent");
  456 #endif
  457                 pp->pr_nidle--;
  458         }
  459 
  460         pp->pr_nitems -= pp->pr_itemsperpage;
  461 
  462         /*
  463          * Unlink the page from the pool and queue it for release.
  464          */
  465         LIST_REMOVE(ph, ph_pagelist);
  466         if ((pp->pr_roflags & PR_PHINPAGE) == 0)
  467                 SPLAY_REMOVE(phtree, &pp->pr_phtree, ph);
  468         LIST_INSERT_HEAD(pq, ph, ph_pagelist);
  469 
  470         pp->pr_npages--;
  471         pp->pr_npagefree++;
  472 
  473         pool_update_curpage(pp);
  474 }
  475 
  476 static boolean_t
  477 pa_starved_p(struct pool_allocator *pa)
  478 {
  479 
  480         if (pa->pa_backingmap != NULL) {
  481                 return vm_map_starved_p(pa->pa_backingmap);
  482         }
  483         return FALSE;
  484 }
  485 
  486 static int
  487 pool_reclaim_callback(struct callback_entry *ce, void *obj, void *arg)
  488 {
  489         struct pool *pp = obj;
  490         struct pool_allocator *pa = pp->pr_alloc;
  491 
  492         KASSERT(&pp->pr_reclaimerentry == ce);
  493         pool_reclaim(pp);
  494         if (!pa_starved_p(pa)) {
  495                 return CALLBACK_CHAIN_ABORT;
  496         }
  497         return CALLBACK_CHAIN_CONTINUE;
  498 }
  499 
  500 static void
  501 pool_reclaim_register(struct pool *pp)
  502 {
  503         struct vm_map *map = pp->pr_alloc->pa_backingmap;
  504         int s;
  505 
  506         if (map == NULL) {
  507                 return;
  508         }
  509 
  510         s = splvm(); /* not necessary for INTRSAFE maps, but don't care. */
  511         callback_register(&vm_map_to_kernel(map)->vmk_reclaim_callback,
  512             &pp->pr_reclaimerentry, pp, pool_reclaim_callback);
  513         splx(s);
  514 }
  515 
  516 static void
  517 pool_reclaim_unregister(struct pool *pp)
  518 {
  519         struct vm_map *map = pp->pr_alloc->pa_backingmap;
  520         int s;
  521 
  522         if (map == NULL) {
  523                 return;
  524         }
  525 
  526         s = splvm(); /* not necessary for INTRSAFE maps, but don't care. */
  527         callback_unregister(&vm_map_to_kernel(map)->vmk_reclaim_callback,
  528             &pp->pr_reclaimerentry);
  529         splx(s);
  530 }
  531 
  532 static void
  533 pa_reclaim_register(struct pool_allocator *pa)
  534 {
  535         struct vm_map *map = *pa->pa_backingmapptr;
  536         struct pool *pp;
  537 
  538         KASSERT(pa->pa_backingmap == NULL);
  539         if (map == NULL) {
  540                 SLIST_INSERT_HEAD(&pa_deferinitq, pa, pa_q);
  541                 return;
  542         }
  543         pa->pa_backingmap = map;
  544         TAILQ_FOREACH(pp, &pa->pa_list, pr_alloc_list) {
  545                 pool_reclaim_register(pp);
  546         }
  547 }
  548 
  549 /*
  550  * Initialize all the pools listed in the "pools" link set.
  551  */
  552 void
  553 pool_subsystem_init(void)
  554 {
  555         struct pool_allocator *pa;
  556         __link_set_decl(pools, struct link_pool_init);
  557         struct link_pool_init * const *pi;
  558 
  559         __link_set_foreach(pi, pools)
  560                 pool_init((*pi)->pp, (*pi)->size, (*pi)->align,
  561                     (*pi)->align_offset, (*pi)->flags, (*pi)->wchan,
  562                     (*pi)->palloc);
  563 
  564         while ((pa = SLIST_FIRST(&pa_deferinitq)) != NULL) {
  565                 KASSERT(pa->pa_backingmapptr != NULL);
  566                 KASSERT(*pa->pa_backingmapptr != NULL);
  567                 SLIST_REMOVE_HEAD(&pa_deferinitq, pa_q);
  568                 pa_reclaim_register(pa);
  569         }
  570 }
  571 
  572 /*
  573  * Initialize the given pool resource structure.
  574  *
  575  * We export this routine to allow other kernel parts to declare
  576  * static pools that must be initialized before malloc() is available.
  577  */
  578 void
  579 pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags,
  580     const char *wchan, struct pool_allocator *palloc)
  581 {
  582 #ifdef DEBUG
  583         struct pool *pp1;
  584 #endif
  585         size_t trysize, phsize;
  586         int off, slack, s;
  587 
  588         KASSERT((1UL << (CHAR_BIT * sizeof(pool_item_freelist_t))) - 2 >=
  589             PHPOOL_FREELIST_NELEM(PHPOOL_MAX - 1));
  590 
  591 #ifdef DEBUG
  592         /*
  593          * Check that the pool hasn't already been initialised and
  594          * added to the list of all pools.
  595          */
  596         LIST_FOREACH(pp1, &pool_head, pr_poollist) {
  597                 if (pp == pp1)
  598                         panic("pool_init: pool %s already initialised",
  599                             wchan);
  600         }
  601 #endif
  602 
  603 #ifdef POOL_DIAGNOSTIC
  604         /*
  605          * Always log if POOL_DIAGNOSTIC is defined.
  606          */
  607         if (pool_logsize != 0)
  608                 flags |= PR_LOGGING;
  609 #endif
  610 
  611         if (palloc == NULL)
  612                 palloc = &pool_allocator_kmem;
  613 #ifdef POOL_SUBPAGE
  614         if (size > palloc->pa_pagesz) {
  615                 if (palloc == &pool_allocator_kmem)
  616                         palloc = &pool_allocator_kmem_fullpage;
  617                 else if (palloc == &pool_allocator_nointr)
  618                         palloc = &pool_allocator_nointr_fullpage;
  619         }               
  620 #endif /* POOL_SUBPAGE */
  621         if ((palloc->pa_flags & PA_INITIALIZED) == 0) {
  622                 if (palloc->pa_pagesz == 0)
  623                         palloc->pa_pagesz = PAGE_SIZE;
  624 
  625                 TAILQ_INIT(&palloc->pa_list);
  626 
  627                 simple_lock_init(&palloc->pa_slock);
  628                 palloc->pa_pagemask = ~(palloc->pa_pagesz - 1);
  629                 palloc->pa_pageshift = ffs(palloc->pa_pagesz) - 1;
  630 
  631                 if (palloc->pa_backingmapptr != NULL) {
  632                         pa_reclaim_register(palloc);
  633                 }
  634                 palloc->pa_flags |= PA_INITIALIZED;
  635         }
  636 
  637         if (align == 0)
  638                 align = ALIGN(1);
  639 
  640         if ((flags & PR_NOTOUCH) == 0 && size < sizeof(struct pool_item))
  641                 size = sizeof(struct pool_item);
  642 
  643         size = roundup(size, align);
  644 #ifdef DIAGNOSTIC
  645         if (size > palloc->pa_pagesz)
  646                 panic("pool_init: pool item size (%zu) too large", size);
  647 #endif
  648 
  649         /*
  650          * Initialize the pool structure.
  651          */
  652         LIST_INIT(&pp->pr_emptypages);
  653         LIST_INIT(&pp->pr_fullpages);
  654         LIST_INIT(&pp->pr_partpages);
  655         LIST_INIT(&pp->pr_cachelist);
  656         pp->pr_curpage = NULL;
  657         pp->pr_npages = 0;
  658         pp->pr_minitems = 0;
  659         pp->pr_minpages = 0;
  660         pp->pr_maxpages = UINT_MAX;
  661         pp->pr_roflags = flags;
  662         pp->pr_flags = 0;
  663         pp->pr_size = size;
  664         pp->pr_align = align;
  665         pp->pr_wchan = wchan;
  666         pp->pr_alloc = palloc;
  667         pp->pr_nitems = 0;
  668         pp->pr_nout = 0;
  669         pp->pr_hardlimit = UINT_MAX;
  670         pp->pr_hardlimit_warning = NULL;
  671         pp->pr_hardlimit_ratecap.tv_sec = 0;
  672         pp->pr_hardlimit_ratecap.tv_usec = 0;
  673         pp->pr_hardlimit_warning_last.tv_sec = 0;
  674         pp->pr_hardlimit_warning_last.tv_usec = 0;
  675         pp->pr_drain_hook = NULL;
  676         pp->pr_drain_hook_arg = NULL;
  677 
  678         /*
  679          * Decide whether to put the page header off page to avoid
  680          * wasting too large a part of the page or too big item.
  681          * Off-page page headers go on a hash table, so we can match
  682          * a returned item with its header based on the page address.
  683          * We use 1/16 of the page size and about 8 times of the item
  684          * size as the threshold (XXX: tune)
  685          *
  686          * However, we'll put the header into the page if we can put
  687          * it without wasting any items.
  688          *
  689          * Silently enforce `0 <= ioff < align'.
  690          */
  691         pp->pr_itemoffset = ioff %= align;
  692         /* See the comment below about reserved bytes. */
  693         trysize = palloc->pa_pagesz - ((align - ioff) % align);
  694         phsize = ALIGN(sizeof(struct pool_item_header));
  695         if ((pp->pr_roflags & (PR_NOTOUCH | PR_NOALIGN)) == 0 &&
  696             (pp->pr_size < MIN(palloc->pa_pagesz / 16, phsize << 3) ||
  697             trysize / pp->pr_size == (trysize - phsize) / pp->pr_size)) {
  698                 /* Use the end of the page for the page header */
  699                 pp->pr_roflags |= PR_PHINPAGE;
  700                 pp->pr_phoffset = off = palloc->pa_pagesz - phsize;
  701         } else {
  702                 /* The page header will be taken from our page header pool */
  703                 pp->pr_phoffset = 0;
  704                 off = palloc->pa_pagesz;
  705                 SPLAY_INIT(&pp->pr_phtree);
  706         }
  707 
  708         /*
  709          * Alignment is to take place at `ioff' within the item. This means
  710          * we must reserve up to `align - 1' bytes on the page to allow
  711          * appropriate positioning of each item.
  712          */
  713         pp->pr_itemsperpage = (off - ((align - ioff) % align)) / pp->pr_size;
  714         KASSERT(pp->pr_itemsperpage != 0);
  715         if ((pp->pr_roflags & PR_NOTOUCH)) {
  716                 int idx;
  717 
  718                 for (idx = 0; pp->pr_itemsperpage > PHPOOL_FREELIST_NELEM(idx);
  719                     idx++) {
  720                         /* nothing */
  721                 }
  722                 if (idx >= PHPOOL_MAX) {
  723                         /*
  724                          * if you see this panic, consider to tweak
  725                          * PHPOOL_MAX and PHPOOL_FREELIST_NELEM.
  726                          */
  727                         panic("%s: too large itemsperpage(%d) for PR_NOTOUCH",
  728                             pp->pr_wchan, pp->pr_itemsperpage);
  729                 }
  730                 pp->pr_phpool = &phpool[idx];
  731         } else if ((pp->pr_roflags & PR_PHINPAGE) == 0) {
  732                 pp->pr_phpool = &phpool[0];
  733         }
  734 #if defined(DIAGNOSTIC)
  735         else {
  736                 pp->pr_phpool = NULL;
  737         }
  738 #endif
  739 
  740         /*
  741          * Use the slack between the chunks and the page header
  742          * for "cache coloring".
  743          */
  744         slack = off - pp->pr_itemsperpage * pp->pr_size;
  745         pp->pr_maxcolor = (slack / align) * align;
  746         pp->pr_curcolor = 0;
  747 
  748         pp->pr_nget = 0;
  749         pp->pr_nfail = 0;
  750         pp->pr_nput = 0;
  751         pp->pr_npagealloc = 0;
  752         pp->pr_npagefree = 0;
  753         pp->pr_hiwat = 0;
  754         pp->pr_nidle = 0;
  755 
  756 #ifdef POOL_DIAGNOSTIC
  757         if (flags & PR_LOGGING) {
  758                 if (kmem_map == NULL ||
  759                     (pp->pr_log = malloc(pool_logsize * sizeof(struct pool_log),
  760                      M_TEMP, M_NOWAIT)) == NULL)
  761                         pp->pr_roflags &= ~PR_LOGGING;
  762                 pp->pr_curlogentry = 0;
  763                 pp->pr_logsize = pool_logsize;
  764         }
  765 #endif
  766 
  767         pp->pr_entered_file = NULL;
  768         pp->pr_entered_line = 0;
  769 
  770         simple_lock_init(&pp->pr_slock);
  771 
  772         /*
  773          * Initialize private page header pool and cache magazine pool if we
  774          * haven't done so yet.
  775          * XXX LOCKING.
  776          */
  777         if (phpool[0].pr_size == 0) {
  778                 int idx;
  779                 for (idx = 0; idx < PHPOOL_MAX; idx++) {
  780                         static char phpool_names[PHPOOL_MAX][6+1+6+1];
  781                         int nelem;
  782                         size_t sz;
  783 
  784                         nelem = PHPOOL_FREELIST_NELEM(idx);
  785                         snprintf(phpool_names[idx], sizeof(phpool_names[idx]),
  786                             "phpool-%d", nelem);
  787                         sz = sizeof(struct pool_item_header);
  788                         if (nelem) {
  789                                 sz = PR_FREELIST_ALIGN(sz)
  790                                     + nelem * sizeof(pool_item_freelist_t);
  791                         }
  792                         pool_init(&phpool[idx], sz, 0, 0, 0,
  793                             phpool_names[idx], &pool_allocator_meta);
  794                 }
  795 #ifdef POOL_SUBPAGE
  796                 pool_init(&psppool, POOL_SUBPAGE, POOL_SUBPAGE, 0,
  797                     PR_RECURSIVE, "psppool", &pool_allocator_meta);
  798 #endif
  799                 pool_init(&pcgpool, sizeof(struct pool_cache_group), 0, 0,
  800                     0, "pcgpool", &pool_allocator_meta);
  801         }
  802 
  803         /* Insert into the list of all pools. */
  804         simple_lock(&pool_head_slock);
  805         LIST_INSERT_HEAD(&pool_head, pp, pr_poollist);
  806         simple_unlock(&pool_head_slock);
  807 
  808         /* Insert this into the list of pools using this allocator. */
  809         s = splvm();
  810         simple_lock(&palloc->pa_slock);
  811         TAILQ_INSERT_TAIL(&palloc->pa_list, pp, pr_alloc_list);
  812         simple_unlock(&palloc->pa_slock);
  813         splx(s);
  814         pool_reclaim_register(pp);
  815 }
  816 
  817 /*
  818  * De-commision a pool resource.
  819  */
  820 void
  821 pool_destroy(struct pool *pp)
  822 {
  823         struct pool_pagelist pq;
  824         struct pool_item_header *ph;
  825         int s;
  826 
  827         /* Remove from global pool list */
  828         simple_lock(&pool_head_slock);
  829         LIST_REMOVE(pp, pr_poollist);
  830         if (drainpp == pp)
  831                 drainpp = NULL;
  832         simple_unlock(&pool_head_slock);
  833 
  834         /* Remove this pool from its allocator's list of pools. */
  835         pool_reclaim_unregister(pp);
  836         s = splvm();
  837         simple_lock(&pp->pr_alloc->pa_slock);
  838         TAILQ_REMOVE(&pp->pr_alloc->pa_list, pp, pr_alloc_list);
  839         simple_unlock(&pp->pr_alloc->pa_slock);
  840         splx(s);
  841 
  842         s = splvm();
  843         simple_lock(&pp->pr_slock);
  844 
  845         KASSERT(LIST_EMPTY(&pp->pr_cachelist));
  846 
  847 #ifdef DIAGNOSTIC
  848         if (pp->pr_nout != 0) {
  849                 pr_printlog(pp, NULL, printf);
  850                 panic("pool_destroy: pool busy: still out: %u",
  851                     pp->pr_nout);
  852         }
  853 #endif
  854 
  855         KASSERT(LIST_EMPTY(&pp->pr_fullpages));
  856         KASSERT(LIST_EMPTY(&pp->pr_partpages));
  857 
  858         /* Remove all pages */
  859         LIST_INIT(&pq);
  860         while ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL)
  861                 pr_rmpage(pp, ph, &pq);
  862 
  863         simple_unlock(&pp->pr_slock);
  864         splx(s);
  865 
  866         pr_pagelist_free(pp, &pq);
  867 
  868 #ifdef POOL_DIAGNOSTIC
  869         if ((pp->pr_roflags & PR_LOGGING) != 0)
  870                 free(pp->pr_log, M_TEMP);
  871 #endif
  872 }
  873 
  874 void
  875 pool_set_drain_hook(struct pool *pp, void (*fn)(void *, int), void *arg)
  876 {
  877 
  878         /* XXX no locking -- must be used just after pool_init() */
  879 #ifdef DIAGNOSTIC
  880         if (pp->pr_drain_hook != NULL)
  881                 panic("pool_set_drain_hook(%s): already set", pp->pr_wchan);
  882 #endif
  883         pp->pr_drain_hook = fn;
  884         pp->pr_drain_hook_arg = arg;
  885 }
  886 
  887 static struct pool_item_header *
  888 pool_alloc_item_header(struct pool *pp, caddr_t storage, int flags)
  889 {
  890         struct pool_item_header *ph;
  891         int s;
  892 
  893         LOCK_ASSERT(simple_lock_held(&pp->pr_slock) == 0);
  894 
  895         if ((pp->pr_roflags & PR_PHINPAGE) != 0)
  896                 ph = (struct pool_item_header *) (storage + pp->pr_phoffset);
  897         else {
  898                 s = splvm();
  899                 ph = pool_get(pp->pr_phpool, flags);
  900                 splx(s);
  901         }
  902 
  903         return (ph);
  904 }
  905 
  906 /*
  907  * Grab an item from the pool; must be called at appropriate spl level
  908  */
  909 void *
  910 #ifdef POOL_DIAGNOSTIC
  911 _pool_get(struct pool *pp, int flags, const char *file, long line)
  912 #else
  913 pool_get(struct pool *pp, int flags)
  914 #endif
  915 {
  916         struct pool_item *pi;
  917         struct pool_item_header *ph;
  918         void *v;
  919 
  920 #ifdef DIAGNOSTIC
  921         if (__predict_false(pp->pr_itemsperpage == 0))
  922                 panic("pool_get: pool %p: pr_itemsperpage is zero, "
  923                     "pool not initialized?", pp);
  924         if (__predict_false(curlwp == NULL && doing_shutdown == 0 &&
  925                             (flags & PR_WAITOK) != 0))
  926                 panic("pool_get: %s: must have NOWAIT", pp->pr_wchan);
  927 
  928 #endif /* DIAGNOSTIC */
  929 #ifdef LOCKDEBUG
  930         if (flags & PR_WAITOK)
  931                 ASSERT_SLEEPABLE(NULL, "pool_get(PR_WAITOK)");
  932         SCHED_ASSERT_UNLOCKED();
  933 #endif
  934 
  935         simple_lock(&pp->pr_slock);
  936         pr_enter(pp, file, line);
  937 
  938  startover:
  939         /*
  940          * Check to see if we've reached the hard limit.  If we have,
  941          * and we can wait, then wait until an item has been returned to
  942          * the pool.
  943          */
  944 #ifdef DIAGNOSTIC
  945         if (__predict_false(pp->pr_nout > pp->pr_hardlimit)) {
  946                 pr_leave(pp);
  947                 simple_unlock(&pp->pr_slock);
  948                 panic("pool_get: %s: crossed hard limit", pp->pr_wchan);
  949         }
  950 #endif
  951         if (__predict_false(pp->pr_nout == pp->pr_hardlimit)) {
  952                 if (pp->pr_drain_hook != NULL) {
  953                         /*
  954                          * Since the drain hook is going to free things
  955                          * back to the pool, unlock, call the hook, re-lock,
  956                          * and check the hardlimit condition again.
  957                          */
  958                         pr_leave(pp);
  959                         simple_unlock(&pp->pr_slock);
  960                         (*pp->pr_drain_hook)(pp->pr_drain_hook_arg, flags);
  961                         simple_lock(&pp->pr_slock);
  962                         pr_enter(pp, file, line);
  963                         if (pp->pr_nout < pp->pr_hardlimit)
  964                                 goto startover;
  965                 }
  966 
  967                 if ((flags & PR_WAITOK) && !(flags & PR_LIMITFAIL)) {
  968                         /*
  969                          * XXX: A warning isn't logged in this case.  Should
  970                          * it be?
  971                          */
  972                         pp->pr_flags |= PR_WANTED;
  973                         pr_leave(pp);
  974                         ltsleep(pp, PSWP, pp->pr_wchan, 0, &pp->pr_slock);
  975                         pr_enter(pp, file, line);
  976                         goto startover;
  977                 }
  978 
  979                 /*
  980                  * Log a message that the hard limit has been hit.
  981                  */
  982                 if (pp->pr_hardlimit_warning != NULL &&
  983                     ratecheck(&pp->pr_hardlimit_warning_last,
  984                               &pp->pr_hardlimit_ratecap))
  985                         log(LOG_ERR, "%s\n", pp->pr_hardlimit_warning);
  986 
  987                 pp->pr_nfail++;
  988 
  989                 pr_leave(pp);
  990                 simple_unlock(&pp->pr_slock);
  991                 return (NULL);
  992         }
  993 
  994         /*
  995          * The convention we use is that if `curpage' is not NULL, then
  996          * it points at a non-empty bucket. In particular, `curpage'
  997          * never points at a page header which has PR_PHINPAGE set and
  998          * has no items in its bucket.
  999          */
 1000         if ((ph = pp->pr_curpage) == NULL) {
 1001                 int error;
 1002 
 1003 #ifdef DIAGNOSTIC
 1004                 if (pp->pr_nitems != 0) {
 1005                         simple_unlock(&pp->pr_slock);
 1006                         printf("pool_get: %s: curpage NULL, nitems %u\n",
 1007                             pp->pr_wchan, pp->pr_nitems);
 1008                         panic("pool_get: nitems inconsistent");
 1009                 }
 1010 #endif
 1011 
 1012                 /*
 1013                  * Call the back-end page allocator for more memory.
 1014                  * Release the pool lock, as the back-end page allocator
 1015                  * may block.
 1016                  */
 1017                 pr_leave(pp);
 1018                 error = pool_grow(pp, flags);
 1019                 pr_enter(pp, file, line);
 1020                 if (error != 0) {
 1021                         /*
 1022                          * We were unable to allocate a page or item
 1023                          * header, but we released the lock during
 1024                          * allocation, so perhaps items were freed
 1025                          * back to the pool.  Check for this case.
 1026                          */
 1027                         if (pp->pr_curpage != NULL)
 1028                                 goto startover;
 1029 
 1030                         pp->pr_nfail++;
 1031                         pr_leave(pp);
 1032                         simple_unlock(&pp->pr_slock);
 1033                         return (NULL);
 1034                 }
 1035 
 1036                 /* Start the allocation process over. */
 1037                 goto startover;
 1038         }
 1039         if (pp->pr_roflags & PR_NOTOUCH) {
 1040 #ifdef DIAGNOSTIC
 1041                 if (__predict_false(ph->ph_nmissing == pp->pr_itemsperpage)) {
 1042                         pr_leave(pp);
 1043                         simple_unlock(&pp->pr_slock);
 1044                         panic("pool_get: %s: page empty", pp->pr_wchan);
 1045                 }
 1046 #endif
 1047                 v = pr_item_notouch_get(pp, ph);
 1048 #ifdef POOL_DIAGNOSTIC
 1049                 pr_log(pp, v, PRLOG_GET, file, line);
 1050 #endif
 1051         } else {
 1052                 v = pi = LIST_FIRST(&ph->ph_itemlist);
 1053                 if (__predict_false(v == NULL)) {
 1054                         pr_leave(pp);
 1055                         simple_unlock(&pp->pr_slock);
 1056                         panic("pool_get: %s: page empty", pp->pr_wchan);
 1057                 }
 1058 #ifdef DIAGNOSTIC
 1059                 if (__predict_false(pp->pr_nitems == 0)) {
 1060                         pr_leave(pp);
 1061                         simple_unlock(&pp->pr_slock);
 1062                         printf("pool_get: %s: items on itemlist, nitems %u\n",
 1063                             pp->pr_wchan, pp->pr_nitems);
 1064                         panic("pool_get: nitems inconsistent");
 1065                 }
 1066 #endif
 1067 
 1068 #ifdef POOL_DIAGNOSTIC
 1069                 pr_log(pp, v, PRLOG_GET, file, line);
 1070 #endif
 1071 
 1072 #ifdef DIAGNOSTIC
 1073                 if (__predict_false(pi->pi_magic != PI_MAGIC)) {
 1074                         pr_printlog(pp, pi, printf);
 1075                         panic("pool_get(%s): free list modified: "
 1076                             "magic=%x; page %p; item addr %p\n",
 1077                             pp->pr_wchan, pi->pi_magic, ph->ph_page, pi);
 1078                 }
 1079 #endif
 1080 
 1081                 /*
 1082                  * Remove from item list.
 1083                  */
 1084                 LIST_REMOVE(pi, pi_list);
 1085         }
 1086         pp->pr_nitems--;
 1087         pp->pr_nout++;
 1088         if (ph->ph_nmissing == 0) {
 1089 #ifdef DIAGNOSTIC
 1090                 if (__predict_false(pp->pr_nidle == 0))
 1091                         panic("pool_get: nidle inconsistent");
 1092 #endif
 1093                 pp->pr_nidle--;
 1094 
 1095                 /*
 1096                  * This page was previously empty.  Move it to the list of
 1097                  * partially-full pages.  This page is already curpage.
 1098                  */
 1099                 LIST_REMOVE(ph, ph_pagelist);
 1100                 LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist);
 1101         }
 1102         ph->ph_nmissing++;
 1103         if (ph->ph_nmissing == pp->pr_itemsperpage) {
 1104 #ifdef DIAGNOSTIC
 1105                 if (__predict_false((pp->pr_roflags & PR_NOTOUCH) == 0 &&
 1106                     !LIST_EMPTY(&ph->ph_itemlist))) {
 1107                         pr_leave(pp);
 1108                         simple_unlock(&pp->pr_slock);
 1109                         panic("pool_get: %s: nmissing inconsistent",
 1110                             pp->pr_wchan);
 1111                 }
 1112 #endif
 1113                 /*
 1114                  * This page is now full.  Move it to the full list
 1115                  * and select a new current page.
 1116                  */
 1117                 LIST_REMOVE(ph, ph_pagelist);
 1118                 LIST_INSERT_HEAD(&pp->pr_fullpages, ph, ph_pagelist);
 1119                 pool_update_curpage(pp);
 1120         }
 1121 
 1122         pp->pr_nget++;
 1123         pr_leave(pp);
 1124 
 1125         /*
 1126          * If we have a low water mark and we are now below that low
 1127          * water mark, add more items to the pool.
 1128          */
 1129         if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) {
 1130                 /*
 1131                  * XXX: Should we log a warning?  Should we set up a timeout
 1132                  * to try again in a second or so?  The latter could break
 1133                  * a caller's assumptions about interrupt protection, etc.
 1134                  */
 1135         }
 1136 
 1137         simple_unlock(&pp->pr_slock);
 1138         return (v);
 1139 }
 1140 
 1141 /*
 1142  * Internal version of pool_put().  Pool is already locked/entered.
 1143  */
 1144 static void
 1145 pool_do_put(struct pool *pp, void *v, struct pool_pagelist *pq)
 1146 {
 1147         struct pool_item *pi = v;
 1148         struct pool_item_header *ph;
 1149 
 1150         LOCK_ASSERT(simple_lock_held(&pp->pr_slock));
 1151         SCHED_ASSERT_UNLOCKED();
 1152 
 1153 #ifdef DIAGNOSTIC
 1154         if (__predict_false(pp->pr_nout == 0)) {
 1155                 printf("pool %s: putting with none out\n",
 1156                     pp->pr_wchan);
 1157                 panic("pool_put");
 1158         }
 1159 #endif
 1160 
 1161         if (__predict_false((ph = pr_find_pagehead(pp, v)) == NULL)) {
 1162                 pr_printlog(pp, NULL, printf);
 1163                 panic("pool_put: %s: page header missing", pp->pr_wchan);
 1164         }
 1165 
 1166 #ifdef LOCKDEBUG
 1167         /*
 1168          * Check if we're freeing a locked simple lock.
 1169          */
 1170         simple_lock_freecheck((caddr_t)pi, ((caddr_t)pi) + pp->pr_size);
 1171 #endif
 1172 
 1173         /*
 1174          * Return to item list.
 1175          */
 1176         if (pp->pr_roflags & PR_NOTOUCH) {
 1177                 pr_item_notouch_put(pp, ph, v);
 1178         } else {
 1179 #ifdef DIAGNOSTIC
 1180                 pi->pi_magic = PI_MAGIC;
 1181 #endif
 1182 #ifdef DEBUG
 1183                 {
 1184                         int i, *ip = v;
 1185 
 1186                         for (i = 0; i < pp->pr_size / sizeof(int); i++) {
 1187                                 *ip++ = PI_MAGIC;
 1188                         }
 1189                 }
 1190 #endif
 1191 
 1192                 LIST_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list);
 1193         }
 1194         KDASSERT(ph->ph_nmissing != 0);
 1195         ph->ph_nmissing--;
 1196         pp->pr_nput++;
 1197         pp->pr_nitems++;
 1198         pp->pr_nout--;
 1199 
 1200         /* Cancel "pool empty" condition if it exists */
 1201         if (pp->pr_curpage == NULL)
 1202                 pp->pr_curpage = ph;
 1203 
 1204         if (pp->pr_flags & PR_WANTED) {
 1205                 pp->pr_flags &= ~PR_WANTED;
 1206                 if (ph->ph_nmissing == 0)
 1207                         pp->pr_nidle++;
 1208                 wakeup((caddr_t)pp);
 1209                 return;
 1210         }
 1211 
 1212         /*
 1213          * If this page is now empty, do one of two things:
 1214          *
 1215          *      (1) If we have more pages than the page high water mark,
 1216          *          free the page back to the system.  ONLY CONSIDER
 1217          *          FREEING BACK A PAGE IF WE HAVE MORE THAN OUR MINIMUM PAGE
 1218          *          CLAIM.
 1219          *
 1220          *      (2) Otherwise, move the page to the empty page list.
 1221          *
 1222          * Either way, select a new current page (so we use a partially-full
 1223          * page if one is available).
 1224          */
 1225         if (ph->ph_nmissing == 0) {
 1226                 pp->pr_nidle++;
 1227                 if (pp->pr_npages > pp->pr_minpages &&
 1228                     (pp->pr_npages > pp->pr_maxpages ||
 1229                      pa_starved_p(pp->pr_alloc))) {
 1230                         pr_rmpage(pp, ph, pq);
 1231                 } else {
 1232                         LIST_REMOVE(ph, ph_pagelist);
 1233                         LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist);
 1234 
 1235                         /*
 1236                          * Update the timestamp on the page.  A page must
 1237                          * be idle for some period of time before it can
 1238                          * be reclaimed by the pagedaemon.  This minimizes
 1239                          * ping-pong'ing for memory.
 1240                          */
 1241                         getmicrotime(&ph->ph_time);
 1242                 }
 1243                 pool_update_curpage(pp);
 1244         }
 1245 
 1246         /*
 1247          * If the page was previously completely full, move it to the
 1248          * partially-full list and make it the current page.  The next
 1249          * allocation will get the item from this page, instead of
 1250          * further fragmenting the pool.
 1251          */
 1252         else if (ph->ph_nmissing == (pp->pr_itemsperpage - 1)) {
 1253                 LIST_REMOVE(ph, ph_pagelist);
 1254                 LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist);
 1255                 pp->pr_curpage = ph;
 1256         }
 1257 }
 1258 
 1259 /*
 1260  * Return resource to the pool; must be called at appropriate spl level
 1261  */
 1262 #ifdef POOL_DIAGNOSTIC
 1263 void
 1264 _pool_put(struct pool *pp, void *v, const char *file, long line)
 1265 {
 1266         struct pool_pagelist pq;
 1267 
 1268         LIST_INIT(&pq);
 1269 
 1270         simple_lock(&pp->pr_slock);
 1271         pr_enter(pp, file, line);
 1272 
 1273         pr_log(pp, v, PRLOG_PUT, file, line);
 1274 
 1275         pool_do_put(pp, v, &pq);
 1276 
 1277         pr_leave(pp);
 1278         simple_unlock(&pp->pr_slock);
 1279 
 1280         pr_pagelist_free(pp, &pq);
 1281 }
 1282 #undef pool_put
 1283 #endif /* POOL_DIAGNOSTIC */
 1284 
 1285 void
 1286 pool_put(struct pool *pp, void *v)
 1287 {
 1288         struct pool_pagelist pq;
 1289 
 1290         LIST_INIT(&pq);
 1291 
 1292         simple_lock(&pp->pr_slock);
 1293         pool_do_put(pp, v, &pq);
 1294         simple_unlock(&pp->pr_slock);
 1295 
 1296         pr_pagelist_free(pp, &pq);
 1297 }
 1298 
 1299 #ifdef POOL_DIAGNOSTIC
 1300 #define         pool_put(h, v)  _pool_put((h), (v), __FILE__, __LINE__)
 1301 #endif
 1302 
 1303 /*
 1304  * pool_grow: grow a pool by a page.
 1305  *
 1306  * => called with pool locked.
 1307  * => unlock and relock the pool.
 1308  * => return with pool locked.
 1309  */
 1310 
 1311 static int
 1312 pool_grow(struct pool *pp, int flags)
 1313 {
 1314         struct pool_item_header *ph = NULL;
 1315         char *cp;
 1316 
 1317         simple_unlock(&pp->pr_slock);
 1318         cp = pool_allocator_alloc(pp, flags);
 1319         if (__predict_true(cp != NULL)) {
 1320                 ph = pool_alloc_item_header(pp, cp, flags);
 1321         }
 1322         if (__predict_false(cp == NULL || ph == NULL)) {
 1323                 if (cp != NULL) {
 1324                         pool_allocator_free(pp, cp);
 1325                 }
 1326                 simple_lock(&pp->pr_slock);
 1327                 return ENOMEM;
 1328         }
 1329 
 1330         simple_lock(&pp->pr_slock);
 1331         pool_prime_page(pp, cp, ph);
 1332         pp->pr_npagealloc++;
 1333         return 0;
 1334 }
 1335 
 1336 /*
 1337  * Add N items to the pool.
 1338  */
 1339 int
 1340 pool_prime(struct pool *pp, int n)
 1341 {
 1342         int newpages;
 1343         int error = 0;
 1344 
 1345         simple_lock(&pp->pr_slock);
 1346 
 1347         newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
 1348 
 1349         while (newpages-- > 0) {
 1350                 error = pool_grow(pp, PR_NOWAIT);
 1351                 if (error) {
 1352                         break;
 1353                 }
 1354                 pp->pr_minpages++;
 1355         }
 1356 
 1357         if (pp->pr_minpages >= pp->pr_maxpages)
 1358                 pp->pr_maxpages = pp->pr_minpages + 1;  /* XXX */
 1359 
 1360         simple_unlock(&pp->pr_slock);
 1361         return error;
 1362 }
 1363 
 1364 /*
 1365  * Add a page worth of items to the pool.
 1366  *
 1367  * Note, we must be called with the pool descriptor LOCKED.
 1368  */
 1369 static void
 1370 pool_prime_page(struct pool *pp, caddr_t storage, struct pool_item_header *ph)
 1371 {
 1372         struct pool_item *pi;
 1373         caddr_t cp = storage;
 1374         unsigned int align = pp->pr_align;
 1375         unsigned int ioff = pp->pr_itemoffset;
 1376         int n;
 1377 
 1378         LOCK_ASSERT(simple_lock_held(&pp->pr_slock));
 1379 
 1380 #ifdef DIAGNOSTIC
 1381         if ((pp->pr_roflags & PR_NOALIGN) == 0 &&
 1382             ((uintptr_t)cp & (pp->pr_alloc->pa_pagesz - 1)) != 0)
 1383                 panic("pool_prime_page: %s: unaligned page", pp->pr_wchan);
 1384 #endif
 1385 
 1386         /*
 1387          * Insert page header.
 1388          */
 1389         LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist);
 1390         LIST_INIT(&ph->ph_itemlist);
 1391         ph->ph_page = storage;
 1392         ph->ph_nmissing = 0;
 1393         getmicrotime(&ph->ph_time);
 1394         if ((pp->pr_roflags & PR_PHINPAGE) == 0)
 1395                 SPLAY_INSERT(phtree, &pp->pr_phtree, ph);
 1396 
 1397         pp->pr_nidle++;
 1398 
 1399         /*
 1400          * Color this page.
 1401          */
 1402         cp = (caddr_t)(cp + pp->pr_curcolor);
 1403         if ((pp->pr_curcolor += align) > pp->pr_maxcolor)
 1404                 pp->pr_curcolor = 0;
 1405 
 1406         /*
 1407          * Adjust storage to apply aligment to `pr_itemoffset' in each item.
 1408          */
 1409         if (ioff != 0)
 1410                 cp = (caddr_t)(cp + (align - ioff));
 1411 
 1412         /*
 1413          * Insert remaining chunks on the bucket list.
 1414          */
 1415         n = pp->pr_itemsperpage;
 1416         pp->pr_nitems += n;
 1417 
 1418         if (pp->pr_roflags & PR_NOTOUCH) {
 1419                 pool_item_freelist_t *freelist = PR_FREELIST(ph);
 1420                 int i;
 1421 
 1422                 ph->ph_off = cp - storage;
 1423                 ph->ph_firstfree = 0;
 1424                 for (i = 0; i < n - 1; i++)
 1425                         freelist[i] = i + 1;
 1426                 freelist[n - 1] = PR_INDEX_EOL;
 1427         } else {
 1428                 while (n--) {
 1429                         pi = (struct pool_item *)cp;
 1430 
 1431                         KASSERT(((((vaddr_t)pi) + ioff) & (align - 1)) == 0);
 1432 
 1433                         /* Insert on page list */
 1434                         LIST_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list);
 1435 #ifdef DIAGNOSTIC
 1436                         pi->pi_magic = PI_MAGIC;
 1437 #endif
 1438                         cp = (caddr_t)(cp + pp->pr_size);
 1439                 }
 1440         }
 1441 
 1442         /*
 1443          * If the pool was depleted, point at the new page.
 1444          */
 1445         if (pp->pr_curpage == NULL)
 1446                 pp->pr_curpage = ph;
 1447 
 1448         if (++pp->pr_npages > pp->pr_hiwat)
 1449                 pp->pr_hiwat = pp->pr_npages;
 1450 }
 1451 
 1452 /*
 1453  * Used by pool_get() when nitems drops below the low water mark.  This
 1454  * is used to catch up pr_nitems with the low water mark.
 1455  *
 1456  * Note 1, we never wait for memory here, we let the caller decide what to do.
 1457  *
 1458  * Note 2, we must be called with the pool already locked, and we return
 1459  * with it locked.
 1460  */
 1461 static int
 1462 pool_catchup(struct pool *pp)
 1463 {
 1464         int error = 0;
 1465 
 1466         while (POOL_NEEDS_CATCHUP(pp)) {
 1467                 error = pool_grow(pp, PR_NOWAIT);
 1468                 if (error) {
 1469                         break;
 1470                 }
 1471         }
 1472         return error;
 1473 }
 1474 
 1475 static void
 1476 pool_update_curpage(struct pool *pp)
 1477 {
 1478 
 1479         pp->pr_curpage = LIST_FIRST(&pp->pr_partpages);
 1480         if (pp->pr_curpage == NULL) {
 1481                 pp->pr_curpage = LIST_FIRST(&pp->pr_emptypages);
 1482         }
 1483 }
 1484 
 1485 void
 1486 pool_setlowat(struct pool *pp, int n)
 1487 {
 1488 
 1489         simple_lock(&pp->pr_slock);
 1490 
 1491         pp->pr_minitems = n;
 1492         pp->pr_minpages = (n == 0)
 1493                 ? 0
 1494                 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
 1495 
 1496         /* Make sure we're caught up with the newly-set low water mark. */
 1497         if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) {
 1498                 /*
 1499                  * XXX: Should we log a warning?  Should we set up a timeout
 1500                  * to try again in a second or so?  The latter could break
 1501                  * a caller's assumptions about interrupt protection, etc.
 1502                  */
 1503         }
 1504 
 1505         simple_unlock(&pp->pr_slock);
 1506 }
 1507 
 1508 void
 1509 pool_sethiwat(struct pool *pp, int n)
 1510 {
 1511 
 1512         simple_lock(&pp->pr_slock);
 1513 
 1514         pp->pr_maxpages = (n == 0)
 1515                 ? 0
 1516                 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
 1517 
 1518         simple_unlock(&pp->pr_slock);
 1519 }
 1520 
 1521 void
 1522 pool_sethardlimit(struct pool *pp, int n, const char *warnmess, int ratecap)
 1523 {
 1524 
 1525         simple_lock(&pp->pr_slock);
 1526 
 1527         pp->pr_hardlimit = n;
 1528         pp->pr_hardlimit_warning = warnmess;
 1529         pp->pr_hardlimit_ratecap.tv_sec = ratecap;
 1530         pp->pr_hardlimit_warning_last.tv_sec = 0;
 1531         pp->pr_hardlimit_warning_last.tv_usec = 0;
 1532 
 1533         /*
 1534          * In-line version of pool_sethiwat(), because we don't want to
 1535          * release the lock.
 1536          */
 1537         pp->pr_maxpages = (n == 0)
 1538                 ? 0
 1539                 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
 1540 
 1541         simple_unlock(&pp->pr_slock);
 1542 }
 1543 
 1544 /*
 1545  * Release all complete pages that have not been used recently.
 1546  */
 1547 int
 1548 #ifdef POOL_DIAGNOSTIC
 1549 _pool_reclaim(struct pool *pp, const char *file, long line)
 1550 #else
 1551 pool_reclaim(struct pool *pp)
 1552 #endif
 1553 {
 1554         struct pool_item_header *ph, *phnext;
 1555         struct pool_cache *pc;
 1556         struct pool_pagelist pq;
 1557         struct pool_cache_grouplist pcgl;
 1558         struct timeval curtime, diff;
 1559 
 1560         if (pp->pr_drain_hook != NULL) {
 1561                 /*
 1562                  * The drain hook must be called with the pool unlocked.
 1563                  */
 1564                 (*pp->pr_drain_hook)(pp->pr_drain_hook_arg, PR_NOWAIT);
 1565         }
 1566 
 1567         if (simple_lock_try(&pp->pr_slock) == 0)
 1568                 return (0);
 1569         pr_enter(pp, file, line);
 1570 
 1571         LIST_INIT(&pq);
 1572         LIST_INIT(&pcgl);
 1573 
 1574         /*
 1575          * Reclaim items from the pool's caches.
 1576          */
 1577         LIST_FOREACH(pc, &pp->pr_cachelist, pc_poollist)
 1578                 pool_cache_reclaim(pc, &pq, &pcgl);
 1579 
 1580         getmicrotime(&curtime);
 1581 
 1582         for (ph = LIST_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) {
 1583                 phnext = LIST_NEXT(ph, ph_pagelist);
 1584 
 1585                 /* Check our minimum page claim */
 1586                 if (pp->pr_npages <= pp->pr_minpages)
 1587                         break;
 1588 
 1589                 KASSERT(ph->ph_nmissing == 0);
 1590                 timersub(&curtime, &ph->ph_time, &diff);
 1591                 if (diff.tv_sec < pool_inactive_time
 1592                     && !pa_starved_p(pp->pr_alloc))
 1593                         continue;
 1594 
 1595                 /*
 1596                  * If freeing this page would put us below
 1597                  * the low water mark, stop now.
 1598                  */
 1599                 if ((pp->pr_nitems - pp->pr_itemsperpage) <
 1600                     pp->pr_minitems)
 1601                         break;
 1602 
 1603                 pr_rmpage(pp, ph, &pq);
 1604         }
 1605 
 1606         pr_leave(pp);
 1607         simple_unlock(&pp->pr_slock);
 1608         if (LIST_EMPTY(&pq) && LIST_EMPTY(&pcgl))
 1609                 return 0;
 1610 
 1611         pr_pagelist_free(pp, &pq);
 1612         pcg_grouplist_free(&pcgl);
 1613         return (1);
 1614 }
 1615 
 1616 /*
 1617  * Drain pools, one at a time.
 1618  *
 1619  * Note, we must never be called from an interrupt context.
 1620  */
 1621 void
 1622 pool_drain(void *arg)
 1623 {
 1624         struct pool *pp;
 1625         int s;
 1626 
 1627         pp = NULL;
 1628         s = splvm();
 1629         simple_lock(&pool_head_slock);
 1630         if (drainpp == NULL) {
 1631                 drainpp = LIST_FIRST(&pool_head);
 1632         }
 1633         if (drainpp) {
 1634                 pp = drainpp;
 1635                 drainpp = LIST_NEXT(pp, pr_poollist);
 1636         }
 1637         simple_unlock(&pool_head_slock);
 1638         if (pp)
 1639                 pool_reclaim(pp);
 1640         splx(s);
 1641 }
 1642 
 1643 /*
 1644  * Diagnostic helpers.
 1645  */
 1646 void
 1647 pool_print(struct pool *pp, const char *modif)
 1648 {
 1649         int s;
 1650 
 1651         s = splvm();
 1652         if (simple_lock_try(&pp->pr_slock) == 0) {
 1653                 printf("pool %s is locked; try again later\n",
 1654                     pp->pr_wchan);
 1655                 splx(s);
 1656                 return;
 1657         }
 1658         pool_print1(pp, modif, printf);
 1659         simple_unlock(&pp->pr_slock);
 1660         splx(s);
 1661 }
 1662 
 1663 void
 1664 pool_printall(const char *modif, void (*pr)(const char *, ...))
 1665 {
 1666         struct pool *pp;
 1667 
 1668         if (simple_lock_try(&pool_head_slock) == 0) {
 1669                 (*pr)("WARNING: pool_head_slock is locked\n");
 1670         } else {
 1671                 simple_unlock(&pool_head_slock);
 1672         }
 1673 
 1674         LIST_FOREACH(pp, &pool_head, pr_poollist) {
 1675                 pool_printit(pp, modif, pr);
 1676         }
 1677 }
 1678 
 1679 void
 1680 pool_printit(struct pool *pp, const char *modif, void (*pr)(const char *, ...))
 1681 {
 1682 
 1683         if (pp == NULL) {
 1684                 (*pr)("Must specify a pool to print.\n");
 1685                 return;
 1686         }
 1687 
 1688         /*
 1689          * Called from DDB; interrupts should be blocked, and all
 1690          * other processors should be paused.  We can skip locking
 1691          * the pool in this case.
 1692          *
 1693          * We do a simple_lock_try() just to print the lock
 1694          * status, however.
 1695          */
 1696 
 1697         if (simple_lock_try(&pp->pr_slock) == 0)
 1698                 (*pr)("WARNING: pool %s is locked\n", pp->pr_wchan);
 1699         else
 1700                 simple_unlock(&pp->pr_slock);
 1701 
 1702         pool_print1(pp, modif, pr);
 1703 }
 1704 
 1705 static void
 1706 pool_print_pagelist(struct pool *pp, struct pool_pagelist *pl,
 1707     void (*pr)(const char *, ...))
 1708 {
 1709         struct pool_item_header *ph;
 1710 #ifdef DIAGNOSTIC
 1711         struct pool_item *pi;
 1712 #endif
 1713 
 1714         LIST_FOREACH(ph, pl, ph_pagelist) {
 1715                 (*pr)("\t\tpage %p, nmissing %d, time %lu,%lu\n",
 1716                     ph->ph_page, ph->ph_nmissing,
 1717                     (u_long)ph->ph_time.tv_sec,
 1718                     (u_long)ph->ph_time.tv_usec);
 1719 #ifdef DIAGNOSTIC
 1720                 if (!(pp->pr_roflags & PR_NOTOUCH)) {
 1721                         LIST_FOREACH(pi, &ph->ph_itemlist, pi_list) {
 1722                                 if (pi->pi_magic != PI_MAGIC) {
 1723                                         (*pr)("\t\t\titem %p, magic 0x%x\n",
 1724                                             pi, pi->pi_magic);
 1725                                 }
 1726                         }
 1727                 }
 1728 #endif
 1729         }
 1730 }
 1731 
 1732 static void
 1733 pool_print1(struct pool *pp, const char *modif, void (*pr)(const char *, ...))
 1734 {
 1735         struct pool_item_header *ph;
 1736         struct pool_cache *pc;
 1737         struct pool_cache_group *pcg;
 1738         int i, print_log = 0, print_pagelist = 0, print_cache = 0;
 1739         char c;
 1740 
 1741         while ((c = *modif++) != '\0') {
 1742                 if (c == 'l')
 1743                         print_log = 1;
 1744                 if (c == 'p')
 1745                         print_pagelist = 1;
 1746                 if (c == 'c')
 1747                         print_cache = 1;
 1748         }
 1749 
 1750         (*pr)("POOL %s: size %u, align %u, ioff %u, roflags 0x%08x\n",
 1751             pp->pr_wchan, pp->pr_size, pp->pr_align, pp->pr_itemoffset,
 1752             pp->pr_roflags);
 1753         (*pr)("\talloc %p\n", pp->pr_alloc);
 1754         (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n",
 1755             pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages);
 1756         (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n",
 1757             pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit);
 1758 
 1759         (*pr)("\n\tnget %lu, nfail %lu, nput %lu\n",
 1760             pp->pr_nget, pp->pr_nfail, pp->pr_nput);
 1761         (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n",
 1762             pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle);
 1763 
 1764         if (print_pagelist == 0)
 1765                 goto skip_pagelist;
 1766 
 1767         if ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL)
 1768                 (*pr)("\n\tempty page list:\n");
 1769         pool_print_pagelist(pp, &pp->pr_emptypages, pr);
 1770         if ((ph = LIST_FIRST(&pp->pr_fullpages)) != NULL)
 1771                 (*pr)("\n\tfull page list:\n");
 1772         pool_print_pagelist(pp, &pp->pr_fullpages, pr);
 1773         if ((ph = LIST_FIRST(&pp->pr_partpages)) != NULL)
 1774                 (*pr)("\n\tpartial-page list:\n");
 1775         pool_print_pagelist(pp, &pp->pr_partpages, pr);
 1776 
 1777         if (pp->pr_curpage == NULL)
 1778                 (*pr)("\tno current page\n");
 1779         else
 1780                 (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page);
 1781 
 1782  skip_pagelist:
 1783         if (print_log == 0)
 1784                 goto skip_log;
 1785 
 1786         (*pr)("\n");
 1787         if ((pp->pr_roflags & PR_LOGGING) == 0)
 1788                 (*pr)("\tno log\n");
 1789         else {
 1790                 pr_printlog(pp, NULL, pr);
 1791         }
 1792 
 1793  skip_log:
 1794         if (print_cache == 0)
 1795                 goto skip_cache;
 1796 
 1797 #define PR_GROUPLIST(pcg)                                               \
 1798         (*pr)("\t\tgroup %p: avail %d\n", pcg, pcg->pcg_avail);         \
 1799         for (i = 0; i < PCG_NOBJECTS; i++) {                            \
 1800                 if (pcg->pcg_objects[i].pcgo_pa !=                      \
 1801                     POOL_PADDR_INVALID) {                               \
 1802                         (*pr)("\t\t\t%p, 0x%llx\n",                     \
 1803                             pcg->pcg_objects[i].pcgo_va,                \
 1804                             (unsigned long long)                        \
 1805                             pcg->pcg_objects[i].pcgo_pa);               \
 1806                 } else {                                                \
 1807                         (*pr)("\t\t\t%p\n",                             \
 1808                             pcg->pcg_objects[i].pcgo_va);               \
 1809                 }                                                       \
 1810         }
 1811 
 1812         LIST_FOREACH(pc, &pp->pr_cachelist, pc_poollist) {
 1813                 (*pr)("\tcache %p\n", pc);
 1814                 (*pr)("\t    hits %lu misses %lu ngroups %lu nitems %lu\n",
 1815                     pc->pc_hits, pc->pc_misses, pc->pc_ngroups, pc->pc_nitems);
 1816                 (*pr)("\t    full groups:\n");
 1817                 LIST_FOREACH(pcg, &pc->pc_fullgroups, pcg_list) {
 1818                         PR_GROUPLIST(pcg);
 1819                 }
 1820                 (*pr)("\t    partial groups:\n");
 1821                 LIST_FOREACH(pcg, &pc->pc_partgroups, pcg_list) {
 1822                         PR_GROUPLIST(pcg);
 1823                 }
 1824                 (*pr)("\t    empty groups:\n");
 1825                 LIST_FOREACH(pcg, &pc->pc_emptygroups, pcg_list) {
 1826                         PR_GROUPLIST(pcg);
 1827                 }
 1828         }
 1829 #undef PR_GROUPLIST
 1830 
 1831  skip_cache:
 1832         pr_enter_check(pp, pr);
 1833 }
 1834 
 1835 static int
 1836 pool_chk_page(struct pool *pp, const char *label, struct pool_item_header *ph)
 1837 {
 1838         struct pool_item *pi;
 1839         caddr_t page;
 1840         int n;
 1841 
 1842         if ((pp->pr_roflags & PR_NOALIGN) == 0) {
 1843                 page = (caddr_t)((uintptr_t)ph & pp->pr_alloc->pa_pagemask);
 1844                 if (page != ph->ph_page &&
 1845                     (pp->pr_roflags & PR_PHINPAGE) != 0) {
 1846                         if (label != NULL)
 1847                                 printf("%s: ", label);
 1848                         printf("pool(%p:%s): page inconsistency: page %p;"
 1849                                " at page head addr %p (p %p)\n", pp,
 1850                                 pp->pr_wchan, ph->ph_page,
 1851                                 ph, page);
 1852                         return 1;
 1853                 }
 1854         }
 1855 
 1856         if ((pp->pr_roflags & PR_NOTOUCH) != 0)
 1857                 return 0;
 1858 
 1859         for (pi = LIST_FIRST(&ph->ph_itemlist), n = 0;
 1860              pi != NULL;
 1861              pi = LIST_NEXT(pi,pi_list), n++) {
 1862 
 1863 #ifdef DIAGNOSTIC
 1864                 if (pi->pi_magic != PI_MAGIC) {
 1865                         if (label != NULL)
 1866                                 printf("%s: ", label);
 1867                         printf("pool(%s): free list modified: magic=%x;"
 1868                                " page %p; item ordinal %d; addr %p\n",
 1869                                 pp->pr_wchan, pi->pi_magic, ph->ph_page,
 1870                                 n, pi);
 1871                         panic("pool");
 1872                 }
 1873 #endif
 1874                 if ((pp->pr_roflags & PR_NOALIGN) != 0) {
 1875                         continue;
 1876                 }
 1877                 page = (caddr_t)((uintptr_t)pi & pp->pr_alloc->pa_pagemask);
 1878                 if (page == ph->ph_page)
 1879                         continue;
 1880 
 1881                 if (label != NULL)
 1882                         printf("%s: ", label);
 1883                 printf("pool(%p:%s): page inconsistency: page %p;"
 1884                        " item ordinal %d; addr %p (p %p)\n", pp,
 1885                         pp->pr_wchan, ph->ph_page,
 1886                         n, pi, page);
 1887                 return 1;
 1888         }
 1889         return 0;
 1890 }
 1891 
 1892 
 1893 int
 1894 pool_chk(struct pool *pp, const char *label)
 1895 {
 1896         struct pool_item_header *ph;
 1897         int r = 0;
 1898 
 1899         simple_lock(&pp->pr_slock);
 1900         LIST_FOREACH(ph, &pp->pr_emptypages, ph_pagelist) {
 1901                 r = pool_chk_page(pp, label, ph);
 1902                 if (r) {
 1903                         goto out;
 1904                 }
 1905         }
 1906         LIST_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) {
 1907                 r = pool_chk_page(pp, label, ph);
 1908                 if (r) {
 1909                         goto out;
 1910                 }
 1911         }
 1912         LIST_FOREACH(ph, &pp->pr_partpages, ph_pagelist) {
 1913                 r = pool_chk_page(pp, label, ph);
 1914                 if (r) {
 1915                         goto out;
 1916                 }
 1917         }
 1918 
 1919 out:
 1920         simple_unlock(&pp->pr_slock);
 1921         return (r);
 1922 }
 1923 
 1924 /*
 1925  * pool_cache_init:
 1926  *
 1927  *      Initialize a pool cache.
 1928  *
 1929  *      NOTE: If the pool must be protected from interrupts, we expect
 1930  *      to be called at the appropriate interrupt priority level.
 1931  */
 1932 void
 1933 pool_cache_init(struct pool_cache *pc, struct pool *pp,
 1934     int (*ctor)(void *, void *, int),
 1935     void (*dtor)(void *, void *),
 1936     void *arg)
 1937 {
 1938 
 1939         LIST_INIT(&pc->pc_emptygroups);
 1940         LIST_INIT(&pc->pc_fullgroups);
 1941         LIST_INIT(&pc->pc_partgroups);
 1942         simple_lock_init(&pc->pc_slock);
 1943 
 1944         pc->pc_pool = pp;
 1945 
 1946         pc->pc_ctor = ctor;
 1947         pc->pc_dtor = dtor;
 1948         pc->pc_arg  = arg;
 1949 
 1950         pc->pc_hits   = 0;
 1951         pc->pc_misses = 0;
 1952 
 1953         pc->pc_ngroups = 0;
 1954 
 1955         pc->pc_nitems = 0;
 1956 
 1957         simple_lock(&pp->pr_slock);
 1958         LIST_INSERT_HEAD(&pp->pr_cachelist, pc, pc_poollist);
 1959         simple_unlock(&pp->pr_slock);
 1960 }
 1961 
 1962 /*
 1963  * pool_cache_destroy:
 1964  *
 1965  *      Destroy a pool cache.
 1966  */
 1967 void
 1968 pool_cache_destroy(struct pool_cache *pc)
 1969 {
 1970         struct pool *pp = pc->pc_pool;
 1971 
 1972         /* First, invalidate the entire cache. */
 1973         pool_cache_invalidate(pc);
 1974 
 1975         /* ...and remove it from the pool's cache list. */
 1976         simple_lock(&pp->pr_slock);
 1977         LIST_REMOVE(pc, pc_poollist);
 1978         simple_unlock(&pp->pr_slock);
 1979 }
 1980 
 1981 static inline void *
 1982 pcg_get(struct pool_cache_group *pcg, paddr_t *pap)
 1983 {
 1984         void *object;
 1985         u_int idx;
 1986 
 1987         KASSERT(pcg->pcg_avail <= PCG_NOBJECTS);
 1988         KASSERT(pcg->pcg_avail != 0);
 1989         idx = --pcg->pcg_avail;
 1990 
 1991         KASSERT(pcg->pcg_objects[idx].pcgo_va != NULL);
 1992         object = pcg->pcg_objects[idx].pcgo_va;
 1993         if (pap != NULL)
 1994                 *pap = pcg->pcg_objects[idx].pcgo_pa;
 1995         pcg->pcg_objects[idx].pcgo_va = NULL;
 1996 
 1997         return (object);
 1998 }
 1999 
 2000 static inline void
 2001 pcg_put(struct pool_cache_group *pcg, void *object, paddr_t pa)
 2002 {
 2003         u_int idx;
 2004 
 2005         KASSERT(pcg->pcg_avail < PCG_NOBJECTS);
 2006         idx = pcg->pcg_avail++;
 2007 
 2008         KASSERT(pcg->pcg_objects[idx].pcgo_va == NULL);
 2009         pcg->pcg_objects[idx].pcgo_va = object;
 2010         pcg->pcg_objects[idx].pcgo_pa = pa;
 2011 }
 2012 
 2013 static void
 2014 pcg_grouplist_free(struct pool_cache_grouplist *pcgl)
 2015 {
 2016         struct pool_cache_group *pcg;
 2017         int s;
 2018 
 2019         s = splvm();
 2020         while ((pcg = LIST_FIRST(pcgl)) != NULL) {
 2021                 LIST_REMOVE(pcg, pcg_list);
 2022                 pool_put(&pcgpool, pcg);
 2023         }
 2024         splx(s);
 2025 }
 2026 
 2027 /*
 2028  * pool_cache_get{,_paddr}:
 2029  *
 2030  *      Get an object from a pool cache (optionally returning
 2031  *      the physical address of the object).
 2032  */
 2033 void *
 2034 pool_cache_get_paddr(struct pool_cache *pc, int flags, paddr_t *pap)
 2035 {
 2036         struct pool_cache_group *pcg;
 2037         void *object;
 2038 
 2039 #ifdef LOCKDEBUG
 2040         if (flags & PR_WAITOK)
 2041                 ASSERT_SLEEPABLE(NULL, "pool_cache_get(PR_WAITOK)");
 2042 #endif
 2043 
 2044         simple_lock(&pc->pc_slock);
 2045 
 2046         pcg = LIST_FIRST(&pc->pc_partgroups);
 2047         if (pcg == NULL) {
 2048                 pcg = LIST_FIRST(&pc->pc_fullgroups);
 2049                 if (pcg != NULL) {
 2050                         LIST_REMOVE(pcg, pcg_list);
 2051                         LIST_INSERT_HEAD(&pc->pc_partgroups, pcg, pcg_list);
 2052                 }
 2053         }
 2054         if (pcg == NULL) {
 2055 
 2056                 /*
 2057                  * No groups with any available objects.  Allocate
 2058                  * a new object, construct it, and return it to
 2059                  * the caller.  We will allocate a group, if necessary,
 2060                  * when the object is freed back to the cache.
 2061                  */
 2062                 pc->pc_misses++;
 2063                 simple_unlock(&pc->pc_slock);
 2064                 object = pool_get(pc->pc_pool, flags);
 2065                 if (object != NULL && pc->pc_ctor != NULL) {
 2066                         if ((*pc->pc_ctor)(pc->pc_arg, object, flags) != 0) {
 2067                                 pool_put(pc->pc_pool, object);
 2068                                 return (NULL);
 2069                         }
 2070                 }
 2071                 if (object != NULL && pap != NULL) {
 2072 #ifdef POOL_VTOPHYS
 2073                         *pap = POOL_VTOPHYS(object);
 2074 #else
 2075                         *pap = POOL_PADDR_INVALID;
 2076 #endif
 2077                 }
 2078                 return (object);
 2079         }
 2080 
 2081         pc->pc_hits++;
 2082         pc->pc_nitems--;
 2083         object = pcg_get(pcg, pap);
 2084 
 2085         if (pcg->pcg_avail == 0) {
 2086                 LIST_REMOVE(pcg, pcg_list);
 2087                 LIST_INSERT_HEAD(&pc->pc_emptygroups, pcg, pcg_list);
 2088         }
 2089         simple_unlock(&pc->pc_slock);
 2090 
 2091         return (object);
 2092 }
 2093 
 2094 /*
 2095  * pool_cache_put{,_paddr}:
 2096  *
 2097  *      Put an object back to the pool cache (optionally caching the
 2098  *      physical address of the object).
 2099  */
 2100 void
 2101 pool_cache_put_paddr(struct pool_cache *pc, void *object, paddr_t pa)
 2102 {
 2103         struct pool_cache_group *pcg;
 2104         int s;
 2105 
 2106         if (__predict_false((pc->pc_pool->pr_flags & PR_WANTED) != 0)) {
 2107                 goto destruct;
 2108         }
 2109 
 2110         simple_lock(&pc->pc_slock);
 2111 
 2112         pcg = LIST_FIRST(&pc->pc_partgroups);
 2113         if (pcg == NULL) {
 2114                 pcg = LIST_FIRST(&pc->pc_emptygroups);
 2115                 if (pcg != NULL) {
 2116                         LIST_REMOVE(pcg, pcg_list);
 2117                         LIST_INSERT_HEAD(&pc->pc_partgroups, pcg, pcg_list);
 2118                 }
 2119         }
 2120         if (pcg == NULL) {
 2121 
 2122                 /*
 2123                  * No empty groups to free the object to.  Attempt to
 2124                  * allocate one.
 2125                  */
 2126                 simple_unlock(&pc->pc_slock);
 2127                 s = splvm();
 2128                 pcg = pool_get(&pcgpool, PR_NOWAIT);
 2129                 splx(s);
 2130                 if (pcg == NULL) {
 2131 destruct:
 2132 
 2133                         /*
 2134                          * Unable to allocate a cache group; destruct the object
 2135                          * and free it back to the pool.
 2136                          */
 2137                         pool_cache_destruct_object(pc, object);
 2138                         return;
 2139                 }
 2140                 memset(pcg, 0, sizeof(*pcg));
 2141                 simple_lock(&pc->pc_slock);
 2142                 pc->pc_ngroups++;
 2143                 LIST_INSERT_HEAD(&pc->pc_partgroups, pcg, pcg_list);
 2144         }
 2145 
 2146         pc->pc_nitems++;
 2147         pcg_put(pcg, object, pa);
 2148 
 2149         if (pcg->pcg_avail == PCG_NOBJECTS) {
 2150                 LIST_REMOVE(pcg, pcg_list);
 2151                 LIST_INSERT_HEAD(&pc->pc_fullgroups, pcg, pcg_list);
 2152         }
 2153         simple_unlock(&pc->pc_slock);
 2154 }
 2155 
 2156 /*
 2157  * pool_cache_destruct_object:
 2158  *
 2159  *      Force destruction of an object and its release back into
 2160  *      the pool.
 2161  */
 2162 void
 2163 pool_cache_destruct_object(struct pool_cache *pc, void *object)
 2164 {
 2165 
 2166         if (pc->pc_dtor != NULL)
 2167                 (*pc->pc_dtor)(pc->pc_arg, object);
 2168         pool_put(pc->pc_pool, object);
 2169 }
 2170 
 2171 static void
 2172 pool_do_cache_invalidate_grouplist(struct pool_cache_grouplist *pcgsl,
 2173     struct pool_cache *pc, struct pool_pagelist *pq,
 2174     struct pool_cache_grouplist *pcgdl)
 2175 {
 2176         struct pool_cache_group *pcg, *npcg;
 2177         void *object;
 2178 
 2179         for (pcg = LIST_FIRST(pcgsl); pcg != NULL; pcg = npcg) {
 2180                 npcg = LIST_NEXT(pcg, pcg_list);
 2181                 while (pcg->pcg_avail != 0) {
 2182                         pc->pc_nitems--;
 2183                         object = pcg_get(pcg, NULL);
 2184                         if (pc->pc_dtor != NULL)
 2185                                 (*pc->pc_dtor)(pc->pc_arg, object);
 2186                         pool_do_put(pc->pc_pool, object, pq);
 2187                 }
 2188                 pc->pc_ngroups--;
 2189                 LIST_REMOVE(pcg, pcg_list);
 2190                 LIST_INSERT_HEAD(pcgdl, pcg, pcg_list);
 2191         }
 2192 }
 2193 
 2194 static void
 2195 pool_do_cache_invalidate(struct pool_cache *pc, struct pool_pagelist *pq,
 2196     struct pool_cache_grouplist *pcgl)
 2197 {
 2198 
 2199         LOCK_ASSERT(simple_lock_held(&pc->pc_slock));
 2200         LOCK_ASSERT(simple_lock_held(&pc->pc_pool->pr_slock));
 2201 
 2202         pool_do_cache_invalidate_grouplist(&pc->pc_fullgroups, pc, pq, pcgl);
 2203         pool_do_cache_invalidate_grouplist(&pc->pc_partgroups, pc, pq, pcgl);
 2204 
 2205         KASSERT(LIST_EMPTY(&pc->pc_partgroups));
 2206         KASSERT(LIST_EMPTY(&pc->pc_fullgroups));
 2207         KASSERT(pc->pc_nitems == 0);
 2208 }
 2209 
 2210 /*
 2211  * pool_cache_invalidate:
 2212  *
 2213  *      Invalidate a pool cache (destruct and release all of the
 2214  *      cached objects).
 2215  */
 2216 void
 2217 pool_cache_invalidate(struct pool_cache *pc)
 2218 {
 2219         struct pool_pagelist pq;
 2220         struct pool_cache_grouplist pcgl;
 2221 
 2222         LIST_INIT(&pq);
 2223         LIST_INIT(&pcgl);
 2224 
 2225         simple_lock(&pc->pc_slock);
 2226         simple_lock(&pc->pc_pool->pr_slock);
 2227 
 2228         pool_do_cache_invalidate(pc, &pq, &pcgl);
 2229 
 2230         simple_unlock(&pc->pc_pool->pr_slock);
 2231         simple_unlock(&pc->pc_slock);
 2232 
 2233         pr_pagelist_free(pc->pc_pool, &pq);
 2234         pcg_grouplist_free(&pcgl);
 2235 }
 2236 
 2237 /*
 2238  * pool_cache_reclaim:
 2239  *
 2240  *      Reclaim a pool cache for pool_reclaim().
 2241  */
 2242 static void
 2243 pool_cache_reclaim(struct pool_cache *pc, struct pool_pagelist *pq,
 2244     struct pool_cache_grouplist *pcgl)
 2245 {
 2246 
 2247         /*
 2248          * We're locking in the wrong order (normally pool_cache -> pool,
 2249          * but the pool is already locked when we get here), so we have
 2250          * to use trylock.  If we can't lock the pool_cache, it's not really
 2251          * a big deal here.
 2252          */
 2253         if (simple_lock_try(&pc->pc_slock) == 0)
 2254                 return;
 2255 
 2256         pool_do_cache_invalidate(pc, pq, pcgl);
 2257 
 2258         simple_unlock(&pc->pc_slock);
 2259 }
 2260 
 2261 /*
 2262  * Pool backend allocators.
 2263  *
 2264  * Each pool has a backend allocator that handles allocation, deallocation,
 2265  * and any additional draining that might be needed.
 2266  *
 2267  * We provide two standard allocators:
 2268  *
 2269  *      pool_allocator_kmem - the default when no allocator is specified
 2270  *
 2271  *      pool_allocator_nointr - used for pools that will not be accessed
 2272  *      in interrupt context.
 2273  */
 2274 void    *pool_page_alloc(struct pool *, int);
 2275 void    pool_page_free(struct pool *, void *);
 2276 
 2277 #ifdef POOL_SUBPAGE
 2278 struct pool_allocator pool_allocator_kmem_fullpage = {
 2279         pool_page_alloc, pool_page_free, 0,
 2280         .pa_backingmapptr = &kmem_map,
 2281 };
 2282 #else
 2283 struct pool_allocator pool_allocator_kmem = {
 2284         pool_page_alloc, pool_page_free, 0,
 2285         .pa_backingmapptr = &kmem_map,
 2286 };
 2287 #endif
 2288 
 2289 void    *pool_page_alloc_nointr(struct pool *, int);
 2290 void    pool_page_free_nointr(struct pool *, void *);
 2291 
 2292 #ifdef POOL_SUBPAGE
 2293 struct pool_allocator pool_allocator_nointr_fullpage = {
 2294         pool_page_alloc_nointr, pool_page_free_nointr, 0,
 2295         .pa_backingmapptr = &kernel_map,
 2296 };
 2297 #else
 2298 struct pool_allocator pool_allocator_nointr = {
 2299         pool_page_alloc_nointr, pool_page_free_nointr, 0,
 2300         .pa_backingmapptr = &kernel_map,
 2301 };
 2302 #endif
 2303 
 2304 #ifdef POOL_SUBPAGE
 2305 void    *pool_subpage_alloc(struct pool *, int);
 2306 void    pool_subpage_free(struct pool *, void *);
 2307 
 2308 struct pool_allocator pool_allocator_kmem = {
 2309         pool_subpage_alloc, pool_subpage_free, POOL_SUBPAGE,
 2310         .pa_backingmapptr = &kmem_map,
 2311 };
 2312 
 2313 void    *pool_subpage_alloc_nointr(struct pool *, int);
 2314 void    pool_subpage_free_nointr(struct pool *, void *);
 2315 
 2316 struct pool_allocator pool_allocator_nointr = {
 2317         pool_subpage_alloc, pool_subpage_free, POOL_SUBPAGE,
 2318         .pa_backingmapptr = &kmem_map,
 2319 };
 2320 #endif /* POOL_SUBPAGE */
 2321 
 2322 static void *
 2323 pool_allocator_alloc(struct pool *pp, int flags)
 2324 {
 2325         struct pool_allocator *pa = pp->pr_alloc;
 2326         void *res;
 2327 
 2328         LOCK_ASSERT(!simple_lock_held(&pp->pr_slock));
 2329 
 2330         res = (*pa->pa_alloc)(pp, flags);
 2331         if (res == NULL && (flags & PR_WAITOK) == 0) {
 2332                 /*
 2333                  * We only run the drain hook here if PR_NOWAIT.
 2334                  * In other cases, the hook will be run in
 2335                  * pool_reclaim().
 2336                  */
 2337                 if (pp->pr_drain_hook != NULL) {
 2338                         (*pp->pr_drain_hook)(pp->pr_drain_hook_arg, flags);
 2339                         res = (*pa->pa_alloc)(pp, flags);
 2340                 }
 2341         }
 2342         return res;
 2343 }
 2344 
 2345 static void
 2346 pool_allocator_free(struct pool *pp, void *v)
 2347 {
 2348         struct pool_allocator *pa = pp->pr_alloc;
 2349 
 2350         LOCK_ASSERT(!simple_lock_held(&pp->pr_slock));
 2351 
 2352         (*pa->pa_free)(pp, v);
 2353 }
 2354 
 2355 void *
 2356 pool_page_alloc(struct pool *pp, int flags)
 2357 {
 2358         boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE;
 2359 
 2360         return ((void *) uvm_km_alloc_poolpage_cache(kmem_map, waitok));
 2361 }
 2362 
 2363 void
 2364 pool_page_free(struct pool *pp, void *v)
 2365 {
 2366 
 2367         uvm_km_free_poolpage_cache(kmem_map, (vaddr_t) v);
 2368 }
 2369 
 2370 static void *
 2371 pool_page_alloc_meta(struct pool *pp, int flags)
 2372 {
 2373         boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE;
 2374 
 2375         return ((void *) uvm_km_alloc_poolpage(kmem_map, waitok));
 2376 }
 2377 
 2378 static void
 2379 pool_page_free_meta(struct pool *pp, void *v)
 2380 {
 2381 
 2382         uvm_km_free_poolpage(kmem_map, (vaddr_t) v);
 2383 }
 2384 
 2385 #ifdef POOL_SUBPAGE
 2386 /* Sub-page allocator, for machines with large hardware pages. */
 2387 void *
 2388 pool_subpage_alloc(struct pool *pp, int flags)
 2389 {
 2390         void *v;
 2391         int s;
 2392         s = splvm();
 2393         v = pool_get(&psppool, flags);
 2394         splx(s);
 2395         return v;
 2396 }
 2397 
 2398 void
 2399 pool_subpage_free(struct pool *pp, void *v)
 2400 {
 2401         int s;
 2402         s = splvm();
 2403         pool_put(&psppool, v);
 2404         splx(s);
 2405 }
 2406 
 2407 /* We don't provide a real nointr allocator.  Maybe later. */
 2408 void *
 2409 pool_subpage_alloc_nointr(struct pool *pp, int flags)
 2410 {
 2411 
 2412         return (pool_subpage_alloc(pp, flags));
 2413 }
 2414 
 2415 void
 2416 pool_subpage_free_nointr(struct pool *pp, void *v)
 2417 {
 2418 
 2419         pool_subpage_free(pp, v);
 2420 }
 2421 #endif /* POOL_SUBPAGE */
 2422 void *
 2423 pool_page_alloc_nointr(struct pool *pp, int flags)
 2424 {
 2425         boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE;
 2426 
 2427         return ((void *) uvm_km_alloc_poolpage_cache(kernel_map, waitok));
 2428 }
 2429 
 2430 void
 2431 pool_page_free_nointr(struct pool *pp, void *v)
 2432 {
 2433 
 2434         uvm_km_free_poolpage_cache(kernel_map, (vaddr_t) v);
 2435 }
Cache object: d2c73153b0a19b03b11a23c743c03ed5
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/kern/subr_pool.c

FreeBSD/Linux Kernel Cross Reference
sys/kern/subr_pool.c