subr_pool.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*      $NetBSD: subr_pool.c,v 1.93.2.1 2004/06/22 08:58:42 tron Exp $  */
    2 
    3 /*-
    4  * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc.
    5  * All rights reserved.
    6  *
    7  * This code is derived from software contributed to The NetBSD Foundation
    8  * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace
    9  * Simulation Facility, NASA Ames Research Center.
   10  *
   11  * Redistribution and use in source and binary forms, with or without
   12  * modification, are permitted provided that the following conditions
   13  * are met:
   14  * 1. Redistributions of source code must retain the above copyright
   15  *    notice, this list of conditions and the following disclaimer.
   16  * 2. Redistributions in binary form must reproduce the above copyright
   17  *    notice, this list of conditions and the following disclaimer in the
   18  *    documentation and/or other materials provided with the distribution.
   19  * 3. All advertising materials mentioning features or use of this software
   20  *    must display the following acknowledgement:
   21  *      This product includes software developed by the NetBSD
   22  *      Foundation, Inc. and its contributors.
   23  * 4. Neither the name of The NetBSD Foundation nor the names of its
   24  *    contributors may be used to endorse or promote products derived
   25  *    from this software without specific prior written permission.
   26  *
   27  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   28  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   29  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   30  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   31  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   32  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   33  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   34  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   35  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   36  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   37  * POSSIBILITY OF SUCH DAMAGE.
   38  */
   39 
   40 #include <sys/cdefs.h>
   41 __KERNEL_RCSID(0, "$NetBSD: subr_pool.c,v 1.93.2.1 2004/06/22 08:58:42 tron Exp $");
   42 
   43 #include "opt_pool.h"
   44 #include "opt_poollog.h"
   45 #include "opt_lockdebug.h"
   46 
   47 #include <sys/param.h>
   48 #include <sys/systm.h>
   49 #include <sys/proc.h>
   50 #include <sys/errno.h>
   51 #include <sys/kernel.h>
   52 #include <sys/malloc.h>
   53 #include <sys/lock.h>
   54 #include <sys/pool.h>
   55 #include <sys/syslog.h>
   56 
   57 #include <uvm/uvm.h>
   58 
   59 /*
   60  * Pool resource management utility.
   61  *
   62  * Memory is allocated in pages which are split into pieces according to
   63  * the pool item size. Each page is kept on one of three lists in the
   64  * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages',
   65  * for empty, full and partially-full pages respectively. The individual
   66  * pool items are on a linked list headed by `ph_itemlist' in each page
   67  * header. The memory for building the page list is either taken from
   68  * the allocated pages themselves (for small pool items) or taken from
   69  * an internal pool of page headers (`phpool').
   70  */
   71 
   72 /* List of all pools */
   73 TAILQ_HEAD(,pool) pool_head = TAILQ_HEAD_INITIALIZER(pool_head);
   74 
   75 /* Private pool for page header structures */
   76 static struct pool phpool;
   77 
   78 #ifdef POOL_SUBPAGE
   79 /* Pool of subpages for use by normal pools. */
   80 static struct pool psppool;
   81 #endif
   82 
   83 /* # of seconds to retain page after last use */
   84 int pool_inactive_time = 10;
   85 
   86 /* Next candidate for drainage (see pool_drain()) */
   87 static struct pool      *drainpp;
   88 
   89 /* This spin lock protects both pool_head and drainpp. */
   90 struct simplelock pool_head_slock = SIMPLELOCK_INITIALIZER;
   91 
   92 struct pool_item_header {
   93         /* Page headers */
   94         LIST_ENTRY(pool_item_header)
   95                                 ph_pagelist;    /* pool page list */
   96         TAILQ_HEAD(,pool_item)  ph_itemlist;    /* chunk list for this page */
   97         SPLAY_ENTRY(pool_item_header)
   98                                 ph_node;        /* Off-page page headers */
   99         unsigned int            ph_nmissing;    /* # of chunks in use */
  100         caddr_t                 ph_page;        /* this page's address */
  101         struct timeval          ph_time;        /* last referenced */
  102 };
  103 
  104 struct pool_item {
  105 #ifdef DIAGNOSTIC
  106         u_int pi_magic;
  107 #endif
  108 #define PI_MAGIC 0xdeadbeefU
  109         /* Other entries use only this list entry */
  110         TAILQ_ENTRY(pool_item)  pi_list;
  111 };
  112 
  113 #define POOL_NEEDS_CATCHUP(pp)                                          \
  114         ((pp)->pr_nitems < (pp)->pr_minitems)
  115 
  116 /*
  117  * Pool cache management.
  118  *
  119  * Pool caches provide a way for constructed objects to be cached by the
  120  * pool subsystem.  This can lead to performance improvements by avoiding
  121  * needless object construction/destruction; it is deferred until absolutely
  122  * necessary.
  123  *
  124  * Caches are grouped into cache groups.  Each cache group references
  125  * up to 16 constructed objects.  When a cache allocates an object
  126  * from the pool, it calls the object's constructor and places it into
  127  * a cache group.  When a cache group frees an object back to the pool,
  128  * it first calls the object's destructor.  This allows the object to
  129  * persist in constructed form while freed to the cache.
  130  *
  131  * Multiple caches may exist for each pool.  This allows a single
  132  * object type to have multiple constructed forms.  The pool references
  133  * each cache, so that when a pool is drained by the pagedaemon, it can
  134  * drain each individual cache as well.  Each time a cache is drained,
  135  * the most idle cache group is freed to the pool in its entirety.
  136  *
  137  * Pool caches are layed on top of pools.  By layering them, we can avoid
  138  * the complexity of cache management for pools which would not benefit
  139  * from it.
  140  */
  141 
  142 /* The cache group pool. */
  143 static struct pool pcgpool;
  144 
  145 static void     pool_cache_reclaim(struct pool_cache *);
  146 
  147 static int      pool_catchup(struct pool *);
  148 static void     pool_prime_page(struct pool *, caddr_t,
  149                     struct pool_item_header *);
  150 static void     pool_update_curpage(struct pool *);
  151 
  152 void            *pool_allocator_alloc(struct pool *, int);
  153 void            pool_allocator_free(struct pool *, void *);
  154 
  155 static void pool_print_pagelist(struct pool_pagelist *,
  156         void (*)(const char *, ...));
  157 static void pool_print1(struct pool *, const char *,
  158         void (*)(const char *, ...));
  159 
  160 static int pool_chk_page(struct pool *, const char *,
  161                          struct pool_item_header *);
  162 
  163 /*
  164  * Pool log entry. An array of these is allocated in pool_init().
  165  */
  166 struct pool_log {
  167         const char      *pl_file;
  168         long            pl_line;
  169         int             pl_action;
  170 #define PRLOG_GET       1
  171 #define PRLOG_PUT       2
  172         void            *pl_addr;
  173 };
  174 
  175 #ifdef POOL_DIAGNOSTIC
  176 /* Number of entries in pool log buffers */
  177 #ifndef POOL_LOGSIZE
  178 #define POOL_LOGSIZE    10
  179 #endif
  180 
  181 int pool_logsize = POOL_LOGSIZE;
  182 
  183 static __inline void
  184 pr_log(struct pool *pp, void *v, int action, const char *file, long line)
  185 {
  186         int n = pp->pr_curlogentry;
  187         struct pool_log *pl;
  188 
  189         if ((pp->pr_roflags & PR_LOGGING) == 0)
  190                 return;
  191 
  192         /*
  193          * Fill in the current entry. Wrap around and overwrite
  194          * the oldest entry if necessary.
  195          */
  196         pl = &pp->pr_log[n];
  197         pl->pl_file = file;
  198         pl->pl_line = line;
  199         pl->pl_action = action;
  200         pl->pl_addr = v;
  201         if (++n >= pp->pr_logsize)
  202                 n = 0;
  203         pp->pr_curlogentry = n;
  204 }
  205 
  206 static void
  207 pr_printlog(struct pool *pp, struct pool_item *pi,
  208     void (*pr)(const char *, ...))
  209 {
  210         int i = pp->pr_logsize;
  211         int n = pp->pr_curlogentry;
  212 
  213         if ((pp->pr_roflags & PR_LOGGING) == 0)
  214                 return;
  215 
  216         /*
  217          * Print all entries in this pool's log.
  218          */
  219         while (i-- > 0) {
  220                 struct pool_log *pl = &pp->pr_log[n];
  221                 if (pl->pl_action != 0) {
  222                         if (pi == NULL || pi == pl->pl_addr) {
  223                                 (*pr)("\tlog entry %d:\n", i);
  224                                 (*pr)("\t\taction = %s, addr = %p\n",
  225                                     pl->pl_action == PRLOG_GET ? "get" : "put",
  226                                     pl->pl_addr);
  227                                 (*pr)("\t\tfile: %s at line %lu\n",
  228                                     pl->pl_file, pl->pl_line);
  229                         }
  230                 }
  231                 if (++n >= pp->pr_logsize)
  232                         n = 0;
  233         }
  234 }
  235 
  236 static __inline void
  237 pr_enter(struct pool *pp, const char *file, long line)
  238 {
  239 
  240         if (__predict_false(pp->pr_entered_file != NULL)) {
  241                 printf("pool %s: reentrancy at file %s line %ld\n",
  242                     pp->pr_wchan, file, line);
  243                 printf("         previous entry at file %s line %ld\n",
  244                     pp->pr_entered_file, pp->pr_entered_line);
  245                 panic("pr_enter");
  246         }
  247 
  248         pp->pr_entered_file = file;
  249         pp->pr_entered_line = line;
  250 }
  251 
  252 static __inline void
  253 pr_leave(struct pool *pp)
  254 {
  255 
  256         if (__predict_false(pp->pr_entered_file == NULL)) {
  257                 printf("pool %s not entered?\n", pp->pr_wchan);
  258                 panic("pr_leave");
  259         }
  260 
  261         pp->pr_entered_file = NULL;
  262         pp->pr_entered_line = 0;
  263 }
  264 
  265 static __inline void
  266 pr_enter_check(struct pool *pp, void (*pr)(const char *, ...))
  267 {
  268 
  269         if (pp->pr_entered_file != NULL)
  270                 (*pr)("\n\tcurrently entered from file %s line %ld\n",
  271                     pp->pr_entered_file, pp->pr_entered_line);
  272 }
  273 #else
  274 #define pr_log(pp, v, action, file, line)
  275 #define pr_printlog(pp, pi, pr)
  276 #define pr_enter(pp, file, line)
  277 #define pr_leave(pp)
  278 #define pr_enter_check(pp, pr)
  279 #endif /* POOL_DIAGNOSTIC */
  280 
  281 static __inline int
  282 phtree_compare(struct pool_item_header *a, struct pool_item_header *b)
  283 {
  284         if (a->ph_page < b->ph_page)
  285                 return (-1);
  286         else if (a->ph_page > b->ph_page)
  287                 return (1);
  288         else
  289                 return (0);
  290 }
  291 
  292 SPLAY_PROTOTYPE(phtree, pool_item_header, ph_node, phtree_compare);
  293 SPLAY_GENERATE(phtree, pool_item_header, ph_node, phtree_compare);
  294 
  295 /*
  296  * Return the pool page header based on page address.
  297  */
  298 static __inline struct pool_item_header *
  299 pr_find_pagehead(struct pool *pp, caddr_t page)
  300 {
  301         struct pool_item_header *ph, tmp;
  302 
  303         if ((pp->pr_roflags & PR_PHINPAGE) != 0)
  304                 return ((struct pool_item_header *)(page + pp->pr_phoffset));
  305 
  306         tmp.ph_page = page;
  307         ph = SPLAY_FIND(phtree, &pp->pr_phtree, &tmp);
  308         return ph;
  309 }
  310 
  311 /*
  312  * Remove a page from the pool.
  313  */
  314 static __inline void
  315 pr_rmpage(struct pool *pp, struct pool_item_header *ph,
  316      struct pool_pagelist *pq)
  317 {
  318         int s;
  319 
  320         LOCK_ASSERT(!simple_lock_held(&pp->pr_slock) || pq != NULL);
  321 
  322         /*
  323          * If the page was idle, decrement the idle page count.
  324          */
  325         if (ph->ph_nmissing == 0) {
  326 #ifdef DIAGNOSTIC
  327                 if (pp->pr_nidle == 0)
  328                         panic("pr_rmpage: nidle inconsistent");
  329                 if (pp->pr_nitems < pp->pr_itemsperpage)
  330                         panic("pr_rmpage: nitems inconsistent");
  331 #endif
  332                 pp->pr_nidle--;
  333         }
  334 
  335         pp->pr_nitems -= pp->pr_itemsperpage;
  336 
  337         /*
  338          * Unlink a page from the pool and release it (or queue it for release).
  339          */
  340         LIST_REMOVE(ph, ph_pagelist);
  341         if ((pp->pr_roflags & PR_PHINPAGE) == 0)
  342                 SPLAY_REMOVE(phtree, &pp->pr_phtree, ph);
  343         if (pq) {
  344                 LIST_INSERT_HEAD(pq, ph, ph_pagelist);
  345         } else {
  346                 pool_allocator_free(pp, ph->ph_page);
  347                 if ((pp->pr_roflags & PR_PHINPAGE) == 0) {
  348                         s = splvm();
  349                         pool_put(&phpool, ph);
  350                         splx(s);
  351                 }
  352         }
  353         pp->pr_npages--;
  354         pp->pr_npagefree++;
  355 
  356         pool_update_curpage(pp);
  357 }
  358 
  359 /*
  360  * Initialize the given pool resource structure.
  361  *
  362  * We export this routine to allow other kernel parts to declare
  363  * static pools that must be initialized before malloc() is available.
  364  */
  365 void
  366 pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags,
  367     const char *wchan, struct pool_allocator *palloc)
  368 {
  369         int off, slack;
  370         size_t trysize, phsize;
  371         int s;
  372 
  373 #ifdef POOL_DIAGNOSTIC
  374         /*
  375          * Always log if POOL_DIAGNOSTIC is defined.
  376          */
  377         if (pool_logsize != 0)
  378                 flags |= PR_LOGGING;
  379 #endif
  380 
  381 #ifdef POOL_SUBPAGE
  382         /*
  383          * XXX We don't provide a real `nointr' back-end
  384          * yet; all sub-pages come from a kmem back-end.
  385          * maybe some day...
  386          */
  387         if (palloc == NULL) {
  388                 extern struct pool_allocator pool_allocator_kmem_subpage;
  389                 palloc = &pool_allocator_kmem_subpage;
  390         }
  391         /*
  392          * We'll assume any user-specified back-end allocator
  393          * will deal with sub-pages, or simply don't care.
  394          */
  395 #else
  396         if (palloc == NULL)
  397                 palloc = &pool_allocator_kmem;
  398 #endif /* POOL_SUBPAGE */
  399         if ((palloc->pa_flags & PA_INITIALIZED) == 0) {
  400                 if (palloc->pa_pagesz == 0) {
  401 #ifdef POOL_SUBPAGE
  402                         if (palloc == &pool_allocator_kmem)
  403                                 palloc->pa_pagesz = PAGE_SIZE;
  404                         else
  405                                 palloc->pa_pagesz = POOL_SUBPAGE;
  406 #else
  407                         palloc->pa_pagesz = PAGE_SIZE;
  408 #endif /* POOL_SUBPAGE */
  409                 }
  410 
  411                 TAILQ_INIT(&palloc->pa_list);
  412 
  413                 simple_lock_init(&palloc->pa_slock);
  414                 palloc->pa_pagemask = ~(palloc->pa_pagesz - 1);
  415                 palloc->pa_pageshift = ffs(palloc->pa_pagesz) - 1;
  416                 palloc->pa_flags |= PA_INITIALIZED;
  417         }
  418 
  419         if (align == 0)
  420                 align = ALIGN(1);
  421 
  422         if (size < sizeof(struct pool_item))
  423                 size = sizeof(struct pool_item);
  424 
  425         size = roundup(size, align);
  426 #ifdef DIAGNOSTIC
  427         if (size > palloc->pa_pagesz)
  428                 panic("pool_init: pool item size (%lu) too large",
  429                       (u_long)size);
  430 #endif
  431 
  432         /*
  433          * Initialize the pool structure.
  434          */
  435         LIST_INIT(&pp->pr_emptypages);
  436         LIST_INIT(&pp->pr_fullpages);
  437         LIST_INIT(&pp->pr_partpages);
  438         TAILQ_INIT(&pp->pr_cachelist);
  439         pp->pr_curpage = NULL;
  440         pp->pr_npages = 0;
  441         pp->pr_minitems = 0;
  442         pp->pr_minpages = 0;
  443         pp->pr_maxpages = UINT_MAX;
  444         pp->pr_roflags = flags;
  445         pp->pr_flags = 0;
  446         pp->pr_size = size;
  447         pp->pr_align = align;
  448         pp->pr_wchan = wchan;
  449         pp->pr_alloc = palloc;
  450         pp->pr_nitems = 0;
  451         pp->pr_nout = 0;
  452         pp->pr_hardlimit = UINT_MAX;
  453         pp->pr_hardlimit_warning = NULL;
  454         pp->pr_hardlimit_ratecap.tv_sec = 0;
  455         pp->pr_hardlimit_ratecap.tv_usec = 0;
  456         pp->pr_hardlimit_warning_last.tv_sec = 0;
  457         pp->pr_hardlimit_warning_last.tv_usec = 0;
  458         pp->pr_drain_hook = NULL;
  459         pp->pr_drain_hook_arg = NULL;
  460 
  461         /*
  462          * Decide whether to put the page header off page to avoid
  463          * wasting too large a part of the page or too big item.
  464          * Off-page page headers go on a hash table, so we can match
  465          * a returned item with its header based on the page address.
  466          * We use 1/16 of the page size and about 8 times of the item
  467          * size as the threshold (XXX: tune)
  468          *
  469          * However, we'll put the header into the page if we can put
  470          * it without wasting any items.
  471          *
  472          * Silently enforce `0 <= ioff < align'.
  473          */
  474         pp->pr_itemoffset = ioff %= align;
  475         /* See the comment below about reserved bytes. */
  476         trysize = palloc->pa_pagesz - ((align - ioff) % align);
  477         phsize = ALIGN(sizeof(struct pool_item_header));
  478         if (pp->pr_size < MIN(palloc->pa_pagesz / 16, phsize << 3) ||
  479             trysize / pp->pr_size == (trysize - phsize) / pp->pr_size) {
  480                 /* Use the end of the page for the page header */
  481                 pp->pr_roflags |= PR_PHINPAGE;
  482                 pp->pr_phoffset = off = palloc->pa_pagesz - phsize;
  483         } else {
  484                 /* The page header will be taken from our page header pool */
  485                 pp->pr_phoffset = 0;
  486                 off = palloc->pa_pagesz;
  487                 SPLAY_INIT(&pp->pr_phtree);
  488         }
  489 
  490         /*
  491          * Alignment is to take place at `ioff' within the item. This means
  492          * we must reserve up to `align - 1' bytes on the page to allow
  493          * appropriate positioning of each item.
  494          */
  495         pp->pr_itemsperpage = (off - ((align - ioff) % align)) / pp->pr_size;
  496         KASSERT(pp->pr_itemsperpage != 0);
  497 
  498         /*
  499          * Use the slack between the chunks and the page header
  500          * for "cache coloring".
  501          */
  502         slack = off - pp->pr_itemsperpage * pp->pr_size;
  503         pp->pr_maxcolor = (slack / align) * align;
  504         pp->pr_curcolor = 0;
  505 
  506         pp->pr_nget = 0;
  507         pp->pr_nfail = 0;
  508         pp->pr_nput = 0;
  509         pp->pr_npagealloc = 0;
  510         pp->pr_npagefree = 0;
  511         pp->pr_hiwat = 0;
  512         pp->pr_nidle = 0;
  513 
  514 #ifdef POOL_DIAGNOSTIC
  515         if (flags & PR_LOGGING) {
  516                 if (kmem_map == NULL ||
  517                     (pp->pr_log = malloc(pool_logsize * sizeof(struct pool_log),
  518                      M_TEMP, M_NOWAIT)) == NULL)
  519                         pp->pr_roflags &= ~PR_LOGGING;
  520                 pp->pr_curlogentry = 0;
  521                 pp->pr_logsize = pool_logsize;
  522         }
  523 #endif
  524 
  525         pp->pr_entered_file = NULL;
  526         pp->pr_entered_line = 0;
  527 
  528         simple_lock_init(&pp->pr_slock);
  529 
  530         /*
  531          * Initialize private page header pool and cache magazine pool if we
  532          * haven't done so yet.
  533          * XXX LOCKING.
  534          */
  535         if (phpool.pr_size == 0) {
  536 #ifdef POOL_SUBPAGE
  537                 pool_init(&phpool, sizeof(struct pool_item_header), 0, 0, 0,
  538                     "phpool", &pool_allocator_kmem);
  539                 pool_init(&psppool, POOL_SUBPAGE, POOL_SUBPAGE, 0,
  540                     PR_RECURSIVE, "psppool", &pool_allocator_kmem);
  541 #else
  542                 pool_init(&phpool, sizeof(struct pool_item_header), 0, 0,
  543                     0, "phpool", NULL);
  544 #endif
  545                 pool_init(&pcgpool, sizeof(struct pool_cache_group), 0, 0,
  546                     0, "pcgpool", NULL);
  547         }
  548 
  549         /* Insert into the list of all pools. */
  550         simple_lock(&pool_head_slock);
  551         TAILQ_INSERT_TAIL(&pool_head, pp, pr_poollist);
  552         simple_unlock(&pool_head_slock);
  553 
  554         /* Insert this into the list of pools using this allocator. */
  555         s = splvm();
  556         simple_lock(&palloc->pa_slock);
  557         TAILQ_INSERT_TAIL(&palloc->pa_list, pp, pr_alloc_list);
  558         simple_unlock(&palloc->pa_slock);
  559         splx(s);
  560 }
  561 
  562 /*
  563  * De-commision a pool resource.
  564  */
  565 void
  566 pool_destroy(struct pool *pp)
  567 {
  568         struct pool_item_header *ph;
  569         struct pool_cache *pc;
  570         int s;
  571 
  572         /* Locking order: pool_allocator -> pool */
  573         s = splvm();
  574         simple_lock(&pp->pr_alloc->pa_slock);
  575         TAILQ_REMOVE(&pp->pr_alloc->pa_list, pp, pr_alloc_list);
  576         simple_unlock(&pp->pr_alloc->pa_slock);
  577         splx(s);
  578 
  579         /* Destroy all caches for this pool. */
  580         while ((pc = TAILQ_FIRST(&pp->pr_cachelist)) != NULL)
  581                 pool_cache_destroy(pc);
  582 
  583 #ifdef DIAGNOSTIC
  584         if (pp->pr_nout != 0) {
  585                 pr_printlog(pp, NULL, printf);
  586                 panic("pool_destroy: pool busy: still out: %u",
  587                     pp->pr_nout);
  588         }
  589 #endif
  590 
  591         /* Remove all pages */
  592         while ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL)
  593                 pr_rmpage(pp, ph, NULL);
  594         KASSERT(LIST_EMPTY(&pp->pr_fullpages));
  595         KASSERT(LIST_EMPTY(&pp->pr_partpages));
  596 
  597         /* Remove from global pool list */
  598         simple_lock(&pool_head_slock);
  599         TAILQ_REMOVE(&pool_head, pp, pr_poollist);
  600         if (drainpp == pp) {
  601                 drainpp = NULL;
  602         }
  603         simple_unlock(&pool_head_slock);
  604 
  605 #ifdef POOL_DIAGNOSTIC
  606         if ((pp->pr_roflags & PR_LOGGING) != 0)
  607                 free(pp->pr_log, M_TEMP);
  608 #endif
  609 }
  610 
  611 void
  612 pool_set_drain_hook(struct pool *pp, void (*fn)(void *, int), void *arg)
  613 {
  614 
  615         /* XXX no locking -- must be used just after pool_init() */
  616 #ifdef DIAGNOSTIC
  617         if (pp->pr_drain_hook != NULL)
  618                 panic("pool_set_drain_hook(%s): already set", pp->pr_wchan);
  619 #endif
  620         pp->pr_drain_hook = fn;
  621         pp->pr_drain_hook_arg = arg;
  622 }
  623 
  624 static struct pool_item_header *
  625 pool_alloc_item_header(struct pool *pp, caddr_t storage, int flags)
  626 {
  627         struct pool_item_header *ph;
  628         int s;
  629 
  630         LOCK_ASSERT(simple_lock_held(&pp->pr_slock) == 0);
  631 
  632         if ((pp->pr_roflags & PR_PHINPAGE) != 0)
  633                 ph = (struct pool_item_header *) (storage + pp->pr_phoffset);
  634         else {
  635                 s = splvm();
  636                 ph = pool_get(&phpool, flags);
  637                 splx(s);
  638         }
  639 
  640         return (ph);
  641 }
  642 
  643 /*
  644  * Grab an item from the pool; must be called at appropriate spl level
  645  */
  646 void *
  647 #ifdef POOL_DIAGNOSTIC
  648 _pool_get(struct pool *pp, int flags, const char *file, long line)
  649 #else
  650 pool_get(struct pool *pp, int flags)
  651 #endif
  652 {
  653         struct pool_item *pi;
  654         struct pool_item_header *ph;
  655         void *v;
  656 
  657 #ifdef DIAGNOSTIC
  658         if (__predict_false(curlwp == NULL && doing_shutdown == 0 &&
  659                             (flags & PR_WAITOK) != 0))
  660                 panic("pool_get: %s: must have NOWAIT", pp->pr_wchan);
  661 
  662 #ifdef LOCKDEBUG
  663         if (flags & PR_WAITOK)
  664                 simple_lock_only_held(NULL, "pool_get(PR_WAITOK)");
  665 #endif
  666 #endif /* DIAGNOSTIC */
  667 
  668         simple_lock(&pp->pr_slock);
  669         pr_enter(pp, file, line);
  670 
  671  startover:
  672         /*
  673          * Check to see if we've reached the hard limit.  If we have,
  674          * and we can wait, then wait until an item has been returned to
  675          * the pool.
  676          */
  677 #ifdef DIAGNOSTIC
  678         if (__predict_false(pp->pr_nout > pp->pr_hardlimit)) {
  679                 pr_leave(pp);
  680                 simple_unlock(&pp->pr_slock);
  681                 panic("pool_get: %s: crossed hard limit", pp->pr_wchan);
  682         }
  683 #endif
  684         if (__predict_false(pp->pr_nout == pp->pr_hardlimit)) {
  685                 if (pp->pr_drain_hook != NULL) {
  686                         /*
  687                          * Since the drain hook is going to free things
  688                          * back to the pool, unlock, call the hook, re-lock,
  689                          * and check the hardlimit condition again.
  690                          */
  691                         pr_leave(pp);
  692                         simple_unlock(&pp->pr_slock);
  693                         (*pp->pr_drain_hook)(pp->pr_drain_hook_arg, flags);
  694                         simple_lock(&pp->pr_slock);
  695                         pr_enter(pp, file, line);
  696                         if (pp->pr_nout < pp->pr_hardlimit)
  697                                 goto startover;
  698                 }
  699 
  700                 if ((flags & PR_WAITOK) && !(flags & PR_LIMITFAIL)) {
  701                         /*
  702                          * XXX: A warning isn't logged in this case.  Should
  703                          * it be?
  704                          */
  705                         pp->pr_flags |= PR_WANTED;
  706                         pr_leave(pp);
  707                         ltsleep(pp, PSWP, pp->pr_wchan, 0, &pp->pr_slock);
  708                         pr_enter(pp, file, line);
  709                         goto startover;
  710                 }
  711 
  712                 /*
  713                  * Log a message that the hard limit has been hit.
  714                  */
  715                 if (pp->pr_hardlimit_warning != NULL &&
  716                     ratecheck(&pp->pr_hardlimit_warning_last,
  717                               &pp->pr_hardlimit_ratecap))
  718                         log(LOG_ERR, "%s\n", pp->pr_hardlimit_warning);
  719 
  720                 pp->pr_nfail++;
  721 
  722                 pr_leave(pp);
  723                 simple_unlock(&pp->pr_slock);
  724                 return (NULL);
  725         }
  726 
  727         /*
  728          * The convention we use is that if `curpage' is not NULL, then
  729          * it points at a non-empty bucket. In particular, `curpage'
  730          * never points at a page header which has PR_PHINPAGE set and
  731          * has no items in its bucket.
  732          */
  733         if ((ph = pp->pr_curpage) == NULL) {
  734 #ifdef DIAGNOSTIC
  735                 if (pp->pr_nitems != 0) {
  736                         simple_unlock(&pp->pr_slock);
  737                         printf("pool_get: %s: curpage NULL, nitems %u\n",
  738                             pp->pr_wchan, pp->pr_nitems);
  739                         panic("pool_get: nitems inconsistent");
  740                 }
  741 #endif
  742 
  743                 /*
  744                  * Call the back-end page allocator for more memory.
  745                  * Release the pool lock, as the back-end page allocator
  746                  * may block.
  747                  */
  748                 pr_leave(pp);
  749                 simple_unlock(&pp->pr_slock);
  750                 v = pool_allocator_alloc(pp, flags);
  751                 if (__predict_true(v != NULL))
  752                         ph = pool_alloc_item_header(pp, v, flags);
  753 
  754                 if (__predict_false(v == NULL || ph == NULL)) {
  755                         if (v != NULL)
  756                                 pool_allocator_free(pp, v);
  757 
  758                         simple_lock(&pp->pr_slock);
  759                         pr_enter(pp, file, line);
  760 
  761                         /*
  762                          * We were unable to allocate a page or item
  763                          * header, but we released the lock during
  764                          * allocation, so perhaps items were freed
  765                          * back to the pool.  Check for this case.
  766                          */
  767                         if (pp->pr_curpage != NULL)
  768                                 goto startover;
  769 
  770                         if ((flags & PR_WAITOK) == 0) {
  771                                 pp->pr_nfail++;
  772                                 pr_leave(pp);
  773                                 simple_unlock(&pp->pr_slock);
  774                                 return (NULL);
  775                         }
  776 
  777                         /*
  778                          * Wait for items to be returned to this pool.
  779                          *
  780                          * XXX: maybe we should wake up once a second and
  781                          * try again?
  782                          */
  783                         pp->pr_flags |= PR_WANTED;
  784                         /* PA_WANTED is already set on the allocator. */
  785                         pr_leave(pp);
  786                         ltsleep(pp, PSWP, pp->pr_wchan, 0, &pp->pr_slock);
  787                         pr_enter(pp, file, line);
  788                         goto startover;
  789                 }
  790 
  791                 /* We have more memory; add it to the pool */
  792                 simple_lock(&pp->pr_slock);
  793                 pr_enter(pp, file, line);
  794                 pool_prime_page(pp, v, ph);
  795                 pp->pr_npagealloc++;
  796 
  797                 /* Start the allocation process over. */
  798                 goto startover;
  799         }
  800         if (__predict_false((v = pi = TAILQ_FIRST(&ph->ph_itemlist)) == NULL)) {
  801                 pr_leave(pp);
  802                 simple_unlock(&pp->pr_slock);
  803                 panic("pool_get: %s: page empty", pp->pr_wchan);
  804         }
  805 #ifdef DIAGNOSTIC
  806         if (__predict_false(pp->pr_nitems == 0)) {
  807                 pr_leave(pp);
  808                 simple_unlock(&pp->pr_slock);
  809                 printf("pool_get: %s: items on itemlist, nitems %u\n",
  810                     pp->pr_wchan, pp->pr_nitems);
  811                 panic("pool_get: nitems inconsistent");
  812         }
  813 #endif
  814 
  815 #ifdef POOL_DIAGNOSTIC
  816         pr_log(pp, v, PRLOG_GET, file, line);
  817 #endif
  818 
  819 #ifdef DIAGNOSTIC
  820         if (__predict_false(pi->pi_magic != PI_MAGIC)) {
  821                 pr_printlog(pp, pi, printf);
  822                 panic("pool_get(%s): free list modified: magic=%x; page %p;"
  823                        " item addr %p\n",
  824                         pp->pr_wchan, pi->pi_magic, ph->ph_page, pi);
  825         }
  826 #endif
  827 
  828         /*
  829          * Remove from item list.
  830          */
  831         TAILQ_REMOVE(&ph->ph_itemlist, pi, pi_list);
  832         pp->pr_nitems--;
  833         pp->pr_nout++;
  834         if (ph->ph_nmissing == 0) {
  835 #ifdef DIAGNOSTIC
  836                 if (__predict_false(pp->pr_nidle == 0))
  837                         panic("pool_get: nidle inconsistent");
  838 #endif
  839                 pp->pr_nidle--;
  840 
  841                 /*
  842                  * This page was previously empty.  Move it to the list of
  843                  * partially-full pages.  This page is already curpage.
  844                  */
  845                 LIST_REMOVE(ph, ph_pagelist);
  846                 LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist);
  847         }
  848         ph->ph_nmissing++;
  849         if (TAILQ_EMPTY(&ph->ph_itemlist)) {
  850 #ifdef DIAGNOSTIC
  851                 if (__predict_false(ph->ph_nmissing != pp->pr_itemsperpage)) {
  852                         pr_leave(pp);
  853                         simple_unlock(&pp->pr_slock);
  854                         panic("pool_get: %s: nmissing inconsistent",
  855                             pp->pr_wchan);
  856                 }
  857 #endif
  858                 /*
  859                  * This page is now full.  Move it to the full list
  860                  * and select a new current page.
  861                  */
  862                 LIST_REMOVE(ph, ph_pagelist);
  863                 LIST_INSERT_HEAD(&pp->pr_fullpages, ph, ph_pagelist);
  864                 pool_update_curpage(pp);
  865         }
  866 
  867         pp->pr_nget++;
  868 
  869         /*
  870          * If we have a low water mark and we are now below that low
  871          * water mark, add more items to the pool.
  872          */
  873         if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) {
  874                 /*
  875                  * XXX: Should we log a warning?  Should we set up a timeout
  876                  * to try again in a second or so?  The latter could break
  877                  * a caller's assumptions about interrupt protection, etc.
  878                  */
  879         }
  880 
  881         pr_leave(pp);
  882         simple_unlock(&pp->pr_slock);
  883         return (v);
  884 }
  885 
  886 /*
  887  * Internal version of pool_put().  Pool is already locked/entered.
  888  */
  889 static void
  890 pool_do_put(struct pool *pp, void *v)
  891 {
  892         struct pool_item *pi = v;
  893         struct pool_item_header *ph;
  894         caddr_t page;
  895         int s;
  896 
  897         LOCK_ASSERT(simple_lock_held(&pp->pr_slock));
  898 
  899         page = (caddr_t)((u_long)v & pp->pr_alloc->pa_pagemask);
  900 
  901 #ifdef DIAGNOSTIC
  902         if (__predict_false(pp->pr_nout == 0)) {
  903                 printf("pool %s: putting with none out\n",
  904                     pp->pr_wchan);
  905                 panic("pool_put");
  906         }
  907 #endif
  908 
  909         if (__predict_false((ph = pr_find_pagehead(pp, page)) == NULL)) {
  910                 pr_printlog(pp, NULL, printf);
  911                 panic("pool_put: %s: page header missing", pp->pr_wchan);
  912         }
  913 
  914 #ifdef LOCKDEBUG
  915         /*
  916          * Check if we're freeing a locked simple lock.
  917          */
  918         simple_lock_freecheck((caddr_t)pi, ((caddr_t)pi) + pp->pr_size);
  919 #endif
  920 
  921         /*
  922          * Return to item list.
  923          */
  924 #ifdef DIAGNOSTIC
  925         pi->pi_magic = PI_MAGIC;
  926 #endif
  927 #ifdef DEBUG
  928         {
  929                 int i, *ip = v;
  930 
  931                 for (i = 0; i < pp->pr_size / sizeof(int); i++) {
  932                         *ip++ = PI_MAGIC;
  933                 }
  934         }
  935 #endif
  936 
  937         TAILQ_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list);
  938         KDASSERT(ph->ph_nmissing != 0);
  939         ph->ph_nmissing--;
  940         pp->pr_nput++;
  941         pp->pr_nitems++;
  942         pp->pr_nout--;
  943 
  944         /* Cancel "pool empty" condition if it exists */
  945         if (pp->pr_curpage == NULL)
  946                 pp->pr_curpage = ph;
  947 
  948         if (pp->pr_flags & PR_WANTED) {
  949                 pp->pr_flags &= ~PR_WANTED;
  950                 if (ph->ph_nmissing == 0)
  951                         pp->pr_nidle++;
  952                 wakeup((caddr_t)pp);
  953                 return;
  954         }
  955 
  956         /*
  957          * If this page is now empty, do one of two things:
  958          *
  959          *      (1) If we have more pages than the page high water mark,
  960          *          free the page back to the system.  ONLY CONSIDER
  961          *          FREEING BACK A PAGE IF WE HAVE MORE THAN OUR MINIMUM PAGE
  962          *          CLAIM.
  963          *
  964          *      (2) Otherwise, move the page to the empty page list.
  965          *
  966          * Either way, select a new current page (so we use a partially-full
  967          * page if one is available).
  968          */
  969         if (ph->ph_nmissing == 0) {
  970                 pp->pr_nidle++;
  971                 if (pp->pr_npages > pp->pr_minpages &&
  972                     (pp->pr_npages > pp->pr_maxpages ||
  973                      (pp->pr_alloc->pa_flags & PA_WANT) != 0)) {
  974                         simple_unlock(&pp->pr_slock);
  975                         pr_rmpage(pp, ph, NULL);
  976                         simple_lock(&pp->pr_slock);
  977                 } else {
  978                         LIST_REMOVE(ph, ph_pagelist);
  979                         LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist);
  980 
  981                         /*
  982                          * Update the timestamp on the page.  A page must
  983                          * be idle for some period of time before it can
  984                          * be reclaimed by the pagedaemon.  This minimizes
  985                          * ping-pong'ing for memory.
  986                          */
  987                         s = splclock();
  988                         ph->ph_time = mono_time;
  989                         splx(s);
  990                 }
  991                 pool_update_curpage(pp);
  992         }
  993 
  994         /*
  995          * If the page was previously completely full, move it to the
  996          * partially-full list and make it the current page.  The next
  997          * allocation will get the item from this page, instead of
  998          * further fragmenting the pool.
  999          */
 1000         else if (ph->ph_nmissing == (pp->pr_itemsperpage - 1)) {
 1001                 LIST_REMOVE(ph, ph_pagelist);
 1002                 LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist);
 1003                 pp->pr_curpage = ph;
 1004         }
 1005 }
 1006 
 1007 /*
 1008  * Return resource to the pool; must be called at appropriate spl level
 1009  */
 1010 #ifdef POOL_DIAGNOSTIC
 1011 void
 1012 _pool_put(struct pool *pp, void *v, const char *file, long line)
 1013 {
 1014 
 1015         simple_lock(&pp->pr_slock);
 1016         pr_enter(pp, file, line);
 1017 
 1018         pr_log(pp, v, PRLOG_PUT, file, line);
 1019 
 1020         pool_do_put(pp, v);
 1021 
 1022         pr_leave(pp);
 1023         simple_unlock(&pp->pr_slock);
 1024 }
 1025 #undef pool_put
 1026 #endif /* POOL_DIAGNOSTIC */
 1027 
 1028 void
 1029 pool_put(struct pool *pp, void *v)
 1030 {
 1031 
 1032         simple_lock(&pp->pr_slock);
 1033 
 1034         pool_do_put(pp, v);
 1035 
 1036         simple_unlock(&pp->pr_slock);
 1037 }
 1038 
 1039 #ifdef POOL_DIAGNOSTIC
 1040 #define         pool_put(h, v)  _pool_put((h), (v), __FILE__, __LINE__)
 1041 #endif
 1042 
 1043 /*
 1044  * Add N items to the pool.
 1045  */
 1046 int
 1047 pool_prime(struct pool *pp, int n)
 1048 {
 1049         struct pool_item_header *ph = NULL;
 1050         caddr_t cp;
 1051         int newpages;
 1052 
 1053         simple_lock(&pp->pr_slock);
 1054 
 1055         newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
 1056 
 1057         while (newpages-- > 0) {
 1058                 simple_unlock(&pp->pr_slock);
 1059                 cp = pool_allocator_alloc(pp, PR_NOWAIT);
 1060                 if (__predict_true(cp != NULL))
 1061                         ph = pool_alloc_item_header(pp, cp, PR_NOWAIT);
 1062 
 1063                 if (__predict_false(cp == NULL || ph == NULL)) {
 1064                         if (cp != NULL)
 1065                                 pool_allocator_free(pp, cp);
 1066                         simple_lock(&pp->pr_slock);
 1067                         break;
 1068                 }
 1069 
 1070                 simple_lock(&pp->pr_slock);
 1071                 pool_prime_page(pp, cp, ph);
 1072                 pp->pr_npagealloc++;
 1073                 pp->pr_minpages++;
 1074         }
 1075 
 1076         if (pp->pr_minpages >= pp->pr_maxpages)
 1077                 pp->pr_maxpages = pp->pr_minpages + 1;  /* XXX */
 1078 
 1079         simple_unlock(&pp->pr_slock);
 1080         return (0);
 1081 }
 1082 
 1083 /*
 1084  * Add a page worth of items to the pool.
 1085  *
 1086  * Note, we must be called with the pool descriptor LOCKED.
 1087  */
 1088 static void
 1089 pool_prime_page(struct pool *pp, caddr_t storage, struct pool_item_header *ph)
 1090 {
 1091         struct pool_item *pi;
 1092         caddr_t cp = storage;
 1093         unsigned int align = pp->pr_align;
 1094         unsigned int ioff = pp->pr_itemoffset;
 1095         int n;
 1096         int s;
 1097 
 1098         LOCK_ASSERT(simple_lock_held(&pp->pr_slock));
 1099 
 1100 #ifdef DIAGNOSTIC
 1101         if (((u_long)cp & (pp->pr_alloc->pa_pagesz - 1)) != 0)
 1102                 panic("pool_prime_page: %s: unaligned page", pp->pr_wchan);
 1103 #endif
 1104 
 1105         /*
 1106          * Insert page header.
 1107          */
 1108         LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist);
 1109         TAILQ_INIT(&ph->ph_itemlist);
 1110         ph->ph_page = storage;
 1111         ph->ph_nmissing = 0;
 1112         s = splclock();
 1113         ph->ph_time = mono_time;
 1114         splx(s);
 1115         if ((pp->pr_roflags & PR_PHINPAGE) == 0)
 1116                 SPLAY_INSERT(phtree, &pp->pr_phtree, ph);
 1117 
 1118         pp->pr_nidle++;
 1119 
 1120         /*
 1121          * Color this page.
 1122          */
 1123         cp = (caddr_t)(cp + pp->pr_curcolor);
 1124         if ((pp->pr_curcolor += align) > pp->pr_maxcolor)
 1125                 pp->pr_curcolor = 0;
 1126 
 1127         /*
 1128          * Adjust storage to apply aligment to `pr_itemoffset' in each item.
 1129          */
 1130         if (ioff != 0)
 1131                 cp = (caddr_t)(cp + (align - ioff));
 1132 
 1133         /*
 1134          * Insert remaining chunks on the bucket list.
 1135          */
 1136         n = pp->pr_itemsperpage;
 1137         pp->pr_nitems += n;
 1138 
 1139         while (n--) {
 1140                 pi = (struct pool_item *)cp;
 1141 
 1142                 KASSERT(((((vaddr_t)pi) + ioff) & (align - 1)) == 0);
 1143 
 1144                 /* Insert on page list */
 1145                 TAILQ_INSERT_TAIL(&ph->ph_itemlist, pi, pi_list);
 1146 #ifdef DIAGNOSTIC
 1147                 pi->pi_magic = PI_MAGIC;
 1148 #endif
 1149                 cp = (caddr_t)(cp + pp->pr_size);
 1150         }
 1151 
 1152         /*
 1153          * If the pool was depleted, point at the new page.
 1154          */
 1155         if (pp->pr_curpage == NULL)
 1156                 pp->pr_curpage = ph;
 1157 
 1158         if (++pp->pr_npages > pp->pr_hiwat)
 1159                 pp->pr_hiwat = pp->pr_npages;
 1160 }
 1161 
 1162 /*
 1163  * Used by pool_get() when nitems drops below the low water mark.  This
 1164  * is used to catch up pr_nitems with the low water mark.
 1165  *
 1166  * Note 1, we never wait for memory here, we let the caller decide what to do.
 1167  *
 1168  * Note 2, we must be called with the pool already locked, and we return
 1169  * with it locked.
 1170  */
 1171 static int
 1172 pool_catchup(struct pool *pp)
 1173 {
 1174         struct pool_item_header *ph = NULL;
 1175         caddr_t cp;
 1176         int error = 0;
 1177 
 1178         while (POOL_NEEDS_CATCHUP(pp)) {
 1179                 /*
 1180                  * Call the page back-end allocator for more memory.
 1181                  *
 1182                  * XXX: We never wait, so should we bother unlocking
 1183                  * the pool descriptor?
 1184                  */
 1185                 simple_unlock(&pp->pr_slock);
 1186                 cp = pool_allocator_alloc(pp, PR_NOWAIT);
 1187                 if (__predict_true(cp != NULL))
 1188                         ph = pool_alloc_item_header(pp, cp, PR_NOWAIT);
 1189                 if (__predict_false(cp == NULL || ph == NULL)) {
 1190                         if (cp != NULL)
 1191                                 pool_allocator_free(pp, cp);
 1192                         error = ENOMEM;
 1193                         simple_lock(&pp->pr_slock);
 1194                         break;
 1195                 }
 1196                 simple_lock(&pp->pr_slock);
 1197                 pool_prime_page(pp, cp, ph);
 1198                 pp->pr_npagealloc++;
 1199         }
 1200 
 1201         return (error);
 1202 }
 1203 
 1204 static void
 1205 pool_update_curpage(struct pool *pp)
 1206 {
 1207 
 1208         pp->pr_curpage = LIST_FIRST(&pp->pr_partpages);
 1209         if (pp->pr_curpage == NULL) {
 1210                 pp->pr_curpage = LIST_FIRST(&pp->pr_emptypages);
 1211         }
 1212 }
 1213 
 1214 void
 1215 pool_setlowat(struct pool *pp, int n)
 1216 {
 1217 
 1218         simple_lock(&pp->pr_slock);
 1219 
 1220         pp->pr_minitems = n;
 1221         pp->pr_minpages = (n == 0)
 1222                 ? 0
 1223                 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
 1224 
 1225         /* Make sure we're caught up with the newly-set low water mark. */
 1226         if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) {
 1227                 /*
 1228                  * XXX: Should we log a warning?  Should we set up a timeout
 1229                  * to try again in a second or so?  The latter could break
 1230                  * a caller's assumptions about interrupt protection, etc.
 1231                  */
 1232         }
 1233 
 1234         simple_unlock(&pp->pr_slock);
 1235 }
 1236 
 1237 void
 1238 pool_sethiwat(struct pool *pp, int n)
 1239 {
 1240 
 1241         simple_lock(&pp->pr_slock);
 1242 
 1243         pp->pr_maxpages = (n == 0)
 1244                 ? 0
 1245                 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
 1246 
 1247         simple_unlock(&pp->pr_slock);
 1248 }
 1249 
 1250 void
 1251 pool_sethardlimit(struct pool *pp, int n, const char *warnmess, int ratecap)
 1252 {
 1253 
 1254         simple_lock(&pp->pr_slock);
 1255 
 1256         pp->pr_hardlimit = n;
 1257         pp->pr_hardlimit_warning = warnmess;
 1258         pp->pr_hardlimit_ratecap.tv_sec = ratecap;
 1259         pp->pr_hardlimit_warning_last.tv_sec = 0;
 1260         pp->pr_hardlimit_warning_last.tv_usec = 0;
 1261 
 1262         /*
 1263          * In-line version of pool_sethiwat(), because we don't want to
 1264          * release the lock.
 1265          */
 1266         pp->pr_maxpages = (n == 0)
 1267                 ? 0
 1268                 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
 1269 
 1270         simple_unlock(&pp->pr_slock);
 1271 }
 1272 
 1273 /*
 1274  * Release all complete pages that have not been used recently.
 1275  */
 1276 int
 1277 #ifdef POOL_DIAGNOSTIC
 1278 _pool_reclaim(struct pool *pp, const char *file, long line)
 1279 #else
 1280 pool_reclaim(struct pool *pp)
 1281 #endif
 1282 {
 1283         struct pool_item_header *ph, *phnext;
 1284         struct pool_cache *pc;
 1285         struct timeval curtime;
 1286         struct pool_pagelist pq;
 1287         struct timeval diff;
 1288         int s;
 1289 
 1290         if (pp->pr_drain_hook != NULL) {
 1291                 /*
 1292                  * The drain hook must be called with the pool unlocked.
 1293                  */
 1294                 (*pp->pr_drain_hook)(pp->pr_drain_hook_arg, PR_NOWAIT);
 1295         }
 1296 
 1297         if (simple_lock_try(&pp->pr_slock) == 0)
 1298                 return (0);
 1299         pr_enter(pp, file, line);
 1300 
 1301         LIST_INIT(&pq);
 1302 
 1303         /*
 1304          * Reclaim items from the pool's caches.
 1305          */
 1306         TAILQ_FOREACH(pc, &pp->pr_cachelist, pc_poollist)
 1307                 pool_cache_reclaim(pc);
 1308 
 1309         s = splclock();
 1310         curtime = mono_time;
 1311         splx(s);
 1312 
 1313         for (ph = LIST_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) {
 1314                 phnext = LIST_NEXT(ph, ph_pagelist);
 1315 
 1316                 /* Check our minimum page claim */
 1317                 if (pp->pr_npages <= pp->pr_minpages)
 1318                         break;
 1319 
 1320                 KASSERT(ph->ph_nmissing == 0);
 1321                 timersub(&curtime, &ph->ph_time, &diff);
 1322                 if (diff.tv_sec < pool_inactive_time)
 1323                         continue;
 1324 
 1325                 /*
 1326                  * If freeing this page would put us below
 1327                  * the low water mark, stop now.
 1328                  */
 1329                 if ((pp->pr_nitems - pp->pr_itemsperpage) <
 1330                     pp->pr_minitems)
 1331                         break;
 1332 
 1333                 pr_rmpage(pp, ph, &pq);
 1334         }
 1335 
 1336         pr_leave(pp);
 1337         simple_unlock(&pp->pr_slock);
 1338         if (LIST_EMPTY(&pq))
 1339                 return (0);
 1340 
 1341         while ((ph = LIST_FIRST(&pq)) != NULL) {
 1342                 LIST_REMOVE(ph, ph_pagelist);
 1343                 pool_allocator_free(pp, ph->ph_page);
 1344                 if (pp->pr_roflags & PR_PHINPAGE) {
 1345                         continue;
 1346                 }
 1347                 s = splvm();
 1348                 pool_put(&phpool, ph);
 1349                 splx(s);
 1350         }
 1351 
 1352         return (1);
 1353 }
 1354 
 1355 /*
 1356  * Drain pools, one at a time.
 1357  *
 1358  * Note, we must never be called from an interrupt context.
 1359  */
 1360 void
 1361 pool_drain(void *arg)
 1362 {
 1363         struct pool *pp;
 1364         int s;
 1365 
 1366         pp = NULL;
 1367         s = splvm();
 1368         simple_lock(&pool_head_slock);
 1369         if (drainpp == NULL) {
 1370                 drainpp = TAILQ_FIRST(&pool_head);
 1371         }
 1372         if (drainpp) {
 1373                 pp = drainpp;
 1374                 drainpp = TAILQ_NEXT(pp, pr_poollist);
 1375         }
 1376         simple_unlock(&pool_head_slock);
 1377         pool_reclaim(pp);
 1378         splx(s);
 1379 }
 1380 
 1381 /*
 1382  * Diagnostic helpers.
 1383  */
 1384 void
 1385 pool_print(struct pool *pp, const char *modif)
 1386 {
 1387         int s;
 1388 
 1389         s = splvm();
 1390         if (simple_lock_try(&pp->pr_slock) == 0) {
 1391                 printf("pool %s is locked; try again later\n",
 1392                     pp->pr_wchan);
 1393                 splx(s);
 1394                 return;
 1395         }
 1396         pool_print1(pp, modif, printf);
 1397         simple_unlock(&pp->pr_slock);
 1398         splx(s);
 1399 }
 1400 
 1401 void
 1402 pool_printit(struct pool *pp, const char *modif, void (*pr)(const char *, ...))
 1403 {
 1404         int didlock = 0;
 1405 
 1406         if (pp == NULL) {
 1407                 (*pr)("Must specify a pool to print.\n");
 1408                 return;
 1409         }
 1410 
 1411         /*
 1412          * Called from DDB; interrupts should be blocked, and all
 1413          * other processors should be paused.  We can skip locking
 1414          * the pool in this case.
 1415          *
 1416          * We do a simple_lock_try() just to print the lock
 1417          * status, however.
 1418          */
 1419 
 1420         if (simple_lock_try(&pp->pr_slock) == 0)
 1421                 (*pr)("WARNING: pool %s is locked\n", pp->pr_wchan);
 1422         else
 1423                 didlock = 1;
 1424 
 1425         pool_print1(pp, modif, pr);
 1426 
 1427         if (didlock)
 1428                 simple_unlock(&pp->pr_slock);
 1429 }
 1430 
 1431 static void
 1432 pool_print_pagelist(struct pool_pagelist *pl, void (*pr)(const char *, ...))
 1433 {
 1434         struct pool_item_header *ph;
 1435 #ifdef DIAGNOSTIC
 1436         struct pool_item *pi;
 1437 #endif
 1438 
 1439         LIST_FOREACH(ph, pl, ph_pagelist) {
 1440                 (*pr)("\t\tpage %p, nmissing %d, time %lu,%lu\n",
 1441                     ph->ph_page, ph->ph_nmissing,
 1442                     (u_long)ph->ph_time.tv_sec,
 1443                     (u_long)ph->ph_time.tv_usec);
 1444 #ifdef DIAGNOSTIC
 1445                 TAILQ_FOREACH(pi, &ph->ph_itemlist, pi_list) {
 1446                         if (pi->pi_magic != PI_MAGIC) {
 1447                                 (*pr)("\t\t\titem %p, magic 0x%x\n",
 1448                                     pi, pi->pi_magic);
 1449                         }
 1450                 }
 1451 #endif
 1452         }
 1453 }
 1454 
 1455 static void
 1456 pool_print1(struct pool *pp, const char *modif, void (*pr)(const char *, ...))
 1457 {
 1458         struct pool_item_header *ph;
 1459         struct pool_cache *pc;
 1460         struct pool_cache_group *pcg;
 1461         int i, print_log = 0, print_pagelist = 0, print_cache = 0;
 1462         char c;
 1463 
 1464         while ((c = *modif++) != '\0') {
 1465                 if (c == 'l')
 1466                         print_log = 1;
 1467                 if (c == 'p')
 1468                         print_pagelist = 1;
 1469                 if (c == 'c')
 1470                         print_cache = 1;
 1471         }
 1472 
 1473         (*pr)("POOL %s: size %u, align %u, ioff %u, roflags 0x%08x\n",
 1474             pp->pr_wchan, pp->pr_size, pp->pr_align, pp->pr_itemoffset,
 1475             pp->pr_roflags);
 1476         (*pr)("\talloc %p\n", pp->pr_alloc);
 1477         (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n",
 1478             pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages);
 1479         (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n",
 1480             pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit);
 1481 
 1482         (*pr)("\n\tnget %lu, nfail %lu, nput %lu\n",
 1483             pp->pr_nget, pp->pr_nfail, pp->pr_nput);
 1484         (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n",
 1485             pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle);
 1486 
 1487         if (print_pagelist == 0)
 1488                 goto skip_pagelist;
 1489 
 1490         if ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL)
 1491                 (*pr)("\n\tempty page list:\n");
 1492         pool_print_pagelist(&pp->pr_emptypages, pr);
 1493         if ((ph = LIST_FIRST(&pp->pr_fullpages)) != NULL)
 1494                 (*pr)("\n\tfull page list:\n");
 1495         pool_print_pagelist(&pp->pr_fullpages, pr);
 1496         if ((ph = LIST_FIRST(&pp->pr_partpages)) != NULL)
 1497                 (*pr)("\n\tpartial-page list:\n");
 1498         pool_print_pagelist(&pp->pr_partpages, pr);
 1499 
 1500         if (pp->pr_curpage == NULL)
 1501                 (*pr)("\tno current page\n");
 1502         else
 1503                 (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page);
 1504 
 1505  skip_pagelist:
 1506         if (print_log == 0)
 1507                 goto skip_log;
 1508 
 1509         (*pr)("\n");
 1510         if ((pp->pr_roflags & PR_LOGGING) == 0)
 1511                 (*pr)("\tno log\n");
 1512         else
 1513                 pr_printlog(pp, NULL, pr);
 1514 
 1515  skip_log:
 1516         if (print_cache == 0)
 1517                 goto skip_cache;
 1518 
 1519         TAILQ_FOREACH(pc, &pp->pr_cachelist, pc_poollist) {
 1520                 (*pr)("\tcache %p: allocfrom %p freeto %p\n", pc,
 1521                     pc->pc_allocfrom, pc->pc_freeto);
 1522                 (*pr)("\t    hits %lu misses %lu ngroups %lu nitems %lu\n",
 1523                     pc->pc_hits, pc->pc_misses, pc->pc_ngroups, pc->pc_nitems);
 1524                 TAILQ_FOREACH(pcg, &pc->pc_grouplist, pcg_list) {
 1525                         (*pr)("\t\tgroup %p: avail %d\n", pcg, pcg->pcg_avail);
 1526                         for (i = 0; i < PCG_NOBJECTS; i++) {
 1527                                 if (pcg->pcg_objects[i].pcgo_pa !=
 1528                                     POOL_PADDR_INVALID) {
 1529                                         (*pr)("\t\t\t%p, 0x%llx\n",
 1530                                             pcg->pcg_objects[i].pcgo_va,
 1531                                             (unsigned long long)
 1532                                             pcg->pcg_objects[i].pcgo_pa);
 1533                                 } else {
 1534                                         (*pr)("\t\t\t%p\n",
 1535                                             pcg->pcg_objects[i].pcgo_va);
 1536                                 }
 1537                         }
 1538                 }
 1539         }
 1540 
 1541  skip_cache:
 1542         pr_enter_check(pp, pr);
 1543 }
 1544 
 1545 static int
 1546 pool_chk_page(struct pool *pp, const char *label, struct pool_item_header *ph)
 1547 {
 1548         struct pool_item *pi;
 1549         caddr_t page;
 1550         int n;
 1551 
 1552         page = (caddr_t)((u_long)ph & pp->pr_alloc->pa_pagemask);
 1553         if (page != ph->ph_page &&
 1554             (pp->pr_roflags & PR_PHINPAGE) != 0) {
 1555                 if (label != NULL)
 1556                         printf("%s: ", label);
 1557                 printf("pool(%p:%s): page inconsistency: page %p;"
 1558                        " at page head addr %p (p %p)\n", pp,
 1559                         pp->pr_wchan, ph->ph_page,
 1560                         ph, page);
 1561                 return 1;
 1562         }
 1563 
 1564         for (pi = TAILQ_FIRST(&ph->ph_itemlist), n = 0;
 1565              pi != NULL;
 1566              pi = TAILQ_NEXT(pi,pi_list), n++) {
 1567 
 1568 #ifdef DIAGNOSTIC
 1569                 if (pi->pi_magic != PI_MAGIC) {
 1570                         if (label != NULL)
 1571                                 printf("%s: ", label);
 1572                         printf("pool(%s): free list modified: magic=%x;"
 1573                                " page %p; item ordinal %d;"
 1574                                " addr %p (p %p)\n",
 1575                                 pp->pr_wchan, pi->pi_magic, ph->ph_page,
 1576                                 n, pi, page);
 1577                         panic("pool");
 1578                 }
 1579 #endif
 1580                 page =
 1581                     (caddr_t)((u_long)pi & pp->pr_alloc->pa_pagemask);
 1582                 if (page == ph->ph_page)
 1583                         continue;
 1584 
 1585                 if (label != NULL)
 1586                         printf("%s: ", label);
 1587                 printf("pool(%p:%s): page inconsistency: page %p;"
 1588                        " item ordinal %d; addr %p (p %p)\n", pp,
 1589                         pp->pr_wchan, ph->ph_page,
 1590                         n, pi, page);
 1591                 return 1;
 1592         }
 1593         return 0;
 1594 }
 1595 
 1596 
 1597 int
 1598 pool_chk(struct pool *pp, const char *label)
 1599 {
 1600         struct pool_item_header *ph;
 1601         int r = 0;
 1602 
 1603         simple_lock(&pp->pr_slock);
 1604         LIST_FOREACH(ph, &pp->pr_emptypages, ph_pagelist) {
 1605                 r = pool_chk_page(pp, label, ph);
 1606                 if (r) {
 1607                         goto out;
 1608                 }
 1609         }
 1610         LIST_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) {
 1611                 r = pool_chk_page(pp, label, ph);
 1612                 if (r) {
 1613                         goto out;
 1614                 }
 1615         }
 1616         LIST_FOREACH(ph, &pp->pr_partpages, ph_pagelist) {
 1617                 r = pool_chk_page(pp, label, ph);
 1618                 if (r) {
 1619                         goto out;
 1620                 }
 1621         }
 1622 
 1623 out:
 1624         simple_unlock(&pp->pr_slock);
 1625         return (r);
 1626 }
 1627 
 1628 /*
 1629  * pool_cache_init:
 1630  *
 1631  *      Initialize a pool cache.
 1632  *
 1633  *      NOTE: If the pool must be protected from interrupts, we expect
 1634  *      to be called at the appropriate interrupt priority level.
 1635  */
 1636 void
 1637 pool_cache_init(struct pool_cache *pc, struct pool *pp,
 1638     int (*ctor)(void *, void *, int),
 1639     void (*dtor)(void *, void *),
 1640     void *arg)
 1641 {
 1642 
 1643         TAILQ_INIT(&pc->pc_grouplist);
 1644         simple_lock_init(&pc->pc_slock);
 1645 
 1646         pc->pc_allocfrom = NULL;
 1647         pc->pc_freeto = NULL;
 1648         pc->pc_pool = pp;
 1649 
 1650         pc->pc_ctor = ctor;
 1651         pc->pc_dtor = dtor;
 1652         pc->pc_arg  = arg;
 1653 
 1654         pc->pc_hits   = 0;
 1655         pc->pc_misses = 0;
 1656 
 1657         pc->pc_ngroups = 0;
 1658 
 1659         pc->pc_nitems = 0;
 1660 
 1661         simple_lock(&pp->pr_slock);
 1662         TAILQ_INSERT_TAIL(&pp->pr_cachelist, pc, pc_poollist);
 1663         simple_unlock(&pp->pr_slock);
 1664 }
 1665 
 1666 /*
 1667  * pool_cache_destroy:
 1668  *
 1669  *      Destroy a pool cache.
 1670  */
 1671 void
 1672 pool_cache_destroy(struct pool_cache *pc)
 1673 {
 1674         struct pool *pp = pc->pc_pool;
 1675 
 1676         /* First, invalidate the entire cache. */
 1677         pool_cache_invalidate(pc);
 1678 
 1679         /* ...and remove it from the pool's cache list. */
 1680         simple_lock(&pp->pr_slock);
 1681         TAILQ_REMOVE(&pp->pr_cachelist, pc, pc_poollist);
 1682         simple_unlock(&pp->pr_slock);
 1683 }
 1684 
 1685 static __inline void *
 1686 pcg_get(struct pool_cache_group *pcg, paddr_t *pap)
 1687 {
 1688         void *object;
 1689         u_int idx;
 1690 
 1691         KASSERT(pcg->pcg_avail <= PCG_NOBJECTS);
 1692         KASSERT(pcg->pcg_avail != 0);
 1693         idx = --pcg->pcg_avail;
 1694 
 1695         KASSERT(pcg->pcg_objects[idx].pcgo_va != NULL);
 1696         object = pcg->pcg_objects[idx].pcgo_va;
 1697         if (pap != NULL)
 1698                 *pap = pcg->pcg_objects[idx].pcgo_pa;
 1699         pcg->pcg_objects[idx].pcgo_va = NULL;
 1700 
 1701         return (object);
 1702 }
 1703 
 1704 static __inline void
 1705 pcg_put(struct pool_cache_group *pcg, void *object, paddr_t pa)
 1706 {
 1707         u_int idx;
 1708 
 1709         KASSERT(pcg->pcg_avail < PCG_NOBJECTS);
 1710         idx = pcg->pcg_avail++;
 1711 
 1712         KASSERT(pcg->pcg_objects[idx].pcgo_va == NULL);
 1713         pcg->pcg_objects[idx].pcgo_va = object;
 1714         pcg->pcg_objects[idx].pcgo_pa = pa;
 1715 }
 1716 
 1717 /*
 1718  * pool_cache_get{,_paddr}:
 1719  *
 1720  *      Get an object from a pool cache (optionally returning
 1721  *      the physical address of the object).
 1722  */
 1723 void *
 1724 pool_cache_get_paddr(struct pool_cache *pc, int flags, paddr_t *pap)
 1725 {
 1726         struct pool_cache_group *pcg;
 1727         void *object;
 1728 
 1729 #ifdef LOCKDEBUG
 1730         if (flags & PR_WAITOK)
 1731                 simple_lock_only_held(NULL, "pool_cache_get(PR_WAITOK)");
 1732 #endif
 1733 
 1734         simple_lock(&pc->pc_slock);
 1735 
 1736         if ((pcg = pc->pc_allocfrom) == NULL) {
 1737                 TAILQ_FOREACH(pcg, &pc->pc_grouplist, pcg_list) {
 1738                         if (pcg->pcg_avail != 0) {
 1739                                 pc->pc_allocfrom = pcg;
 1740                                 goto have_group;
 1741                         }
 1742                 }
 1743 
 1744                 /*
 1745                  * No groups with any available objects.  Allocate
 1746                  * a new object, construct it, and return it to
 1747                  * the caller.  We will allocate a group, if necessary,
 1748                  * when the object is freed back to the cache.
 1749                  */
 1750                 pc->pc_misses++;
 1751                 simple_unlock(&pc->pc_slock);
 1752                 object = pool_get(pc->pc_pool, flags);
 1753                 if (object != NULL && pc->pc_ctor != NULL) {
 1754                         if ((*pc->pc_ctor)(pc->pc_arg, object, flags) != 0) {
 1755                                 pool_put(pc->pc_pool, object);
 1756                                 return (NULL);
 1757                         }
 1758                 }
 1759                 if (object != NULL && pap != NULL) {
 1760 #ifdef POOL_VTOPHYS
 1761                         *pap = POOL_VTOPHYS(object);
 1762 #else
 1763                         *pap = POOL_PADDR_INVALID;
 1764 #endif
 1765                 }
 1766                 return (object);
 1767         }
 1768 
 1769  have_group:
 1770         pc->pc_hits++;
 1771         pc->pc_nitems--;
 1772         object = pcg_get(pcg, pap);
 1773 
 1774         if (pcg->pcg_avail == 0)
 1775                 pc->pc_allocfrom = NULL;
 1776 
 1777         simple_unlock(&pc->pc_slock);
 1778 
 1779         return (object);
 1780 }
 1781 
 1782 /*
 1783  * pool_cache_put{,_paddr}:
 1784  *
 1785  *      Put an object back to the pool cache (optionally caching the
 1786  *      physical address of the object).
 1787  */
 1788 void
 1789 pool_cache_put_paddr(struct pool_cache *pc, void *object, paddr_t pa)
 1790 {
 1791         struct pool_cache_group *pcg;
 1792         int s;
 1793 
 1794         simple_lock(&pc->pc_slock);
 1795 
 1796         if ((pcg = pc->pc_freeto) == NULL) {
 1797                 TAILQ_FOREACH(pcg, &pc->pc_grouplist, pcg_list) {
 1798                         if (pcg->pcg_avail != PCG_NOBJECTS) {
 1799                                 pc->pc_freeto = pcg;
 1800                                 goto have_group;
 1801                         }
 1802                 }
 1803 
 1804                 /*
 1805                  * No empty groups to free the object to.  Attempt to
 1806                  * allocate one.
 1807                  */
 1808                 simple_unlock(&pc->pc_slock);
 1809                 s = splvm();
 1810                 pcg = pool_get(&pcgpool, PR_NOWAIT);
 1811                 splx(s);
 1812                 if (pcg != NULL) {
 1813                         memset(pcg, 0, sizeof(*pcg));
 1814                         simple_lock(&pc->pc_slock);
 1815                         pc->pc_ngroups++;
 1816                         TAILQ_INSERT_TAIL(&pc->pc_grouplist, pcg, pcg_list);
 1817                         if (pc->pc_freeto == NULL)
 1818                                 pc->pc_freeto = pcg;
 1819                         goto have_group;
 1820                 }
 1821 
 1822                 /*
 1823                  * Unable to allocate a cache group; destruct the object
 1824                  * and free it back to the pool.
 1825                  */
 1826                 pool_cache_destruct_object(pc, object);
 1827                 return;
 1828         }
 1829 
 1830  have_group:
 1831         pc->pc_nitems++;
 1832         pcg_put(pcg, object, pa);
 1833 
 1834         if (pcg->pcg_avail == PCG_NOBJECTS)
 1835                 pc->pc_freeto = NULL;
 1836 
 1837         simple_unlock(&pc->pc_slock);
 1838 }
 1839 
 1840 /*
 1841  * pool_cache_destruct_object:
 1842  *
 1843  *      Force destruction of an object and its release back into
 1844  *      the pool.
 1845  */
 1846 void
 1847 pool_cache_destruct_object(struct pool_cache *pc, void *object)
 1848 {
 1849 
 1850         if (pc->pc_dtor != NULL)
 1851                 (*pc->pc_dtor)(pc->pc_arg, object);
 1852         pool_put(pc->pc_pool, object);
 1853 }
 1854 
 1855 /*
 1856  * pool_cache_do_invalidate:
 1857  *
 1858  *      This internal function implements pool_cache_invalidate() and
 1859  *      pool_cache_reclaim().
 1860  */
 1861 static void
 1862 pool_cache_do_invalidate(struct pool_cache *pc, int free_groups,
 1863     void (*putit)(struct pool *, void *))
 1864 {
 1865         struct pool_cache_group *pcg, *npcg;
 1866         void *object;
 1867         int s;
 1868 
 1869         for (pcg = TAILQ_FIRST(&pc->pc_grouplist); pcg != NULL;
 1870              pcg = npcg) {
 1871                 npcg = TAILQ_NEXT(pcg, pcg_list);
 1872                 while (pcg->pcg_avail != 0) {
 1873                         pc->pc_nitems--;
 1874                         object = pcg_get(pcg, NULL);
 1875                         if (pcg->pcg_avail == 0 && pc->pc_allocfrom == pcg)
 1876                                 pc->pc_allocfrom = NULL;
 1877                         if (pc->pc_dtor != NULL)
 1878                                 (*pc->pc_dtor)(pc->pc_arg, object);
 1879                         (*putit)(pc->pc_pool, object);
 1880                 }
 1881                 if (free_groups) {
 1882                         pc->pc_ngroups--;
 1883                         TAILQ_REMOVE(&pc->pc_grouplist, pcg, pcg_list);
 1884                         if (pc->pc_freeto == pcg)
 1885                                 pc->pc_freeto = NULL;
 1886                         s = splvm();
 1887                         pool_put(&pcgpool, pcg);
 1888                         splx(s);
 1889                 }
 1890         }
 1891 }
 1892 
 1893 /*
 1894  * pool_cache_invalidate:
 1895  *
 1896  *      Invalidate a pool cache (destruct and release all of the
 1897  *      cached objects).
 1898  */
 1899 void
 1900 pool_cache_invalidate(struct pool_cache *pc)
 1901 {
 1902 
 1903         simple_lock(&pc->pc_slock);
 1904         pool_cache_do_invalidate(pc, 0, pool_put);
 1905         simple_unlock(&pc->pc_slock);
 1906 }
 1907 
 1908 /*
 1909  * pool_cache_reclaim:
 1910  *
 1911  *      Reclaim a pool cache for pool_reclaim().
 1912  */
 1913 static void
 1914 pool_cache_reclaim(struct pool_cache *pc)
 1915 {
 1916 
 1917         simple_lock(&pc->pc_slock);
 1918         pool_cache_do_invalidate(pc, 1, pool_do_put);
 1919         simple_unlock(&pc->pc_slock);
 1920 }
 1921 
 1922 /*
 1923  * Pool backend allocators.
 1924  *
 1925  * Each pool has a backend allocator that handles allocation, deallocation,
 1926  * and any additional draining that might be needed.
 1927  *
 1928  * We provide two standard allocators:
 1929  *
 1930  *      pool_allocator_kmem - the default when no allocator is specified
 1931  *
 1932  *      pool_allocator_nointr - used for pools that will not be accessed
 1933  *      in interrupt context.
 1934  */
 1935 void    *pool_page_alloc(struct pool *, int);
 1936 void    pool_page_free(struct pool *, void *);
 1937 
 1938 struct pool_allocator pool_allocator_kmem = {
 1939         pool_page_alloc, pool_page_free, 0,
 1940 };
 1941 
 1942 void    *pool_page_alloc_nointr(struct pool *, int);
 1943 void    pool_page_free_nointr(struct pool *, void *);
 1944 
 1945 struct pool_allocator pool_allocator_nointr = {
 1946         pool_page_alloc_nointr, pool_page_free_nointr, 0,
 1947 };
 1948 
 1949 #ifdef POOL_SUBPAGE
 1950 void    *pool_subpage_alloc(struct pool *, int);
 1951 void    pool_subpage_free(struct pool *, void *);
 1952 
 1953 struct pool_allocator pool_allocator_kmem_subpage = {
 1954         pool_subpage_alloc, pool_subpage_free, 0,
 1955 };
 1956 #endif /* POOL_SUBPAGE */
 1957 
 1958 /*
 1959  * We have at least three different resources for the same allocation and
 1960  * each resource can be depleted.  First, we have the ready elements in the
 1961  * pool.  Then we have the resource (typically a vm_map) for this allocator.
 1962  * Finally, we have physical memory.  Waiting for any of these can be
 1963  * unnecessary when any other is freed, but the kernel doesn't support
 1964  * sleeping on multiple wait channels, so we have to employ another strategy.
 1965  *
 1966  * The caller sleeps on the pool (so that it can be awakened when an item
 1967  * is returned to the pool), but we set PA_WANT on the allocator.  When a
 1968  * page is returned to the allocator and PA_WANT is set, pool_allocator_free
 1969  * will wake up all sleeping pools belonging to this allocator.
 1970  *
 1971  * XXX Thundering herd.
 1972  */
 1973 void *
 1974 pool_allocator_alloc(struct pool *org, int flags)
 1975 {
 1976         struct pool_allocator *pa = org->pr_alloc;
 1977         struct pool *pp, *start;
 1978         int s, freed;
 1979         void *res;
 1980 
 1981         LOCK_ASSERT(!simple_lock_held(&org->pr_slock));
 1982 
 1983         do {
 1984                 if ((res = (*pa->pa_alloc)(org, flags)) != NULL)
 1985                         return (res);
 1986                 if ((flags & PR_WAITOK) == 0) {
 1987                         /*
 1988                          * We only run the drain hookhere if PR_NOWAIT.
 1989                          * In other cases, the hook will be run in
 1990                          * pool_reclaim().
 1991                          */
 1992                         if (org->pr_drain_hook != NULL) {
 1993                                 (*org->pr_drain_hook)(org->pr_drain_hook_arg,
 1994                                     flags);
 1995                                 if ((res = (*pa->pa_alloc)(org, flags)) != NULL)
 1996                                         return (res);
 1997                         }
 1998                         break;
 1999                 }
 2000 
 2001                 /*
 2002                  * Drain all pools, except "org", that use this
 2003                  * allocator.  We do this to reclaim VA space.
 2004                  * pa_alloc is responsible for waiting for
 2005                  * physical memory.
 2006                  *
 2007                  * XXX We risk looping forever if start if someone
 2008                  * calls pool_destroy on "start".  But there is no
 2009                  * other way to have potentially sleeping pool_reclaim,
 2010                  * non-sleeping locks on pool_allocator, and some
 2011                  * stirring of drained pools in the allocator.
 2012                  *
 2013                  * XXX Maybe we should use pool_head_slock for locking
 2014                  * the allocators?
 2015                  */
 2016                 freed = 0;
 2017 
 2018                 s = splvm();
 2019                 simple_lock(&pa->pa_slock);
 2020                 pp = start = TAILQ_FIRST(&pa->pa_list);
 2021                 do {
 2022                         TAILQ_REMOVE(&pa->pa_list, pp, pr_alloc_list);
 2023                         TAILQ_INSERT_TAIL(&pa->pa_list, pp, pr_alloc_list);
 2024                         if (pp == org)
 2025                                 continue;
 2026                         simple_unlock(&pa->pa_slock);
 2027                         freed = pool_reclaim(pp);
 2028                         simple_lock(&pa->pa_slock);
 2029                 } while ((pp = TAILQ_FIRST(&pa->pa_list)) != start &&
 2030                          freed == 0);
 2031 
 2032                 if (freed == 0) {
 2033                         /*
 2034                          * We set PA_WANT here, the caller will most likely
 2035                          * sleep waiting for pages (if not, this won't hurt
 2036                          * that much), and there is no way to set this in
 2037                          * the caller without violating locking order.
 2038                          */
 2039                         pa->pa_flags |= PA_WANT;
 2040                 }
 2041                 simple_unlock(&pa->pa_slock);
 2042                 splx(s);
 2043         } while (freed);
 2044         return (NULL);
 2045 }
 2046 
 2047 void
 2048 pool_allocator_free(struct pool *pp, void *v)
 2049 {
 2050         struct pool_allocator *pa = pp->pr_alloc;
 2051         int s;
 2052 
 2053         LOCK_ASSERT(!simple_lock_held(&pp->pr_slock));
 2054 
 2055         (*pa->pa_free)(pp, v);
 2056 
 2057         s = splvm();
 2058         simple_lock(&pa->pa_slock);
 2059         if ((pa->pa_flags & PA_WANT) == 0) {
 2060                 simple_unlock(&pa->pa_slock);
 2061                 splx(s);
 2062                 return;
 2063         }
 2064 
 2065         TAILQ_FOREACH(pp, &pa->pa_list, pr_alloc_list) {
 2066                 simple_lock(&pp->pr_slock);
 2067                 if ((pp->pr_flags & PR_WANTED) != 0) {
 2068                         pp->pr_flags &= ~PR_WANTED;
 2069                         wakeup(pp);
 2070                 }
 2071                 simple_unlock(&pp->pr_slock);
 2072         }
 2073         pa->pa_flags &= ~PA_WANT;
 2074         simple_unlock(&pa->pa_slock);
 2075         splx(s);
 2076 }
 2077 
 2078 void *
 2079 pool_page_alloc(struct pool *pp, int flags)
 2080 {
 2081         boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE;
 2082 
 2083         return ((void *) uvm_km_alloc_poolpage(waitok));
 2084 }
 2085 
 2086 void
 2087 pool_page_free(struct pool *pp, void *v)
 2088 {
 2089 
 2090         uvm_km_free_poolpage((vaddr_t) v);
 2091 }
 2092 
 2093 #ifdef POOL_SUBPAGE
 2094 /* Sub-page allocator, for machines with large hardware pages. */
 2095 void *
 2096 pool_subpage_alloc(struct pool *pp, int flags)
 2097 {
 2098         void *v;
 2099         int s;
 2100         s = splvm();
 2101         v = pool_get(&psppool, flags);
 2102         splx(s);
 2103         return v;
 2104 }
 2105 
 2106 void
 2107 pool_subpage_free(struct pool *pp, void *v)
 2108 {
 2109         int s;
 2110         s = splvm();
 2111         pool_put(&psppool, v);
 2112         splx(s);
 2113 }
 2114 
 2115 /* We don't provide a real nointr allocator.  Maybe later. */
 2116 void *
 2117 pool_page_alloc_nointr(struct pool *pp, int flags)
 2118 {
 2119 
 2120         return (pool_subpage_alloc(pp, flags));
 2121 }
 2122 
 2123 void
 2124 pool_page_free_nointr(struct pool *pp, void *v)
 2125 {
 2126 
 2127         pool_subpage_free(pp, v);
 2128 }
 2129 #else
 2130 void *
 2131 pool_page_alloc_nointr(struct pool *pp, int flags)
 2132 {
 2133         boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE;
 2134 
 2135         return ((void *) uvm_km_alloc_poolpage1(kernel_map,
 2136             uvm.kernel_object, waitok));
 2137 }
 2138 
 2139 void
 2140 pool_page_free_nointr(struct pool *pp, void *v)
 2141 {
 2142 
 2143         uvm_km_free_poolpage1(kernel_map, (vaddr_t) v);
 2144 }
 2145 #endif /* POOL_SUBPAGE */
Cache object: 2e6558ec5a2f311853f144108b1f864f
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/kern/subr_pool.c

FreeBSD/Linux Kernel Cross Reference
sys/kern/subr_pool.c