The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/subr_pool.c

Version: -  FREEBSD  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-2  -  FREEBSD-11-1  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-4  -  FREEBSD-10-3  -  FREEBSD-10-2  -  FREEBSD-10-1  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-3  -  FREEBSD-9-2  -  FREEBSD-9-1  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-4  -  FREEBSD-8-3  -  FREEBSD-8-2  -  FREEBSD-8-1  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-4  -  FREEBSD-7-3  -  FREEBSD-7-2  -  FREEBSD-7-1  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-4  -  FREEBSD-6-3  -  FREEBSD-6-2  -  FREEBSD-6-1  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-5  -  FREEBSD-5-4  -  FREEBSD-5-3  -  FREEBSD-5-2  -  FREEBSD-5-1  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  xnu-1456.1.26  -  xnu-1699.24.8  -  xnu-2050.18.24  -  OPENSOLARIS  -  minix-3-1-1 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: subr_pool.c,v 1.99.8.2 2006/03/10 13:19:42 tron Exp $  */
    2 
    3 /*-
    4  * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc.
    5  * All rights reserved.
    6  *
    7  * This code is derived from software contributed to The NetBSD Foundation
    8  * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace
    9  * Simulation Facility, NASA Ames Research Center.
   10  *
   11  * Redistribution and use in source and binary forms, with or without
   12  * modification, are permitted provided that the following conditions
   13  * are met:
   14  * 1. Redistributions of source code must retain the above copyright
   15  *    notice, this list of conditions and the following disclaimer.
   16  * 2. Redistributions in binary form must reproduce the above copyright
   17  *    notice, this list of conditions and the following disclaimer in the
   18  *    documentation and/or other materials provided with the distribution.
   19  * 3. All advertising materials mentioning features or use of this software
   20  *    must display the following acknowledgement:
   21  *      This product includes software developed by the NetBSD
   22  *      Foundation, Inc. and its contributors.
   23  * 4. Neither the name of The NetBSD Foundation nor the names of its
   24  *    contributors may be used to endorse or promote products derived
   25  *    from this software without specific prior written permission.
   26  *
   27  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   28  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   29  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   30  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   31  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   32  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   33  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   34  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   35  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   36  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   37  * POSSIBILITY OF SUCH DAMAGE.
   38  */
   39 
   40 #include <sys/cdefs.h>
   41 __KERNEL_RCSID(0, "$NetBSD: subr_pool.c,v 1.99.8.2 2006/03/10 13:19:42 tron Exp $");
   42 
   43 #include "opt_pool.h"
   44 #include "opt_poollog.h"
   45 #include "opt_lockdebug.h"
   46 
   47 #include <sys/param.h>
   48 #include <sys/systm.h>
   49 #include <sys/proc.h>
   50 #include <sys/errno.h>
   51 #include <sys/kernel.h>
   52 #include <sys/malloc.h>
   53 #include <sys/lock.h>
   54 #include <sys/pool.h>
   55 #include <sys/syslog.h>
   56 
   57 #include <uvm/uvm.h>
   58 
   59 /*
   60  * Pool resource management utility.
   61  *
   62  * Memory is allocated in pages which are split into pieces according to
   63  * the pool item size. Each page is kept on one of three lists in the
   64  * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages',
   65  * for empty, full and partially-full pages respectively. The individual
   66  * pool items are on a linked list headed by `ph_itemlist' in each page
   67  * header. The memory for building the page list is either taken from
   68  * the allocated pages themselves (for small pool items) or taken from
   69  * an internal pool of page headers (`phpool').
   70  */
   71 
   72 /* List of all pools */
   73 TAILQ_HEAD(,pool) pool_head = TAILQ_HEAD_INITIALIZER(pool_head);
   74 
   75 /* Private pool for page header structures */
   76 #define PHPOOL_MAX      8
   77 static struct pool phpool[PHPOOL_MAX];
   78 #define PHPOOL_FREELIST_NELEM(idx)      (((idx) == 0) ? 0 : (1 << (idx)))
   79 
   80 #ifdef POOL_SUBPAGE
   81 /* Pool of subpages for use by normal pools. */
   82 static struct pool psppool;
   83 #endif
   84 
   85 static void *pool_page_alloc_meta(struct pool *, int);
   86 static void pool_page_free_meta(struct pool *, void *);
   87 
   88 /* allocator for pool metadata */
   89 static struct pool_allocator pool_allocator_meta = {
   90         pool_page_alloc_meta, pool_page_free_meta
   91 };
   92 
   93 /* # of seconds to retain page after last use */
   94 int pool_inactive_time = 10;
   95 
   96 /* Next candidate for drainage (see pool_drain()) */
   97 static struct pool      *drainpp;
   98 
   99 /* This spin lock protects both pool_head and drainpp. */
  100 struct simplelock pool_head_slock = SIMPLELOCK_INITIALIZER;
  101 
  102 typedef uint8_t pool_item_freelist_t;
  103 
  104 struct pool_item_header {
  105         /* Page headers */
  106         LIST_ENTRY(pool_item_header)
  107                                 ph_pagelist;    /* pool page list */
  108         SPLAY_ENTRY(pool_item_header)
  109                                 ph_node;        /* Off-page page headers */
  110         caddr_t                 ph_page;        /* this page's address */
  111         struct timeval          ph_time;        /* last referenced */
  112         union {
  113                 /* !PR_NOTOUCH */
  114                 struct {
  115                         TAILQ_HEAD(, pool_item)
  116                                 phu_itemlist;   /* chunk list for this page */
  117                 } phu_normal;
  118                 /* PR_NOTOUCH */
  119                 struct {
  120                         uint16_t
  121                                 phu_off;        /* start offset in page */
  122                         pool_item_freelist_t
  123                                 phu_firstfree;  /* first free item */
  124                         /*
  125                          * XXX it might be better to use
  126                          * a simple bitmap and ffs(3)
  127                          */
  128                 } phu_notouch;
  129         } ph_u;
  130         uint16_t                ph_nmissing;    /* # of chunks in use */
  131 };
  132 #define ph_itemlist     ph_u.phu_normal.phu_itemlist
  133 #define ph_off          ph_u.phu_notouch.phu_off
  134 #define ph_firstfree    ph_u.phu_notouch.phu_firstfree
  135 
  136 struct pool_item {
  137 #ifdef DIAGNOSTIC
  138         u_int pi_magic;
  139 #endif
  140 #define PI_MAGIC 0xdeadbeefU
  141         /* Other entries use only this list entry */
  142         TAILQ_ENTRY(pool_item)  pi_list;
  143 };
  144 
  145 #define POOL_NEEDS_CATCHUP(pp)                                          \
  146         ((pp)->pr_nitems < (pp)->pr_minitems)
  147 
  148 /*
  149  * Pool cache management.
  150  *
  151  * Pool caches provide a way for constructed objects to be cached by the
  152  * pool subsystem.  This can lead to performance improvements by avoiding
  153  * needless object construction/destruction; it is deferred until absolutely
  154  * necessary.
  155  *
  156  * Caches are grouped into cache groups.  Each cache group references
  157  * up to 16 constructed objects.  When a cache allocates an object
  158  * from the pool, it calls the object's constructor and places it into
  159  * a cache group.  When a cache group frees an object back to the pool,
  160  * it first calls the object's destructor.  This allows the object to
  161  * persist in constructed form while freed to the cache.
  162  *
  163  * Multiple caches may exist for each pool.  This allows a single
  164  * object type to have multiple constructed forms.  The pool references
  165  * each cache, so that when a pool is drained by the pagedaemon, it can
  166  * drain each individual cache as well.  Each time a cache is drained,
  167  * the most idle cache group is freed to the pool in its entirety.
  168  *
  169  * Pool caches are layed on top of pools.  By layering them, we can avoid
  170  * the complexity of cache management for pools which would not benefit
  171  * from it.
  172  */
  173 
  174 /* The cache group pool. */
  175 static struct pool pcgpool;
  176 
  177 static void     pool_cache_reclaim(struct pool_cache *, struct pool_pagelist *);
  178 
  179 static int      pool_catchup(struct pool *);
  180 static void     pool_prime_page(struct pool *, caddr_t,
  181                     struct pool_item_header *);
  182 static void     pool_update_curpage(struct pool *);
  183 
  184 void            *pool_allocator_alloc(struct pool *, int);
  185 void            pool_allocator_free(struct pool *, void *);
  186 
  187 static void pool_print_pagelist(struct pool *, struct pool_pagelist *,
  188         void (*)(const char *, ...));
  189 static void pool_print1(struct pool *, const char *,
  190         void (*)(const char *, ...));
  191 
  192 static int pool_chk_page(struct pool *, const char *,
  193                          struct pool_item_header *);
  194 
  195 /*
  196  * Pool log entry. An array of these is allocated in pool_init().
  197  */
  198 struct pool_log {
  199         const char      *pl_file;
  200         long            pl_line;
  201         int             pl_action;
  202 #define PRLOG_GET       1
  203 #define PRLOG_PUT       2
  204         void            *pl_addr;
  205 };
  206 
  207 #ifdef POOL_DIAGNOSTIC
  208 /* Number of entries in pool log buffers */
  209 #ifndef POOL_LOGSIZE
  210 #define POOL_LOGSIZE    10
  211 #endif
  212 
  213 int pool_logsize = POOL_LOGSIZE;
  214 
  215 static __inline void
  216 pr_log(struct pool *pp, void *v, int action, const char *file, long line)
  217 {
  218         int n = pp->pr_curlogentry;
  219         struct pool_log *pl;
  220 
  221         if ((pp->pr_roflags & PR_LOGGING) == 0)
  222                 return;
  223 
  224         /*
  225          * Fill in the current entry. Wrap around and overwrite
  226          * the oldest entry if necessary.
  227          */
  228         pl = &pp->pr_log[n];
  229         pl->pl_file = file;
  230         pl->pl_line = line;
  231         pl->pl_action = action;
  232         pl->pl_addr = v;
  233         if (++n >= pp->pr_logsize)
  234                 n = 0;
  235         pp->pr_curlogentry = n;
  236 }
  237 
  238 static void
  239 pr_printlog(struct pool *pp, struct pool_item *pi,
  240     void (*pr)(const char *, ...))
  241 {
  242         int i = pp->pr_logsize;
  243         int n = pp->pr_curlogentry;
  244 
  245         if ((pp->pr_roflags & PR_LOGGING) == 0)
  246                 return;
  247 
  248         /*
  249          * Print all entries in this pool's log.
  250          */
  251         while (i-- > 0) {
  252                 struct pool_log *pl = &pp->pr_log[n];
  253                 if (pl->pl_action != 0) {
  254                         if (pi == NULL || pi == pl->pl_addr) {
  255                                 (*pr)("\tlog entry %d:\n", i);
  256                                 (*pr)("\t\taction = %s, addr = %p\n",
  257                                     pl->pl_action == PRLOG_GET ? "get" : "put",
  258                                     pl->pl_addr);
  259                                 (*pr)("\t\tfile: %s at line %lu\n",
  260                                     pl->pl_file, pl->pl_line);
  261                         }
  262                 }
  263                 if (++n >= pp->pr_logsize)
  264                         n = 0;
  265         }
  266 }
  267 
  268 static __inline void
  269 pr_enter(struct pool *pp, const char *file, long line)
  270 {
  271 
  272         if (__predict_false(pp->pr_entered_file != NULL)) {
  273                 printf("pool %s: reentrancy at file %s line %ld\n",
  274                     pp->pr_wchan, file, line);
  275                 printf("         previous entry at file %s line %ld\n",
  276                     pp->pr_entered_file, pp->pr_entered_line);
  277                 panic("pr_enter");
  278         }
  279 
  280         pp->pr_entered_file = file;
  281         pp->pr_entered_line = line;
  282 }
  283 
  284 static __inline void
  285 pr_leave(struct pool *pp)
  286 {
  287 
  288         if (__predict_false(pp->pr_entered_file == NULL)) {
  289                 printf("pool %s not entered?\n", pp->pr_wchan);
  290                 panic("pr_leave");
  291         }
  292 
  293         pp->pr_entered_file = NULL;
  294         pp->pr_entered_line = 0;
  295 }
  296 
  297 static __inline void
  298 pr_enter_check(struct pool *pp, void (*pr)(const char *, ...))
  299 {
  300 
  301         if (pp->pr_entered_file != NULL)
  302                 (*pr)("\n\tcurrently entered from file %s line %ld\n",
  303                     pp->pr_entered_file, pp->pr_entered_line);
  304 }
  305 #else
  306 #define pr_log(pp, v, action, file, line)
  307 #define pr_printlog(pp, pi, pr)
  308 #define pr_enter(pp, file, line)
  309 #define pr_leave(pp)
  310 #define pr_enter_check(pp, pr)
  311 #endif /* POOL_DIAGNOSTIC */
  312 
  313 static __inline int
  314 pr_item_notouch_index(const struct pool *pp, const struct pool_item_header *ph,
  315     const void *v)
  316 {
  317         const char *cp = v;
  318         int idx;
  319 
  320         KASSERT(pp->pr_roflags & PR_NOTOUCH);
  321         idx = (cp - ph->ph_page - ph->ph_off) / pp->pr_size;
  322         KASSERT(idx < pp->pr_itemsperpage);
  323         return idx;
  324 }
  325 
  326 #define PR_FREELIST_ALIGN(p) \
  327         roundup((uintptr_t)(p), sizeof(pool_item_freelist_t))
  328 #define PR_FREELIST(ph) ((pool_item_freelist_t *)PR_FREELIST_ALIGN((ph) + 1))
  329 #define PR_INDEX_USED   ((pool_item_freelist_t)-1)
  330 #define PR_INDEX_EOL    ((pool_item_freelist_t)-2)
  331 
  332 static __inline void
  333 pr_item_notouch_put(const struct pool *pp, struct pool_item_header *ph,
  334     void *obj)
  335 {
  336         int idx = pr_item_notouch_index(pp, ph, obj);
  337         pool_item_freelist_t *freelist = PR_FREELIST(ph);
  338 
  339         KASSERT(freelist[idx] == PR_INDEX_USED);
  340         freelist[idx] = ph->ph_firstfree;
  341         ph->ph_firstfree = idx;
  342 }
  343 
  344 static __inline void *
  345 pr_item_notouch_get(const struct pool *pp, struct pool_item_header *ph)
  346 {
  347         int idx = ph->ph_firstfree;
  348         pool_item_freelist_t *freelist = PR_FREELIST(ph);
  349 
  350         KASSERT(freelist[idx] != PR_INDEX_USED);
  351         ph->ph_firstfree = freelist[idx];
  352         freelist[idx] = PR_INDEX_USED;
  353 
  354         return ph->ph_page + ph->ph_off + idx * pp->pr_size;
  355 }
  356 
  357 static __inline int
  358 phtree_compare(struct pool_item_header *a, struct pool_item_header *b)
  359 {
  360         if (a->ph_page < b->ph_page)
  361                 return (-1);
  362         else if (a->ph_page > b->ph_page)
  363                 return (1);
  364         else
  365                 return (0);
  366 }
  367 
  368 SPLAY_PROTOTYPE(phtree, pool_item_header, ph_node, phtree_compare);
  369 SPLAY_GENERATE(phtree, pool_item_header, ph_node, phtree_compare);
  370 
  371 /*
  372  * Return the pool page header based on page address.
  373  */
  374 static __inline struct pool_item_header *
  375 pr_find_pagehead(struct pool *pp, caddr_t page)
  376 {
  377         struct pool_item_header *ph, tmp;
  378 
  379         if ((pp->pr_roflags & PR_PHINPAGE) != 0)
  380                 return ((struct pool_item_header *)(page + pp->pr_phoffset));
  381 
  382         tmp.ph_page = page;
  383         ph = SPLAY_FIND(phtree, &pp->pr_phtree, &tmp);
  384         return ph;
  385 }
  386 
  387 static void
  388 pr_pagelist_free(struct pool *pp, struct pool_pagelist *pq)
  389 {
  390         struct pool_item_header *ph;
  391         int s;
  392 
  393         while ((ph = LIST_FIRST(pq)) != NULL) {
  394                 LIST_REMOVE(ph, ph_pagelist);
  395                 pool_allocator_free(pp, ph->ph_page);
  396                 if ((pp->pr_roflags & PR_PHINPAGE) == 0) {
  397                         s = splvm();
  398                         pool_put(pp->pr_phpool, ph);
  399                         splx(s);
  400                 }
  401         }
  402 }
  403 
  404 /*
  405  * Remove a page from the pool.
  406  */
  407 static __inline void
  408 pr_rmpage(struct pool *pp, struct pool_item_header *ph,
  409      struct pool_pagelist *pq)
  410 {
  411 
  412         LOCK_ASSERT(simple_lock_held(&pp->pr_slock));
  413 
  414         /*
  415          * If the page was idle, decrement the idle page count.
  416          */
  417         if (ph->ph_nmissing == 0) {
  418 #ifdef DIAGNOSTIC
  419                 if (pp->pr_nidle == 0)
  420                         panic("pr_rmpage: nidle inconsistent");
  421                 if (pp->pr_nitems < pp->pr_itemsperpage)
  422                         panic("pr_rmpage: nitems inconsistent");
  423 #endif
  424                 pp->pr_nidle--;
  425         }
  426 
  427         pp->pr_nitems -= pp->pr_itemsperpage;
  428 
  429         /*
  430          * Unlink the page from the pool and queue it for release.
  431          */
  432         LIST_REMOVE(ph, ph_pagelist);
  433         if ((pp->pr_roflags & PR_PHINPAGE) == 0)
  434                 SPLAY_REMOVE(phtree, &pp->pr_phtree, ph);
  435         LIST_INSERT_HEAD(pq, ph, ph_pagelist);
  436 
  437         pp->pr_npages--;
  438         pp->pr_npagefree++;
  439 
  440         pool_update_curpage(pp);
  441 }
  442 
  443 /*
  444  * Initialize all the pools listed in the "pools" link set.
  445  */
  446 void
  447 link_pool_init(void)
  448 {
  449         __link_set_decl(pools, struct link_pool_init);
  450         struct link_pool_init * const *pi;
  451 
  452         __link_set_foreach(pi, pools)
  453                 pool_init((*pi)->pp, (*pi)->size, (*pi)->align,
  454                     (*pi)->align_offset, (*pi)->flags, (*pi)->wchan,
  455                     (*pi)->palloc);
  456 }
  457 
  458 /*
  459  * Initialize the given pool resource structure.
  460  *
  461  * We export this routine to allow other kernel parts to declare
  462  * static pools that must be initialized before malloc() is available.
  463  */
  464 void
  465 pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags,
  466     const char *wchan, struct pool_allocator *palloc)
  467 {
  468         int off, slack;
  469         size_t trysize, phsize;
  470         int s;
  471 
  472         KASSERT((1UL << (CHAR_BIT * sizeof(pool_item_freelist_t))) - 2 >=
  473             PHPOOL_FREELIST_NELEM(PHPOOL_MAX - 1));
  474 
  475 #ifdef POOL_DIAGNOSTIC
  476         /*
  477          * Always log if POOL_DIAGNOSTIC is defined.
  478          */
  479         if (pool_logsize != 0)
  480                 flags |= PR_LOGGING;
  481 #endif
  482 
  483         if (palloc == NULL)
  484                 palloc = &pool_allocator_kmem;
  485 #ifdef POOL_SUBPAGE
  486         if (size > palloc->pa_pagesz) {
  487                 if (palloc == &pool_allocator_kmem)
  488                         palloc = &pool_allocator_kmem_fullpage;
  489                 else if (palloc == &pool_allocator_nointr)
  490                         palloc = &pool_allocator_nointr_fullpage;
  491         }               
  492 #endif /* POOL_SUBPAGE */
  493         if ((palloc->pa_flags & PA_INITIALIZED) == 0) {
  494                 if (palloc->pa_pagesz == 0)
  495                         palloc->pa_pagesz = PAGE_SIZE;
  496 
  497                 TAILQ_INIT(&palloc->pa_list);
  498 
  499                 simple_lock_init(&palloc->pa_slock);
  500                 palloc->pa_pagemask = ~(palloc->pa_pagesz - 1);
  501                 palloc->pa_pageshift = ffs(palloc->pa_pagesz) - 1;
  502                 palloc->pa_flags |= PA_INITIALIZED;
  503         }
  504 
  505         if (align == 0)
  506                 align = ALIGN(1);
  507 
  508         if (size < sizeof(struct pool_item))
  509                 size = sizeof(struct pool_item);
  510 
  511         size = roundup(size, align);
  512 #ifdef DIAGNOSTIC
  513         if (size > palloc->pa_pagesz)
  514                 panic("pool_init: pool item size (%lu) too large",
  515                       (u_long)size);
  516 #endif
  517 
  518         /*
  519          * Initialize the pool structure.
  520          */
  521         LIST_INIT(&pp->pr_emptypages);
  522         LIST_INIT(&pp->pr_fullpages);
  523         LIST_INIT(&pp->pr_partpages);
  524         TAILQ_INIT(&pp->pr_cachelist);
  525         pp->pr_curpage = NULL;
  526         pp->pr_npages = 0;
  527         pp->pr_minitems = 0;
  528         pp->pr_minpages = 0;
  529         pp->pr_maxpages = UINT_MAX;
  530         pp->pr_roflags = flags;
  531         pp->pr_flags = 0;
  532         pp->pr_size = size;
  533         pp->pr_align = align;
  534         pp->pr_wchan = wchan;
  535         pp->pr_alloc = palloc;
  536         pp->pr_nitems = 0;
  537         pp->pr_nout = 0;
  538         pp->pr_hardlimit = UINT_MAX;
  539         pp->pr_hardlimit_warning = NULL;
  540         pp->pr_hardlimit_ratecap.tv_sec = 0;
  541         pp->pr_hardlimit_ratecap.tv_usec = 0;
  542         pp->pr_hardlimit_warning_last.tv_sec = 0;
  543         pp->pr_hardlimit_warning_last.tv_usec = 0;
  544         pp->pr_drain_hook = NULL;
  545         pp->pr_drain_hook_arg = NULL;
  546 
  547         /*
  548          * Decide whether to put the page header off page to avoid
  549          * wasting too large a part of the page or too big item.
  550          * Off-page page headers go on a hash table, so we can match
  551          * a returned item with its header based on the page address.
  552          * We use 1/16 of the page size and about 8 times of the item
  553          * size as the threshold (XXX: tune)
  554          *
  555          * However, we'll put the header into the page if we can put
  556          * it without wasting any items.
  557          *
  558          * Silently enforce `0 <= ioff < align'.
  559          */
  560         pp->pr_itemoffset = ioff %= align;
  561         /* See the comment below about reserved bytes. */
  562         trysize = palloc->pa_pagesz - ((align - ioff) % align);
  563         phsize = ALIGN(sizeof(struct pool_item_header));
  564         if ((pp->pr_roflags & PR_NOTOUCH) == 0 &&
  565             (pp->pr_size < MIN(palloc->pa_pagesz / 16, phsize << 3) ||
  566             trysize / pp->pr_size == (trysize - phsize) / pp->pr_size)) {
  567                 /* Use the end of the page for the page header */
  568                 pp->pr_roflags |= PR_PHINPAGE;
  569                 pp->pr_phoffset = off = palloc->pa_pagesz - phsize;
  570         } else {
  571                 /* The page header will be taken from our page header pool */
  572                 pp->pr_phoffset = 0;
  573                 off = palloc->pa_pagesz;
  574                 SPLAY_INIT(&pp->pr_phtree);
  575         }
  576 
  577         /*
  578          * Alignment is to take place at `ioff' within the item. This means
  579          * we must reserve up to `align - 1' bytes on the page to allow
  580          * appropriate positioning of each item.
  581          */
  582         pp->pr_itemsperpage = (off - ((align - ioff) % align)) / pp->pr_size;
  583         KASSERT(pp->pr_itemsperpage != 0);
  584         if ((pp->pr_roflags & PR_NOTOUCH)) {
  585                 int idx;
  586 
  587                 for (idx = 0; pp->pr_itemsperpage > PHPOOL_FREELIST_NELEM(idx);
  588                     idx++) {
  589                         /* nothing */
  590                 }
  591                 if (idx >= PHPOOL_MAX) {
  592                         /*
  593                          * if you see this panic, consider to tweak
  594                          * PHPOOL_MAX and PHPOOL_FREELIST_NELEM.
  595                          */
  596                         panic("%s: too large itemsperpage(%d) for PR_NOTOUCH",
  597                             pp->pr_wchan, pp->pr_itemsperpage);
  598                 }
  599                 pp->pr_phpool = &phpool[idx];
  600         } else if ((pp->pr_roflags & PR_PHINPAGE) == 0) {
  601                 pp->pr_phpool = &phpool[0];
  602         }
  603 #if defined(DIAGNOSTIC)
  604         else {
  605                 pp->pr_phpool = NULL;
  606         }
  607 #endif
  608 
  609         /*
  610          * Use the slack between the chunks and the page header
  611          * for "cache coloring".
  612          */
  613         slack = off - pp->pr_itemsperpage * pp->pr_size;
  614         pp->pr_maxcolor = (slack / align) * align;
  615         pp->pr_curcolor = 0;
  616 
  617         pp->pr_nget = 0;
  618         pp->pr_nfail = 0;
  619         pp->pr_nput = 0;
  620         pp->pr_npagealloc = 0;
  621         pp->pr_npagefree = 0;
  622         pp->pr_hiwat = 0;
  623         pp->pr_nidle = 0;
  624 
  625 #ifdef POOL_DIAGNOSTIC
  626         if (flags & PR_LOGGING) {
  627                 if (kmem_map == NULL ||
  628                     (pp->pr_log = malloc(pool_logsize * sizeof(struct pool_log),
  629                      M_TEMP, M_NOWAIT)) == NULL)
  630                         pp->pr_roflags &= ~PR_LOGGING;
  631                 pp->pr_curlogentry = 0;
  632                 pp->pr_logsize = pool_logsize;
  633         }
  634 #endif
  635 
  636         pp->pr_entered_file = NULL;
  637         pp->pr_entered_line = 0;
  638 
  639         simple_lock_init(&pp->pr_slock);
  640 
  641         /*
  642          * Initialize private page header pool and cache magazine pool if we
  643          * haven't done so yet.
  644          * XXX LOCKING.
  645          */
  646         if (phpool[0].pr_size == 0) {
  647                 int idx;
  648                 for (idx = 0; idx < PHPOOL_MAX; idx++) {
  649                         static char phpool_names[PHPOOL_MAX][6+1+6+1];
  650                         int nelem;
  651                         size_t sz;
  652 
  653                         nelem = PHPOOL_FREELIST_NELEM(idx);
  654                         snprintf(phpool_names[idx], sizeof(phpool_names[idx]),
  655                             "phpool-%d", nelem);
  656                         sz = sizeof(struct pool_item_header);
  657                         if (nelem) {
  658                                 sz = PR_FREELIST_ALIGN(sz)
  659                                     + nelem * sizeof(pool_item_freelist_t);
  660                         }
  661                         pool_init(&phpool[idx], sz, 0, 0, 0,
  662                             phpool_names[idx], &pool_allocator_meta);
  663                 }
  664 #ifdef POOL_SUBPAGE
  665                 pool_init(&psppool, POOL_SUBPAGE, POOL_SUBPAGE, 0,
  666                     PR_RECURSIVE, "psppool", &pool_allocator_meta);
  667 #endif
  668                 pool_init(&pcgpool, sizeof(struct pool_cache_group), 0, 0,
  669                     0, "pcgpool", &pool_allocator_meta);
  670         }
  671 
  672         /* Insert into the list of all pools. */
  673         simple_lock(&pool_head_slock);
  674         TAILQ_INSERT_TAIL(&pool_head, pp, pr_poollist);
  675         simple_unlock(&pool_head_slock);
  676 
  677         /* Insert this into the list of pools using this allocator. */
  678         s = splvm();
  679         simple_lock(&palloc->pa_slock);
  680         TAILQ_INSERT_TAIL(&palloc->pa_list, pp, pr_alloc_list);
  681         simple_unlock(&palloc->pa_slock);
  682         splx(s);
  683 }
  684 
  685 /*
  686  * De-commision a pool resource.
  687  */
  688 void
  689 pool_destroy(struct pool *pp)
  690 {
  691         struct pool_pagelist pq;
  692         struct pool_item_header *ph;
  693         int s;
  694 
  695         /* Remove from global pool list */
  696         simple_lock(&pool_head_slock);
  697         TAILQ_REMOVE(&pool_head, pp, pr_poollist);
  698         if (drainpp == pp)
  699                 drainpp = NULL;
  700         simple_unlock(&pool_head_slock);
  701 
  702         /* Remove this pool from its allocator's list of pools. */
  703         s = splvm();
  704         simple_lock(&pp->pr_alloc->pa_slock);
  705         TAILQ_REMOVE(&pp->pr_alloc->pa_list, pp, pr_alloc_list);
  706         simple_unlock(&pp->pr_alloc->pa_slock);
  707         splx(s);
  708 
  709         s = splvm();
  710         simple_lock(&pp->pr_slock);
  711 
  712         KASSERT(TAILQ_EMPTY(&pp->pr_cachelist));
  713 
  714 #ifdef DIAGNOSTIC
  715         if (pp->pr_nout != 0) {
  716                 pr_printlog(pp, NULL, printf);
  717                 panic("pool_destroy: pool busy: still out: %u",
  718                     pp->pr_nout);
  719         }
  720 #endif
  721 
  722         KASSERT(LIST_EMPTY(&pp->pr_fullpages));
  723         KASSERT(LIST_EMPTY(&pp->pr_partpages));
  724 
  725         /* Remove all pages */
  726         LIST_INIT(&pq);
  727         while ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL)
  728                 pr_rmpage(pp, ph, &pq);
  729 
  730         simple_unlock(&pp->pr_slock);
  731         splx(s);
  732 
  733         pr_pagelist_free(pp, &pq);
  734 
  735 #ifdef POOL_DIAGNOSTIC
  736         if ((pp->pr_roflags & PR_LOGGING) != 0)
  737                 free(pp->pr_log, M_TEMP);
  738 #endif
  739 }
  740 
  741 void
  742 pool_set_drain_hook(struct pool *pp, void (*fn)(void *, int), void *arg)
  743 {
  744 
  745         /* XXX no locking -- must be used just after pool_init() */
  746 #ifdef DIAGNOSTIC
  747         if (pp->pr_drain_hook != NULL)
  748                 panic("pool_set_drain_hook(%s): already set", pp->pr_wchan);
  749 #endif
  750         pp->pr_drain_hook = fn;
  751         pp->pr_drain_hook_arg = arg;
  752 }
  753 
  754 static struct pool_item_header *
  755 pool_alloc_item_header(struct pool *pp, caddr_t storage, int flags)
  756 {
  757         struct pool_item_header *ph;
  758         int s;
  759 
  760         LOCK_ASSERT(simple_lock_held(&pp->pr_slock) == 0);
  761 
  762         if ((pp->pr_roflags & PR_PHINPAGE) != 0)
  763                 ph = (struct pool_item_header *) (storage + pp->pr_phoffset);
  764         else {
  765                 s = splvm();
  766                 ph = pool_get(pp->pr_phpool, flags);
  767                 splx(s);
  768         }
  769 
  770         return (ph);
  771 }
  772 
  773 /*
  774  * Grab an item from the pool; must be called at appropriate spl level
  775  */
  776 void *
  777 #ifdef POOL_DIAGNOSTIC
  778 _pool_get(struct pool *pp, int flags, const char *file, long line)
  779 #else
  780 pool_get(struct pool *pp, int flags)
  781 #endif
  782 {
  783         struct pool_item *pi;
  784         struct pool_item_header *ph;
  785         void *v;
  786 
  787 #ifdef DIAGNOSTIC
  788         if (__predict_false(pp->pr_itemsperpage == 0))
  789                 panic("pool_get: pool %p: pr_itemsperpage is zero, "
  790                     "pool not initialized?", pp);
  791         if (__predict_false(curlwp == NULL && doing_shutdown == 0 &&
  792                             (flags & PR_WAITOK) != 0))
  793                 panic("pool_get: %s: must have NOWAIT", pp->pr_wchan);
  794 
  795 #ifdef LOCKDEBUG
  796         if (flags & PR_WAITOK)
  797                 simple_lock_only_held(NULL, "pool_get(PR_WAITOK)");
  798 #endif
  799 #endif /* DIAGNOSTIC */
  800 
  801         simple_lock(&pp->pr_slock);
  802         pr_enter(pp, file, line);
  803 
  804  startover:
  805         /*
  806          * Check to see if we've reached the hard limit.  If we have,
  807          * and we can wait, then wait until an item has been returned to
  808          * the pool.
  809          */
  810 #ifdef DIAGNOSTIC
  811         if (__predict_false(pp->pr_nout > pp->pr_hardlimit)) {
  812                 pr_leave(pp);
  813                 simple_unlock(&pp->pr_slock);
  814                 panic("pool_get: %s: crossed hard limit", pp->pr_wchan);
  815         }
  816 #endif
  817         if (__predict_false(pp->pr_nout == pp->pr_hardlimit)) {
  818                 if (pp->pr_drain_hook != NULL) {
  819                         /*
  820                          * Since the drain hook is going to free things
  821                          * back to the pool, unlock, call the hook, re-lock,
  822                          * and check the hardlimit condition again.
  823                          */
  824                         pr_leave(pp);
  825                         simple_unlock(&pp->pr_slock);
  826                         (*pp->pr_drain_hook)(pp->pr_drain_hook_arg, flags);
  827                         simple_lock(&pp->pr_slock);
  828                         pr_enter(pp, file, line);
  829                         if (pp->pr_nout < pp->pr_hardlimit)
  830                                 goto startover;
  831                 }
  832 
  833                 if ((flags & PR_WAITOK) && !(flags & PR_LIMITFAIL)) {
  834                         /*
  835                          * XXX: A warning isn't logged in this case.  Should
  836                          * it be?
  837                          */
  838                         pp->pr_flags |= PR_WANTED;
  839                         pr_leave(pp);
  840                         ltsleep(pp, PSWP, pp->pr_wchan, 0, &pp->pr_slock);
  841                         pr_enter(pp, file, line);
  842                         goto startover;
  843                 }
  844 
  845                 /*
  846                  * Log a message that the hard limit has been hit.
  847                  */
  848                 if (pp->pr_hardlimit_warning != NULL &&
  849                     ratecheck(&pp->pr_hardlimit_warning_last,
  850                               &pp->pr_hardlimit_ratecap))
  851                         log(LOG_ERR, "%s\n", pp->pr_hardlimit_warning);
  852 
  853                 pp->pr_nfail++;
  854 
  855                 pr_leave(pp);
  856                 simple_unlock(&pp->pr_slock);
  857                 return (NULL);
  858         }
  859 
  860         /*
  861          * The convention we use is that if `curpage' is not NULL, then
  862          * it points at a non-empty bucket. In particular, `curpage'
  863          * never points at a page header which has PR_PHINPAGE set and
  864          * has no items in its bucket.
  865          */
  866         if ((ph = pp->pr_curpage) == NULL) {
  867 #ifdef DIAGNOSTIC
  868                 if (pp->pr_nitems != 0) {
  869                         simple_unlock(&pp->pr_slock);
  870                         printf("pool_get: %s: curpage NULL, nitems %u\n",
  871                             pp->pr_wchan, pp->pr_nitems);
  872                         panic("pool_get: nitems inconsistent");
  873                 }
  874 #endif
  875 
  876                 /*
  877                  * Call the back-end page allocator for more memory.
  878                  * Release the pool lock, as the back-end page allocator
  879                  * may block.
  880                  */
  881                 pr_leave(pp);
  882                 simple_unlock(&pp->pr_slock);
  883                 v = pool_allocator_alloc(pp, flags);
  884                 if (__predict_true(v != NULL))
  885                         ph = pool_alloc_item_header(pp, v, flags);
  886 
  887                 if (__predict_false(v == NULL || ph == NULL)) {
  888                         if (v != NULL)
  889                                 pool_allocator_free(pp, v);
  890 
  891                         simple_lock(&pp->pr_slock);
  892                         pr_enter(pp, file, line);
  893 
  894                         /*
  895                          * We were unable to allocate a page or item
  896                          * header, but we released the lock during
  897                          * allocation, so perhaps items were freed
  898                          * back to the pool.  Check for this case.
  899                          */
  900                         if (pp->pr_curpage != NULL)
  901                                 goto startover;
  902 
  903                         if ((flags & PR_WAITOK) == 0) {
  904                                 pp->pr_nfail++;
  905                                 pr_leave(pp);
  906                                 simple_unlock(&pp->pr_slock);
  907                                 return (NULL);
  908                         }
  909 
  910                         /*
  911                          * Wait for items to be returned to this pool.
  912                          *
  913                          * XXX: maybe we should wake up once a second and
  914                          * try again?
  915                          */
  916                         pp->pr_flags |= PR_WANTED;
  917                         /* PA_WANTED is already set on the allocator. */
  918                         pr_leave(pp);
  919                         ltsleep(pp, PSWP, pp->pr_wchan, 0, &pp->pr_slock);
  920                         pr_enter(pp, file, line);
  921                         goto startover;
  922                 }
  923 
  924                 /* We have more memory; add it to the pool */
  925                 simple_lock(&pp->pr_slock);
  926                 pr_enter(pp, file, line);
  927                 pool_prime_page(pp, v, ph);
  928                 pp->pr_npagealloc++;
  929 
  930                 /* Start the allocation process over. */
  931                 goto startover;
  932         }
  933         if (pp->pr_roflags & PR_NOTOUCH) {
  934 #ifdef DIAGNOSTIC
  935                 if (__predict_false(ph->ph_nmissing == pp->pr_itemsperpage)) {
  936                         pr_leave(pp);
  937                         simple_unlock(&pp->pr_slock);
  938                         panic("pool_get: %s: page empty", pp->pr_wchan);
  939                 }
  940 #endif
  941                 v = pr_item_notouch_get(pp, ph);
  942 #ifdef POOL_DIAGNOSTIC
  943                 pr_log(pp, v, PRLOG_GET, file, line);
  944 #endif
  945         } else {
  946                 v = pi = TAILQ_FIRST(&ph->ph_itemlist);
  947                 if (__predict_false(v == NULL)) {
  948                         pr_leave(pp);
  949                         simple_unlock(&pp->pr_slock);
  950                         panic("pool_get: %s: page empty", pp->pr_wchan);
  951                 }
  952 #ifdef DIAGNOSTIC
  953                 if (__predict_false(pp->pr_nitems == 0)) {
  954                         pr_leave(pp);
  955                         simple_unlock(&pp->pr_slock);
  956                         printf("pool_get: %s: items on itemlist, nitems %u\n",
  957                             pp->pr_wchan, pp->pr_nitems);
  958                         panic("pool_get: nitems inconsistent");
  959                 }
  960 #endif
  961 
  962 #ifdef POOL_DIAGNOSTIC
  963                 pr_log(pp, v, PRLOG_GET, file, line);
  964 #endif
  965 
  966 #ifdef DIAGNOSTIC
  967                 if (__predict_false(pi->pi_magic != PI_MAGIC)) {
  968                         pr_printlog(pp, pi, printf);
  969                         panic("pool_get(%s): free list modified: "
  970                             "magic=%x; page %p; item addr %p\n",
  971                             pp->pr_wchan, pi->pi_magic, ph->ph_page, pi);
  972                 }
  973 #endif
  974 
  975                 /*
  976                  * Remove from item list.
  977                  */
  978                 TAILQ_REMOVE(&ph->ph_itemlist, pi, pi_list);
  979         }
  980         pp->pr_nitems--;
  981         pp->pr_nout++;
  982         if (ph->ph_nmissing == 0) {
  983 #ifdef DIAGNOSTIC
  984                 if (__predict_false(pp->pr_nidle == 0))
  985                         panic("pool_get: nidle inconsistent");
  986 #endif
  987                 pp->pr_nidle--;
  988 
  989                 /*
  990                  * This page was previously empty.  Move it to the list of
  991                  * partially-full pages.  This page is already curpage.
  992                  */
  993                 LIST_REMOVE(ph, ph_pagelist);
  994                 LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist);
  995         }
  996         ph->ph_nmissing++;
  997         if (ph->ph_nmissing == pp->pr_itemsperpage) {
  998 #ifdef DIAGNOSTIC
  999                 if (__predict_false((pp->pr_roflags & PR_NOTOUCH) == 0 &&
 1000                     !TAILQ_EMPTY(&ph->ph_itemlist))) {
 1001                         pr_leave(pp);
 1002                         simple_unlock(&pp->pr_slock);
 1003                         panic("pool_get: %s: nmissing inconsistent",
 1004                             pp->pr_wchan);
 1005                 }
 1006 #endif
 1007                 /*
 1008                  * This page is now full.  Move it to the full list
 1009                  * and select a new current page.
 1010                  */
 1011                 LIST_REMOVE(ph, ph_pagelist);
 1012                 LIST_INSERT_HEAD(&pp->pr_fullpages, ph, ph_pagelist);
 1013                 pool_update_curpage(pp);
 1014         }
 1015 
 1016         pp->pr_nget++;
 1017 
 1018         /*
 1019          * If we have a low water mark and we are now below that low
 1020          * water mark, add more items to the pool.
 1021          */
 1022         if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) {
 1023                 /*
 1024                  * XXX: Should we log a warning?  Should we set up a timeout
 1025                  * to try again in a second or so?  The latter could break
 1026                  * a caller's assumptions about interrupt protection, etc.
 1027                  */
 1028         }
 1029 
 1030         pr_leave(pp);
 1031         simple_unlock(&pp->pr_slock);
 1032         return (v);
 1033 }
 1034 
 1035 /*
 1036  * Internal version of pool_put().  Pool is already locked/entered.
 1037  */
 1038 static void
 1039 pool_do_put(struct pool *pp, void *v, struct pool_pagelist *pq)
 1040 {
 1041         struct pool_item *pi = v;
 1042         struct pool_item_header *ph;
 1043         caddr_t page;
 1044         int s;
 1045 
 1046         LOCK_ASSERT(simple_lock_held(&pp->pr_slock));
 1047 
 1048         page = (caddr_t)((u_long)v & pp->pr_alloc->pa_pagemask);
 1049 
 1050 #ifdef DIAGNOSTIC
 1051         if (__predict_false(pp->pr_nout == 0)) {
 1052                 printf("pool %s: putting with none out\n",
 1053                     pp->pr_wchan);
 1054                 panic("pool_put");
 1055         }
 1056 #endif
 1057 
 1058         if (__predict_false((ph = pr_find_pagehead(pp, page)) == NULL)) {
 1059                 pr_printlog(pp, NULL, printf);
 1060                 panic("pool_put: %s: page header missing", pp->pr_wchan);
 1061         }
 1062 
 1063 #ifdef LOCKDEBUG
 1064         /*
 1065          * Check if we're freeing a locked simple lock.
 1066          */
 1067         simple_lock_freecheck((caddr_t)pi, ((caddr_t)pi) + pp->pr_size);
 1068 #endif
 1069 
 1070         /*
 1071          * Return to item list.
 1072          */
 1073         if (pp->pr_roflags & PR_NOTOUCH) {
 1074                 pr_item_notouch_put(pp, ph, v);
 1075         } else {
 1076 #ifdef DIAGNOSTIC
 1077                 pi->pi_magic = PI_MAGIC;
 1078 #endif
 1079 #ifdef DEBUG
 1080                 {
 1081                         int i, *ip = v;
 1082 
 1083                         for (i = 0; i < pp->pr_size / sizeof(int); i++) {
 1084                                 *ip++ = PI_MAGIC;
 1085                         }
 1086                 }
 1087 #endif
 1088 
 1089                 TAILQ_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list);
 1090         }
 1091         KDASSERT(ph->ph_nmissing != 0);
 1092         ph->ph_nmissing--;
 1093         pp->pr_nput++;
 1094         pp->pr_nitems++;
 1095         pp->pr_nout--;
 1096 
 1097         /* Cancel "pool empty" condition if it exists */
 1098         if (pp->pr_curpage == NULL)
 1099                 pp->pr_curpage = ph;
 1100 
 1101         if (pp->pr_flags & PR_WANTED) {
 1102                 pp->pr_flags &= ~PR_WANTED;
 1103                 if (ph->ph_nmissing == 0)
 1104                         pp->pr_nidle++;
 1105                 wakeup((caddr_t)pp);
 1106                 return;
 1107         }
 1108 
 1109         /*
 1110          * If this page is now empty, do one of two things:
 1111          *
 1112          *      (1) If we have more pages than the page high water mark,
 1113          *          free the page back to the system.  ONLY CONSIDER
 1114          *          FREEING BACK A PAGE IF WE HAVE MORE THAN OUR MINIMUM PAGE
 1115          *          CLAIM.
 1116          *
 1117          *      (2) Otherwise, move the page to the empty page list.
 1118          *
 1119          * Either way, select a new current page (so we use a partially-full
 1120          * page if one is available).
 1121          */
 1122         if (ph->ph_nmissing == 0) {
 1123                 pp->pr_nidle++;
 1124                 if (pp->pr_npages > pp->pr_minpages &&
 1125                     (pp->pr_npages > pp->pr_maxpages ||
 1126                      (pp->pr_alloc->pa_flags & PA_WANT) != 0)) {
 1127                         pr_rmpage(pp, ph, pq);
 1128                 } else {
 1129                         LIST_REMOVE(ph, ph_pagelist);
 1130                         LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist);
 1131 
 1132                         /*
 1133                          * Update the timestamp on the page.  A page must
 1134                          * be idle for some period of time before it can
 1135                          * be reclaimed by the pagedaemon.  This minimizes
 1136                          * ping-pong'ing for memory.
 1137                          */
 1138                         s = splclock();
 1139                         ph->ph_time = mono_time;
 1140                         splx(s);
 1141                 }
 1142                 pool_update_curpage(pp);
 1143         }
 1144 
 1145         /*
 1146          * If the page was previously completely full, move it to the
 1147          * partially-full list and make it the current page.  The next
 1148          * allocation will get the item from this page, instead of
 1149          * further fragmenting the pool.
 1150          */
 1151         else if (ph->ph_nmissing == (pp->pr_itemsperpage - 1)) {
 1152                 LIST_REMOVE(ph, ph_pagelist);
 1153                 LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist);
 1154                 pp->pr_curpage = ph;
 1155         }
 1156 }
 1157 
 1158 /*
 1159  * Return resource to the pool; must be called at appropriate spl level
 1160  */
 1161 #ifdef POOL_DIAGNOSTIC
 1162 void
 1163 _pool_put(struct pool *pp, void *v, const char *file, long line)
 1164 {
 1165         struct pool_pagelist pq;
 1166 
 1167         LIST_INIT(&pq);
 1168 
 1169         simple_lock(&pp->pr_slock);
 1170         pr_enter(pp, file, line);
 1171 
 1172         pr_log(pp, v, PRLOG_PUT, file, line);
 1173 
 1174         pool_do_put(pp, v, &pq);
 1175 
 1176         pr_leave(pp);
 1177         simple_unlock(&pp->pr_slock);
 1178 
 1179         if (! LIST_EMPTY(&pq))
 1180                 pr_pagelist_free(pp, &pq);
 1181 }
 1182 #undef pool_put
 1183 #endif /* POOL_DIAGNOSTIC */
 1184 
 1185 void
 1186 pool_put(struct pool *pp, void *v)
 1187 {
 1188         struct pool_pagelist pq;
 1189 
 1190         LIST_INIT(&pq);
 1191 
 1192         simple_lock(&pp->pr_slock);
 1193         pool_do_put(pp, v, &pq);
 1194         simple_unlock(&pp->pr_slock);
 1195 
 1196         if (! LIST_EMPTY(&pq))
 1197                 pr_pagelist_free(pp, &pq);
 1198 }
 1199 
 1200 #ifdef POOL_DIAGNOSTIC
 1201 #define         pool_put(h, v)  _pool_put((h), (v), __FILE__, __LINE__)
 1202 #endif
 1203 
 1204 /*
 1205  * Add N items to the pool.
 1206  */
 1207 int
 1208 pool_prime(struct pool *pp, int n)
 1209 {
 1210         struct pool_item_header *ph = NULL;
 1211         caddr_t cp;
 1212         int newpages;
 1213 
 1214         simple_lock(&pp->pr_slock);
 1215 
 1216         newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
 1217 
 1218         while (newpages-- > 0) {
 1219                 simple_unlock(&pp->pr_slock);
 1220                 cp = pool_allocator_alloc(pp, PR_NOWAIT);
 1221                 if (__predict_true(cp != NULL))
 1222                         ph = pool_alloc_item_header(pp, cp, PR_NOWAIT);
 1223 
 1224                 if (__predict_false(cp == NULL || ph == NULL)) {
 1225                         if (cp != NULL)
 1226                                 pool_allocator_free(pp, cp);
 1227                         simple_lock(&pp->pr_slock);
 1228                         break;
 1229                 }
 1230 
 1231                 simple_lock(&pp->pr_slock);
 1232                 pool_prime_page(pp, cp, ph);
 1233                 pp->pr_npagealloc++;
 1234                 pp->pr_minpages++;
 1235         }
 1236 
 1237         if (pp->pr_minpages >= pp->pr_maxpages)
 1238                 pp->pr_maxpages = pp->pr_minpages + 1;  /* XXX */
 1239 
 1240         simple_unlock(&pp->pr_slock);
 1241         return (0);
 1242 }
 1243 
 1244 /*
 1245  * Add a page worth of items to the pool.
 1246  *
 1247  * Note, we must be called with the pool descriptor LOCKED.
 1248  */
 1249 static void
 1250 pool_prime_page(struct pool *pp, caddr_t storage, struct pool_item_header *ph)
 1251 {
 1252         struct pool_item *pi;
 1253         caddr_t cp = storage;
 1254         unsigned int align = pp->pr_align;
 1255         unsigned int ioff = pp->pr_itemoffset;
 1256         int n;
 1257         int s;
 1258 
 1259         LOCK_ASSERT(simple_lock_held(&pp->pr_slock));
 1260 
 1261 #ifdef DIAGNOSTIC
 1262         if (((u_long)cp & (pp->pr_alloc->pa_pagesz - 1)) != 0)
 1263                 panic("pool_prime_page: %s: unaligned page", pp->pr_wchan);
 1264 #endif
 1265 
 1266         /*
 1267          * Insert page header.
 1268          */
 1269         LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist);
 1270         TAILQ_INIT(&ph->ph_itemlist);
 1271         ph->ph_page = storage;
 1272         ph->ph_nmissing = 0;
 1273         s = splclock();
 1274         ph->ph_time = mono_time;
 1275         splx(s);
 1276         if ((pp->pr_roflags & PR_PHINPAGE) == 0)
 1277                 SPLAY_INSERT(phtree, &pp->pr_phtree, ph);
 1278 
 1279         pp->pr_nidle++;
 1280 
 1281         /*
 1282          * Color this page.
 1283          */
 1284         cp = (caddr_t)(cp + pp->pr_curcolor);
 1285         if ((pp->pr_curcolor += align) > pp->pr_maxcolor)
 1286                 pp->pr_curcolor = 0;
 1287 
 1288         /*
 1289          * Adjust storage to apply aligment to `pr_itemoffset' in each item.
 1290          */
 1291         if (ioff != 0)
 1292                 cp = (caddr_t)(cp + (align - ioff));
 1293 
 1294         /*
 1295          * Insert remaining chunks on the bucket list.
 1296          */
 1297         n = pp->pr_itemsperpage;
 1298         pp->pr_nitems += n;
 1299 
 1300         if (pp->pr_roflags & PR_NOTOUCH) {
 1301                 pool_item_freelist_t *freelist = PR_FREELIST(ph);
 1302                 int i;
 1303 
 1304                 ph->ph_off = cp - storage;
 1305                 ph->ph_firstfree = 0;
 1306                 for (i = 0; i < n - 1; i++)
 1307                         freelist[i] = i + 1;
 1308                 freelist[n - 1] = PR_INDEX_EOL;
 1309         } else {
 1310                 while (n--) {
 1311                         pi = (struct pool_item *)cp;
 1312 
 1313                         KASSERT(((((vaddr_t)pi) + ioff) & (align - 1)) == 0);
 1314 
 1315                         /* Insert on page list */
 1316                         TAILQ_INSERT_TAIL(&ph->ph_itemlist, pi, pi_list);
 1317 #ifdef DIAGNOSTIC
 1318                         pi->pi_magic = PI_MAGIC;
 1319 #endif
 1320                         cp = (caddr_t)(cp + pp->pr_size);
 1321                 }
 1322         }
 1323 
 1324         /*
 1325          * If the pool was depleted, point at the new page.
 1326          */
 1327         if (pp->pr_curpage == NULL)
 1328                 pp->pr_curpage = ph;
 1329 
 1330         if (++pp->pr_npages > pp->pr_hiwat)
 1331                 pp->pr_hiwat = pp->pr_npages;
 1332 }
 1333 
 1334 /*
 1335  * Used by pool_get() when nitems drops below the low water mark.  This
 1336  * is used to catch up pr_nitems with the low water mark.
 1337  *
 1338  * Note 1, we never wait for memory here, we let the caller decide what to do.
 1339  *
 1340  * Note 2, we must be called with the pool already locked, and we return
 1341  * with it locked.
 1342  */
 1343 static int
 1344 pool_catchup(struct pool *pp)
 1345 {
 1346         struct pool_item_header *ph = NULL;
 1347         caddr_t cp;
 1348         int error = 0;
 1349 
 1350         while (POOL_NEEDS_CATCHUP(pp)) {
 1351                 /*
 1352                  * Call the page back-end allocator for more memory.
 1353                  *
 1354                  * XXX: We never wait, so should we bother unlocking
 1355                  * the pool descriptor?
 1356                  */
 1357                 simple_unlock(&pp->pr_slock);
 1358                 cp = pool_allocator_alloc(pp, PR_NOWAIT);
 1359                 if (__predict_true(cp != NULL))
 1360                         ph = pool_alloc_item_header(pp, cp, PR_NOWAIT);
 1361                 if (__predict_false(cp == NULL || ph == NULL)) {
 1362                         if (cp != NULL)
 1363                                 pool_allocator_free(pp, cp);
 1364                         error = ENOMEM;
 1365                         simple_lock(&pp->pr_slock);
 1366                         break;
 1367                 }
 1368                 simple_lock(&pp->pr_slock);
 1369                 pool_prime_page(pp, cp, ph);
 1370                 pp->pr_npagealloc++;
 1371         }
 1372 
 1373         return (error);
 1374 }
 1375 
 1376 static void
 1377 pool_update_curpage(struct pool *pp)
 1378 {
 1379 
 1380         pp->pr_curpage = LIST_FIRST(&pp->pr_partpages);
 1381         if (pp->pr_curpage == NULL) {
 1382                 pp->pr_curpage = LIST_FIRST(&pp->pr_emptypages);
 1383         }
 1384 }
 1385 
 1386 void
 1387 pool_setlowat(struct pool *pp, int n)
 1388 {
 1389 
 1390         simple_lock(&pp->pr_slock);
 1391 
 1392         pp->pr_minitems = n;
 1393         pp->pr_minpages = (n == 0)
 1394                 ? 0
 1395                 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
 1396 
 1397         /* Make sure we're caught up with the newly-set low water mark. */
 1398         if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) {
 1399                 /*
 1400                  * XXX: Should we log a warning?  Should we set up a timeout
 1401                  * to try again in a second or so?  The latter could break
 1402                  * a caller's assumptions about interrupt protection, etc.
 1403                  */
 1404         }
 1405 
 1406         simple_unlock(&pp->pr_slock);
 1407 }
 1408 
 1409 void
 1410 pool_sethiwat(struct pool *pp, int n)
 1411 {
 1412 
 1413         simple_lock(&pp->pr_slock);
 1414 
 1415         pp->pr_maxpages = (n == 0)
 1416                 ? 0
 1417                 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
 1418 
 1419         simple_unlock(&pp->pr_slock);
 1420 }
 1421 
 1422 void
 1423 pool_sethardlimit(struct pool *pp, int n, const char *warnmess, int ratecap)
 1424 {
 1425 
 1426         simple_lock(&pp->pr_slock);
 1427 
 1428         pp->pr_hardlimit = n;
 1429         pp->pr_hardlimit_warning = warnmess;
 1430         pp->pr_hardlimit_ratecap.tv_sec = ratecap;
 1431         pp->pr_hardlimit_warning_last.tv_sec = 0;
 1432         pp->pr_hardlimit_warning_last.tv_usec = 0;
 1433 
 1434         /*
 1435          * In-line version of pool_sethiwat(), because we don't want to
 1436          * release the lock.
 1437          */
 1438         pp->pr_maxpages = (n == 0)
 1439                 ? 0
 1440                 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
 1441 
 1442         simple_unlock(&pp->pr_slock);
 1443 }
 1444 
 1445 /*
 1446  * Release all complete pages that have not been used recently.
 1447  */
 1448 int
 1449 #ifdef POOL_DIAGNOSTIC
 1450 _pool_reclaim(struct pool *pp, const char *file, long line)
 1451 #else
 1452 pool_reclaim(struct pool *pp)
 1453 #endif
 1454 {
 1455         struct pool_item_header *ph, *phnext;
 1456         struct pool_cache *pc;
 1457         struct timeval curtime;
 1458         struct pool_pagelist pq;
 1459         struct timeval diff;
 1460         int s;
 1461 
 1462         if (pp->pr_drain_hook != NULL) {
 1463                 /*
 1464                  * The drain hook must be called with the pool unlocked.
 1465                  */
 1466                 (*pp->pr_drain_hook)(pp->pr_drain_hook_arg, PR_NOWAIT);
 1467         }
 1468 
 1469         if (simple_lock_try(&pp->pr_slock) == 0)
 1470                 return (0);
 1471         pr_enter(pp, file, line);
 1472 
 1473         LIST_INIT(&pq);
 1474 
 1475         /*
 1476          * Reclaim items from the pool's caches.
 1477          */
 1478         TAILQ_FOREACH(pc, &pp->pr_cachelist, pc_poollist)
 1479                 pool_cache_reclaim(pc, &pq);
 1480 
 1481         s = splclock();
 1482         curtime = mono_time;
 1483         splx(s);
 1484 
 1485         for (ph = LIST_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) {
 1486                 phnext = LIST_NEXT(ph, ph_pagelist);
 1487 
 1488                 /* Check our minimum page claim */
 1489                 if (pp->pr_npages <= pp->pr_minpages)
 1490                         break;
 1491 
 1492                 KASSERT(ph->ph_nmissing == 0);
 1493                 timersub(&curtime, &ph->ph_time, &diff);
 1494                 if (diff.tv_sec < pool_inactive_time)
 1495                         continue;
 1496 
 1497                 /*
 1498                  * If freeing this page would put us below
 1499                  * the low water mark, stop now.
 1500                  */
 1501                 if ((pp->pr_nitems - pp->pr_itemsperpage) <
 1502                     pp->pr_minitems)
 1503                         break;
 1504 
 1505                 pr_rmpage(pp, ph, &pq);
 1506         }
 1507 
 1508         pr_leave(pp);
 1509         simple_unlock(&pp->pr_slock);
 1510         if (LIST_EMPTY(&pq))
 1511                 return (0);
 1512 
 1513         pr_pagelist_free(pp, &pq);
 1514         return (1);
 1515 }
 1516 
 1517 /*
 1518  * Drain pools, one at a time.
 1519  *
 1520  * Note, we must never be called from an interrupt context.
 1521  */
 1522 void
 1523 pool_drain(void *arg)
 1524 {
 1525         struct pool *pp;
 1526         int s;
 1527 
 1528         pp = NULL;
 1529         s = splvm();
 1530         simple_lock(&pool_head_slock);
 1531         if (drainpp == NULL) {
 1532                 drainpp = TAILQ_FIRST(&pool_head);
 1533         }
 1534         if (drainpp) {
 1535                 pp = drainpp;
 1536                 drainpp = TAILQ_NEXT(pp, pr_poollist);
 1537         }
 1538         simple_unlock(&pool_head_slock);
 1539         pool_reclaim(pp);
 1540         splx(s);
 1541 }
 1542 
 1543 /*
 1544  * Diagnostic helpers.
 1545  */
 1546 void
 1547 pool_print(struct pool *pp, const char *modif)
 1548 {
 1549         int s;
 1550 
 1551         s = splvm();
 1552         if (simple_lock_try(&pp->pr_slock) == 0) {
 1553                 printf("pool %s is locked; try again later\n",
 1554                     pp->pr_wchan);
 1555                 splx(s);
 1556                 return;
 1557         }
 1558         pool_print1(pp, modif, printf);
 1559         simple_unlock(&pp->pr_slock);
 1560         splx(s);
 1561 }
 1562 
 1563 void
 1564 pool_printit(struct pool *pp, const char *modif, void (*pr)(const char *, ...))
 1565 {
 1566         int didlock = 0;
 1567 
 1568         if (pp == NULL) {
 1569                 (*pr)("Must specify a pool to print.\n");
 1570                 return;
 1571         }
 1572 
 1573         /*
 1574          * Called from DDB; interrupts should be blocked, and all
 1575          * other processors should be paused.  We can skip locking
 1576          * the pool in this case.
 1577          *
 1578          * We do a simple_lock_try() just to print the lock
 1579          * status, however.
 1580          */
 1581 
 1582         if (simple_lock_try(&pp->pr_slock) == 0)
 1583                 (*pr)("WARNING: pool %s is locked\n", pp->pr_wchan);
 1584         else
 1585                 didlock = 1;
 1586 
 1587         pool_print1(pp, modif, pr);
 1588 
 1589         if (didlock)
 1590                 simple_unlock(&pp->pr_slock);
 1591 }
 1592 
 1593 static void
 1594 pool_print_pagelist(struct pool *pp, struct pool_pagelist *pl,
 1595     void (*pr)(const char *, ...))
 1596 {
 1597         struct pool_item_header *ph;
 1598 #ifdef DIAGNOSTIC
 1599         struct pool_item *pi;
 1600 #endif
 1601 
 1602         LIST_FOREACH(ph, pl, ph_pagelist) {
 1603                 (*pr)("\t\tpage %p, nmissing %d, time %lu,%lu\n",
 1604                     ph->ph_page, ph->ph_nmissing,
 1605                     (u_long)ph->ph_time.tv_sec,
 1606                     (u_long)ph->ph_time.tv_usec);
 1607 #ifdef DIAGNOSTIC
 1608                 if (!(pp->pr_roflags & PR_NOTOUCH)) {
 1609                         TAILQ_FOREACH(pi, &ph->ph_itemlist, pi_list) {
 1610                                 if (pi->pi_magic != PI_MAGIC) {
 1611                                         (*pr)("\t\t\titem %p, magic 0x%x\n",
 1612                                             pi, pi->pi_magic);
 1613                                 }
 1614                         }
 1615                 }
 1616 #endif
 1617         }
 1618 }
 1619 
 1620 static void
 1621 pool_print1(struct pool *pp, const char *modif, void (*pr)(const char *, ...))
 1622 {
 1623         struct pool_item_header *ph;
 1624         struct pool_cache *pc;
 1625         struct pool_cache_group *pcg;
 1626         int i, print_log = 0, print_pagelist = 0, print_cache = 0;
 1627         char c;
 1628 
 1629         while ((c = *modif++) != '\0') {
 1630                 if (c == 'l')
 1631                         print_log = 1;
 1632                 if (c == 'p')
 1633                         print_pagelist = 1;
 1634                 if (c == 'c')
 1635                         print_cache = 1;
 1636         }
 1637 
 1638         (*pr)("POOL %s: size %u, align %u, ioff %u, roflags 0x%08x\n",
 1639             pp->pr_wchan, pp->pr_size, pp->pr_align, pp->pr_itemoffset,
 1640             pp->pr_roflags);
 1641         (*pr)("\talloc %p\n", pp->pr_alloc);
 1642         (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n",
 1643             pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages);
 1644         (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n",
 1645             pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit);
 1646 
 1647         (*pr)("\n\tnget %lu, nfail %lu, nput %lu\n",
 1648             pp->pr_nget, pp->pr_nfail, pp->pr_nput);
 1649         (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n",
 1650             pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle);
 1651 
 1652         if (print_pagelist == 0)
 1653                 goto skip_pagelist;
 1654 
 1655         if ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL)
 1656                 (*pr)("\n\tempty page list:\n");
 1657         pool_print_pagelist(pp, &pp->pr_emptypages, pr);
 1658         if ((ph = LIST_FIRST(&pp->pr_fullpages)) != NULL)
 1659                 (*pr)("\n\tfull page list:\n");
 1660         pool_print_pagelist(pp, &pp->pr_fullpages, pr);
 1661         if ((ph = LIST_FIRST(&pp->pr_partpages)) != NULL)
 1662                 (*pr)("\n\tpartial-page list:\n");
 1663         pool_print_pagelist(pp, &pp->pr_partpages, pr);
 1664 
 1665         if (pp->pr_curpage == NULL)
 1666                 (*pr)("\tno current page\n");
 1667         else
 1668                 (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page);
 1669 
 1670  skip_pagelist:
 1671         if (print_log == 0)
 1672                 goto skip_log;
 1673 
 1674         (*pr)("\n");
 1675         if ((pp->pr_roflags & PR_LOGGING) == 0)
 1676                 (*pr)("\tno log\n");
 1677         else
 1678                 pr_printlog(pp, NULL, pr);
 1679 
 1680  skip_log:
 1681         if (print_cache == 0)
 1682                 goto skip_cache;
 1683 
 1684         TAILQ_FOREACH(pc, &pp->pr_cachelist, pc_poollist) {
 1685                 (*pr)("\tcache %p: allocfrom %p freeto %p\n", pc,
 1686                     pc->pc_allocfrom, pc->pc_freeto);
 1687                 (*pr)("\t    hits %lu misses %lu ngroups %lu nitems %lu\n",
 1688                     pc->pc_hits, pc->pc_misses, pc->pc_ngroups, pc->pc_nitems);
 1689                 TAILQ_FOREACH(pcg, &pc->pc_grouplist, pcg_list) {
 1690                         (*pr)("\t\tgroup %p: avail %d\n", pcg, pcg->pcg_avail);
 1691                         for (i = 0; i < PCG_NOBJECTS; i++) {
 1692                                 if (pcg->pcg_objects[i].pcgo_pa !=
 1693                                     POOL_PADDR_INVALID) {
 1694                                         (*pr)("\t\t\t%p, 0x%llx\n",
 1695                                             pcg->pcg_objects[i].pcgo_va,
 1696                                             (unsigned long long)
 1697                                             pcg->pcg_objects[i].pcgo_pa);
 1698                                 } else {
 1699                                         (*pr)("\t\t\t%p\n",
 1700                                             pcg->pcg_objects[i].pcgo_va);
 1701                                 }
 1702                         }
 1703                 }
 1704         }
 1705 
 1706  skip_cache:
 1707         pr_enter_check(pp, pr);
 1708 }
 1709 
 1710 static int
 1711 pool_chk_page(struct pool *pp, const char *label, struct pool_item_header *ph)
 1712 {
 1713         struct pool_item *pi;
 1714         caddr_t page;
 1715         int n;
 1716 
 1717         page = (caddr_t)((u_long)ph & pp->pr_alloc->pa_pagemask);
 1718         if (page != ph->ph_page &&
 1719             (pp->pr_roflags & PR_PHINPAGE) != 0) {
 1720                 if (label != NULL)
 1721                         printf("%s: ", label);
 1722                 printf("pool(%p:%s): page inconsistency: page %p;"
 1723                        " at page head addr %p (p %p)\n", pp,
 1724                         pp->pr_wchan, ph->ph_page,
 1725                         ph, page);
 1726                 return 1;
 1727         }
 1728 
 1729         if ((pp->pr_roflags & PR_NOTOUCH) != 0)
 1730                 return 0;
 1731 
 1732         for (pi = TAILQ_FIRST(&ph->ph_itemlist), n = 0;
 1733              pi != NULL;
 1734              pi = TAILQ_NEXT(pi,pi_list), n++) {
 1735 
 1736 #ifdef DIAGNOSTIC
 1737                 if (pi->pi_magic != PI_MAGIC) {
 1738                         if (label != NULL)
 1739                                 printf("%s: ", label);
 1740                         printf("pool(%s): free list modified: magic=%x;"
 1741                                " page %p; item ordinal %d;"
 1742                                " addr %p (p %p)\n",
 1743                                 pp->pr_wchan, pi->pi_magic, ph->ph_page,
 1744                                 n, pi, page);
 1745                         panic("pool");
 1746                 }
 1747 #endif
 1748                 page =
 1749                     (caddr_t)((u_long)pi & pp->pr_alloc->pa_pagemask);
 1750                 if (page == ph->ph_page)
 1751                         continue;
 1752 
 1753                 if (label != NULL)
 1754                         printf("%s: ", label);
 1755                 printf("pool(%p:%s): page inconsistency: page %p;"
 1756                        " item ordinal %d; addr %p (p %p)\n", pp,
 1757                         pp->pr_wchan, ph->ph_page,
 1758                         n, pi, page);
 1759                 return 1;
 1760         }
 1761         return 0;
 1762 }
 1763 
 1764 
 1765 int
 1766 pool_chk(struct pool *pp, const char *label)
 1767 {
 1768         struct pool_item_header *ph;
 1769         int r = 0;
 1770 
 1771         simple_lock(&pp->pr_slock);
 1772         LIST_FOREACH(ph, &pp->pr_emptypages, ph_pagelist) {
 1773                 r = pool_chk_page(pp, label, ph);
 1774                 if (r) {
 1775                         goto out;
 1776                 }
 1777         }
 1778         LIST_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) {
 1779                 r = pool_chk_page(pp, label, ph);
 1780                 if (r) {
 1781                         goto out;
 1782                 }
 1783         }
 1784         LIST_FOREACH(ph, &pp->pr_partpages, ph_pagelist) {
 1785                 r = pool_chk_page(pp, label, ph);
 1786                 if (r) {
 1787                         goto out;
 1788                 }
 1789         }
 1790 
 1791 out:
 1792         simple_unlock(&pp->pr_slock);
 1793         return (r);
 1794 }
 1795 
 1796 /*
 1797  * pool_cache_init:
 1798  *
 1799  *      Initialize a pool cache.
 1800  *
 1801  *      NOTE: If the pool must be protected from interrupts, we expect
 1802  *      to be called at the appropriate interrupt priority level.
 1803  */
 1804 void
 1805 pool_cache_init(struct pool_cache *pc, struct pool *pp,
 1806     int (*ctor)(void *, void *, int),
 1807     void (*dtor)(void *, void *),
 1808     void *arg)
 1809 {
 1810 
 1811         TAILQ_INIT(&pc->pc_grouplist);
 1812         simple_lock_init(&pc->pc_slock);
 1813 
 1814         pc->pc_allocfrom = NULL;
 1815         pc->pc_freeto = NULL;
 1816         pc->pc_pool = pp;
 1817 
 1818         pc->pc_ctor = ctor;
 1819         pc->pc_dtor = dtor;
 1820         pc->pc_arg  = arg;
 1821 
 1822         pc->pc_hits   = 0;
 1823         pc->pc_misses = 0;
 1824 
 1825         pc->pc_ngroups = 0;
 1826 
 1827         pc->pc_nitems = 0;
 1828 
 1829         simple_lock(&pp->pr_slock);
 1830         TAILQ_INSERT_TAIL(&pp->pr_cachelist, pc, pc_poollist);
 1831         simple_unlock(&pp->pr_slock);
 1832 }
 1833 
 1834 /*
 1835  * pool_cache_destroy:
 1836  *
 1837  *      Destroy a pool cache.
 1838  */
 1839 void
 1840 pool_cache_destroy(struct pool_cache *pc)
 1841 {
 1842         struct pool *pp = pc->pc_pool;
 1843 
 1844         /* First, invalidate the entire cache. */
 1845         pool_cache_invalidate(pc);
 1846 
 1847         /* ...and remove it from the pool's cache list. */
 1848         simple_lock(&pp->pr_slock);
 1849         TAILQ_REMOVE(&pp->pr_cachelist, pc, pc_poollist);
 1850         simple_unlock(&pp->pr_slock);
 1851 }
 1852 
 1853 static __inline void *
 1854 pcg_get(struct pool_cache_group *pcg, paddr_t *pap)
 1855 {
 1856         void *object;
 1857         u_int idx;
 1858 
 1859         KASSERT(pcg->pcg_avail <= PCG_NOBJECTS);
 1860         KASSERT(pcg->pcg_avail != 0);
 1861         idx = --pcg->pcg_avail;
 1862 
 1863         KASSERT(pcg->pcg_objects[idx].pcgo_va != NULL);
 1864         object = pcg->pcg_objects[idx].pcgo_va;
 1865         if (pap != NULL)
 1866                 *pap = pcg->pcg_objects[idx].pcgo_pa;
 1867         pcg->pcg_objects[idx].pcgo_va = NULL;
 1868 
 1869         return (object);
 1870 }
 1871 
 1872 static __inline void
 1873 pcg_put(struct pool_cache_group *pcg, void *object, paddr_t pa)
 1874 {
 1875         u_int idx;
 1876 
 1877         KASSERT(pcg->pcg_avail < PCG_NOBJECTS);
 1878         idx = pcg->pcg_avail++;
 1879 
 1880         KASSERT(pcg->pcg_objects[idx].pcgo_va == NULL);
 1881         pcg->pcg_objects[idx].pcgo_va = object;
 1882         pcg->pcg_objects[idx].pcgo_pa = pa;
 1883 }
 1884 
 1885 /*
 1886  * pool_cache_get{,_paddr}:
 1887  *
 1888  *      Get an object from a pool cache (optionally returning
 1889  *      the physical address of the object).
 1890  */
 1891 void *
 1892 pool_cache_get_paddr(struct pool_cache *pc, int flags, paddr_t *pap)
 1893 {
 1894         struct pool_cache_group *pcg;
 1895         void *object;
 1896 
 1897 #ifdef LOCKDEBUG
 1898         if (flags & PR_WAITOK)
 1899                 simple_lock_only_held(NULL, "pool_cache_get(PR_WAITOK)");
 1900 #endif
 1901 
 1902         simple_lock(&pc->pc_slock);
 1903 
 1904         if ((pcg = pc->pc_allocfrom) == NULL) {
 1905                 TAILQ_FOREACH(pcg, &pc->pc_grouplist, pcg_list) {
 1906                         if (pcg->pcg_avail != 0) {
 1907                                 pc->pc_allocfrom = pcg;
 1908                                 goto have_group;
 1909                         }
 1910                 }
 1911 
 1912                 /*
 1913                  * No groups with any available objects.  Allocate
 1914                  * a new object, construct it, and return it to
 1915                  * the caller.  We will allocate a group, if necessary,
 1916                  * when the object is freed back to the cache.
 1917                  */
 1918                 pc->pc_misses++;
 1919                 simple_unlock(&pc->pc_slock);
 1920                 object = pool_get(pc->pc_pool, flags);
 1921                 if (object != NULL && pc->pc_ctor != NULL) {
 1922                         if ((*pc->pc_ctor)(pc->pc_arg, object, flags) != 0) {
 1923                                 pool_put(pc->pc_pool, object);
 1924                                 return (NULL);
 1925                         }
 1926                 }
 1927                 if (object != NULL && pap != NULL) {
 1928 #ifdef POOL_VTOPHYS
 1929                         *pap = POOL_VTOPHYS(object);
 1930 #else
 1931                         *pap = POOL_PADDR_INVALID;
 1932 #endif
 1933                 }
 1934                 return (object);
 1935         }
 1936 
 1937  have_group:
 1938         pc->pc_hits++;
 1939         pc->pc_nitems--;
 1940         object = pcg_get(pcg, pap);
 1941 
 1942         if (pcg->pcg_avail == 0)
 1943                 pc->pc_allocfrom = NULL;
 1944 
 1945         simple_unlock(&pc->pc_slock);
 1946 
 1947         return (object);
 1948 }
 1949 
 1950 /*
 1951  * pool_cache_put{,_paddr}:
 1952  *
 1953  *      Put an object back to the pool cache (optionally caching the
 1954  *      physical address of the object).
 1955  */
 1956 void
 1957 pool_cache_put_paddr(struct pool_cache *pc, void *object, paddr_t pa)
 1958 {
 1959         struct pool_cache_group *pcg;
 1960         int s;
 1961 
 1962         simple_lock(&pc->pc_slock);
 1963 
 1964         if ((pcg = pc->pc_freeto) == NULL) {
 1965                 TAILQ_FOREACH(pcg, &pc->pc_grouplist, pcg_list) {
 1966                         if (pcg->pcg_avail != PCG_NOBJECTS) {
 1967                                 pc->pc_freeto = pcg;
 1968                                 goto have_group;
 1969                         }
 1970                 }
 1971 
 1972                 /*
 1973                  * No empty groups to free the object to.  Attempt to
 1974                  * allocate one.
 1975                  */
 1976                 simple_unlock(&pc->pc_slock);
 1977                 s = splvm();
 1978                 pcg = pool_get(&pcgpool, PR_NOWAIT);
 1979                 splx(s);
 1980                 if (pcg != NULL) {
 1981                         memset(pcg, 0, sizeof(*pcg));
 1982                         simple_lock(&pc->pc_slock);
 1983                         pc->pc_ngroups++;
 1984                         TAILQ_INSERT_TAIL(&pc->pc_grouplist, pcg, pcg_list);
 1985                         if (pc->pc_freeto == NULL)
 1986                                 pc->pc_freeto = pcg;
 1987                         goto have_group;
 1988                 }
 1989 
 1990                 /*
 1991                  * Unable to allocate a cache group; destruct the object
 1992                  * and free it back to the pool.
 1993                  */
 1994                 pool_cache_destruct_object(pc, object);
 1995                 return;
 1996         }
 1997 
 1998  have_group:
 1999         pc->pc_nitems++;
 2000         pcg_put(pcg, object, pa);
 2001 
 2002         if (pcg->pcg_avail == PCG_NOBJECTS)
 2003                 pc->pc_freeto = NULL;
 2004 
 2005         simple_unlock(&pc->pc_slock);
 2006 }
 2007 
 2008 /*
 2009  * pool_cache_destruct_object:
 2010  *
 2011  *      Force destruction of an object and its release back into
 2012  *      the pool.
 2013  */
 2014 void
 2015 pool_cache_destruct_object(struct pool_cache *pc, void *object)
 2016 {
 2017 
 2018         if (pc->pc_dtor != NULL)
 2019                 (*pc->pc_dtor)(pc->pc_arg, object);
 2020         pool_put(pc->pc_pool, object);
 2021 }
 2022 
 2023 /*
 2024  * pool_cache_invalidate:
 2025  *
 2026  *      Invalidate a pool cache (destruct and release all of the
 2027  *      cached objects).
 2028  */
 2029 void
 2030 pool_cache_invalidate(struct pool_cache *pc)
 2031 {
 2032         struct pool_pagelist pq;
 2033         struct pool_cache_group *pcg, *npcg;
 2034         void *object;
 2035 
 2036         LIST_INIT(&pq);
 2037 
 2038         simple_lock(&pc->pc_slock);
 2039         simple_lock(&pc->pc_pool->pr_slock);
 2040 
 2041         for (pcg = TAILQ_FIRST(&pc->pc_grouplist); pcg != NULL;
 2042              pcg = npcg) {
 2043                 npcg = TAILQ_NEXT(pcg, pcg_list);
 2044                 while (pcg->pcg_avail != 0) {
 2045                         pc->pc_nitems--;
 2046                         object = pcg_get(pcg, NULL);
 2047                         if (pcg->pcg_avail == 0 && pc->pc_allocfrom == pcg)
 2048                                 pc->pc_allocfrom = NULL;
 2049                         if (pc->pc_dtor != NULL)
 2050                                 (*pc->pc_dtor)(pc->pc_arg, object);
 2051                         pool_do_put(pc->pc_pool, object, &pq);
 2052                 }
 2053         }
 2054 
 2055         simple_unlock(&pc->pc_pool->pr_slock);
 2056         simple_unlock(&pc->pc_slock);
 2057 
 2058         if (! LIST_EMPTY(&pq))
 2059                 pr_pagelist_free(pc->pc_pool, &pq);
 2060 }
 2061 
 2062 /*
 2063  * pool_cache_reclaim:
 2064  *
 2065  *      Reclaim a pool cache for pool_reclaim().
 2066  */
 2067 static void
 2068 pool_cache_reclaim(struct pool_cache *pc, struct pool_pagelist *pq)
 2069 {
 2070         struct pool_cache_group *pcg, *npcg;
 2071         void *object;
 2072         int s;
 2073 
 2074         /*
 2075          * We're locking in the wrong order (normally pool_cache -> pool,
 2076          * but the pool is already locked when we get here), so we have
 2077          * to use trylock.  If we can't lock the pool_cache, it's not really
 2078          * a big deal here.
 2079          */
 2080         if (simple_lock_try(&pc->pc_slock) == 0)
 2081                 return;
 2082 
 2083         for (pcg = TAILQ_FIRST(&pc->pc_grouplist); pcg != NULL;
 2084              pcg = npcg) {
 2085                 npcg = TAILQ_NEXT(pcg, pcg_list);
 2086                 while (pcg->pcg_avail != 0) {
 2087                         pc->pc_nitems--;
 2088                         object = pcg_get(pcg, NULL);
 2089                         if (pcg->pcg_avail == 0 && pc->pc_allocfrom == pcg)
 2090                                 pc->pc_allocfrom = NULL;
 2091                         if (pc->pc_dtor != NULL)
 2092                                 (*pc->pc_dtor)(pc->pc_arg, object);
 2093                         pool_do_put(pc->pc_pool, object, pq);
 2094                 }
 2095                 pc->pc_ngroups--;
 2096                 TAILQ_REMOVE(&pc->pc_grouplist, pcg, pcg_list);
 2097                 if (pc->pc_freeto == pcg)
 2098                         pc->pc_freeto = NULL;
 2099                 s = splvm();
 2100                 pool_put(&pcgpool, pcg);
 2101                 splx(s);
 2102         }
 2103 
 2104         simple_unlock(&pc->pc_slock);
 2105 }
 2106 
 2107 /*
 2108  * Pool backend allocators.
 2109  *
 2110  * Each pool has a backend allocator that handles allocation, deallocation,
 2111  * and any additional draining that might be needed.
 2112  *
 2113  * We provide two standard allocators:
 2114  *
 2115  *      pool_allocator_kmem - the default when no allocator is specified
 2116  *
 2117  *      pool_allocator_nointr - used for pools that will not be accessed
 2118  *      in interrupt context.
 2119  */
 2120 void    *pool_page_alloc(struct pool *, int);
 2121 void    pool_page_free(struct pool *, void *);
 2122 
 2123 #ifdef POOL_SUBPAGE
 2124 struct pool_allocator pool_allocator_kmem_fullpage = {
 2125         pool_page_alloc, pool_page_free, 0,
 2126 };
 2127 #else
 2128 struct pool_allocator pool_allocator_kmem = {
 2129         pool_page_alloc, pool_page_free, 0,
 2130 };
 2131 #endif
 2132 
 2133 void    *pool_page_alloc_nointr(struct pool *, int);
 2134 void    pool_page_free_nointr(struct pool *, void *);
 2135 
 2136 #ifdef POOL_SUBPAGE
 2137 struct pool_allocator pool_allocator_nointr_fullpage = {
 2138         pool_page_alloc_nointr, pool_page_free_nointr, 0,
 2139 };
 2140 #else
 2141 struct pool_allocator pool_allocator_nointr = {
 2142         pool_page_alloc_nointr, pool_page_free_nointr, 0,
 2143 };
 2144 #endif
 2145 
 2146 #ifdef POOL_SUBPAGE
 2147 void    *pool_subpage_alloc(struct pool *, int);
 2148 void    pool_subpage_free(struct pool *, void *);
 2149 
 2150 struct pool_allocator pool_allocator_kmem = {
 2151         pool_subpage_alloc, pool_subpage_free, POOL_SUBPAGE,
 2152 };
 2153 
 2154 void    *pool_subpage_alloc_nointr(struct pool *, int);
 2155 void    pool_subpage_free_nointr(struct pool *, void *);
 2156 
 2157 struct pool_allocator pool_allocator_nointr = {
 2158         pool_subpage_alloc, pool_subpage_free, POOL_SUBPAGE,
 2159 };
 2160 #endif /* POOL_SUBPAGE */
 2161 
 2162 /*
 2163  * We have at least three different resources for the same allocation and
 2164  * each resource can be depleted.  First, we have the ready elements in the
 2165  * pool.  Then we have the resource (typically a vm_map) for this allocator.
 2166  * Finally, we have physical memory.  Waiting for any of these can be
 2167  * unnecessary when any other is freed, but the kernel doesn't support
 2168  * sleeping on multiple wait channels, so we have to employ another strategy.
 2169  *
 2170  * The caller sleeps on the pool (so that it can be awakened when an item
 2171  * is returned to the pool), but we set PA_WANT on the allocator.  When a
 2172  * page is returned to the allocator and PA_WANT is set, pool_allocator_free
 2173  * will wake up all sleeping pools belonging to this allocator.
 2174  *
 2175  * XXX Thundering herd.
 2176  */
 2177 void *
 2178 pool_allocator_alloc(struct pool *org, int flags)
 2179 {
 2180         struct pool_allocator *pa = org->pr_alloc;
 2181         struct pool *pp, *start;
 2182         int s, freed;
 2183         void *res;
 2184 
 2185         LOCK_ASSERT(!simple_lock_held(&org->pr_slock));
 2186 
 2187         do {
 2188                 if ((res = (*pa->pa_alloc)(org, flags)) != NULL)
 2189                         return (res);
 2190                 if ((flags & PR_WAITOK) == 0) {
 2191                         /*
 2192                          * We only run the drain hookhere if PR_NOWAIT.
 2193                          * In other cases, the hook will be run in
 2194                          * pool_reclaim().
 2195                          */
 2196                         if (org->pr_drain_hook != NULL) {
 2197                                 (*org->pr_drain_hook)(org->pr_drain_hook_arg,
 2198                                     flags);
 2199                                 if ((res = (*pa->pa_alloc)(org, flags)) != NULL)
 2200                                         return (res);
 2201                         }
 2202                         break;
 2203                 }
 2204 
 2205                 /*
 2206                  * Drain all pools, except "org", that use this
 2207                  * allocator.  We do this to reclaim VA space.
 2208                  * pa_alloc is responsible for waiting for
 2209                  * physical memory.
 2210                  *
 2211                  * XXX We risk looping forever if start if someone
 2212                  * calls pool_destroy on "start".  But there is no
 2213                  * other way to have potentially sleeping pool_reclaim,
 2214                  * non-sleeping locks on pool_allocator, and some
 2215                  * stirring of drained pools in the allocator.
 2216                  *
 2217                  * XXX Maybe we should use pool_head_slock for locking
 2218                  * the allocators?
 2219                  */
 2220                 freed = 0;
 2221 
 2222                 s = splvm();
 2223                 simple_lock(&pa->pa_slock);
 2224                 pp = start = TAILQ_FIRST(&pa->pa_list);
 2225                 do {
 2226                         TAILQ_REMOVE(&pa->pa_list, pp, pr_alloc_list);
 2227                         TAILQ_INSERT_TAIL(&pa->pa_list, pp, pr_alloc_list);
 2228                         if (pp == org)
 2229                                 continue;
 2230                         simple_unlock(&pa->pa_slock);
 2231                         freed = pool_reclaim(pp);
 2232                         simple_lock(&pa->pa_slock);
 2233                 } while ((pp = TAILQ_FIRST(&pa->pa_list)) != start &&
 2234                          freed == 0);
 2235 
 2236                 if (freed == 0) {
 2237                         /*
 2238                          * We set PA_WANT here, the caller will most likely
 2239                          * sleep waiting for pages (if not, this won't hurt
 2240                          * that much), and there is no way to set this in
 2241                          * the caller without violating locking order.
 2242                          */
 2243                         pa->pa_flags |= PA_WANT;
 2244                 }
 2245                 simple_unlock(&pa->pa_slock);
 2246                 splx(s);
 2247         } while (freed);
 2248         return (NULL);
 2249 }
 2250 
 2251 void
 2252 pool_allocator_free(struct pool *pp, void *v)
 2253 {
 2254         struct pool_allocator *pa = pp->pr_alloc;
 2255         int s;
 2256 
 2257         LOCK_ASSERT(!simple_lock_held(&pp->pr_slock));
 2258 
 2259         (*pa->pa_free)(pp, v);
 2260 
 2261         s = splvm();
 2262         simple_lock(&pa->pa_slock);
 2263         if ((pa->pa_flags & PA_WANT) == 0) {
 2264                 simple_unlock(&pa->pa_slock);
 2265                 splx(s);
 2266                 return;
 2267         }
 2268 
 2269         TAILQ_FOREACH(pp, &pa->pa_list, pr_alloc_list) {
 2270                 simple_lock(&pp->pr_slock);
 2271                 if ((pp->pr_flags & PR_WANTED) != 0) {
 2272                         pp->pr_flags &= ~PR_WANTED;
 2273                         wakeup(pp);
 2274                 }
 2275                 simple_unlock(&pp->pr_slock);
 2276         }
 2277         pa->pa_flags &= ~PA_WANT;
 2278         simple_unlock(&pa->pa_slock);
 2279         splx(s);
 2280 }
 2281 
 2282 void *
 2283 pool_page_alloc(struct pool *pp, int flags)
 2284 {
 2285         boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE;
 2286 
 2287         return ((void *) uvm_km_alloc_poolpage_cache(kmem_map, NULL, waitok));
 2288 }
 2289 
 2290 void
 2291 pool_page_free(struct pool *pp, void *v)
 2292 {
 2293 
 2294         uvm_km_free_poolpage_cache(kmem_map, (vaddr_t) v);
 2295 }
 2296 
 2297 static void *
 2298 pool_page_alloc_meta(struct pool *pp, int flags)
 2299 {
 2300         boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE;
 2301 
 2302         return ((void *) uvm_km_alloc_poolpage1(kmem_map, NULL, waitok));
 2303 }
 2304 
 2305 static void
 2306 pool_page_free_meta(struct pool *pp, void *v)
 2307 {
 2308 
 2309         uvm_km_free_poolpage1(kmem_map, (vaddr_t) v);
 2310 }
 2311 
 2312 #ifdef POOL_SUBPAGE
 2313 /* Sub-page allocator, for machines with large hardware pages. */
 2314 void *
 2315 pool_subpage_alloc(struct pool *pp, int flags)
 2316 {
 2317         void *v;
 2318         int s;
 2319         s = splvm();
 2320         v = pool_get(&psppool, flags);
 2321         splx(s);
 2322         return v;
 2323 }
 2324 
 2325 void
 2326 pool_subpage_free(struct pool *pp, void *v)
 2327 {
 2328         int s;
 2329         s = splvm();
 2330         pool_put(&psppool, v);
 2331         splx(s);
 2332 }
 2333 
 2334 /* We don't provide a real nointr allocator.  Maybe later. */
 2335 void *
 2336 pool_subpage_alloc_nointr(struct pool *pp, int flags)
 2337 {
 2338 
 2339         return (pool_subpage_alloc(pp, flags));
 2340 }
 2341 
 2342 void
 2343 pool_subpage_free_nointr(struct pool *pp, void *v)
 2344 {
 2345 
 2346         pool_subpage_free(pp, v);
 2347 }
 2348 #endif /* POOL_SUBPAGE */
 2349 void *
 2350 pool_page_alloc_nointr(struct pool *pp, int flags)
 2351 {
 2352         boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE;
 2353 
 2354         return ((void *) uvm_km_alloc_poolpage_cache(kernel_map,
 2355             uvm.kernel_object, waitok));
 2356 }
 2357 
 2358 void
 2359 pool_page_free_nointr(struct pool *pp, void *v)
 2360 {
 2361 
 2362         uvm_km_free_poolpage_cache(kernel_map, (vaddr_t) v);
 2363 }

Cache object: a98bf98cc66b435cab6d84f0f515aba6


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.