The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/subr_pool.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $OpenBSD: subr_pool.c,v 1.236 2022/08/14 01:58:28 jsg Exp $     */
    2 /*      $NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $       */
    3 
    4 /*-
    5  * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc.
    6  * All rights reserved.
    7  *
    8  * This code is derived from software contributed to The NetBSD Foundation
    9  * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace
   10  * Simulation Facility, NASA Ames Research Center.
   11  *
   12  * Redistribution and use in source and binary forms, with or without
   13  * modification, are permitted provided that the following conditions
   14  * are met:
   15  * 1. Redistributions of source code must retain the above copyright
   16  *    notice, this list of conditions and the following disclaimer.
   17  * 2. Redistributions in binary form must reproduce the above copyright
   18  *    notice, this list of conditions and the following disclaimer in the
   19  *    documentation and/or other materials provided with the distribution.
   20  *
   21  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   23  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   24  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   25  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   31  * POSSIBILITY OF SUCH DAMAGE.
   32  */
   33 
   34 #include <sys/param.h>
   35 #include <sys/systm.h>
   36 #include <sys/errno.h>
   37 #include <sys/malloc.h>
   38 #include <sys/pool.h>
   39 #include <sys/proc.h>
   40 #include <sys/sysctl.h>
   41 #include <sys/task.h>
   42 #include <sys/time.h>
   43 #include <sys/timeout.h>
   44 #include <sys/percpu.h>
   45 #include <sys/tracepoint.h>
   46 
   47 #include <uvm/uvm_extern.h>
   48 
   49 /*
   50  * Pool resource management utility.
   51  *
   52  * Memory is allocated in pages which are split into pieces according to
   53  * the pool item size. Each page is kept on one of three lists in the
   54  * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages',
   55  * for empty, full and partially-full pages respectively. The individual
   56  * pool items are on a linked list headed by `ph_items' in each page
   57  * header. The memory for building the page list is either taken from
   58  * the allocated pages themselves (for small pool items) or taken from
   59  * an internal pool of page headers (`phpool').
   60  */
   61 
   62 /* List of all pools */
   63 SIMPLEQ_HEAD(,pool) pool_head = SIMPLEQ_HEAD_INITIALIZER(pool_head);
   64 
   65 /*
   66  * Every pool gets a unique serial number assigned to it. If this counter
   67  * wraps, we're screwed, but we shouldn't create so many pools anyway.
   68  */
   69 unsigned int pool_serial;
   70 unsigned int pool_count;
   71 
   72 /* Lock the previous variables making up the global pool state */
   73 struct rwlock pool_lock = RWLOCK_INITIALIZER("pools");
   74 
   75 /* Private pool for page header structures */
   76 struct pool phpool;
   77 
   78 struct pool_lock_ops {
   79         void    (*pl_init)(struct pool *, union pool_lock *,
   80                     const struct lock_type *);
   81         void    (*pl_enter)(union pool_lock *);
   82         int     (*pl_enter_try)(union pool_lock *);
   83         void    (*pl_leave)(union pool_lock *);
   84         void    (*pl_assert_locked)(union pool_lock *);
   85         void    (*pl_assert_unlocked)(union pool_lock *);
   86         int     (*pl_sleep)(void *, union pool_lock *, int, const char *);
   87 };
   88 
   89 static const struct pool_lock_ops pool_lock_ops_mtx;
   90 static const struct pool_lock_ops pool_lock_ops_rw;
   91 
   92 #ifdef WITNESS
   93 #define pl_init(pp, pl) do {                                            \
   94         static const struct lock_type __lock_type = { .lt_name = #pl }; \
   95         (pp)->pr_lock_ops->pl_init(pp, pl, &__lock_type);               \
   96 } while (0)
   97 #else /* WITNESS */
   98 #define pl_init(pp, pl)         (pp)->pr_lock_ops->pl_init(pp, pl, NULL)
   99 #endif /* WITNESS */
  100 
  101 static inline void
  102 pl_enter(struct pool *pp, union pool_lock *pl)
  103 {
  104         pp->pr_lock_ops->pl_enter(pl);
  105 }
  106 static inline int
  107 pl_enter_try(struct pool *pp, union pool_lock *pl)
  108 {
  109         return pp->pr_lock_ops->pl_enter_try(pl);
  110 }
  111 static inline void
  112 pl_leave(struct pool *pp, union pool_lock *pl)
  113 {
  114         pp->pr_lock_ops->pl_leave(pl);
  115 }
  116 static inline void
  117 pl_assert_locked(struct pool *pp, union pool_lock *pl)
  118 {
  119         pp->pr_lock_ops->pl_assert_locked(pl);
  120 }
  121 static inline void
  122 pl_assert_unlocked(struct pool *pp, union pool_lock *pl)
  123 {
  124         pp->pr_lock_ops->pl_assert_unlocked(pl);
  125 }
  126 static inline int
  127 pl_sleep(struct pool *pp, void *ident, union pool_lock *lock, int priority,
  128     const char *wmesg)
  129 {
  130         return pp->pr_lock_ops->pl_sleep(ident, lock, priority, wmesg);
  131 }
  132 
  133 struct pool_item {
  134         u_long                          pi_magic;
  135         XSIMPLEQ_ENTRY(pool_item)       pi_list;
  136 };
  137 #define POOL_IMAGIC(ph, pi) ((u_long)(pi) ^ (ph)->ph_magic)
  138 
  139 struct pool_page_header {
  140         /* Page headers */
  141         TAILQ_ENTRY(pool_page_header)
  142                                 ph_entry;       /* pool page list */
  143         XSIMPLEQ_HEAD(, pool_item)
  144                                 ph_items;       /* free items on the page */
  145         RBT_ENTRY(pool_page_header)
  146                                 ph_node;        /* off-page page headers */
  147         unsigned int            ph_nmissing;    /* # of chunks in use */
  148         caddr_t                 ph_page;        /* this page's address */
  149         caddr_t                 ph_colored;     /* page's colored address */
  150         unsigned long           ph_magic;
  151         uint64_t                ph_timestamp;
  152 };
  153 #define POOL_MAGICBIT (1 << 3) /* keep away from perturbed low bits */
  154 #define POOL_PHPOISON(ph) ISSET((ph)->ph_magic, POOL_MAGICBIT)
  155 
  156 #ifdef MULTIPROCESSOR
  157 struct pool_cache_item {
  158         struct pool_cache_item  *ci_next;       /* next item in list */
  159         unsigned long            ci_nitems;     /* number of items in list */
  160         TAILQ_ENTRY(pool_cache_item)
  161                                  ci_nextl;      /* entry in list of lists */
  162 };
  163 
  164 /* we store whether the cached item is poisoned in the high bit of nitems */
  165 #define POOL_CACHE_ITEM_NITEMS_MASK     0x7ffffffUL
  166 #define POOL_CACHE_ITEM_NITEMS_POISON   0x8000000UL
  167 
  168 #define POOL_CACHE_ITEM_NITEMS(_ci)                                     \
  169     ((_ci)->ci_nitems & POOL_CACHE_ITEM_NITEMS_MASK)
  170 
  171 #define POOL_CACHE_ITEM_POISONED(_ci)                                   \
  172     ISSET((_ci)->ci_nitems, POOL_CACHE_ITEM_NITEMS_POISON)
  173 
  174 struct pool_cache {
  175         struct pool_cache_item  *pc_actv;       /* active list of items */
  176         unsigned long            pc_nactv;      /* actv head nitems cache */
  177         struct pool_cache_item  *pc_prev;       /* previous list of items */
  178 
  179         uint64_t                 pc_gen;        /* generation number */
  180         uint64_t                 pc_nget;       /* # of successful requests */
  181         uint64_t                 pc_nfail;      /* # of unsuccessful reqs */
  182         uint64_t                 pc_nput;       /* # of releases */
  183         uint64_t                 pc_nlget;      /* # of list requests */
  184         uint64_t                 pc_nlfail;     /* # of fails getting a list */
  185         uint64_t                 pc_nlput;      /* # of list releases */
  186 
  187         int                      pc_nout;
  188 };
  189 
  190 void    *pool_cache_get(struct pool *);
  191 void     pool_cache_put(struct pool *, void *);
  192 void     pool_cache_destroy(struct pool *);
  193 void     pool_cache_gc(struct pool *);
  194 #endif
  195 void     pool_cache_pool_info(struct pool *, struct kinfo_pool *);
  196 int      pool_cache_info(struct pool *, void *, size_t *);
  197 int      pool_cache_cpus_info(struct pool *, void *, size_t *);
  198 
  199 #ifdef POOL_DEBUG
  200 int     pool_debug = 1;
  201 #else
  202 int     pool_debug = 0;
  203 #endif
  204 
  205 #define POOL_INPGHDR(pp) ((pp)->pr_phoffset != 0)
  206 
  207 struct pool_page_header *
  208          pool_p_alloc(struct pool *, int, int *);
  209 void     pool_p_insert(struct pool *, struct pool_page_header *);
  210 void     pool_p_remove(struct pool *, struct pool_page_header *);
  211 void     pool_p_free(struct pool *, struct pool_page_header *);
  212 
  213 void     pool_update_curpage(struct pool *);
  214 void    *pool_do_get(struct pool *, int, int *);
  215 void     pool_do_put(struct pool *, void *);
  216 int      pool_chk_page(struct pool *, struct pool_page_header *, int);
  217 int      pool_chk(struct pool *);
  218 void     pool_get_done(struct pool *, void *, void *);
  219 void     pool_runqueue(struct pool *, int);
  220 
  221 void    *pool_allocator_alloc(struct pool *, int, int *);
  222 void     pool_allocator_free(struct pool *, void *);
  223 
  224 /*
  225  * The default pool allocator.
  226  */
  227 void    *pool_page_alloc(struct pool *, int, int *);
  228 void    pool_page_free(struct pool *, void *);
  229 
  230 /*
  231  * safe for interrupts; this is the default allocator
  232  */
  233 struct pool_allocator pool_allocator_single = {
  234         pool_page_alloc,
  235         pool_page_free,
  236         POOL_ALLOC_SIZE(PAGE_SIZE, POOL_ALLOC_ALIGNED)
  237 };
  238 
  239 void    *pool_multi_alloc(struct pool *, int, int *);
  240 void    pool_multi_free(struct pool *, void *);
  241 
  242 struct pool_allocator pool_allocator_multi = {
  243         pool_multi_alloc,
  244         pool_multi_free,
  245         POOL_ALLOC_SIZES(PAGE_SIZE, (1UL << 31), POOL_ALLOC_ALIGNED)
  246 };
  247 
  248 void    *pool_multi_alloc_ni(struct pool *, int, int *);
  249 void    pool_multi_free_ni(struct pool *, void *);
  250 
  251 struct pool_allocator pool_allocator_multi_ni = {
  252         pool_multi_alloc_ni,
  253         pool_multi_free_ni,
  254         POOL_ALLOC_SIZES(PAGE_SIZE, (1UL << 31), POOL_ALLOC_ALIGNED)
  255 };
  256 
  257 #ifdef DDB
  258 void     pool_print_pagelist(struct pool_pagelist *, int (*)(const char *, ...)
  259              __attribute__((__format__(__kprintf__,1,2))));
  260 void     pool_print1(struct pool *, const char *, int (*)(const char *, ...)
  261              __attribute__((__format__(__kprintf__,1,2))));
  262 #endif
  263 
  264 /* stale page garbage collectors */
  265 void    pool_gc_sched(void *);
  266 struct timeout pool_gc_tick = TIMEOUT_INITIALIZER(pool_gc_sched, NULL);
  267 void    pool_gc_pages(void *);
  268 struct task pool_gc_task = TASK_INITIALIZER(pool_gc_pages, NULL);
  269 
  270 #define POOL_WAIT_FREE  SEC_TO_NSEC(1)
  271 #define POOL_WAIT_GC    SEC_TO_NSEC(8)
  272 
  273 RBT_PROTOTYPE(phtree, pool_page_header, ph_node, phtree_compare);
  274 
  275 static inline int
  276 phtree_compare(const struct pool_page_header *a,
  277     const struct pool_page_header *b)
  278 {
  279         vaddr_t va = (vaddr_t)a->ph_page;
  280         vaddr_t vb = (vaddr_t)b->ph_page;
  281 
  282         /* the compares in this order are important for the NFIND to work */
  283         if (vb < va)
  284                 return (-1);
  285         if (vb > va)
  286                 return (1);
  287 
  288         return (0);
  289 }
  290 
  291 RBT_GENERATE(phtree, pool_page_header, ph_node, phtree_compare);
  292 
  293 /*
  294  * Return the pool page header based on page address.
  295  */
  296 static inline struct pool_page_header *
  297 pr_find_pagehead(struct pool *pp, void *v)
  298 {
  299         struct pool_page_header *ph, key;
  300 
  301         if (POOL_INPGHDR(pp)) {
  302                 caddr_t page;
  303 
  304                 page = (caddr_t)((vaddr_t)v & pp->pr_pgmask);
  305 
  306                 return ((struct pool_page_header *)(page + pp->pr_phoffset));
  307         }
  308 
  309         key.ph_page = v;
  310         ph = RBT_NFIND(phtree, &pp->pr_phtree, &key);
  311         if (ph == NULL)
  312                 panic("%s: %s: page header missing", __func__, pp->pr_wchan);
  313 
  314         KASSERT(ph->ph_page <= (caddr_t)v);
  315         if (ph->ph_page + pp->pr_pgsize <= (caddr_t)v)
  316                 panic("%s: %s: incorrect page", __func__, pp->pr_wchan);
  317 
  318         return (ph);
  319 }
  320 
  321 /*
  322  * Initialize the given pool resource structure.
  323  *
  324  * We export this routine to allow other kernel parts to declare
  325  * static pools that must be initialized before malloc() is available.
  326  */
  327 void
  328 pool_init(struct pool *pp, size_t size, u_int align, int ipl, int flags,
  329     const char *wchan, struct pool_allocator *palloc)
  330 {
  331         int off = 0, space;
  332         unsigned int pgsize = PAGE_SIZE, items;
  333         size_t pa_pagesz;
  334 #ifdef DIAGNOSTIC
  335         struct pool *iter;
  336 #endif
  337 
  338         if (align == 0)
  339                 align = ALIGN(1);
  340 
  341         if (size < sizeof(struct pool_item))
  342                 size = sizeof(struct pool_item);
  343 
  344         size = roundup(size, align);
  345 
  346         while (size * 8 > pgsize)
  347                 pgsize <<= 1;
  348 
  349         if (palloc == NULL) {
  350                 if (pgsize > PAGE_SIZE) {
  351                         palloc = ISSET(flags, PR_WAITOK) ?
  352                             &pool_allocator_multi_ni : &pool_allocator_multi;
  353                 } else
  354                         palloc = &pool_allocator_single;
  355 
  356                 pa_pagesz = palloc->pa_pagesz;
  357         } else {
  358                 size_t pgsizes;
  359 
  360                 pa_pagesz = palloc->pa_pagesz;
  361                 if (pa_pagesz == 0)
  362                         pa_pagesz = POOL_ALLOC_DEFAULT;
  363 
  364                 pgsizes = pa_pagesz & ~POOL_ALLOC_ALIGNED;
  365 
  366                 /* make sure the allocator can fit at least one item */
  367                 if (size > pgsizes) {
  368                         panic("%s: pool %s item size 0x%zx > "
  369                             "allocator %p sizes 0x%zx", __func__, wchan,
  370                             size, palloc, pgsizes);
  371                 }
  372 
  373                 /* shrink pgsize until it fits into the range */
  374                 while (!ISSET(pgsizes, pgsize))
  375                         pgsize >>= 1;
  376         }
  377         KASSERT(ISSET(pa_pagesz, pgsize));
  378 
  379         items = pgsize / size;
  380 
  381         /*
  382          * Decide whether to put the page header off page to avoid
  383          * wasting too large a part of the page. Off-page page headers
  384          * go into an RB tree, so we can match a returned item with
  385          * its header based on the page address.
  386          */
  387         if (ISSET(pa_pagesz, POOL_ALLOC_ALIGNED)) {
  388                 if (pgsize - (size * items) >
  389                     sizeof(struct pool_page_header)) {
  390                         off = pgsize - sizeof(struct pool_page_header);
  391                 } else if (sizeof(struct pool_page_header) * 2 >= size) {
  392                         off = pgsize - sizeof(struct pool_page_header);
  393                         items = off / size;
  394                 }
  395         }
  396 
  397         KASSERT(items > 0);
  398 
  399         /*
  400          * Initialize the pool structure.
  401          */
  402         memset(pp, 0, sizeof(*pp));
  403         if (ISSET(flags, PR_RWLOCK)) {
  404                 KASSERT(flags & PR_WAITOK);
  405                 pp->pr_lock_ops = &pool_lock_ops_rw;
  406         } else
  407                 pp->pr_lock_ops = &pool_lock_ops_mtx;
  408         TAILQ_INIT(&pp->pr_emptypages);
  409         TAILQ_INIT(&pp->pr_fullpages);
  410         TAILQ_INIT(&pp->pr_partpages);
  411         pp->pr_curpage = NULL;
  412         pp->pr_npages = 0;
  413         pp->pr_minitems = 0;
  414         pp->pr_minpages = 0;
  415         pp->pr_maxpages = 8;
  416         pp->pr_size = size;
  417         pp->pr_pgsize = pgsize;
  418         pp->pr_pgmask = ~0UL ^ (pgsize - 1);
  419         pp->pr_phoffset = off;
  420         pp->pr_itemsperpage = items;
  421         pp->pr_wchan = wchan;
  422         pp->pr_alloc = palloc;
  423         pp->pr_nitems = 0;
  424         pp->pr_nout = 0;
  425         pp->pr_hardlimit = UINT_MAX;
  426         pp->pr_hardlimit_warning = NULL;
  427         pp->pr_hardlimit_ratecap.tv_sec = 0;
  428         pp->pr_hardlimit_ratecap.tv_usec = 0;
  429         pp->pr_hardlimit_warning_last.tv_sec = 0;
  430         pp->pr_hardlimit_warning_last.tv_usec = 0;
  431         RBT_INIT(phtree, &pp->pr_phtree);
  432 
  433         /*
  434          * Use the space between the chunks and the page header
  435          * for cache coloring.
  436          */
  437         space = POOL_INPGHDR(pp) ? pp->pr_phoffset : pp->pr_pgsize;
  438         space -= pp->pr_itemsperpage * pp->pr_size;
  439         pp->pr_align = align;
  440         pp->pr_maxcolors = (space / align) + 1;
  441 
  442         pp->pr_nget = 0;
  443         pp->pr_nfail = 0;
  444         pp->pr_nput = 0;
  445         pp->pr_npagealloc = 0;
  446         pp->pr_npagefree = 0;
  447         pp->pr_hiwat = 0;
  448         pp->pr_nidle = 0;
  449 
  450         pp->pr_ipl = ipl;
  451         pp->pr_flags = flags;
  452 
  453         pl_init(pp, &pp->pr_lock);
  454         pl_init(pp, &pp->pr_requests_lock);
  455         TAILQ_INIT(&pp->pr_requests);
  456 
  457         if (phpool.pr_size == 0) {
  458                 pool_init(&phpool, sizeof(struct pool_page_header), 0,
  459                     IPL_HIGH, 0, "phpool", NULL);
  460 
  461                 /* make sure phpool won't "recurse" */
  462                 KASSERT(POOL_INPGHDR(&phpool));
  463         }
  464 
  465         /* pglistalloc/constraint parameters */
  466         pp->pr_crange = &kp_dirty;
  467 
  468         /* Insert this into the list of all pools. */
  469         rw_enter_write(&pool_lock);
  470 #ifdef DIAGNOSTIC
  471         SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) {
  472                 if (iter == pp)
  473                         panic("%s: pool %s already on list", __func__, wchan);
  474         }
  475 #endif
  476 
  477         pp->pr_serial = ++pool_serial;
  478         if (pool_serial == 0)
  479                 panic("%s: too much uptime", __func__);
  480 
  481         SIMPLEQ_INSERT_HEAD(&pool_head, pp, pr_poollist);
  482         pool_count++;
  483         rw_exit_write(&pool_lock);
  484 }
  485 
  486 /*
  487  * Decommission a pool resource.
  488  */
  489 void
  490 pool_destroy(struct pool *pp)
  491 {
  492         struct pool_page_header *ph;
  493         struct pool *prev, *iter;
  494 
  495 #ifdef MULTIPROCESSOR
  496         if (pp->pr_cache != NULL)
  497                 pool_cache_destroy(pp);
  498 #endif
  499 
  500 #ifdef DIAGNOSTIC
  501         if (pp->pr_nout != 0)
  502                 panic("%s: pool busy: still out: %u", __func__, pp->pr_nout);
  503 #endif
  504 
  505         /* Remove from global pool list */
  506         rw_enter_write(&pool_lock);
  507         pool_count--;
  508         if (pp == SIMPLEQ_FIRST(&pool_head))
  509                 SIMPLEQ_REMOVE_HEAD(&pool_head, pr_poollist);
  510         else {
  511                 prev = SIMPLEQ_FIRST(&pool_head);
  512                 SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) {
  513                         if (iter == pp) {
  514                                 SIMPLEQ_REMOVE_AFTER(&pool_head, prev,
  515                                     pr_poollist);
  516                                 break;
  517                         }
  518                         prev = iter;
  519                 }
  520         }
  521         rw_exit_write(&pool_lock);
  522 
  523         /* Remove all pages */
  524         while ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) {
  525                 pl_enter(pp, &pp->pr_lock);
  526                 pool_p_remove(pp, ph);
  527                 pl_leave(pp, &pp->pr_lock);
  528                 pool_p_free(pp, ph);
  529         }
  530         KASSERT(TAILQ_EMPTY(&pp->pr_fullpages));
  531         KASSERT(TAILQ_EMPTY(&pp->pr_partpages));
  532 }
  533 
  534 void
  535 pool_request_init(struct pool_request *pr,
  536     void (*handler)(struct pool *, void *, void *), void *cookie)
  537 {
  538         pr->pr_handler = handler;
  539         pr->pr_cookie = cookie;
  540         pr->pr_item = NULL;
  541 }
  542 
  543 void
  544 pool_request(struct pool *pp, struct pool_request *pr)
  545 {
  546         pl_enter(pp, &pp->pr_requests_lock);
  547         TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry);
  548         pool_runqueue(pp, PR_NOWAIT);
  549         pl_leave(pp, &pp->pr_requests_lock);
  550 }
  551 
  552 struct pool_get_memory {
  553         union pool_lock lock;
  554         void * volatile v;
  555 };
  556 
  557 /*
  558  * Grab an item from the pool.
  559  */
  560 void *
  561 pool_get(struct pool *pp, int flags)
  562 {
  563         void *v = NULL;
  564         int slowdown = 0;
  565 
  566         KASSERT(flags & (PR_WAITOK | PR_NOWAIT));
  567         if (pp->pr_flags & PR_RWLOCK)
  568                 KASSERT(flags & PR_WAITOK);
  569 
  570 #ifdef MULTIPROCESSOR
  571         if (pp->pr_cache != NULL) {
  572                 v = pool_cache_get(pp);
  573                 if (v != NULL)
  574                         goto good;
  575         }
  576 #endif
  577 
  578         pl_enter(pp, &pp->pr_lock);
  579         if (pp->pr_nout >= pp->pr_hardlimit) {
  580                 if (ISSET(flags, PR_NOWAIT|PR_LIMITFAIL))
  581                         goto fail;
  582         } else if ((v = pool_do_get(pp, flags, &slowdown)) == NULL) {
  583                 if (ISSET(flags, PR_NOWAIT))
  584                         goto fail;
  585         }
  586         pl_leave(pp, &pp->pr_lock);
  587 
  588         if ((slowdown || pool_debug == 2) && ISSET(flags, PR_WAITOK))
  589                 yield();
  590 
  591         if (v == NULL) {
  592                 struct pool_get_memory mem = { .v = NULL };
  593                 struct pool_request pr;
  594 
  595 #ifdef DIAGNOSTIC
  596                 if (ISSET(flags, PR_WAITOK) && curproc == &proc0)
  597                         panic("%s: cannot sleep for memory during boot",
  598                             __func__);
  599 #endif
  600                 pl_init(pp, &mem.lock);
  601                 pool_request_init(&pr, pool_get_done, &mem);
  602                 pool_request(pp, &pr);
  603 
  604                 pl_enter(pp, &mem.lock);
  605                 while (mem.v == NULL)
  606                         pl_sleep(pp, &mem, &mem.lock, PSWP, pp->pr_wchan);
  607                 pl_leave(pp, &mem.lock);
  608 
  609                 v = mem.v;
  610         }
  611 
  612 #ifdef MULTIPROCESSOR
  613 good:
  614 #endif
  615         if (ISSET(flags, PR_ZERO))
  616                 memset(v, 0, pp->pr_size);
  617 
  618         TRACEPOINT(uvm, pool_get, pp, v, flags);
  619 
  620         return (v);
  621 
  622 fail:
  623         pp->pr_nfail++;
  624         pl_leave(pp, &pp->pr_lock);
  625         return (NULL);
  626 }
  627 
  628 void
  629 pool_get_done(struct pool *pp, void *xmem, void *v)
  630 {
  631         struct pool_get_memory *mem = xmem;
  632 
  633         pl_enter(pp, &mem->lock);
  634         mem->v = v;
  635         pl_leave(pp, &mem->lock);
  636 
  637         wakeup_one(mem);
  638 }
  639 
  640 void
  641 pool_runqueue(struct pool *pp, int flags)
  642 {
  643         struct pool_requests prl = TAILQ_HEAD_INITIALIZER(prl);
  644         struct pool_request *pr;
  645 
  646         pl_assert_unlocked(pp, &pp->pr_lock);
  647         pl_assert_locked(pp, &pp->pr_requests_lock);
  648 
  649         if (pp->pr_requesting++)
  650                 return;
  651 
  652         do {
  653                 pp->pr_requesting = 1;
  654 
  655                 TAILQ_CONCAT(&prl, &pp->pr_requests, pr_entry);
  656                 if (TAILQ_EMPTY(&prl))
  657                         continue;
  658 
  659                 pl_leave(pp, &pp->pr_requests_lock);
  660 
  661                 pl_enter(pp, &pp->pr_lock);
  662                 pr = TAILQ_FIRST(&prl);
  663                 while (pr != NULL) {
  664                         int slowdown = 0;
  665 
  666                         if (pp->pr_nout >= pp->pr_hardlimit)
  667                                 break;
  668 
  669                         pr->pr_item = pool_do_get(pp, flags, &slowdown);
  670                         if (pr->pr_item == NULL) /* || slowdown ? */
  671                                 break;
  672 
  673                         pr = TAILQ_NEXT(pr, pr_entry);
  674                 }
  675                 pl_leave(pp, &pp->pr_lock);
  676 
  677                 while ((pr = TAILQ_FIRST(&prl)) != NULL &&
  678                     pr->pr_item != NULL) {
  679                         TAILQ_REMOVE(&prl, pr, pr_entry);
  680                         (*pr->pr_handler)(pp, pr->pr_cookie, pr->pr_item);
  681                 }
  682 
  683                 pl_enter(pp, &pp->pr_requests_lock);
  684         } while (--pp->pr_requesting);
  685 
  686         TAILQ_CONCAT(&pp->pr_requests, &prl, pr_entry);
  687 }
  688 
  689 void *
  690 pool_do_get(struct pool *pp, int flags, int *slowdown)
  691 {
  692         struct pool_item *pi;
  693         struct pool_page_header *ph;
  694 
  695         pl_assert_locked(pp, &pp->pr_lock);
  696 
  697         splassert(pp->pr_ipl);
  698 
  699         /*
  700          * Account for this item now to avoid races if we need to give up
  701          * pr_lock to allocate a page.
  702          */
  703         pp->pr_nout++;
  704 
  705         if (pp->pr_curpage == NULL) {
  706                 pl_leave(pp, &pp->pr_lock);
  707                 ph = pool_p_alloc(pp, flags, slowdown);
  708                 pl_enter(pp, &pp->pr_lock);
  709 
  710                 if (ph == NULL) {
  711                         pp->pr_nout--;
  712                         return (NULL);
  713                 }
  714 
  715                 pool_p_insert(pp, ph);
  716         }
  717 
  718         ph = pp->pr_curpage;
  719         pi = XSIMPLEQ_FIRST(&ph->ph_items);
  720         if (__predict_false(pi == NULL))
  721                 panic("%s: %s: page empty", __func__, pp->pr_wchan);
  722 
  723         if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) {
  724                 panic("%s: %s free list modified: "
  725                     "page %p; item addr %p; offset 0x%x=0x%lx != 0x%lx",
  726                     __func__, pp->pr_wchan, ph->ph_page, pi,
  727                     0, pi->pi_magic, POOL_IMAGIC(ph, pi));
  728         }
  729 
  730         XSIMPLEQ_REMOVE_HEAD(&ph->ph_items, pi_list);
  731 
  732 #ifdef DIAGNOSTIC
  733         if (pool_debug && POOL_PHPOISON(ph)) {
  734                 size_t pidx;
  735                 uint32_t pval;
  736                 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi),
  737                     &pidx, &pval)) {
  738                         int *ip = (int *)(pi + 1);
  739                         panic("%s: %s free list modified: "
  740                             "page %p; item addr %p; offset 0x%zx=0x%x",
  741                             __func__, pp->pr_wchan, ph->ph_page, pi,
  742                             (pidx * sizeof(int)) + sizeof(*pi), ip[pidx]);
  743                 }
  744         }
  745 #endif /* DIAGNOSTIC */
  746 
  747         if (ph->ph_nmissing++ == 0) {
  748                 /*
  749                  * This page was previously empty.  Move it to the list of
  750                  * partially-full pages.  This page is already curpage.
  751                  */
  752                 TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_entry);
  753                 TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_entry);
  754 
  755                 pp->pr_nidle--;
  756         }
  757 
  758         if (ph->ph_nmissing == pp->pr_itemsperpage) {
  759                 /*
  760                  * This page is now full.  Move it to the full list
  761                  * and select a new current page.
  762                  */
  763                 TAILQ_REMOVE(&pp->pr_partpages, ph, ph_entry);
  764                 TAILQ_INSERT_TAIL(&pp->pr_fullpages, ph, ph_entry);
  765                 pool_update_curpage(pp);
  766         }
  767 
  768         pp->pr_nget++;
  769 
  770         return (pi);
  771 }
  772 
  773 /*
  774  * Return resource to the pool.
  775  */
  776 void
  777 pool_put(struct pool *pp, void *v)
  778 {
  779         struct pool_page_header *ph, *freeph = NULL;
  780 
  781 #ifdef DIAGNOSTIC
  782         if (v == NULL)
  783                 panic("%s: NULL item", __func__);
  784 #endif
  785 
  786         TRACEPOINT(uvm, pool_put, pp, v);
  787 
  788 #ifdef MULTIPROCESSOR
  789         if (pp->pr_cache != NULL && TAILQ_EMPTY(&pp->pr_requests)) {
  790                 pool_cache_put(pp, v);
  791                 return;
  792         }
  793 #endif
  794 
  795         pl_enter(pp, &pp->pr_lock);
  796 
  797         pool_do_put(pp, v);
  798 
  799         pp->pr_nout--;
  800         pp->pr_nput++;
  801 
  802         /* is it time to free a page? */
  803         if (pp->pr_nidle > pp->pr_maxpages &&
  804             (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL &&
  805             getnsecuptime() - ph->ph_timestamp > POOL_WAIT_FREE) {
  806                 freeph = ph;
  807                 pool_p_remove(pp, freeph);
  808         }
  809 
  810         pl_leave(pp, &pp->pr_lock);
  811 
  812         if (freeph != NULL)
  813                 pool_p_free(pp, freeph);
  814 
  815         pool_wakeup(pp);
  816 }
  817 
  818 void
  819 pool_wakeup(struct pool *pp)
  820 {
  821         if (!TAILQ_EMPTY(&pp->pr_requests)) {
  822                 pl_enter(pp, &pp->pr_requests_lock);
  823                 pool_runqueue(pp, PR_NOWAIT);
  824                 pl_leave(pp, &pp->pr_requests_lock);
  825         }
  826 }
  827 
  828 void
  829 pool_do_put(struct pool *pp, void *v)
  830 {
  831         struct pool_item *pi = v;
  832         struct pool_page_header *ph;
  833 
  834         splassert(pp->pr_ipl);
  835 
  836         ph = pr_find_pagehead(pp, v);
  837 
  838 #ifdef DIAGNOSTIC
  839         if (pool_debug) {
  840                 struct pool_item *qi;
  841                 XSIMPLEQ_FOREACH(qi, &ph->ph_items, pi_list) {
  842                         if (pi == qi) {
  843                                 panic("%s: %s: double pool_put: %p", __func__,
  844                                     pp->pr_wchan, pi);
  845                         }
  846                 }
  847         }
  848 #endif /* DIAGNOSTIC */
  849 
  850         pi->pi_magic = POOL_IMAGIC(ph, pi);
  851         XSIMPLEQ_INSERT_HEAD(&ph->ph_items, pi, pi_list);
  852 #ifdef DIAGNOSTIC
  853         if (POOL_PHPOISON(ph))
  854                 poison_mem(pi + 1, pp->pr_size - sizeof(*pi));
  855 #endif /* DIAGNOSTIC */
  856 
  857         if (ph->ph_nmissing-- == pp->pr_itemsperpage) {
  858                 /*
  859                  * The page was previously completely full, move it to the
  860                  * partially-full list.
  861                  */
  862                 TAILQ_REMOVE(&pp->pr_fullpages, ph, ph_entry);
  863                 TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_entry);
  864         }
  865 
  866         if (ph->ph_nmissing == 0) {
  867                 /*
  868                  * The page is now empty, so move it to the empty page list.
  869                  */
  870                 pp->pr_nidle++;
  871 
  872                 ph->ph_timestamp = getnsecuptime();
  873                 TAILQ_REMOVE(&pp->pr_partpages, ph, ph_entry);
  874                 TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_entry);
  875                 pool_update_curpage(pp);
  876         }
  877 }
  878 
  879 /*
  880  * Add N items to the pool.
  881  */
  882 int
  883 pool_prime(struct pool *pp, int n)
  884 {
  885         struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl);
  886         struct pool_page_header *ph;
  887         int newpages;
  888 
  889         newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
  890 
  891         while (newpages-- > 0) {
  892                 int slowdown = 0;
  893 
  894                 ph = pool_p_alloc(pp, PR_NOWAIT, &slowdown);
  895                 if (ph == NULL) /* or slowdown? */
  896                         break;
  897 
  898                 TAILQ_INSERT_TAIL(&pl, ph, ph_entry);
  899         }
  900 
  901         pl_enter(pp, &pp->pr_lock);
  902         while ((ph = TAILQ_FIRST(&pl)) != NULL) {
  903                 TAILQ_REMOVE(&pl, ph, ph_entry);
  904                 pool_p_insert(pp, ph);
  905         }
  906         pl_leave(pp, &pp->pr_lock);
  907 
  908         return (0);
  909 }
  910 
  911 struct pool_page_header *
  912 pool_p_alloc(struct pool *pp, int flags, int *slowdown)
  913 {
  914         struct pool_page_header *ph;
  915         struct pool_item *pi;
  916         caddr_t addr;
  917         unsigned int order;
  918         int o;
  919         int n;
  920 
  921         pl_assert_unlocked(pp, &pp->pr_lock);
  922         KASSERT(pp->pr_size >= sizeof(*pi));
  923 
  924         addr = pool_allocator_alloc(pp, flags, slowdown);
  925         if (addr == NULL)
  926                 return (NULL);
  927 
  928         if (POOL_INPGHDR(pp))
  929                 ph = (struct pool_page_header *)(addr + pp->pr_phoffset);
  930         else {
  931                 ph = pool_get(&phpool, flags);
  932                 if (ph == NULL) {
  933                         pool_allocator_free(pp, addr);
  934                         return (NULL);
  935                 }
  936         }
  937 
  938         XSIMPLEQ_INIT(&ph->ph_items);
  939         ph->ph_page = addr;
  940         addr += pp->pr_align * (pp->pr_npagealloc % pp->pr_maxcolors);
  941         ph->ph_colored = addr;
  942         ph->ph_nmissing = 0;
  943         arc4random_buf(&ph->ph_magic, sizeof(ph->ph_magic));
  944 #ifdef DIAGNOSTIC
  945         /* use a bit in ph_magic to record if we poison page items */
  946         if (pool_debug)
  947                 SET(ph->ph_magic, POOL_MAGICBIT);
  948         else
  949                 CLR(ph->ph_magic, POOL_MAGICBIT);
  950 #endif /* DIAGNOSTIC */
  951 
  952         n = pp->pr_itemsperpage;
  953         o = 32;
  954         while (n--) {
  955                 pi = (struct pool_item *)addr;
  956                 pi->pi_magic = POOL_IMAGIC(ph, pi);
  957 
  958                 if (o == 32) {
  959                         order = arc4random();
  960                         o = 0;
  961                 }
  962                 if (ISSET(order, 1U << o++))
  963                         XSIMPLEQ_INSERT_TAIL(&ph->ph_items, pi, pi_list);
  964                 else
  965                         XSIMPLEQ_INSERT_HEAD(&ph->ph_items, pi, pi_list);
  966 
  967 #ifdef DIAGNOSTIC
  968                 if (POOL_PHPOISON(ph))
  969                         poison_mem(pi + 1, pp->pr_size - sizeof(*pi));
  970 #endif /* DIAGNOSTIC */
  971 
  972                 addr += pp->pr_size;
  973         }
  974 
  975         return (ph);
  976 }
  977 
  978 void
  979 pool_p_free(struct pool *pp, struct pool_page_header *ph)
  980 {
  981         struct pool_item *pi;
  982 
  983         pl_assert_unlocked(pp, &pp->pr_lock);
  984         KASSERT(ph->ph_nmissing == 0);
  985 
  986         XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) {
  987                 if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) {
  988                         panic("%s: %s free list modified: "
  989                             "page %p; item addr %p; offset 0x%x=0x%lx",
  990                             __func__, pp->pr_wchan, ph->ph_page, pi,
  991                             0, pi->pi_magic);
  992                 }
  993 
  994 #ifdef DIAGNOSTIC
  995                 if (POOL_PHPOISON(ph)) {
  996                         size_t pidx;
  997                         uint32_t pval;
  998                         if (poison_check(pi + 1, pp->pr_size - sizeof(*pi),
  999                             &pidx, &pval)) {
 1000                                 int *ip = (int *)(pi + 1);
 1001                                 panic("%s: %s free list modified: "
 1002                                     "page %p; item addr %p; offset 0x%zx=0x%x",
 1003                                     __func__, pp->pr_wchan, ph->ph_page, pi,
 1004                                     pidx * sizeof(int), ip[pidx]);
 1005                         }
 1006                 }
 1007 #endif
 1008         }
 1009 
 1010         pool_allocator_free(pp, ph->ph_page);
 1011 
 1012         if (!POOL_INPGHDR(pp))
 1013                 pool_put(&phpool, ph);
 1014 }
 1015 
 1016 void
 1017 pool_p_insert(struct pool *pp, struct pool_page_header *ph)
 1018 {
 1019         pl_assert_locked(pp, &pp->pr_lock);
 1020 
 1021         /* If the pool was depleted, point at the new page */
 1022         if (pp->pr_curpage == NULL)
 1023                 pp->pr_curpage = ph;
 1024 
 1025         TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_entry);
 1026         if (!POOL_INPGHDR(pp))
 1027                 RBT_INSERT(phtree, &pp->pr_phtree, ph);
 1028 
 1029         pp->pr_nitems += pp->pr_itemsperpage;
 1030         pp->pr_nidle++;
 1031 
 1032         pp->pr_npagealloc++;
 1033         if (++pp->pr_npages > pp->pr_hiwat)
 1034                 pp->pr_hiwat = pp->pr_npages;
 1035 }
 1036 
 1037 void
 1038 pool_p_remove(struct pool *pp, struct pool_page_header *ph)
 1039 {
 1040         pl_assert_locked(pp, &pp->pr_lock);
 1041 
 1042         pp->pr_npagefree++;
 1043         pp->pr_npages--;
 1044         pp->pr_nidle--;
 1045         pp->pr_nitems -= pp->pr_itemsperpage;
 1046 
 1047         if (!POOL_INPGHDR(pp))
 1048                 RBT_REMOVE(phtree, &pp->pr_phtree, ph);
 1049         TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_entry);
 1050 
 1051         pool_update_curpage(pp);
 1052 }
 1053 
 1054 void
 1055 pool_update_curpage(struct pool *pp)
 1056 {
 1057         pp->pr_curpage = TAILQ_LAST(&pp->pr_partpages, pool_pagelist);
 1058         if (pp->pr_curpage == NULL) {
 1059                 pp->pr_curpage = TAILQ_LAST(&pp->pr_emptypages, pool_pagelist);
 1060         }
 1061 }
 1062 
 1063 void
 1064 pool_setlowat(struct pool *pp, int n)
 1065 {
 1066         int prime = 0;
 1067 
 1068         pl_enter(pp, &pp->pr_lock);
 1069         pp->pr_minitems = n;
 1070         pp->pr_minpages = (n == 0)
 1071                 ? 0
 1072                 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
 1073 
 1074         if (pp->pr_nitems < n)
 1075                 prime = n - pp->pr_nitems;
 1076         pl_leave(pp, &pp->pr_lock);
 1077 
 1078         if (prime > 0)
 1079                 pool_prime(pp, prime);
 1080 }
 1081 
 1082 void
 1083 pool_sethiwat(struct pool *pp, int n)
 1084 {
 1085         pp->pr_maxpages = (n == 0)
 1086                 ? 0
 1087                 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
 1088 }
 1089 
 1090 int
 1091 pool_sethardlimit(struct pool *pp, u_int n, const char *warnmsg, int ratecap)
 1092 {
 1093         int error = 0;
 1094 
 1095         if (n < pp->pr_nout) {
 1096                 error = EINVAL;
 1097                 goto done;
 1098         }
 1099 
 1100         pp->pr_hardlimit = n;
 1101         pp->pr_hardlimit_warning = warnmsg;
 1102         pp->pr_hardlimit_ratecap.tv_sec = ratecap;
 1103         pp->pr_hardlimit_warning_last.tv_sec = 0;
 1104         pp->pr_hardlimit_warning_last.tv_usec = 0;
 1105 
 1106 done:
 1107         return (error);
 1108 }
 1109 
 1110 void
 1111 pool_set_constraints(struct pool *pp, const struct kmem_pa_mode *mode)
 1112 {
 1113         pp->pr_crange = mode;
 1114 }
 1115 
 1116 /*
 1117  * Release all complete pages that have not been used recently.
 1118  *
 1119  * Returns non-zero if any pages have been reclaimed.
 1120  */
 1121 int
 1122 pool_reclaim(struct pool *pp)
 1123 {
 1124         struct pool_page_header *ph, *phnext;
 1125         struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl);
 1126 
 1127         pl_enter(pp, &pp->pr_lock);
 1128         for (ph = TAILQ_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) {
 1129                 phnext = TAILQ_NEXT(ph, ph_entry);
 1130 
 1131                 /* Check our minimum page claim */
 1132                 if (pp->pr_npages <= pp->pr_minpages)
 1133                         break;
 1134 
 1135                 /*
 1136                  * If freeing this page would put us below
 1137                  * the low water mark, stop now.
 1138                  */
 1139                 if ((pp->pr_nitems - pp->pr_itemsperpage) <
 1140                     pp->pr_minitems)
 1141                         break;
 1142 
 1143                 pool_p_remove(pp, ph);
 1144                 TAILQ_INSERT_TAIL(&pl, ph, ph_entry);
 1145         }
 1146         pl_leave(pp, &pp->pr_lock);
 1147 
 1148         if (TAILQ_EMPTY(&pl))
 1149                 return (0);
 1150 
 1151         while ((ph = TAILQ_FIRST(&pl)) != NULL) {
 1152                 TAILQ_REMOVE(&pl, ph, ph_entry);
 1153                 pool_p_free(pp, ph);
 1154         }
 1155 
 1156         return (1);
 1157 }
 1158 
 1159 /*
 1160  * Release all complete pages that have not been used recently
 1161  * from all pools.
 1162  */
 1163 void
 1164 pool_reclaim_all(void)
 1165 {
 1166         struct pool     *pp;
 1167 
 1168         rw_enter_read(&pool_lock);
 1169         SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist)
 1170                 pool_reclaim(pp);
 1171         rw_exit_read(&pool_lock);
 1172 }
 1173 
 1174 #ifdef DDB
 1175 #include <machine/db_machdep.h>
 1176 #include <ddb/db_output.h>
 1177 
 1178 /*
 1179  * Diagnostic helpers.
 1180  */
 1181 void
 1182 pool_printit(struct pool *pp, const char *modif,
 1183     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
 1184 {
 1185         pool_print1(pp, modif, pr);
 1186 }
 1187 
 1188 void
 1189 pool_print_pagelist(struct pool_pagelist *pl,
 1190     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
 1191 {
 1192         struct pool_page_header *ph;
 1193         struct pool_item *pi;
 1194 
 1195         TAILQ_FOREACH(ph, pl, ph_entry) {
 1196                 (*pr)("\t\tpage %p, color %p, nmissing %d\n",
 1197                     ph->ph_page, ph->ph_colored, ph->ph_nmissing);
 1198                 XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) {
 1199                         if (pi->pi_magic != POOL_IMAGIC(ph, pi)) {
 1200                                 (*pr)("\t\t\titem %p, magic 0x%lx\n",
 1201                                     pi, pi->pi_magic);
 1202                         }
 1203                 }
 1204         }
 1205 }
 1206 
 1207 void
 1208 pool_print1(struct pool *pp, const char *modif,
 1209     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
 1210 {
 1211         struct pool_page_header *ph;
 1212         int print_pagelist = 0;
 1213         char c;
 1214 
 1215         while ((c = *modif++) != '\0') {
 1216                 if (c == 'p')
 1217                         print_pagelist = 1;
 1218                 modif++;
 1219         }
 1220 
 1221         (*pr)("POOL %s: size %u maxcolors %u\n", pp->pr_wchan, pp->pr_size,
 1222             pp->pr_maxcolors);
 1223         (*pr)("\talloc %p\n", pp->pr_alloc);
 1224         (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n",
 1225             pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages);
 1226         (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n",
 1227             pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit);
 1228 
 1229         (*pr)("\n\tnget %lu, nfail %lu, nput %lu\n",
 1230             pp->pr_nget, pp->pr_nfail, pp->pr_nput);
 1231         (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n",
 1232             pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle);
 1233 
 1234         if (print_pagelist == 0)
 1235                 return;
 1236 
 1237         if ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL)
 1238                 (*pr)("\n\tempty page list:\n");
 1239         pool_print_pagelist(&pp->pr_emptypages, pr);
 1240         if ((ph = TAILQ_FIRST(&pp->pr_fullpages)) != NULL)
 1241                 (*pr)("\n\tfull page list:\n");
 1242         pool_print_pagelist(&pp->pr_fullpages, pr);
 1243         if ((ph = TAILQ_FIRST(&pp->pr_partpages)) != NULL)
 1244                 (*pr)("\n\tpartial-page list:\n");
 1245         pool_print_pagelist(&pp->pr_partpages, pr);
 1246 
 1247         if (pp->pr_curpage == NULL)
 1248                 (*pr)("\tno current page\n");
 1249         else
 1250                 (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page);
 1251 }
 1252 
 1253 void
 1254 db_show_all_pools(db_expr_t expr, int haddr, db_expr_t count, char *modif)
 1255 {
 1256         struct pool *pp;
 1257         char maxp[16];
 1258         int ovflw;
 1259         char mode;
 1260 
 1261         mode = modif[0];
 1262         if (mode != '\0' && mode != 'a') {
 1263                 db_printf("usage: show all pools [/a]\n");
 1264                 return;
 1265         }
 1266 
 1267         if (mode == '\0')
 1268                 db_printf("%-10s%4s%9s%5s%9s%6s%6s%6s%6s%6s%6s%5s\n",
 1269                     "Name",
 1270                     "Size",
 1271                     "Requests",
 1272                     "Fail",
 1273                     "Releases",
 1274                     "Pgreq",
 1275                     "Pgrel",
 1276                     "Npage",
 1277                     "Hiwat",
 1278                     "Minpg",
 1279                     "Maxpg",
 1280                     "Idle");
 1281         else
 1282                 db_printf("%-12s %18s %18s\n",
 1283                     "Name", "Address", "Allocator");
 1284 
 1285         SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) {
 1286                 if (mode == 'a') {
 1287                         db_printf("%-12s %18p %18p\n", pp->pr_wchan, pp,
 1288                             pp->pr_alloc);
 1289                         continue;
 1290                 }
 1291 
 1292                 if (!pp->pr_nget)
 1293                         continue;
 1294 
 1295                 if (pp->pr_maxpages == UINT_MAX)
 1296                         snprintf(maxp, sizeof maxp, "inf");
 1297                 else
 1298                         snprintf(maxp, sizeof maxp, "%u", pp->pr_maxpages);
 1299 
 1300 #define PRWORD(ovflw, fmt, width, fixed, val) do {      \
 1301         (ovflw) += db_printf((fmt),                     \
 1302             (width) - (fixed) - (ovflw) > 0 ?           \
 1303             (width) - (fixed) - (ovflw) : 0,            \
 1304             (val)) - (width);                           \
 1305         if ((ovflw) < 0)                                \
 1306                 (ovflw) = 0;                            \
 1307 } while (/* CONSTCOND */0)
 1308 
 1309                 ovflw = 0;
 1310                 PRWORD(ovflw, "%-*s", 10, 0, pp->pr_wchan);
 1311                 PRWORD(ovflw, " %*u", 4, 1, pp->pr_size);
 1312                 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nget);
 1313                 PRWORD(ovflw, " %*lu", 5, 1, pp->pr_nfail);
 1314                 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nput);
 1315                 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagealloc);
 1316                 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagefree);
 1317                 PRWORD(ovflw, " %*d", 6, 1, pp->pr_npages);
 1318                 PRWORD(ovflw, " %*d", 6, 1, pp->pr_hiwat);
 1319                 PRWORD(ovflw, " %*d", 6, 1, pp->pr_minpages);
 1320                 PRWORD(ovflw, " %*s", 6, 1, maxp);
 1321                 PRWORD(ovflw, " %*lu\n", 5, 1, pp->pr_nidle);
 1322 
 1323                 pool_chk(pp);
 1324         }
 1325 }
 1326 #endif /* DDB */
 1327 
 1328 #if defined(POOL_DEBUG) || defined(DDB)
 1329 int
 1330 pool_chk_page(struct pool *pp, struct pool_page_header *ph, int expected)
 1331 {
 1332         struct pool_item *pi;
 1333         caddr_t page;
 1334         int n;
 1335         const char *label = pp->pr_wchan;
 1336 
 1337         page = (caddr_t)((u_long)ph & pp->pr_pgmask);
 1338         if (page != ph->ph_page && POOL_INPGHDR(pp)) {
 1339                 printf("%s: ", label);
 1340                 printf("pool(%p:%s): page inconsistency: page %p; "
 1341                     "at page head addr %p (p %p)\n",
 1342                     pp, pp->pr_wchan, ph->ph_page, ph, page);
 1343                 return 1;
 1344         }
 1345 
 1346         for (pi = XSIMPLEQ_FIRST(&ph->ph_items), n = 0;
 1347              pi != NULL;
 1348              pi = XSIMPLEQ_NEXT(&ph->ph_items, pi, pi_list), n++) {
 1349                 if ((caddr_t)pi < ph->ph_page ||
 1350                     (caddr_t)pi >= ph->ph_page + pp->pr_pgsize) {
 1351                         printf("%s: ", label);
 1352                         printf("pool(%p:%s): page inconsistency: page %p;"
 1353                             " item ordinal %d; addr %p\n", pp,
 1354                             pp->pr_wchan, ph->ph_page, n, pi);
 1355                         return (1);
 1356                 }
 1357 
 1358                 if (pi->pi_magic != POOL_IMAGIC(ph, pi)) {
 1359                         printf("%s: ", label);
 1360                         printf("pool(%p:%s): free list modified: "
 1361                             "page %p; item ordinal %d; addr %p "
 1362                             "(p %p); offset 0x%x=0x%lx\n",
 1363                             pp, pp->pr_wchan, ph->ph_page, n, pi, page,
 1364                             0, pi->pi_magic);
 1365                 }
 1366 
 1367 #ifdef DIAGNOSTIC
 1368                 if (POOL_PHPOISON(ph)) {
 1369                         size_t pidx;
 1370                         uint32_t pval;
 1371                         if (poison_check(pi + 1, pp->pr_size - sizeof(*pi),
 1372                             &pidx, &pval)) {
 1373                                 int *ip = (int *)(pi + 1);
 1374                                 printf("pool(%s): free list modified: "
 1375                                     "page %p; item ordinal %d; addr %p "
 1376                                     "(p %p); offset 0x%zx=0x%x\n",
 1377                                     pp->pr_wchan, ph->ph_page, n, pi,
 1378                                     page, pidx * sizeof(int), ip[pidx]);
 1379                         }
 1380                 }
 1381 #endif /* DIAGNOSTIC */
 1382         }
 1383         if (n + ph->ph_nmissing != pp->pr_itemsperpage) {
 1384                 printf("pool(%p:%s): page inconsistency: page %p;"
 1385                     " %d on list, %d missing, %d items per page\n", pp,
 1386                     pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing,
 1387                     pp->pr_itemsperpage);
 1388                 return 1;
 1389         }
 1390         if (expected >= 0 && n != expected) {
 1391                 printf("pool(%p:%s): page inconsistency: page %p;"
 1392                     " %d on list, %d missing, %d expected\n", pp,
 1393                     pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing,
 1394                     expected);
 1395                 return 1;
 1396         }
 1397         return 0;
 1398 }
 1399 
 1400 int
 1401 pool_chk(struct pool *pp)
 1402 {
 1403         struct pool_page_header *ph;
 1404         int r = 0;
 1405 
 1406         TAILQ_FOREACH(ph, &pp->pr_emptypages, ph_entry)
 1407                 r += pool_chk_page(pp, ph, pp->pr_itemsperpage);
 1408         TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_entry)
 1409                 r += pool_chk_page(pp, ph, 0);
 1410         TAILQ_FOREACH(ph, &pp->pr_partpages, ph_entry)
 1411                 r += pool_chk_page(pp, ph, -1);
 1412 
 1413         return (r);
 1414 }
 1415 #endif /* defined(POOL_DEBUG) || defined(DDB) */
 1416 
 1417 #ifdef DDB
 1418 void
 1419 pool_walk(struct pool *pp, int full,
 1420     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))),
 1421     void (*func)(void *, int, int (*)(const char *, ...)
 1422             __attribute__((__format__(__kprintf__,1,2)))))
 1423 {
 1424         struct pool_page_header *ph;
 1425         struct pool_item *pi;
 1426         caddr_t cp;
 1427         int n;
 1428 
 1429         TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_entry) {
 1430                 cp = ph->ph_colored;
 1431                 n = ph->ph_nmissing;
 1432 
 1433                 while (n--) {
 1434                         func(cp, full, pr);
 1435                         cp += pp->pr_size;
 1436                 }
 1437         }
 1438 
 1439         TAILQ_FOREACH(ph, &pp->pr_partpages, ph_entry) {
 1440                 cp = ph->ph_colored;
 1441                 n = ph->ph_nmissing;
 1442 
 1443                 do {
 1444                         XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) {
 1445                                 if (cp == (caddr_t)pi)
 1446                                         break;
 1447                         }
 1448                         if (cp != (caddr_t)pi) {
 1449                                 func(cp, full, pr);
 1450                                 n--;
 1451                         }
 1452 
 1453                         cp += pp->pr_size;
 1454                 } while (n > 0);
 1455         }
 1456 }
 1457 #endif
 1458 
 1459 /*
 1460  * We have three different sysctls.
 1461  * kern.pool.npools - the number of pools.
 1462  * kern.pool.pool.<pool#> - the pool struct for the pool#.
 1463  * kern.pool.name.<pool#> - the name for pool#.
 1464  */
 1465 int
 1466 sysctl_dopool(int *name, u_int namelen, char *oldp, size_t *oldlenp)
 1467 {
 1468         struct kinfo_pool pi;
 1469         struct pool *pp;
 1470         int rv = ENOENT;
 1471 
 1472         switch (name[0]) {
 1473         case KERN_POOL_NPOOLS:
 1474                 if (namelen != 1)
 1475                         return (ENOTDIR);
 1476                 return (sysctl_rdint(oldp, oldlenp, NULL, pool_count));
 1477 
 1478         case KERN_POOL_NAME:
 1479         case KERN_POOL_POOL:
 1480         case KERN_POOL_CACHE:
 1481         case KERN_POOL_CACHE_CPUS:
 1482                 break;
 1483         default:
 1484                 return (EOPNOTSUPP);
 1485         }
 1486 
 1487         if (namelen != 2)
 1488                 return (ENOTDIR);
 1489 
 1490         rw_enter_read(&pool_lock);
 1491 
 1492         SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) {
 1493                 if (name[1] == pp->pr_serial)
 1494                         break;
 1495         }
 1496 
 1497         if (pp == NULL)
 1498                 goto done;
 1499 
 1500         switch (name[0]) {
 1501         case KERN_POOL_NAME:
 1502                 rv = sysctl_rdstring(oldp, oldlenp, NULL, pp->pr_wchan);
 1503                 break;
 1504         case KERN_POOL_POOL:
 1505                 memset(&pi, 0, sizeof(pi));
 1506 
 1507                 pl_enter(pp, &pp->pr_lock);
 1508                 pi.pr_size = pp->pr_size;
 1509                 pi.pr_pgsize = pp->pr_pgsize;
 1510                 pi.pr_itemsperpage = pp->pr_itemsperpage;
 1511                 pi.pr_npages = pp->pr_npages;
 1512                 pi.pr_minpages = pp->pr_minpages;
 1513                 pi.pr_maxpages = pp->pr_maxpages;
 1514                 pi.pr_hardlimit = pp->pr_hardlimit;
 1515                 pi.pr_nout = pp->pr_nout;
 1516                 pi.pr_nitems = pp->pr_nitems;
 1517                 pi.pr_nget = pp->pr_nget;
 1518                 pi.pr_nput = pp->pr_nput;
 1519                 pi.pr_nfail = pp->pr_nfail;
 1520                 pi.pr_npagealloc = pp->pr_npagealloc;
 1521                 pi.pr_npagefree = pp->pr_npagefree;
 1522                 pi.pr_hiwat = pp->pr_hiwat;
 1523                 pi.pr_nidle = pp->pr_nidle;
 1524                 pl_leave(pp, &pp->pr_lock);
 1525 
 1526                 pool_cache_pool_info(pp, &pi);
 1527 
 1528                 rv = sysctl_rdstruct(oldp, oldlenp, NULL, &pi, sizeof(pi));
 1529                 break;
 1530 
 1531         case KERN_POOL_CACHE:
 1532                 rv = pool_cache_info(pp, oldp, oldlenp);
 1533                 break;
 1534 
 1535         case KERN_POOL_CACHE_CPUS:
 1536                 rv = pool_cache_cpus_info(pp, oldp, oldlenp);
 1537                 break;
 1538         }
 1539 
 1540 done:
 1541         rw_exit_read(&pool_lock);
 1542 
 1543         return (rv);
 1544 }
 1545 
 1546 void
 1547 pool_gc_sched(void *null)
 1548 {
 1549         task_add(systqmp, &pool_gc_task);
 1550 }
 1551 
 1552 void
 1553 pool_gc_pages(void *null)
 1554 {
 1555         struct pool *pp;
 1556         struct pool_page_header *ph, *freeph;
 1557         int s;
 1558 
 1559         rw_enter_read(&pool_lock);
 1560         s = splvm(); /* XXX go to splvm until all pools _setipl properly */
 1561         SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) {
 1562 #ifdef MULTIPROCESSOR
 1563                 if (pp->pr_cache != NULL)
 1564                         pool_cache_gc(pp);
 1565 #endif
 1566 
 1567                 if (pp->pr_nidle <= pp->pr_minpages || /* guess */
 1568                     !pl_enter_try(pp, &pp->pr_lock)) /* try */
 1569                         continue;
 1570 
 1571                 /* is it time to free a page? */
 1572                 if (pp->pr_nidle > pp->pr_minpages &&
 1573                     (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL &&
 1574                     getnsecuptime() - ph->ph_timestamp > POOL_WAIT_GC) {
 1575                         freeph = ph;
 1576                         pool_p_remove(pp, freeph);
 1577                 } else
 1578                         freeph = NULL;
 1579 
 1580                 pl_leave(pp, &pp->pr_lock);
 1581 
 1582                 if (freeph != NULL)
 1583                         pool_p_free(pp, freeph);
 1584         }
 1585         splx(s);
 1586         rw_exit_read(&pool_lock);
 1587 
 1588         timeout_add_sec(&pool_gc_tick, 1);
 1589 }
 1590 
 1591 /*
 1592  * Pool backend allocators.
 1593  */
 1594 
 1595 void *
 1596 pool_allocator_alloc(struct pool *pp, int flags, int *slowdown)
 1597 {
 1598         void *v;
 1599 
 1600         v = (*pp->pr_alloc->pa_alloc)(pp, flags, slowdown);
 1601 
 1602 #ifdef DIAGNOSTIC
 1603         if (v != NULL && POOL_INPGHDR(pp)) {
 1604                 vaddr_t addr = (vaddr_t)v;
 1605                 if ((addr & pp->pr_pgmask) != addr) {
 1606                         panic("%s: %s page address %p isn't aligned to %u",
 1607                             __func__, pp->pr_wchan, v, pp->pr_pgsize);
 1608                 }
 1609         }
 1610 #endif
 1611 
 1612         return (v);
 1613 }
 1614 
 1615 void
 1616 pool_allocator_free(struct pool *pp, void *v)
 1617 {
 1618         struct pool_allocator *pa = pp->pr_alloc;
 1619 
 1620         (*pa->pa_free)(pp, v);
 1621 }
 1622 
 1623 void *
 1624 pool_page_alloc(struct pool *pp, int flags, int *slowdown)
 1625 {
 1626         struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
 1627 
 1628         kd.kd_waitok = ISSET(flags, PR_WAITOK);
 1629         kd.kd_slowdown = slowdown;
 1630 
 1631         return (km_alloc(pp->pr_pgsize, &kv_page, pp->pr_crange, &kd));
 1632 }
 1633 
 1634 void
 1635 pool_page_free(struct pool *pp, void *v)
 1636 {
 1637         km_free(v, pp->pr_pgsize, &kv_page, pp->pr_crange);
 1638 }
 1639 
 1640 void *
 1641 pool_multi_alloc(struct pool *pp, int flags, int *slowdown)
 1642 {
 1643         struct kmem_va_mode kv = kv_intrsafe;
 1644         struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
 1645         void *v;
 1646         int s;
 1647 
 1648         if (POOL_INPGHDR(pp))
 1649                 kv.kv_align = pp->pr_pgsize;
 1650 
 1651         kd.kd_waitok = ISSET(flags, PR_WAITOK);
 1652         kd.kd_slowdown = slowdown;
 1653 
 1654         s = splvm();
 1655         v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd);
 1656         splx(s);
 1657 
 1658         return (v);
 1659 }
 1660 
 1661 void
 1662 pool_multi_free(struct pool *pp, void *v)
 1663 {
 1664         struct kmem_va_mode kv = kv_intrsafe;
 1665         int s;
 1666 
 1667         if (POOL_INPGHDR(pp))
 1668                 kv.kv_align = pp->pr_pgsize;
 1669 
 1670         s = splvm();
 1671         km_free(v, pp->pr_pgsize, &kv, pp->pr_crange);
 1672         splx(s);
 1673 }
 1674 
 1675 void *
 1676 pool_multi_alloc_ni(struct pool *pp, int flags, int *slowdown)
 1677 {
 1678         struct kmem_va_mode kv = kv_any;
 1679         struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
 1680         void *v;
 1681 
 1682         if (POOL_INPGHDR(pp))
 1683                 kv.kv_align = pp->pr_pgsize;
 1684 
 1685         kd.kd_waitok = ISSET(flags, PR_WAITOK);
 1686         kd.kd_slowdown = slowdown;
 1687 
 1688         KERNEL_LOCK();
 1689         v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd);
 1690         KERNEL_UNLOCK();
 1691 
 1692         return (v);
 1693 }
 1694 
 1695 void
 1696 pool_multi_free_ni(struct pool *pp, void *v)
 1697 {
 1698         struct kmem_va_mode kv = kv_any;
 1699 
 1700         if (POOL_INPGHDR(pp))
 1701                 kv.kv_align = pp->pr_pgsize;
 1702 
 1703         KERNEL_LOCK();
 1704         km_free(v, pp->pr_pgsize, &kv, pp->pr_crange);
 1705         KERNEL_UNLOCK();
 1706 }
 1707 
 1708 #ifdef MULTIPROCESSOR
 1709 
 1710 struct pool pool_caches; /* per cpu cache entries */
 1711 
 1712 void
 1713 pool_cache_init(struct pool *pp)
 1714 {
 1715         struct cpumem *cm;
 1716         struct pool_cache *pc;
 1717         struct cpumem_iter i;
 1718 
 1719         if (pool_caches.pr_size == 0) {
 1720                 pool_init(&pool_caches, sizeof(struct pool_cache),
 1721                     CACHELINESIZE, IPL_NONE, PR_WAITOK | PR_RWLOCK,
 1722                     "plcache", NULL);
 1723         }
 1724 
 1725         /* must be able to use the pool items as cache list items */
 1726         KASSERT(pp->pr_size >= sizeof(struct pool_cache_item));
 1727 
 1728         cm = cpumem_get(&pool_caches);
 1729 
 1730         pl_init(pp, &pp->pr_cache_lock);
 1731         arc4random_buf(pp->pr_cache_magic, sizeof(pp->pr_cache_magic));
 1732         TAILQ_INIT(&pp->pr_cache_lists);
 1733         pp->pr_cache_nitems = 0;
 1734         pp->pr_cache_timestamp = getnsecuptime();
 1735         pp->pr_cache_items = 8;
 1736         pp->pr_cache_contention = 0;
 1737         pp->pr_cache_ngc = 0;
 1738 
 1739         CPUMEM_FOREACH(pc, &i, cm) {
 1740                 pc->pc_actv = NULL;
 1741                 pc->pc_nactv = 0;
 1742                 pc->pc_prev = NULL;
 1743 
 1744                 pc->pc_nget = 0;
 1745                 pc->pc_nfail = 0;
 1746                 pc->pc_nput = 0;
 1747                 pc->pc_nlget = 0;
 1748                 pc->pc_nlfail = 0;
 1749                 pc->pc_nlput = 0;
 1750                 pc->pc_nout = 0;
 1751         }
 1752 
 1753         membar_producer();
 1754 
 1755         pp->pr_cache = cm;
 1756 }
 1757 
 1758 static inline void
 1759 pool_cache_item_magic(struct pool *pp, struct pool_cache_item *ci)
 1760 {
 1761         unsigned long *entry = (unsigned long *)&ci->ci_nextl;
 1762 
 1763         entry[0] = pp->pr_cache_magic[0] ^ (u_long)ci;
 1764         entry[1] = pp->pr_cache_magic[1] ^ (u_long)ci->ci_next;
 1765 }
 1766 
 1767 static inline void
 1768 pool_cache_item_magic_check(struct pool *pp, struct pool_cache_item *ci)
 1769 {
 1770         unsigned long *entry;
 1771         unsigned long val;
 1772 
 1773         entry = (unsigned long *)&ci->ci_nextl;
 1774         val = pp->pr_cache_magic[0] ^ (u_long)ci;
 1775         if (*entry != val)
 1776                 goto fail;
 1777 
 1778         entry++;
 1779         val = pp->pr_cache_magic[1] ^ (u_long)ci->ci_next;
 1780         if (*entry != val)
 1781                 goto fail;
 1782 
 1783         return;
 1784 
 1785 fail:
 1786         panic("%s: %s cpu free list modified: item addr %p+%zu 0x%lx!=0x%lx",
 1787             __func__, pp->pr_wchan, ci, (caddr_t)entry - (caddr_t)ci,
 1788             *entry, val);
 1789 }
 1790 
 1791 static inline void
 1792 pool_list_enter(struct pool *pp)
 1793 {
 1794         if (pl_enter_try(pp, &pp->pr_cache_lock) == 0) {
 1795                 pl_enter(pp, &pp->pr_cache_lock);
 1796                 pp->pr_cache_contention++;
 1797         }
 1798 }
 1799 
 1800 static inline void
 1801 pool_list_leave(struct pool *pp)
 1802 {
 1803         pl_leave(pp, &pp->pr_cache_lock);
 1804 }
 1805 
 1806 static inline struct pool_cache_item *
 1807 pool_cache_list_alloc(struct pool *pp, struct pool_cache *pc)
 1808 {
 1809         struct pool_cache_item *pl;
 1810 
 1811         pool_list_enter(pp);
 1812         pl = TAILQ_FIRST(&pp->pr_cache_lists);
 1813         if (pl != NULL) {
 1814                 TAILQ_REMOVE(&pp->pr_cache_lists, pl, ci_nextl);
 1815                 pp->pr_cache_nitems -= POOL_CACHE_ITEM_NITEMS(pl);
 1816 
 1817                 pool_cache_item_magic(pp, pl);
 1818 
 1819                 pc->pc_nlget++;
 1820         } else
 1821                 pc->pc_nlfail++;
 1822 
 1823         /* fold this cpus nout into the global while we have the lock */
 1824         pp->pr_cache_nout += pc->pc_nout;
 1825         pc->pc_nout = 0;
 1826         pool_list_leave(pp);
 1827 
 1828         return (pl);
 1829 }
 1830 
 1831 static inline void
 1832 pool_cache_list_free(struct pool *pp, struct pool_cache *pc,
 1833     struct pool_cache_item *ci)
 1834 {
 1835         pool_list_enter(pp);
 1836         if (TAILQ_EMPTY(&pp->pr_cache_lists))
 1837                 pp->pr_cache_timestamp = getnsecuptime();
 1838 
 1839         pp->pr_cache_nitems += POOL_CACHE_ITEM_NITEMS(ci);
 1840         TAILQ_INSERT_TAIL(&pp->pr_cache_lists, ci, ci_nextl);
 1841 
 1842         pc->pc_nlput++;
 1843 
 1844         /* fold this cpus nout into the global while we have the lock */
 1845         pp->pr_cache_nout += pc->pc_nout;
 1846         pc->pc_nout = 0;
 1847         pool_list_leave(pp);
 1848 }
 1849 
 1850 static inline struct pool_cache *
 1851 pool_cache_enter(struct pool *pp, int *s)
 1852 {
 1853         struct pool_cache *pc;
 1854 
 1855         pc = cpumem_enter(pp->pr_cache);
 1856         *s = splraise(pp->pr_ipl);
 1857         pc->pc_gen++;
 1858 
 1859         return (pc);
 1860 }
 1861 
 1862 static inline void
 1863 pool_cache_leave(struct pool *pp, struct pool_cache *pc, int s)
 1864 {
 1865         pc->pc_gen++;
 1866         splx(s);
 1867         cpumem_leave(pp->pr_cache, pc);
 1868 }
 1869 
 1870 void *
 1871 pool_cache_get(struct pool *pp)
 1872 {
 1873         struct pool_cache *pc;
 1874         struct pool_cache_item *ci;
 1875         int s;
 1876 
 1877         pc = pool_cache_enter(pp, &s);
 1878 
 1879         if (pc->pc_actv != NULL) {
 1880                 ci = pc->pc_actv;
 1881         } else if (pc->pc_prev != NULL) {
 1882                 ci = pc->pc_prev;
 1883                 pc->pc_prev = NULL;
 1884         } else if ((ci = pool_cache_list_alloc(pp, pc)) == NULL) {
 1885                 pc->pc_nfail++;
 1886                 goto done;
 1887         }
 1888 
 1889         pool_cache_item_magic_check(pp, ci);
 1890 #ifdef DIAGNOSTIC
 1891         if (pool_debug && POOL_CACHE_ITEM_POISONED(ci)) {
 1892                 size_t pidx;
 1893                 uint32_t pval;
 1894 
 1895                 if (poison_check(ci + 1, pp->pr_size - sizeof(*ci),
 1896                     &pidx, &pval)) {
 1897                         int *ip = (int *)(ci + 1);
 1898                         ip += pidx;
 1899 
 1900                         panic("%s: %s cpu free list modified: "
 1901                             "item addr %p+%zu 0x%x!=0x%x",
 1902                             __func__, pp->pr_wchan, ci,
 1903                             (caddr_t)ip - (caddr_t)ci, *ip, pval);
 1904                 }
 1905         }
 1906 #endif
 1907 
 1908         pc->pc_actv = ci->ci_next;
 1909         pc->pc_nactv = POOL_CACHE_ITEM_NITEMS(ci) - 1;
 1910         pc->pc_nget++;
 1911         pc->pc_nout++;
 1912 
 1913 done:
 1914         pool_cache_leave(pp, pc, s);
 1915 
 1916         return (ci);
 1917 }
 1918 
 1919 void
 1920 pool_cache_put(struct pool *pp, void *v)
 1921 {
 1922         struct pool_cache *pc;
 1923         struct pool_cache_item *ci = v;
 1924         unsigned long nitems;
 1925         int s;
 1926 #ifdef DIAGNOSTIC
 1927         int poison = pool_debug && pp->pr_size > sizeof(*ci);
 1928 
 1929         if (poison)
 1930                 poison_mem(ci + 1, pp->pr_size - sizeof(*ci));
 1931 #endif
 1932 
 1933         pc = pool_cache_enter(pp, &s);
 1934 
 1935         nitems = pc->pc_nactv;
 1936         if (nitems >= pp->pr_cache_items) {
 1937                 if (pc->pc_prev != NULL)
 1938                         pool_cache_list_free(pp, pc, pc->pc_prev);
 1939 
 1940                 pc->pc_prev = pc->pc_actv;
 1941 
 1942                 pc->pc_actv = NULL;
 1943                 pc->pc_nactv = 0;
 1944                 nitems = 0;
 1945         }
 1946 
 1947         ci->ci_next = pc->pc_actv;
 1948         ci->ci_nitems = ++nitems;
 1949 #ifdef DIAGNOSTIC
 1950         ci->ci_nitems |= poison ? POOL_CACHE_ITEM_NITEMS_POISON : 0;
 1951 #endif
 1952         pool_cache_item_magic(pp, ci);
 1953 
 1954         pc->pc_actv = ci;
 1955         pc->pc_nactv = nitems;
 1956 
 1957         pc->pc_nput++;
 1958         pc->pc_nout--;
 1959 
 1960         pool_cache_leave(pp, pc, s);
 1961 }
 1962 
 1963 struct pool_cache_item *
 1964 pool_cache_list_put(struct pool *pp, struct pool_cache_item *pl)
 1965 {
 1966         struct pool_cache_item *rpl, *next;
 1967 
 1968         if (pl == NULL)
 1969                 return (NULL);
 1970 
 1971         rpl = TAILQ_NEXT(pl, ci_nextl);
 1972 
 1973         pl_enter(pp, &pp->pr_lock);
 1974         do {
 1975                 next = pl->ci_next;
 1976                 pool_do_put(pp, pl);
 1977                 pl = next;
 1978         } while (pl != NULL);
 1979         pl_leave(pp, &pp->pr_lock);
 1980 
 1981         return (rpl);
 1982 }
 1983 
 1984 void
 1985 pool_cache_destroy(struct pool *pp)
 1986 {
 1987         struct pool_cache *pc;
 1988         struct pool_cache_item *pl;
 1989         struct cpumem_iter i;
 1990         struct cpumem *cm;
 1991 
 1992         rw_enter_write(&pool_lock); /* serialise with the gc */
 1993         cm = pp->pr_cache;
 1994         pp->pr_cache = NULL; /* make pool_put avoid the cache */
 1995         rw_exit_write(&pool_lock);
 1996 
 1997         CPUMEM_FOREACH(pc, &i, cm) {
 1998                 pool_cache_list_put(pp, pc->pc_actv);
 1999                 pool_cache_list_put(pp, pc->pc_prev);
 2000         }
 2001 
 2002         cpumem_put(&pool_caches, cm);
 2003 
 2004         pl = TAILQ_FIRST(&pp->pr_cache_lists);
 2005         while (pl != NULL)
 2006                 pl = pool_cache_list_put(pp, pl);
 2007 }
 2008 
 2009 void
 2010 pool_cache_gc(struct pool *pp)
 2011 {
 2012         unsigned int contention, delta;
 2013 
 2014         if (getnsecuptime() - pp->pr_cache_timestamp > POOL_WAIT_GC &&
 2015             !TAILQ_EMPTY(&pp->pr_cache_lists) &&
 2016             pl_enter_try(pp, &pp->pr_cache_lock)) {
 2017                 struct pool_cache_item *pl = NULL;
 2018 
 2019                 pl = TAILQ_FIRST(&pp->pr_cache_lists);
 2020                 if (pl != NULL) {
 2021                         TAILQ_REMOVE(&pp->pr_cache_lists, pl, ci_nextl);
 2022                         pp->pr_cache_nitems -= POOL_CACHE_ITEM_NITEMS(pl);
 2023                         pp->pr_cache_timestamp = getnsecuptime();
 2024 
 2025                         pp->pr_cache_ngc++;
 2026                 }
 2027 
 2028                 pl_leave(pp, &pp->pr_cache_lock);
 2029 
 2030                 pool_cache_list_put(pp, pl);
 2031         }
 2032 
 2033         /*
 2034          * if there's a lot of contention on the pr_cache_mtx then consider
 2035          * growing the length of the list to reduce the need to access the
 2036          * global pool.
 2037          */
 2038 
 2039         contention = pp->pr_cache_contention;
 2040         delta = contention - pp->pr_cache_contention_prev;
 2041         if (delta > 8 /* magic */) {
 2042                 if ((ncpusfound * 8 * 2) <= pp->pr_cache_nitems)
 2043                         pp->pr_cache_items += 8;
 2044         } else if (delta == 0) {
 2045                 if (pp->pr_cache_items > 8)
 2046                         pp->pr_cache_items--;
 2047         }
 2048         pp->pr_cache_contention_prev = contention;
 2049 }
 2050 
 2051 void
 2052 pool_cache_pool_info(struct pool *pp, struct kinfo_pool *pi)
 2053 {
 2054         struct pool_cache *pc;
 2055         struct cpumem_iter i;
 2056 
 2057         if (pp->pr_cache == NULL)
 2058                 return;
 2059 
 2060         /* loop through the caches twice to collect stats */
 2061 
 2062         /* once without the lock so we can yield while reading nget/nput */
 2063         CPUMEM_FOREACH(pc, &i, pp->pr_cache) {
 2064                 uint64_t gen, nget, nput;
 2065 
 2066                 do {
 2067                         while ((gen = pc->pc_gen) & 1)
 2068                                 yield();
 2069 
 2070                         nget = pc->pc_nget;
 2071                         nput = pc->pc_nput;
 2072                 } while (gen != pc->pc_gen);
 2073 
 2074                 pi->pr_nget += nget;
 2075                 pi->pr_nput += nput;
 2076         }
 2077 
 2078         /* and once with the mtx so we can get consistent nout values */
 2079         pl_enter(pp, &pp->pr_cache_lock);
 2080         CPUMEM_FOREACH(pc, &i, pp->pr_cache)
 2081                 pi->pr_nout += pc->pc_nout;
 2082 
 2083         pi->pr_nout += pp->pr_cache_nout;
 2084         pl_leave(pp, &pp->pr_cache_lock);
 2085 }
 2086 
 2087 int
 2088 pool_cache_info(struct pool *pp, void *oldp, size_t *oldlenp)
 2089 {
 2090         struct kinfo_pool_cache kpc;
 2091 
 2092         if (pp->pr_cache == NULL)
 2093                 return (EOPNOTSUPP);
 2094 
 2095         memset(&kpc, 0, sizeof(kpc)); /* don't leak padding */
 2096 
 2097         pl_enter(pp, &pp->pr_cache_lock);
 2098         kpc.pr_ngc = pp->pr_cache_ngc;
 2099         kpc.pr_len = pp->pr_cache_items;
 2100         kpc.pr_nitems = pp->pr_cache_nitems;
 2101         kpc.pr_contention = pp->pr_cache_contention;
 2102         pl_leave(pp, &pp->pr_cache_lock);
 2103 
 2104         return (sysctl_rdstruct(oldp, oldlenp, NULL, &kpc, sizeof(kpc)));
 2105 }
 2106 
 2107 int
 2108 pool_cache_cpus_info(struct pool *pp, void *oldp, size_t *oldlenp)
 2109 {
 2110         struct pool_cache *pc;
 2111         struct kinfo_pool_cache_cpu *kpcc, *info;
 2112         unsigned int cpu = 0;
 2113         struct cpumem_iter i;
 2114         int error = 0;
 2115         size_t len;
 2116 
 2117         if (pp->pr_cache == NULL)
 2118                 return (EOPNOTSUPP);
 2119         if (*oldlenp % sizeof(*kpcc))
 2120                 return (EINVAL);
 2121 
 2122         kpcc = mallocarray(ncpusfound, sizeof(*kpcc), M_TEMP,
 2123             M_WAITOK|M_CANFAIL|M_ZERO);
 2124         if (kpcc == NULL)
 2125                 return (EIO);
 2126 
 2127         len = ncpusfound * sizeof(*kpcc);
 2128 
 2129         CPUMEM_FOREACH(pc, &i, pp->pr_cache) {
 2130                 uint64_t gen;
 2131 
 2132                 if (cpu >= ncpusfound) {
 2133                         error = EIO;
 2134                         goto err;
 2135                 }
 2136 
 2137                 info = &kpcc[cpu];
 2138                 info->pr_cpu = cpu;
 2139 
 2140                 do {
 2141                         while ((gen = pc->pc_gen) & 1)
 2142                                 yield();
 2143 
 2144                         info->pr_nget = pc->pc_nget;
 2145                         info->pr_nfail = pc->pc_nfail;
 2146                         info->pr_nput = pc->pc_nput;
 2147                         info->pr_nlget = pc->pc_nlget;
 2148                         info->pr_nlfail = pc->pc_nlfail;
 2149                         info->pr_nlput = pc->pc_nlput;
 2150                 } while (gen != pc->pc_gen);
 2151 
 2152                 cpu++;
 2153         }
 2154 
 2155         error = sysctl_rdstruct(oldp, oldlenp, NULL, kpcc, len);
 2156 err:
 2157         free(kpcc, M_TEMP, len);
 2158 
 2159         return (error);
 2160 }
 2161 #else /* MULTIPROCESSOR */
 2162 void
 2163 pool_cache_init(struct pool *pp)
 2164 {
 2165         /* nop */
 2166 }
 2167 
 2168 void
 2169 pool_cache_pool_info(struct pool *pp, struct kinfo_pool *pi)
 2170 {
 2171         /* nop */
 2172 }
 2173 
 2174 int
 2175 pool_cache_info(struct pool *pp, void *oldp, size_t *oldlenp)
 2176 {
 2177         return (EOPNOTSUPP);
 2178 }
 2179 
 2180 int
 2181 pool_cache_cpus_info(struct pool *pp, void *oldp, size_t *oldlenp)
 2182 {
 2183         return (EOPNOTSUPP);
 2184 }
 2185 #endif /* MULTIPROCESSOR */
 2186 
 2187 
 2188 void
 2189 pool_lock_mtx_init(struct pool *pp, union pool_lock *lock,
 2190     const struct lock_type *type)
 2191 {
 2192         _mtx_init_flags(&lock->prl_mtx, pp->pr_ipl, pp->pr_wchan, 0, type);
 2193 }
 2194 
 2195 void
 2196 pool_lock_mtx_enter(union pool_lock *lock)
 2197 {
 2198         mtx_enter(&lock->prl_mtx);
 2199 }
 2200 
 2201 int
 2202 pool_lock_mtx_enter_try(union pool_lock *lock)
 2203 {
 2204         return (mtx_enter_try(&lock->prl_mtx));
 2205 }
 2206 
 2207 void
 2208 pool_lock_mtx_leave(union pool_lock *lock)
 2209 {
 2210         mtx_leave(&lock->prl_mtx);
 2211 }
 2212 
 2213 void
 2214 pool_lock_mtx_assert_locked(union pool_lock *lock)
 2215 {
 2216         MUTEX_ASSERT_LOCKED(&lock->prl_mtx);
 2217 }
 2218 
 2219 void
 2220 pool_lock_mtx_assert_unlocked(union pool_lock *lock)
 2221 {
 2222         MUTEX_ASSERT_UNLOCKED(&lock->prl_mtx);
 2223 }
 2224 
 2225 int
 2226 pool_lock_mtx_sleep(void *ident, union pool_lock *lock, int priority,
 2227     const char *wmesg)
 2228 {
 2229         return msleep_nsec(ident, &lock->prl_mtx, priority, wmesg, INFSLP);
 2230 }
 2231 
 2232 static const struct pool_lock_ops pool_lock_ops_mtx = {
 2233         pool_lock_mtx_init,
 2234         pool_lock_mtx_enter,
 2235         pool_lock_mtx_enter_try,
 2236         pool_lock_mtx_leave,
 2237         pool_lock_mtx_assert_locked,
 2238         pool_lock_mtx_assert_unlocked,
 2239         pool_lock_mtx_sleep,
 2240 };
 2241 
 2242 void
 2243 pool_lock_rw_init(struct pool *pp, union pool_lock *lock,
 2244     const struct lock_type *type)
 2245 {
 2246         _rw_init_flags(&lock->prl_rwlock, pp->pr_wchan, 0, type);
 2247 }
 2248 
 2249 void
 2250 pool_lock_rw_enter(union pool_lock *lock)
 2251 {
 2252         rw_enter_write(&lock->prl_rwlock);
 2253 }
 2254 
 2255 int
 2256 pool_lock_rw_enter_try(union pool_lock *lock)
 2257 {
 2258         return (rw_enter(&lock->prl_rwlock, RW_WRITE | RW_NOSLEEP) == 0);
 2259 }
 2260 
 2261 void
 2262 pool_lock_rw_leave(union pool_lock *lock)
 2263 {
 2264         rw_exit_write(&lock->prl_rwlock);
 2265 }
 2266 
 2267 void
 2268 pool_lock_rw_assert_locked(union pool_lock *lock)
 2269 {
 2270         rw_assert_wrlock(&lock->prl_rwlock);
 2271 }
 2272 
 2273 void
 2274 pool_lock_rw_assert_unlocked(union pool_lock *lock)
 2275 {
 2276         KASSERT(rw_status(&lock->prl_rwlock) != RW_WRITE);
 2277 }
 2278 
 2279 int
 2280 pool_lock_rw_sleep(void *ident, union pool_lock *lock, int priority,
 2281     const char *wmesg)
 2282 {
 2283         return rwsleep_nsec(ident, &lock->prl_rwlock, priority, wmesg, INFSLP);
 2284 }
 2285 
 2286 static const struct pool_lock_ops pool_lock_ops_rw = {
 2287         pool_lock_rw_init,
 2288         pool_lock_rw_enter,
 2289         pool_lock_rw_enter_try,
 2290         pool_lock_rw_leave,
 2291         pool_lock_rw_assert_locked,
 2292         pool_lock_rw_assert_unlocked,
 2293         pool_lock_rw_sleep,
 2294 };

Cache object: 53a9d9510067a4c13b1e786fbd27224e


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.