The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/subr_percpu.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: subr_percpu.c,v 1.25 2020/05/11 21:37:31 riastradh Exp $       */
    2 
    3 /*-
    4  * Copyright (c)2007,2008 YAMAMOTO Takashi,
    5  * All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   26  * SUCH DAMAGE.
   27  */
   28 
   29 /*
   30  * per-cpu storage.
   31  */
   32 
   33 #include <sys/cdefs.h>
   34 __KERNEL_RCSID(0, "$NetBSD: subr_percpu.c,v 1.25 2020/05/11 21:37:31 riastradh Exp $");
   35 
   36 #include <sys/param.h>
   37 #include <sys/cpu.h>
   38 #include <sys/kernel.h>
   39 #include <sys/kmem.h>
   40 #include <sys/mutex.h>
   41 #include <sys/percpu.h>
   42 #include <sys/rwlock.h>
   43 #include <sys/vmem.h>
   44 #include <sys/xcall.h>
   45 
   46 #define PERCPU_QUANTUM_SIZE     (ALIGNBYTES + 1)
   47 #define PERCPU_QCACHE_MAX       0
   48 #define PERCPU_IMPORT_SIZE      2048
   49 
   50 struct percpu {
   51         unsigned                pc_offset;
   52         size_t                  pc_size;
   53         percpu_callback_t       pc_ctor;
   54         percpu_callback_t       pc_dtor;
   55         void                    *pc_cookie;
   56         LIST_ENTRY(percpu)      pc_list;
   57 };
   58 
   59 static krwlock_t        percpu_swap_lock        __cacheline_aligned;
   60 static vmem_t *         percpu_offset_arena     __read_mostly;
   61 static struct {
   62         kmutex_t        lock;
   63         unsigned int    nextoff;
   64         LIST_HEAD(, percpu) ctor_list;
   65         struct lwp      *busy;
   66         kcondvar_t      cv;
   67 } percpu_allocation __cacheline_aligned;
   68 
   69 static percpu_cpu_t *
   70 cpu_percpu(struct cpu_info *ci)
   71 {
   72 
   73         return &ci->ci_data.cpu_percpu;
   74 }
   75 
   76 static unsigned int
   77 percpu_offset(percpu_t *pc)
   78 {
   79         const unsigned int off = pc->pc_offset;
   80 
   81         KASSERT(off < percpu_allocation.nextoff);
   82         return off;
   83 }
   84 
   85 /*
   86  * percpu_cpu_swap: crosscall handler for percpu_cpu_enlarge
   87  */
   88 __noubsan
   89 static void
   90 percpu_cpu_swap(void *p1, void *p2)
   91 {
   92         struct cpu_info * const ci = p1;
   93         percpu_cpu_t * const newpcc = p2;
   94         percpu_cpu_t * const pcc = cpu_percpu(ci);
   95 
   96         KASSERT(ci == curcpu() || !mp_online);
   97 
   98         /*
   99          * swap *pcc and *newpcc unless anyone has beaten us.
  100          */
  101         rw_enter(&percpu_swap_lock, RW_WRITER);
  102         if (newpcc->pcc_size > pcc->pcc_size) {
  103                 percpu_cpu_t tmp;
  104                 int s;
  105 
  106                 tmp = *pcc;
  107 
  108                 /*
  109                  * block interrupts so that we don't lose their modifications.
  110                  */
  111 
  112                 s = splhigh();
  113 
  114                 /*
  115                  * copy data to new storage.
  116                  */
  117 
  118                 memcpy(newpcc->pcc_data, pcc->pcc_data, pcc->pcc_size);
  119 
  120                 /*
  121                  * this assignment needs to be atomic for percpu_getptr_remote.
  122                  */
  123 
  124                 pcc->pcc_data = newpcc->pcc_data;
  125 
  126                 splx(s);
  127 
  128                 pcc->pcc_size = newpcc->pcc_size;
  129                 *newpcc = tmp;
  130         }
  131         rw_exit(&percpu_swap_lock);
  132 }
  133 
  134 /*
  135  * percpu_cpu_enlarge: ensure that percpu_cpu_t of each cpus have enough space
  136  */
  137 
  138 static void
  139 percpu_cpu_enlarge(size_t size)
  140 {
  141         CPU_INFO_ITERATOR cii;
  142         struct cpu_info *ci;
  143 
  144         for (CPU_INFO_FOREACH(cii, ci)) {
  145                 percpu_cpu_t pcc;
  146 
  147                 pcc.pcc_data = kmem_alloc(size, KM_SLEEP); /* XXX cacheline */
  148                 pcc.pcc_size = size;
  149                 if (!mp_online) {
  150                         percpu_cpu_swap(ci, &pcc);
  151                 } else {
  152                         uint64_t where;
  153 
  154                         where = xc_unicast(0, percpu_cpu_swap, ci, &pcc, ci);
  155                         xc_wait(where);
  156                 }
  157                 KASSERT(pcc.pcc_size <= size);
  158                 if (pcc.pcc_data != NULL) {
  159                         kmem_free(pcc.pcc_data, pcc.pcc_size);
  160                 }
  161         }
  162 }
  163 
  164 /*
  165  * percpu_backend_alloc: vmem import callback for percpu_offset_arena
  166  */
  167 
  168 static int
  169 percpu_backend_alloc(vmem_t *dummy, vmem_size_t size, vmem_size_t *resultsize,
  170     vm_flag_t vmflags, vmem_addr_t *addrp)
  171 {
  172         unsigned int offset;
  173         unsigned int nextoff;
  174 
  175         ASSERT_SLEEPABLE();
  176         KASSERT(dummy == NULL);
  177 
  178         if ((vmflags & VM_NOSLEEP) != 0)
  179                 return ENOMEM;
  180 
  181         size = roundup(size, PERCPU_IMPORT_SIZE);
  182         mutex_enter(&percpu_allocation.lock);
  183         offset = percpu_allocation.nextoff;
  184         percpu_allocation.nextoff = nextoff = percpu_allocation.nextoff + size;
  185         mutex_exit(&percpu_allocation.lock);
  186 
  187         percpu_cpu_enlarge(nextoff);
  188 
  189         *resultsize = size;
  190         *addrp = (vmem_addr_t)offset;
  191         return 0;
  192 }
  193 
  194 static void
  195 percpu_zero_cb(void *vp, void *vp2, struct cpu_info *ci)
  196 {
  197         size_t sz = (uintptr_t)vp2;
  198 
  199         memset(vp, 0, sz);
  200 }
  201 
  202 /*
  203  * percpu_zero: initialize percpu storage with zero.
  204  */
  205 
  206 static void
  207 percpu_zero(percpu_t *pc, size_t sz)
  208 {
  209 
  210         percpu_foreach(pc, percpu_zero_cb, (void *)(uintptr_t)sz);
  211 }
  212 
  213 /*
  214  * percpu_init: subsystem initialization
  215  */
  216 
  217 void
  218 percpu_init(void)
  219 {
  220 
  221         ASSERT_SLEEPABLE();
  222         rw_init(&percpu_swap_lock);
  223         mutex_init(&percpu_allocation.lock, MUTEX_DEFAULT, IPL_NONE);
  224         percpu_allocation.nextoff = PERCPU_QUANTUM_SIZE;
  225         LIST_INIT(&percpu_allocation.ctor_list);
  226         percpu_allocation.busy = NULL;
  227         cv_init(&percpu_allocation.cv, "percpu");
  228 
  229         percpu_offset_arena = vmem_xcreate("percpu", 0, 0, PERCPU_QUANTUM_SIZE,
  230             percpu_backend_alloc, NULL, NULL, PERCPU_QCACHE_MAX, VM_SLEEP,
  231             IPL_NONE);
  232 }
  233 
  234 /*
  235  * percpu_init_cpu: cpu initialization
  236  *
  237  * => should be called before the cpu appears on the list for CPU_INFO_FOREACH.
  238  * => may be called for static CPUs afterward (typically just primary CPU)
  239  */
  240 
  241 void
  242 percpu_init_cpu(struct cpu_info *ci)
  243 {
  244         percpu_cpu_t * const pcc = cpu_percpu(ci);
  245         struct percpu *pc;
  246         size_t size = percpu_allocation.nextoff; /* XXX racy */
  247 
  248         ASSERT_SLEEPABLE();
  249 
  250         /*
  251          * For the primary CPU, prior percpu_create may have already
  252          * triggered allocation, so there's nothing more for us to do
  253          * here.
  254          */
  255         if (pcc->pcc_size)
  256                 return;
  257         KASSERT(pcc->pcc_data == NULL);
  258 
  259         /*
  260          * Otherwise, allocate storage and, while the constructor list
  261          * is locked, run constructors for all percpus on this CPU.
  262          */
  263         pcc->pcc_size = size;
  264         if (size) {
  265                 pcc->pcc_data = kmem_zalloc(pcc->pcc_size, KM_SLEEP);
  266                 mutex_enter(&percpu_allocation.lock);
  267                 while (percpu_allocation.busy)
  268                         cv_wait(&percpu_allocation.cv,
  269                             &percpu_allocation.lock);
  270                 percpu_allocation.busy = curlwp;
  271                 LIST_FOREACH(pc, &percpu_allocation.ctor_list, pc_list) {
  272                         KASSERT(pc->pc_ctor);
  273                         mutex_exit(&percpu_allocation.lock);
  274                         (*pc->pc_ctor)((char *)pcc->pcc_data + pc->pc_offset,
  275                             pc->pc_cookie, ci);
  276                         mutex_enter(&percpu_allocation.lock);
  277                 }
  278                 KASSERT(percpu_allocation.busy == curlwp);
  279                 percpu_allocation.busy = NULL;
  280                 cv_broadcast(&percpu_allocation.cv);
  281                 mutex_exit(&percpu_allocation.lock);
  282         }
  283 }
  284 
  285 /*
  286  * percpu_alloc: allocate percpu storage
  287  *
  288  * => called in thread context.
  289  * => considered as an expensive and rare operation.
  290  * => allocated storage is initialized with zeros.
  291  */
  292 
  293 percpu_t *
  294 percpu_alloc(size_t size)
  295 {
  296 
  297         return percpu_create(size, NULL, NULL, NULL);
  298 }
  299 
  300 /*
  301  * percpu_create: allocate percpu storage and associate ctor/dtor with it
  302  *
  303  * => called in thread context.
  304  * => considered as an expensive and rare operation.
  305  * => allocated storage is initialized by ctor, or zeros if ctor is null
  306  * => percpu_free will call dtor first, if dtor is nonnull
  307  * => ctor or dtor may sleep, even on allocation
  308  */
  309 
  310 percpu_t *
  311 percpu_create(size_t size, percpu_callback_t ctor, percpu_callback_t dtor,
  312     void *cookie)
  313 {
  314         vmem_addr_t offset;
  315         percpu_t *pc;
  316 
  317         ASSERT_SLEEPABLE();
  318         (void)vmem_alloc(percpu_offset_arena, size, VM_SLEEP | VM_BESTFIT,
  319             &offset);
  320 
  321         pc = kmem_alloc(sizeof(*pc), KM_SLEEP);
  322         pc->pc_offset = offset;
  323         pc->pc_size = size;
  324         pc->pc_ctor = ctor;
  325         pc->pc_dtor = dtor;
  326         pc->pc_cookie = cookie;
  327 
  328         if (ctor) {
  329                 CPU_INFO_ITERATOR cii;
  330                 struct cpu_info *ci;
  331                 void *buf;
  332 
  333                 /*
  334                  * Wait until nobody is using the list of percpus with
  335                  * constructors.
  336                  */
  337                 mutex_enter(&percpu_allocation.lock);
  338                 while (percpu_allocation.busy)
  339                         cv_wait(&percpu_allocation.cv,
  340                             &percpu_allocation.lock);
  341                 percpu_allocation.busy = curlwp;
  342                 mutex_exit(&percpu_allocation.lock);
  343 
  344                 /*
  345                  * Run the constructor for all CPUs.  We use a
  346                  * temporary buffer wo that we need not hold the
  347                  * percpu_swap_lock while running the constructor.
  348                  */
  349                 buf = kmem_alloc(size, KM_SLEEP);
  350                 for (CPU_INFO_FOREACH(cii, ci)) {
  351                         memset(buf, 0, size);
  352                         (*ctor)(buf, cookie, ci);
  353                         percpu_traverse_enter();
  354                         memcpy(percpu_getptr_remote(pc, ci), buf, size);
  355                         percpu_traverse_exit();
  356                 }
  357                 explicit_memset(buf, 0, size);
  358                 kmem_free(buf, size);
  359 
  360                 /*
  361                  * Insert the percpu into the list of percpus with
  362                  * constructors.  We are now done using the list, so it
  363                  * is safe for concurrent percpu_create or concurrent
  364                  * percpu_init_cpu to run.
  365                  */
  366                 mutex_enter(&percpu_allocation.lock);
  367                 KASSERT(percpu_allocation.busy == curlwp);
  368                 percpu_allocation.busy = NULL;
  369                 cv_broadcast(&percpu_allocation.cv);
  370                 LIST_INSERT_HEAD(&percpu_allocation.ctor_list, pc, pc_list);
  371                 mutex_exit(&percpu_allocation.lock);
  372         } else {
  373                 percpu_zero(pc, size);
  374         }
  375 
  376         return pc;
  377 }
  378 
  379 /*
  380  * percpu_free: free percpu storage
  381  *
  382  * => called in thread context.
  383  * => considered as an expensive and rare operation.
  384  */
  385 
  386 void
  387 percpu_free(percpu_t *pc, size_t size)
  388 {
  389 
  390         ASSERT_SLEEPABLE();
  391         KASSERT(size == pc->pc_size);
  392 
  393         /*
  394          * If there's a constructor, take the percpu off the list of
  395          * percpus with constructors, but first wait until nobody is
  396          * using the list.
  397          */
  398         if (pc->pc_ctor) {
  399                 mutex_enter(&percpu_allocation.lock);
  400                 while (percpu_allocation.busy)
  401                         cv_wait(&percpu_allocation.cv,
  402                             &percpu_allocation.lock);
  403                 LIST_REMOVE(pc, pc_list);
  404                 mutex_exit(&percpu_allocation.lock);
  405         }
  406 
  407         /* If there's a destructor, run it now for all CPUs.  */
  408         if (pc->pc_dtor) {
  409                 CPU_INFO_ITERATOR cii;
  410                 struct cpu_info *ci;
  411                 void *buf;
  412 
  413                 buf = kmem_alloc(size, KM_SLEEP);
  414                 for (CPU_INFO_FOREACH(cii, ci)) {
  415                         percpu_traverse_enter();
  416                         memcpy(buf, percpu_getptr_remote(pc, ci), size);
  417                         explicit_memset(percpu_getptr_remote(pc, ci), 0, size);
  418                         percpu_traverse_exit();
  419                         (*pc->pc_dtor)(buf, pc->pc_cookie, ci);
  420                 }
  421                 explicit_memset(buf, 0, size);
  422                 kmem_free(buf, size);
  423         }
  424 
  425         vmem_free(percpu_offset_arena, (vmem_addr_t)percpu_offset(pc), size);
  426         kmem_free(pc, sizeof(*pc));
  427 }
  428 
  429 /*
  430  * percpu_getref:
  431  *
  432  * => safe to be used in either thread or interrupt context
  433  * => disables preemption; must be bracketed with a percpu_putref()
  434  */
  435 
  436 void *
  437 percpu_getref(percpu_t *pc)
  438 {
  439 
  440         kpreempt_disable();
  441         return percpu_getptr_remote(pc, curcpu());
  442 }
  443 
  444 /*
  445  * percpu_putref:
  446  *
  447  * => drops the preemption-disabled count after caller is done with per-cpu
  448  *    data
  449  */
  450 
  451 void
  452 percpu_putref(percpu_t *pc)
  453 {
  454 
  455         kpreempt_enable();
  456 }
  457 
  458 /*
  459  * percpu_traverse_enter, percpu_traverse_exit, percpu_getptr_remote:
  460  * helpers to access remote cpu's percpu data.
  461  *
  462  * => called in thread context.
  463  * => percpu_traverse_enter can block low-priority xcalls.
  464  * => typical usage would be:
  465  *
  466  *      sum = 0;
  467  *      percpu_traverse_enter();
  468  *      for (CPU_INFO_FOREACH(cii, ci)) {
  469  *              unsigned int *p = percpu_getptr_remote(pc, ci);
  470  *              sum += *p;
  471  *      }
  472  *      percpu_traverse_exit();
  473  */
  474 
  475 void
  476 percpu_traverse_enter(void)
  477 {
  478 
  479         ASSERT_SLEEPABLE();
  480         rw_enter(&percpu_swap_lock, RW_READER);
  481 }
  482 
  483 void
  484 percpu_traverse_exit(void)
  485 {
  486 
  487         rw_exit(&percpu_swap_lock);
  488 }
  489 
  490 void *
  491 percpu_getptr_remote(percpu_t *pc, struct cpu_info *ci)
  492 {
  493 
  494         return &((char *)cpu_percpu(ci)->pcc_data)[percpu_offset(pc)];
  495 }
  496 
  497 /*
  498  * percpu_foreach: call the specified callback function for each cpus.
  499  *
  500  * => must be called from thread context.
  501  * => callback executes on **current** CPU (or, really, arbitrary CPU,
  502  *    in case of preemption)
  503  * => caller should not rely on the cpu iteration order.
  504  * => the callback function should be minimum because it is executed with
  505  *    holding a global lock, which can block low-priority xcalls.
  506  *    eg. it's illegal for a callback function to sleep for memory allocation.
  507  */
  508 void
  509 percpu_foreach(percpu_t *pc, percpu_callback_t cb, void *arg)
  510 {
  511         CPU_INFO_ITERATOR cii;
  512         struct cpu_info *ci;
  513 
  514         percpu_traverse_enter();
  515         for (CPU_INFO_FOREACH(cii, ci)) {
  516                 (*cb)(percpu_getptr_remote(pc, ci), arg, ci);
  517         }
  518         percpu_traverse_exit();
  519 }
  520 
  521 struct percpu_xcall_ctx {
  522         percpu_callback_t  ctx_cb;
  523         void              *ctx_arg;
  524 };
  525 
  526 static void
  527 percpu_xcfunc(void * const v1, void * const v2)
  528 {
  529         percpu_t * const pc = v1;
  530         struct percpu_xcall_ctx * const ctx = v2;
  531 
  532         (*ctx->ctx_cb)(percpu_getref(pc), ctx->ctx_arg, curcpu());
  533         percpu_putref(pc);
  534 }
  535 
  536 /*
  537  * percpu_foreach_xcall: call the specified callback function for each
  538  * cpu.  This version uses an xcall to run the callback on each cpu.
  539  *
  540  * => must be called from thread context.
  541  * => callback executes on **remote** CPU in soft-interrupt context
  542  *    (at the specified soft interrupt priority).
  543  * => caller should not rely on the cpu iteration order.
  544  * => the callback function should be minimum because it may be
  545  *    executed in soft-interrupt context.  eg. it's illegal for
  546  *    a callback function to sleep for memory allocation.
  547  */
  548 void
  549 percpu_foreach_xcall(percpu_t *pc, u_int xcflags, percpu_callback_t cb,
  550                      void *arg)
  551 {
  552         struct percpu_xcall_ctx ctx = {
  553                 .ctx_cb = cb,
  554                 .ctx_arg = arg,
  555         };
  556         CPU_INFO_ITERATOR cii;
  557         struct cpu_info *ci;
  558 
  559         for (CPU_INFO_FOREACH(cii, ci)) {
  560                 xc_wait(xc_unicast(xcflags, percpu_xcfunc, pc, &ctx, ci));
  561         }
  562 }

Cache object: 8971032a9ccd92d0c516ee0119190607


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.