The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/lib/proportions.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Floating proportions
    3  *
    4  *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
    5  *
    6  * Description:
    7  *
    8  * The floating proportion is a time derivative with an exponentially decaying
    9  * history:
   10  *
   11  *   p_{j} = \Sum_{i=0} (dx_{j}/dt_{-i}) / 2^(1+i)
   12  *
   13  * Where j is an element from {prop_local}, x_{j} is j's number of events,
   14  * and i the time period over which the differential is taken. So d/dt_{-i} is
   15  * the differential over the i-th last period.
   16  *
   17  * The decaying history gives smooth transitions. The time differential carries
   18  * the notion of speed.
   19  *
   20  * The denominator is 2^(1+i) because we want the series to be normalised, ie.
   21  *
   22  *   \Sum_{i=0} 1/2^(1+i) = 1
   23  *
   24  * Further more, if we measure time (t) in the same events as x; so that:
   25  *
   26  *   t = \Sum_{j} x_{j}
   27  *
   28  * we get that:
   29  *
   30  *   \Sum_{j} p_{j} = 1
   31  *
   32  * Writing this in an iterative fashion we get (dropping the 'd's):
   33  *
   34  *   if (++x_{j}, ++t > period)
   35  *     t /= 2;
   36  *     for_each (j)
   37  *       x_{j} /= 2;
   38  *
   39  * so that:
   40  *
   41  *   p_{j} = x_{j} / t;
   42  *
   43  * We optimize away the '/= 2' for the global time delta by noting that:
   44  *
   45  *   if (++t > period) t /= 2:
   46  *
   47  * Can be approximated by:
   48  *
   49  *   period/2 + (++t % period/2)
   50  *
   51  * [ Furthermore, when we choose period to be 2^n it can be written in terms of
   52  *   binary operations and wraparound artefacts disappear. ]
   53  *
   54  * Also note that this yields a natural counter of the elapsed periods:
   55  *
   56  *   c = t / (period/2)
   57  *
   58  * [ Its monotonic increasing property can be applied to mitigate the wrap-
   59  *   around issue. ]
   60  *
   61  * This allows us to do away with the loop over all prop_locals on each period
   62  * expiration. By remembering the period count under which it was last accessed
   63  * as c_{j}, we can obtain the number of 'missed' cycles from:
   64  *
   65  *   c - c_{j}
   66  *
   67  * We can then lazily catch up to the global period count every time we are
   68  * going to use x_{j}, by doing:
   69  *
   70  *   x_{j} /= 2^(c - c_{j}), c_{j} = c
   71  */
   72 
   73 #include <linux/proportions.h>
   74 #include <linux/rcupdate.h>
   75 
   76 int prop_descriptor_init(struct prop_descriptor *pd, int shift)
   77 {
   78         int err;
   79 
   80         if (shift > PROP_MAX_SHIFT)
   81                 shift = PROP_MAX_SHIFT;
   82 
   83         pd->index = 0;
   84         pd->pg[0].shift = shift;
   85         mutex_init(&pd->mutex);
   86         err = percpu_counter_init(&pd->pg[0].events, 0);
   87         if (err)
   88                 goto out;
   89 
   90         err = percpu_counter_init(&pd->pg[1].events, 0);
   91         if (err)
   92                 percpu_counter_destroy(&pd->pg[0].events);
   93 
   94 out:
   95         return err;
   96 }
   97 
   98 /*
   99  * We have two copies, and flip between them to make it seem like an atomic
  100  * update. The update is not really atomic wrt the events counter, but
  101  * it is internally consistent with the bit layout depending on shift.
  102  *
  103  * We copy the events count, move the bits around and flip the index.
  104  */
  105 void prop_change_shift(struct prop_descriptor *pd, int shift)
  106 {
  107         int index;
  108         int offset;
  109         u64 events;
  110         unsigned long flags;
  111 
  112         if (shift > PROP_MAX_SHIFT)
  113                 shift = PROP_MAX_SHIFT;
  114 
  115         mutex_lock(&pd->mutex);
  116 
  117         index = pd->index ^ 1;
  118         offset = pd->pg[pd->index].shift - shift;
  119         if (!offset)
  120                 goto out;
  121 
  122         pd->pg[index].shift = shift;
  123 
  124         local_irq_save(flags);
  125         events = percpu_counter_sum(&pd->pg[pd->index].events);
  126         if (offset < 0)
  127                 events <<= -offset;
  128         else
  129                 events >>= offset;
  130         percpu_counter_set(&pd->pg[index].events, events);
  131 
  132         /*
  133          * ensure the new pg is fully written before the switch
  134          */
  135         smp_wmb();
  136         pd->index = index;
  137         local_irq_restore(flags);
  138 
  139         synchronize_rcu();
  140 
  141 out:
  142         mutex_unlock(&pd->mutex);
  143 }
  144 
  145 /*
  146  * wrap the access to the data in an rcu_read_lock() section;
  147  * this is used to track the active references.
  148  */
  149 static struct prop_global *prop_get_global(struct prop_descriptor *pd)
  150 __acquires(RCU)
  151 {
  152         int index;
  153 
  154         rcu_read_lock();
  155         index = pd->index;
  156         /*
  157          * match the wmb from vcd_flip()
  158          */
  159         smp_rmb();
  160         return &pd->pg[index];
  161 }
  162 
  163 static void prop_put_global(struct prop_descriptor *pd, struct prop_global *pg)
  164 __releases(RCU)
  165 {
  166         rcu_read_unlock();
  167 }
  168 
  169 static void
  170 prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift)
  171 {
  172         int offset = *pl_shift - new_shift;
  173 
  174         if (!offset)
  175                 return;
  176 
  177         if (offset < 0)
  178                 *pl_period <<= -offset;
  179         else
  180                 *pl_period >>= offset;
  181 
  182         *pl_shift = new_shift;
  183 }
  184 
  185 /*
  186  * PERCPU
  187  */
  188 
  189 #define PROP_BATCH (8*(1+ilog2(nr_cpu_ids)))
  190 
  191 int prop_local_init_percpu(struct prop_local_percpu *pl)
  192 {
  193         raw_spin_lock_init(&pl->lock);
  194         pl->shift = 0;
  195         pl->period = 0;
  196         return percpu_counter_init(&pl->events, 0);
  197 }
  198 
  199 void prop_local_destroy_percpu(struct prop_local_percpu *pl)
  200 {
  201         percpu_counter_destroy(&pl->events);
  202 }
  203 
  204 /*
  205  * Catch up with missed period expirations.
  206  *
  207  *   until (c_{j} == c)
  208  *     x_{j} -= x_{j}/2;
  209  *     c_{j}++;
  210  */
  211 static
  212 void prop_norm_percpu(struct prop_global *pg, struct prop_local_percpu *pl)
  213 {
  214         unsigned long period = 1UL << (pg->shift - 1);
  215         unsigned long period_mask = ~(period - 1);
  216         unsigned long global_period;
  217         unsigned long flags;
  218 
  219         global_period = percpu_counter_read(&pg->events);
  220         global_period &= period_mask;
  221 
  222         /*
  223          * Fast path - check if the local and global period count still match
  224          * outside of the lock.
  225          */
  226         if (pl->period == global_period)
  227                 return;
  228 
  229         raw_spin_lock_irqsave(&pl->lock, flags);
  230         prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
  231 
  232         /*
  233          * For each missed period, we half the local counter.
  234          * basically:
  235          *   pl->events >> (global_period - pl->period);
  236          */
  237         period = (global_period - pl->period) >> (pg->shift - 1);
  238         if (period < BITS_PER_LONG) {
  239                 s64 val = percpu_counter_read(&pl->events);
  240 
  241                 if (val < (nr_cpu_ids * PROP_BATCH))
  242                         val = percpu_counter_sum(&pl->events);
  243 
  244                 __percpu_counter_add(&pl->events, -val + (val >> period),
  245                                         PROP_BATCH);
  246         } else
  247                 percpu_counter_set(&pl->events, 0);
  248 
  249         pl->period = global_period;
  250         raw_spin_unlock_irqrestore(&pl->lock, flags);
  251 }
  252 
  253 /*
  254  *   ++x_{j}, ++t
  255  */
  256 void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl)
  257 {
  258         struct prop_global *pg = prop_get_global(pd);
  259 
  260         prop_norm_percpu(pg, pl);
  261         __percpu_counter_add(&pl->events, 1, PROP_BATCH);
  262         percpu_counter_add(&pg->events, 1);
  263         prop_put_global(pd, pg);
  264 }
  265 
  266 /*
  267  * identical to __prop_inc_percpu, except that it limits this pl's fraction to
  268  * @frac/PROP_FRAC_BASE by ignoring events when this limit has been exceeded.
  269  */
  270 void __prop_inc_percpu_max(struct prop_descriptor *pd,
  271                            struct prop_local_percpu *pl, long frac)
  272 {
  273         struct prop_global *pg = prop_get_global(pd);
  274 
  275         prop_norm_percpu(pg, pl);
  276 
  277         if (unlikely(frac != PROP_FRAC_BASE)) {
  278                 unsigned long period_2 = 1UL << (pg->shift - 1);
  279                 unsigned long counter_mask = period_2 - 1;
  280                 unsigned long global_count;
  281                 long numerator, denominator;
  282 
  283                 numerator = percpu_counter_read_positive(&pl->events);
  284                 global_count = percpu_counter_read(&pg->events);
  285                 denominator = period_2 + (global_count & counter_mask);
  286 
  287                 if (numerator > ((denominator * frac) >> PROP_FRAC_SHIFT))
  288                         goto out_put;
  289         }
  290 
  291         percpu_counter_add(&pl->events, 1);
  292         percpu_counter_add(&pg->events, 1);
  293 
  294 out_put:
  295         prop_put_global(pd, pg);
  296 }
  297 
  298 /*
  299  * Obtain a fraction of this proportion
  300  *
  301  *   p_{j} = x_{j} / (period/2 + t % period/2)
  302  */
  303 void prop_fraction_percpu(struct prop_descriptor *pd,
  304                 struct prop_local_percpu *pl,
  305                 long *numerator, long *denominator)
  306 {
  307         struct prop_global *pg = prop_get_global(pd);
  308         unsigned long period_2 = 1UL << (pg->shift - 1);
  309         unsigned long counter_mask = period_2 - 1;
  310         unsigned long global_count;
  311 
  312         prop_norm_percpu(pg, pl);
  313         *numerator = percpu_counter_read_positive(&pl->events);
  314 
  315         global_count = percpu_counter_read(&pg->events);
  316         *denominator = period_2 + (global_count & counter_mask);
  317 
  318         prop_put_global(pd, pg);
  319 }
  320 
  321 /*
  322  * SINGLE
  323  */
  324 
  325 int prop_local_init_single(struct prop_local_single *pl)
  326 {
  327         raw_spin_lock_init(&pl->lock);
  328         pl->shift = 0;
  329         pl->period = 0;
  330         pl->events = 0;
  331         return 0;
  332 }
  333 
  334 void prop_local_destroy_single(struct prop_local_single *pl)
  335 {
  336 }
  337 
  338 /*
  339  * Catch up with missed period expirations.
  340  */
  341 static
  342 void prop_norm_single(struct prop_global *pg, struct prop_local_single *pl)
  343 {
  344         unsigned long period = 1UL << (pg->shift - 1);
  345         unsigned long period_mask = ~(period - 1);
  346         unsigned long global_period;
  347         unsigned long flags;
  348 
  349         global_period = percpu_counter_read(&pg->events);
  350         global_period &= period_mask;
  351 
  352         /*
  353          * Fast path - check if the local and global period count still match
  354          * outside of the lock.
  355          */
  356         if (pl->period == global_period)
  357                 return;
  358 
  359         raw_spin_lock_irqsave(&pl->lock, flags);
  360         prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
  361         /*
  362          * For each missed period, we half the local counter.
  363          */
  364         period = (global_period - pl->period) >> (pg->shift - 1);
  365         if (likely(period < BITS_PER_LONG))
  366                 pl->events >>= period;
  367         else
  368                 pl->events = 0;
  369         pl->period = global_period;
  370         raw_spin_unlock_irqrestore(&pl->lock, flags);
  371 }
  372 
  373 /*
  374  *   ++x_{j}, ++t
  375  */
  376 void __prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl)
  377 {
  378         struct prop_global *pg = prop_get_global(pd);
  379 
  380         prop_norm_single(pg, pl);
  381         pl->events++;
  382         percpu_counter_add(&pg->events, 1);
  383         prop_put_global(pd, pg);
  384 }
  385 
  386 /*
  387  * Obtain a fraction of this proportion
  388  *
  389  *   p_{j} = x_{j} / (period/2 + t % period/2)
  390  */
  391 void prop_fraction_single(struct prop_descriptor *pd,
  392                 struct prop_local_single *pl,
  393                 long *numerator, long *denominator)
  394 {
  395         struct prop_global *pg = prop_get_global(pd);
  396         unsigned long period_2 = 1UL << (pg->shift - 1);
  397         unsigned long counter_mask = period_2 - 1;
  398         unsigned long global_count;
  399 
  400         prop_norm_single(pg, pl);
  401         *numerator = pl->events;
  402 
  403         global_count = percpu_counter_read(&pg->events);
  404         *denominator = period_2 + (global_count & counter_mask);
  405 
  406         prop_put_global(pd, pg);
  407 }

Cache object: 5eea37a4dc22c3486e2189bb95ce214e


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.