The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/contrib/openzfs/module/zfs/multilist.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * CDDL HEADER START
    3  *
    4  * This file and its contents are supplied under the terms of the
    5  * Common Development and Distribution License ("CDDL"), version 1.0.
    6  * You may only use this file in accordance with the terms of version
    7  * 1.0 of the CDDL.
    8  *
    9  * A full copy of the text of the CDDL should have accompanied this
   10  * source.  A copy of the CDDL is also available via the Internet at
   11  * http://www.illumos.org/license/CDDL.
   12  *
   13  * CDDL HEADER END
   14  */
   15 /*
   16  * Copyright (c) 2013, 2017 by Delphix. All rights reserved.
   17  */
   18 
   19 #include <sys/zfs_context.h>
   20 #include <sys/multilist.h>
   21 #include <sys/trace_zfs.h>
   22 
   23 /*
   24  * This overrides the number of sublists in each multilist_t, which defaults
   25  * to the number of CPUs in the system (see multilist_create()).
   26  */
   27 uint_t zfs_multilist_num_sublists = 0;
   28 
   29 /*
   30  * Given the object contained on the list, return a pointer to the
   31  * object's multilist_node_t structure it contains.
   32  */
   33 #ifdef ZFS_DEBUG
   34 static multilist_node_t *
   35 multilist_d2l(multilist_t *ml, void *obj)
   36 {
   37         return ((multilist_node_t *)((char *)obj + ml->ml_offset));
   38 }
   39 #else
   40 #define multilist_d2l(ml, obj) ((void) sizeof (ml), (void) sizeof (obj), NULL)
   41 #endif
   42 
   43 /*
   44  * Initialize a new mutlilist using the parameters specified.
   45  *
   46  *  - 'size' denotes the size of the structure containing the
   47  *     multilist_node_t.
   48  *  - 'offset' denotes the byte offset of the mutlilist_node_t within
   49  *     the structure that contains it.
   50  *  - 'num' specifies the number of internal sublists to create.
   51  *  - 'index_func' is used to determine which sublist to insert into
   52  *     when the multilist_insert() function is called; as well as which
   53  *     sublist to remove from when multilist_remove() is called. The
   54  *     requirements this function must meet, are the following:
   55  *
   56  *      - It must always return the same value when called on the same
   57  *        object (to ensure the object is removed from the list it was
   58  *        inserted into).
   59  *
   60  *      - It must return a value in the range [0, number of sublists).
   61  *        The multilist_get_num_sublists() function may be used to
   62  *        determine the number of sublists in the multilist.
   63  *
   64  *     Also, in order to reduce internal contention between the sublists
   65  *     during insertion and removal, this function should choose evenly
   66  *     between all available sublists when inserting. This isn't a hard
   67  *     requirement, but a general rule of thumb in order to garner the
   68  *     best multi-threaded performance out of the data structure.
   69  */
   70 static void
   71 multilist_create_impl(multilist_t *ml, size_t size, size_t offset,
   72     uint_t num, multilist_sublist_index_func_t *index_func)
   73 {
   74         ASSERT3U(size, >, 0);
   75         ASSERT3U(size, >=, offset + sizeof (multilist_node_t));
   76         ASSERT3U(num, >, 0);
   77         ASSERT3P(index_func, !=, NULL);
   78 
   79         ml->ml_offset = offset;
   80         ml->ml_num_sublists = num;
   81         ml->ml_index_func = index_func;
   82 
   83         ml->ml_sublists = kmem_zalloc(sizeof (multilist_sublist_t) *
   84             ml->ml_num_sublists, KM_SLEEP);
   85 
   86         ASSERT3P(ml->ml_sublists, !=, NULL);
   87 
   88         for (int i = 0; i < ml->ml_num_sublists; i++) {
   89                 multilist_sublist_t *mls = &ml->ml_sublists[i];
   90                 mutex_init(&mls->mls_lock, NULL, MUTEX_NOLOCKDEP, NULL);
   91                 list_create(&mls->mls_list, size, offset);
   92         }
   93 }
   94 
   95 /*
   96  * Allocate a new multilist, using the default number of sublists (the number
   97  * of CPUs, or at least 4, or the tunable zfs_multilist_num_sublists). Note
   98  * that the multilists do not expand if more CPUs are hot-added. In that case,
   99  * we will have less fanout than boot_ncpus, but we don't want to always
  100  * reserve the RAM necessary to create the extra slots for additional CPUs up
  101  * front, and dynamically adding them is a complex task.
  102  */
  103 void
  104 multilist_create(multilist_t *ml, size_t size, size_t offset,
  105     multilist_sublist_index_func_t *index_func)
  106 {
  107         uint_t num_sublists;
  108 
  109         if (zfs_multilist_num_sublists > 0) {
  110                 num_sublists = zfs_multilist_num_sublists;
  111         } else {
  112                 num_sublists = MAX(boot_ncpus, 4);
  113         }
  114 
  115         multilist_create_impl(ml, size, offset, num_sublists, index_func);
  116 }
  117 
  118 /*
  119  * Destroy the given multilist object, and free up any memory it holds.
  120  */
  121 void
  122 multilist_destroy(multilist_t *ml)
  123 {
  124         ASSERT(multilist_is_empty(ml));
  125 
  126         for (int i = 0; i < ml->ml_num_sublists; i++) {
  127                 multilist_sublist_t *mls = &ml->ml_sublists[i];
  128 
  129                 ASSERT(list_is_empty(&mls->mls_list));
  130 
  131                 list_destroy(&mls->mls_list);
  132                 mutex_destroy(&mls->mls_lock);
  133         }
  134 
  135         ASSERT3P(ml->ml_sublists, !=, NULL);
  136         kmem_free(ml->ml_sublists,
  137             sizeof (multilist_sublist_t) * ml->ml_num_sublists);
  138 
  139         ml->ml_num_sublists = 0;
  140         ml->ml_offset = 0;
  141         ml->ml_sublists = NULL;
  142 }
  143 
  144 /*
  145  * Insert the given object into the multilist.
  146  *
  147  * This function will insert the object specified into the sublist
  148  * determined using the function given at multilist creation time.
  149  *
  150  * The sublist locks are automatically acquired if not already held, to
  151  * ensure consistency when inserting and removing from multiple threads.
  152  */
  153 void
  154 multilist_insert(multilist_t *ml, void *obj)
  155 {
  156         unsigned int sublist_idx = ml->ml_index_func(ml, obj);
  157         multilist_sublist_t *mls;
  158         boolean_t need_lock;
  159 
  160         DTRACE_PROBE3(multilist__insert, multilist_t *, ml,
  161             unsigned int, sublist_idx, void *, obj);
  162 
  163         ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
  164 
  165         mls = &ml->ml_sublists[sublist_idx];
  166 
  167         /*
  168          * Note: Callers may already hold the sublist lock by calling
  169          * multilist_sublist_lock().  Here we rely on MUTEX_HELD()
  170          * returning TRUE if and only if the current thread holds the
  171          * lock.  While it's a little ugly to make the lock recursive in
  172          * this way, it works and allows the calling code to be much
  173          * simpler -- otherwise it would have to pass around a flag
  174          * indicating that it already has the lock.
  175          */
  176         need_lock = !MUTEX_HELD(&mls->mls_lock);
  177 
  178         if (need_lock)
  179                 mutex_enter(&mls->mls_lock);
  180 
  181         ASSERT(!multilist_link_active(multilist_d2l(ml, obj)));
  182 
  183         multilist_sublist_insert_head(mls, obj);
  184 
  185         if (need_lock)
  186                 mutex_exit(&mls->mls_lock);
  187 }
  188 
  189 /*
  190  * Remove the given object from the multilist.
  191  *
  192  * This function will remove the object specified from the sublist
  193  * determined using the function given at multilist creation time.
  194  *
  195  * The necessary sublist locks are automatically acquired, to ensure
  196  * consistency when inserting and removing from multiple threads.
  197  */
  198 void
  199 multilist_remove(multilist_t *ml, void *obj)
  200 {
  201         unsigned int sublist_idx = ml->ml_index_func(ml, obj);
  202         multilist_sublist_t *mls;
  203         boolean_t need_lock;
  204 
  205         DTRACE_PROBE3(multilist__remove, multilist_t *, ml,
  206             unsigned int, sublist_idx, void *, obj);
  207 
  208         ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
  209 
  210         mls = &ml->ml_sublists[sublist_idx];
  211         /* See comment in multilist_insert(). */
  212         need_lock = !MUTEX_HELD(&mls->mls_lock);
  213 
  214         if (need_lock)
  215                 mutex_enter(&mls->mls_lock);
  216 
  217         ASSERT(multilist_link_active(multilist_d2l(ml, obj)));
  218 
  219         multilist_sublist_remove(mls, obj);
  220 
  221         if (need_lock)
  222                 mutex_exit(&mls->mls_lock);
  223 }
  224 
  225 /*
  226  * Check to see if this multilist object is empty.
  227  *
  228  * This will return TRUE if it finds all of the sublists of this
  229  * multilist to be empty, and FALSE otherwise. Each sublist lock will be
  230  * automatically acquired as necessary.
  231  *
  232  * If concurrent insertions and removals are occurring, the semantics
  233  * of this function become a little fuzzy. Instead of locking all
  234  * sublists for the entire call time of the function, each sublist is
  235  * only locked as it is individually checked for emptiness. Thus, it's
  236  * possible for this function to return TRUE with non-empty sublists at
  237  * the time the function returns. This would be due to another thread
  238  * inserting into a given sublist, after that specific sublist was check
  239  * and deemed empty, but before all sublists have been checked.
  240  */
  241 int
  242 multilist_is_empty(multilist_t *ml)
  243 {
  244         for (int i = 0; i < ml->ml_num_sublists; i++) {
  245                 multilist_sublist_t *mls = &ml->ml_sublists[i];
  246                 /* See comment in multilist_insert(). */
  247                 boolean_t need_lock = !MUTEX_HELD(&mls->mls_lock);
  248 
  249                 if (need_lock)
  250                         mutex_enter(&mls->mls_lock);
  251 
  252                 if (!list_is_empty(&mls->mls_list)) {
  253                         if (need_lock)
  254                                 mutex_exit(&mls->mls_lock);
  255 
  256                         return (FALSE);
  257                 }
  258 
  259                 if (need_lock)
  260                         mutex_exit(&mls->mls_lock);
  261         }
  262 
  263         return (TRUE);
  264 }
  265 
  266 /* Return the number of sublists composing this multilist */
  267 unsigned int
  268 multilist_get_num_sublists(multilist_t *ml)
  269 {
  270         return (ml->ml_num_sublists);
  271 }
  272 
  273 /* Return a randomly selected, valid sublist index for this multilist */
  274 unsigned int
  275 multilist_get_random_index(multilist_t *ml)
  276 {
  277         return (random_in_range(ml->ml_num_sublists));
  278 }
  279 
  280 /* Lock and return the sublist specified at the given index */
  281 multilist_sublist_t *
  282 multilist_sublist_lock(multilist_t *ml, unsigned int sublist_idx)
  283 {
  284         multilist_sublist_t *mls;
  285 
  286         ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
  287         mls = &ml->ml_sublists[sublist_idx];
  288         mutex_enter(&mls->mls_lock);
  289 
  290         return (mls);
  291 }
  292 
  293 /* Lock and return the sublist that would be used to store the specified obj */
  294 multilist_sublist_t *
  295 multilist_sublist_lock_obj(multilist_t *ml, void *obj)
  296 {
  297         return (multilist_sublist_lock(ml, ml->ml_index_func(ml, obj)));
  298 }
  299 
  300 void
  301 multilist_sublist_unlock(multilist_sublist_t *mls)
  302 {
  303         mutex_exit(&mls->mls_lock);
  304 }
  305 
  306 /*
  307  * We're allowing any object to be inserted into this specific sublist,
  308  * but this can lead to trouble if multilist_remove() is called to
  309  * remove this object. Specifically, if calling ml_index_func on this
  310  * object returns an index for sublist different than what is passed as
  311  * a parameter here, any call to multilist_remove() with this newly
  312  * inserted object is undefined! (the call to multilist_remove() will
  313  * remove the object from a list that it isn't contained in)
  314  */
  315 void
  316 multilist_sublist_insert_head(multilist_sublist_t *mls, void *obj)
  317 {
  318         ASSERT(MUTEX_HELD(&mls->mls_lock));
  319         list_insert_head(&mls->mls_list, obj);
  320 }
  321 
  322 /* please see comment above multilist_sublist_insert_head */
  323 void
  324 multilist_sublist_insert_tail(multilist_sublist_t *mls, void *obj)
  325 {
  326         ASSERT(MUTEX_HELD(&mls->mls_lock));
  327         list_insert_tail(&mls->mls_list, obj);
  328 }
  329 
  330 /*
  331  * Move the object one element forward in the list.
  332  *
  333  * This function will move the given object forward in the list (towards
  334  * the head) by one object. So, in essence, it will swap its position in
  335  * the list with its "prev" pointer. If the given object is already at the
  336  * head of the list, it cannot be moved forward any more than it already
  337  * is, so no action is taken.
  338  *
  339  * NOTE: This function **must not** remove any object from the list other
  340  *       than the object given as the parameter. This is relied upon in
  341  *       arc_evict_state_impl().
  342  */
  343 void
  344 multilist_sublist_move_forward(multilist_sublist_t *mls, void *obj)
  345 {
  346         void *prev = list_prev(&mls->mls_list, obj);
  347 
  348         ASSERT(MUTEX_HELD(&mls->mls_lock));
  349         ASSERT(!list_is_empty(&mls->mls_list));
  350 
  351         /* 'obj' must be at the head of the list, nothing to do */
  352         if (prev == NULL)
  353                 return;
  354 
  355         list_remove(&mls->mls_list, obj);
  356         list_insert_before(&mls->mls_list, prev, obj);
  357 }
  358 
  359 void
  360 multilist_sublist_remove(multilist_sublist_t *mls, void *obj)
  361 {
  362         ASSERT(MUTEX_HELD(&mls->mls_lock));
  363         list_remove(&mls->mls_list, obj);
  364 }
  365 
  366 int
  367 multilist_sublist_is_empty(multilist_sublist_t *mls)
  368 {
  369         ASSERT(MUTEX_HELD(&mls->mls_lock));
  370         return (list_is_empty(&mls->mls_list));
  371 }
  372 
  373 int
  374 multilist_sublist_is_empty_idx(multilist_t *ml, unsigned int sublist_idx)
  375 {
  376         multilist_sublist_t *mls;
  377         int empty;
  378 
  379         ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
  380         mls = &ml->ml_sublists[sublist_idx];
  381         ASSERT(!MUTEX_HELD(&mls->mls_lock));
  382         mutex_enter(&mls->mls_lock);
  383         empty = list_is_empty(&mls->mls_list);
  384         mutex_exit(&mls->mls_lock);
  385         return (empty);
  386 }
  387 
  388 void *
  389 multilist_sublist_head(multilist_sublist_t *mls)
  390 {
  391         ASSERT(MUTEX_HELD(&mls->mls_lock));
  392         return (list_head(&mls->mls_list));
  393 }
  394 
  395 void *
  396 multilist_sublist_tail(multilist_sublist_t *mls)
  397 {
  398         ASSERT(MUTEX_HELD(&mls->mls_lock));
  399         return (list_tail(&mls->mls_list));
  400 }
  401 
  402 void *
  403 multilist_sublist_next(multilist_sublist_t *mls, void *obj)
  404 {
  405         ASSERT(MUTEX_HELD(&mls->mls_lock));
  406         return (list_next(&mls->mls_list, obj));
  407 }
  408 
  409 void *
  410 multilist_sublist_prev(multilist_sublist_t *mls, void *obj)
  411 {
  412         ASSERT(MUTEX_HELD(&mls->mls_lock));
  413         return (list_prev(&mls->mls_list, obj));
  414 }
  415 
  416 void
  417 multilist_link_init(multilist_node_t *link)
  418 {
  419         list_link_init(link);
  420 }
  421 
  422 int
  423 multilist_link_active(multilist_node_t *link)
  424 {
  425         return (list_link_active(link));
  426 }
  427 
  428 ZFS_MODULE_PARAM(zfs, zfs_, multilist_num_sublists, UINT, ZMOD_RW,
  429         "Number of sublists used in each multilist");

Cache object: 3ef656f6aa2bc85654b18608f70655cd


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.