The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/vm/vm_radix.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2013 EMC Corp.
    5  * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org>
    6  * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com>
    7  * All rights reserved.
    8  *
    9  * Redistribution and use in source and binary forms, with or without
   10  * modification, are permitted provided that the following conditions
   11  * are met:
   12  * 1. Redistributions of source code must retain the above copyright
   13  *    notice, this list of conditions and the following disclaimer.
   14  * 2. Redistributions in binary form must reproduce the above copyright
   15  *    notice, this list of conditions and the following disclaimer in the
   16  *    documentation and/or other materials provided with the distribution.
   17  *
   18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   28  * SUCH DAMAGE.
   29  *
   30  */
   31 
   32 /*
   33  * Path-compressed radix trie implementation.
   34  * The following code is not generalized into a general purpose library
   35  * because there are way too many parameters embedded that should really
   36  * be decided by the library consumers.  At the same time, consumers
   37  * of this code must achieve highest possible performance.
   38  *
   39  * The implementation takes into account the following rationale:
   40  * - Size of the nodes should be as small as possible but still big enough
   41  *   to avoid a large maximum depth for the trie.  This is a balance
   42  *   between the necessity to not wire too much physical memory for the nodes
   43  *   and the necessity to avoid too much cache pollution during the trie
   44  *   operations.
   45  * - There is not a huge bias toward the number of lookup operations over
   46  *   the number of insert and remove operations.  This basically implies
   47  *   that optimizations supposedly helping one operation but hurting the
   48  *   other might be carefully evaluated.
   49  * - On average not many nodes are expected to be fully populated, hence
   50  *   level compression may just complicate things.
   51  */
   52 
   53 #include <sys/cdefs.h>
   54 __FBSDID("$FreeBSD$");
   55 
   56 #include "opt_ddb.h"
   57 
   58 #include <sys/param.h>
   59 #include <sys/systm.h>
   60 #include <sys/kernel.h>
   61 #include <sys/proc.h>
   62 #include <sys/vmmeter.h>
   63 #include <sys/smr.h>
   64 #include <sys/smr_types.h>
   65 
   66 #include <vm/uma.h>
   67 #include <vm/vm.h>
   68 #include <vm/vm_param.h>
   69 #include <vm/vm_object.h>
   70 #include <vm/vm_page.h>
   71 #include <vm/vm_radix.h>
   72 
   73 #ifdef DDB
   74 #include <ddb/ddb.h>
   75 #endif
   76 
   77 /*
   78  * These widths should allow the pointers to a node's children to fit within
   79  * a single cache line.  The extra levels from a narrow width should not be
   80  * a problem thanks to path compression.
   81  */
   82 #ifdef __LP64__
   83 #define VM_RADIX_WIDTH  4
   84 #else
   85 #define VM_RADIX_WIDTH  3
   86 #endif
   87 
   88 #define VM_RADIX_COUNT  (1 << VM_RADIX_WIDTH)
   89 #define VM_RADIX_MASK   (VM_RADIX_COUNT - 1)
   90 #define VM_RADIX_LIMIT                                                  \
   91         (howmany(sizeof(vm_pindex_t) * NBBY, VM_RADIX_WIDTH) - 1)
   92 
   93 /* Flag bits stored in node pointers. */
   94 #define VM_RADIX_ISLEAF 0x1
   95 #define VM_RADIX_FLAGS  0x1
   96 #define VM_RADIX_PAD    VM_RADIX_FLAGS
   97 
   98 /* Returns one unit associated with specified level. */
   99 #define VM_RADIX_UNITLEVEL(lev)                                         \
  100         ((vm_pindex_t)1 << ((lev) * VM_RADIX_WIDTH))
  101 
  102 enum vm_radix_access { SMR, LOCKED, UNSERIALIZED };
  103 
  104 struct vm_radix_node;
  105 typedef SMR_POINTER(struct vm_radix_node *) smrnode_t;
  106 
  107 struct vm_radix_node {
  108         vm_pindex_t     rn_owner;                       /* Owner of record. */
  109         uint16_t        rn_count;                       /* Valid children. */
  110         uint8_t         rn_clev;                        /* Current level. */
  111         int8_t          rn_last;                        /* zero last ptr. */
  112         smrnode_t       rn_child[VM_RADIX_COUNT];       /* Child nodes. */
  113 };
  114 
  115 static uma_zone_t vm_radix_node_zone;
  116 static smr_t vm_radix_smr;
  117 
  118 static void vm_radix_node_store(smrnode_t *p, struct vm_radix_node *v,
  119     enum vm_radix_access access);
  120 
  121 /*
  122  * Allocate a radix node.
  123  */
  124 static struct vm_radix_node *
  125 vm_radix_node_get(vm_pindex_t owner, uint16_t count, uint16_t clevel)
  126 {
  127         struct vm_radix_node *rnode;
  128 
  129         rnode = uma_zalloc_smr(vm_radix_node_zone, M_NOWAIT);
  130         if (rnode == NULL)
  131                 return (NULL);
  132 
  133         /*
  134          * We want to clear the last child pointer after the final section
  135          * has exited so lookup can not return false negatives.  It is done
  136          * here because it will be cache-cold in the dtor callback.
  137          */
  138         if (rnode->rn_last != 0) {
  139                 vm_radix_node_store(&rnode->rn_child[rnode->rn_last - 1],
  140                     NULL, UNSERIALIZED);
  141                 rnode->rn_last = 0;
  142         }
  143         rnode->rn_owner = owner;
  144         rnode->rn_count = count;
  145         rnode->rn_clev = clevel;
  146         return (rnode);
  147 }
  148 
  149 /*
  150  * Free radix node.
  151  */
  152 static __inline void
  153 vm_radix_node_put(struct vm_radix_node *rnode, int8_t last)
  154 {
  155 #ifdef INVARIANTS
  156         int slot;
  157 
  158         KASSERT(rnode->rn_count == 0,
  159             ("vm_radix_node_put: rnode %p has %d children", rnode,
  160             rnode->rn_count));
  161         for (slot = 0; slot < VM_RADIX_COUNT; slot++) {
  162                 if (slot == last)
  163                         continue;
  164                 KASSERT(smr_unserialized_load(&rnode->rn_child[slot], true) ==
  165                     NULL, ("vm_radix_node_put: rnode %p has a child", rnode));
  166         }
  167 #endif
  168         /* Off by one so a freshly zero'd node is not assigned to. */
  169         rnode->rn_last = last + 1;
  170         uma_zfree_smr(vm_radix_node_zone, rnode);
  171 }
  172 
  173 /*
  174  * Return the position in the array for a given level.
  175  */
  176 static __inline int
  177 vm_radix_slot(vm_pindex_t index, uint16_t level)
  178 {
  179 
  180         return ((index >> (level * VM_RADIX_WIDTH)) & VM_RADIX_MASK);
  181 }
  182 
  183 /* Trims the key after the specified level. */
  184 static __inline vm_pindex_t
  185 vm_radix_trimkey(vm_pindex_t index, uint16_t level)
  186 {
  187         vm_pindex_t ret;
  188 
  189         ret = index;
  190         if (level > 0) {
  191                 ret >>= level * VM_RADIX_WIDTH;
  192                 ret <<= level * VM_RADIX_WIDTH;
  193         }
  194         return (ret);
  195 }
  196 
  197 /*
  198  * Fetch a node pointer from a slot in another node.
  199  */
  200 static __inline struct vm_radix_node *
  201 vm_radix_node_load(smrnode_t *p, enum vm_radix_access access)
  202 {
  203 
  204         switch (access) {
  205         case UNSERIALIZED:
  206                 return (smr_unserialized_load(p, true));
  207         case LOCKED:
  208                 return (smr_serialized_load(p, true));
  209         case SMR:
  210                 return (smr_entered_load(p, vm_radix_smr));
  211         }
  212         __assert_unreachable();
  213 }
  214 
  215 static __inline void
  216 vm_radix_node_store(smrnode_t *p, struct vm_radix_node *v,
  217     enum vm_radix_access access)
  218 {
  219 
  220         switch (access) {
  221         case UNSERIALIZED:
  222                 smr_unserialized_store(p, v, true);
  223                 break;
  224         case LOCKED:
  225                 smr_serialized_store(p, v, true);
  226                 break;
  227         case SMR:
  228                 panic("vm_radix_node_store: Not supported in smr section.");
  229         }
  230 }
  231 
  232 /*
  233  * Get the root node for a radix tree.
  234  */
  235 static __inline struct vm_radix_node *
  236 vm_radix_root_load(struct vm_radix *rtree, enum vm_radix_access access)
  237 {
  238 
  239         return (vm_radix_node_load((smrnode_t *)&rtree->rt_root, access));
  240 }
  241 
  242 /*
  243  * Set the root node for a radix tree.
  244  */
  245 static __inline void
  246 vm_radix_root_store(struct vm_radix *rtree, struct vm_radix_node *rnode,
  247     enum vm_radix_access access)
  248 {
  249 
  250         vm_radix_node_store((smrnode_t *)&rtree->rt_root, rnode, access);
  251 }
  252 
  253 /*
  254  * Returns TRUE if the specified radix node is a leaf and FALSE otherwise.
  255  */
  256 static __inline boolean_t
  257 vm_radix_isleaf(struct vm_radix_node *rnode)
  258 {
  259 
  260         return (((uintptr_t)rnode & VM_RADIX_ISLEAF) != 0);
  261 }
  262 
  263 /*
  264  * Returns the associated page extracted from rnode.
  265  */
  266 static __inline vm_page_t
  267 vm_radix_topage(struct vm_radix_node *rnode)
  268 {
  269 
  270         return ((vm_page_t)((uintptr_t)rnode & ~VM_RADIX_FLAGS));
  271 }
  272 
  273 /*
  274  * Adds the page as a child of the provided node.
  275  */
  276 static __inline void
  277 vm_radix_addpage(struct vm_radix_node *rnode, vm_pindex_t index, uint16_t clev,
  278     vm_page_t page, enum vm_radix_access access)
  279 {
  280         int slot;
  281 
  282         slot = vm_radix_slot(index, clev);
  283         vm_radix_node_store(&rnode->rn_child[slot],
  284             (struct vm_radix_node *)((uintptr_t)page | VM_RADIX_ISLEAF), access);
  285 }
  286 
  287 /*
  288  * Returns the slot where two keys differ.
  289  * It cannot accept 2 equal keys.
  290  */
  291 static __inline uint16_t
  292 vm_radix_keydiff(vm_pindex_t index1, vm_pindex_t index2)
  293 {
  294         uint16_t clev;
  295 
  296         KASSERT(index1 != index2, ("%s: passing the same key value %jx",
  297             __func__, (uintmax_t)index1));
  298 
  299         index1 ^= index2;
  300         for (clev = VM_RADIX_LIMIT;; clev--)
  301                 if (vm_radix_slot(index1, clev) != 0)
  302                         return (clev);
  303 }
  304 
  305 /*
  306  * Returns TRUE if it can be determined that key does not belong to the
  307  * specified rnode.  Otherwise, returns FALSE.
  308  */
  309 static __inline boolean_t
  310 vm_radix_keybarr(struct vm_radix_node *rnode, vm_pindex_t idx)
  311 {
  312 
  313         if (rnode->rn_clev < VM_RADIX_LIMIT) {
  314                 idx = vm_radix_trimkey(idx, rnode->rn_clev + 1);
  315                 return (idx != rnode->rn_owner);
  316         }
  317         return (FALSE);
  318 }
  319 
  320 /*
  321  * Internal helper for vm_radix_reclaim_allnodes().
  322  * This function is recursive.
  323  */
  324 static void
  325 vm_radix_reclaim_allnodes_int(struct vm_radix_node *rnode)
  326 {
  327         struct vm_radix_node *child;
  328         int slot;
  329 
  330         KASSERT(rnode->rn_count <= VM_RADIX_COUNT,
  331             ("vm_radix_reclaim_allnodes_int: bad count in rnode %p", rnode));
  332         for (slot = 0; rnode->rn_count != 0; slot++) {
  333                 child = vm_radix_node_load(&rnode->rn_child[slot], UNSERIALIZED);
  334                 if (child == NULL)
  335                         continue;
  336                 if (!vm_radix_isleaf(child))
  337                         vm_radix_reclaim_allnodes_int(child);
  338                 vm_radix_node_store(&rnode->rn_child[slot], NULL, UNSERIALIZED);
  339                 rnode->rn_count--;
  340         }
  341         vm_radix_node_put(rnode, -1);
  342 }
  343 
  344 #ifndef UMA_MD_SMALL_ALLOC
  345 void vm_radix_reserve_kva(void);
  346 /*
  347  * Reserve the KVA necessary to satisfy the node allocation.
  348  * This is mandatory in architectures not supporting direct
  349  * mapping as they will need otherwise to carve into the kernel maps for
  350  * every node allocation, resulting into deadlocks for consumers already
  351  * working with kernel maps.
  352  */
  353 void
  354 vm_radix_reserve_kva(void)
  355 {
  356 
  357         /*
  358          * Calculate the number of reserved nodes, discounting the pages that
  359          * are needed to store them.
  360          */
  361         if (!uma_zone_reserve_kva(vm_radix_node_zone,
  362             ((vm_paddr_t)vm_cnt.v_page_count * PAGE_SIZE) / (PAGE_SIZE +
  363             sizeof(struct vm_radix_node))))
  364                 panic("%s: unable to reserve KVA", __func__);
  365 }
  366 #endif
  367 
  368 /*
  369  * Initialize the UMA slab zone.
  370  */
  371 void
  372 vm_radix_zinit(void)
  373 {
  374 
  375         vm_radix_node_zone = uma_zcreate("RADIX NODE",
  376             sizeof(struct vm_radix_node), NULL, NULL, NULL, NULL,
  377             VM_RADIX_PAD, UMA_ZONE_VM | UMA_ZONE_SMR | UMA_ZONE_ZINIT);
  378         vm_radix_smr = uma_zone_get_smr(vm_radix_node_zone);
  379 }
  380 
  381 /*
  382  * Inserts the key-value pair into the trie.
  383  * Panics if the key already exists.
  384  */
  385 int
  386 vm_radix_insert(struct vm_radix *rtree, vm_page_t page)
  387 {
  388         vm_pindex_t index, newind;
  389         struct vm_radix_node *rnode, *tmp;
  390         smrnode_t *parentp;
  391         vm_page_t m;
  392         int slot;
  393         uint16_t clev;
  394 
  395         index = page->pindex;
  396 
  397         /*
  398          * The owner of record for root is not really important because it
  399          * will never be used.
  400          */
  401         rnode = vm_radix_root_load(rtree, LOCKED);
  402         if (rnode == NULL) {
  403                 rtree->rt_root = (uintptr_t)page | VM_RADIX_ISLEAF;
  404                 return (0);
  405         }
  406         parentp = (smrnode_t *)&rtree->rt_root;
  407         for (;;) {
  408                 if (vm_radix_isleaf(rnode)) {
  409                         m = vm_radix_topage(rnode);
  410                         if (m->pindex == index)
  411                                 panic("%s: key %jx is already present",
  412                                     __func__, (uintmax_t)index);
  413                         clev = vm_radix_keydiff(m->pindex, index);
  414                         tmp = vm_radix_node_get(vm_radix_trimkey(index,
  415                             clev + 1), 2, clev);
  416                         if (tmp == NULL)
  417                                 return (ENOMEM);
  418                         /* These writes are not yet visible due to ordering. */
  419                         vm_radix_addpage(tmp, index, clev, page, UNSERIALIZED);
  420                         vm_radix_addpage(tmp, m->pindex, clev, m, UNSERIALIZED);
  421                         /* Synchronize to make leaf visible. */
  422                         vm_radix_node_store(parentp, tmp, LOCKED);
  423                         return (0);
  424                 } else if (vm_radix_keybarr(rnode, index))
  425                         break;
  426                 slot = vm_radix_slot(index, rnode->rn_clev);
  427                 parentp = &rnode->rn_child[slot];
  428                 tmp = vm_radix_node_load(parentp, LOCKED);
  429                 if (tmp == NULL) {
  430                         rnode->rn_count++;
  431                         vm_radix_addpage(rnode, index, rnode->rn_clev, page,
  432                             LOCKED);
  433                         return (0);
  434                 }
  435                 rnode = tmp;
  436         }
  437 
  438         /*
  439          * A new node is needed because the right insertion level is reached.
  440          * Setup the new intermediate node and add the 2 children: the
  441          * new object and the older edge.
  442          */
  443         newind = rnode->rn_owner;
  444         clev = vm_radix_keydiff(newind, index);
  445         tmp = vm_radix_node_get(vm_radix_trimkey(index, clev + 1), 2, clev);
  446         if (tmp == NULL)
  447                 return (ENOMEM);
  448         slot = vm_radix_slot(newind, clev);
  449         /* These writes are not yet visible due to ordering. */
  450         vm_radix_addpage(tmp, index, clev, page, UNSERIALIZED);
  451         vm_radix_node_store(&tmp->rn_child[slot], rnode, UNSERIALIZED);
  452         /* Serializing write to make the above visible. */
  453         vm_radix_node_store(parentp, tmp, LOCKED);
  454 
  455         return (0);
  456 }
  457 
  458 /*
  459  * Returns TRUE if the specified radix tree contains a single leaf and FALSE
  460  * otherwise.
  461  */
  462 boolean_t
  463 vm_radix_is_singleton(struct vm_radix *rtree)
  464 {
  465         struct vm_radix_node *rnode;
  466 
  467         rnode = vm_radix_root_load(rtree, LOCKED);
  468         if (rnode == NULL)
  469                 return (FALSE);
  470         return (vm_radix_isleaf(rnode));
  471 }
  472 
  473 /*
  474  * Returns the value stored at the index.  If the index is not present,
  475  * NULL is returned.
  476  */
  477 static __always_inline vm_page_t
  478 _vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index,
  479     enum vm_radix_access access)
  480 {
  481         struct vm_radix_node *rnode;
  482         vm_page_t m;
  483         int slot;
  484 
  485         rnode = vm_radix_root_load(rtree, access);
  486         while (rnode != NULL) {
  487                 if (vm_radix_isleaf(rnode)) {
  488                         m = vm_radix_topage(rnode);
  489                         if (m->pindex == index)
  490                                 return (m);
  491                         break;
  492                 }
  493                 if (vm_radix_keybarr(rnode, index))
  494                         break;
  495                 slot = vm_radix_slot(index, rnode->rn_clev);
  496                 rnode = vm_radix_node_load(&rnode->rn_child[slot], access);
  497         }
  498         return (NULL);
  499 }
  500 
  501 /*
  502  * Returns the value stored at the index assuming there is an external lock.
  503  *
  504  * If the index is not present, NULL is returned.
  505  */
  506 vm_page_t
  507 vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index)
  508 {
  509 
  510         return _vm_radix_lookup(rtree, index, LOCKED);
  511 }
  512 
  513 /*
  514  * Returns the value stored at the index without requiring an external lock.
  515  *
  516  * If the index is not present, NULL is returned.
  517  */
  518 vm_page_t
  519 vm_radix_lookup_unlocked(struct vm_radix *rtree, vm_pindex_t index)
  520 {
  521         vm_page_t m;
  522 
  523         smr_enter(vm_radix_smr);
  524         m = _vm_radix_lookup(rtree, index, SMR);
  525         smr_exit(vm_radix_smr);
  526 
  527         return (m);
  528 }
  529 
  530 /*
  531  * Look up the nearest entry at a position greater than or equal to index.
  532  */
  533 vm_page_t
  534 vm_radix_lookup_ge(struct vm_radix *rtree, vm_pindex_t index)
  535 {
  536         struct vm_radix_node *stack[VM_RADIX_LIMIT];
  537         vm_pindex_t inc;
  538         vm_page_t m;
  539         struct vm_radix_node *child, *rnode;
  540 #ifdef INVARIANTS
  541         int loops = 0;
  542 #endif
  543         int slot, tos;
  544 
  545         rnode = vm_radix_root_load(rtree, LOCKED);
  546         if (rnode == NULL)
  547                 return (NULL);
  548         else if (vm_radix_isleaf(rnode)) {
  549                 m = vm_radix_topage(rnode);
  550                 if (m->pindex >= index)
  551                         return (m);
  552                 else
  553                         return (NULL);
  554         }
  555         tos = 0;
  556         for (;;) {
  557                 /*
  558                  * If the keys differ before the current bisection node,
  559                  * then the search key might rollback to the earliest
  560                  * available bisection node or to the smallest key
  561                  * in the current node (if the owner is greater than the
  562                  * search key).
  563                  */
  564                 if (vm_radix_keybarr(rnode, index)) {
  565                         if (index > rnode->rn_owner) {
  566 ascend:
  567                                 KASSERT(++loops < 1000,
  568                                     ("vm_radix_lookup_ge: too many loops"));
  569 
  570                                 /*
  571                                  * Pop nodes from the stack until either the
  572                                  * stack is empty or a node that could have a
  573                                  * matching descendant is found.
  574                                  */
  575                                 do {
  576                                         if (tos == 0)
  577                                                 return (NULL);
  578                                         rnode = stack[--tos];
  579                                 } while (vm_radix_slot(index,
  580                                     rnode->rn_clev) == (VM_RADIX_COUNT - 1));
  581 
  582                                 /*
  583                                  * The following computation cannot overflow
  584                                  * because index's slot at the current level
  585                                  * is less than VM_RADIX_COUNT - 1.
  586                                  */
  587                                 index = vm_radix_trimkey(index,
  588                                     rnode->rn_clev);
  589                                 index += VM_RADIX_UNITLEVEL(rnode->rn_clev);
  590                         } else
  591                                 index = rnode->rn_owner;
  592                         KASSERT(!vm_radix_keybarr(rnode, index),
  593                             ("vm_radix_lookup_ge: keybarr failed"));
  594                 }
  595                 slot = vm_radix_slot(index, rnode->rn_clev);
  596                 child = vm_radix_node_load(&rnode->rn_child[slot], LOCKED);
  597                 if (vm_radix_isleaf(child)) {
  598                         m = vm_radix_topage(child);
  599                         if (m->pindex >= index)
  600                                 return (m);
  601                 } else if (child != NULL)
  602                         goto descend;
  603 
  604                 /*
  605                  * Look for an available edge or page within the current
  606                  * bisection node.
  607                  */
  608                 if (slot < (VM_RADIX_COUNT - 1)) {
  609                         inc = VM_RADIX_UNITLEVEL(rnode->rn_clev);
  610                         index = vm_radix_trimkey(index, rnode->rn_clev);
  611                         do {
  612                                 index += inc;
  613                                 slot++;
  614                                 child = vm_radix_node_load(&rnode->rn_child[slot],
  615                                     LOCKED);
  616                                 if (vm_radix_isleaf(child)) {
  617                                         m = vm_radix_topage(child);
  618                                         if (m->pindex >= index)
  619                                                 return (m);
  620                                 } else if (child != NULL)
  621                                         goto descend;
  622                         } while (slot < (VM_RADIX_COUNT - 1));
  623                 }
  624                 KASSERT(child == NULL || vm_radix_isleaf(child),
  625                     ("vm_radix_lookup_ge: child is radix node"));
  626 
  627                 /*
  628                  * If a page or edge greater than the search slot is not found
  629                  * in the current node, ascend to the next higher-level node.
  630                  */
  631                 goto ascend;
  632 descend:
  633                 KASSERT(rnode->rn_clev > 0,
  634                     ("vm_radix_lookup_ge: pushing leaf's parent"));
  635                 KASSERT(tos < VM_RADIX_LIMIT,
  636                     ("vm_radix_lookup_ge: stack overflow"));
  637                 stack[tos++] = rnode;
  638                 rnode = child;
  639         }
  640 }
  641 
  642 /*
  643  * Look up the nearest entry at a position less than or equal to index.
  644  */
  645 vm_page_t
  646 vm_radix_lookup_le(struct vm_radix *rtree, vm_pindex_t index)
  647 {
  648         struct vm_radix_node *stack[VM_RADIX_LIMIT];
  649         vm_pindex_t inc;
  650         vm_page_t m;
  651         struct vm_radix_node *child, *rnode;
  652 #ifdef INVARIANTS
  653         int loops = 0;
  654 #endif
  655         int slot, tos;
  656 
  657         rnode = vm_radix_root_load(rtree, LOCKED);
  658         if (rnode == NULL)
  659                 return (NULL);
  660         else if (vm_radix_isleaf(rnode)) {
  661                 m = vm_radix_topage(rnode);
  662                 if (m->pindex <= index)
  663                         return (m);
  664                 else
  665                         return (NULL);
  666         }
  667         tos = 0;
  668         for (;;) {
  669                 /*
  670                  * If the keys differ before the current bisection node,
  671                  * then the search key might rollback to the earliest
  672                  * available bisection node or to the largest key
  673                  * in the current node (if the owner is smaller than the
  674                  * search key).
  675                  */
  676                 if (vm_radix_keybarr(rnode, index)) {
  677                         if (index > rnode->rn_owner) {
  678                                 index = rnode->rn_owner + VM_RADIX_COUNT *
  679                                     VM_RADIX_UNITLEVEL(rnode->rn_clev);
  680                         } else {
  681 ascend:
  682                                 KASSERT(++loops < 1000,
  683                                     ("vm_radix_lookup_le: too many loops"));
  684 
  685                                 /*
  686                                  * Pop nodes from the stack until either the
  687                                  * stack is empty or a node that could have a
  688                                  * matching descendant is found.
  689                                  */
  690                                 do {
  691                                         if (tos == 0)
  692                                                 return (NULL);
  693                                         rnode = stack[--tos];
  694                                 } while (vm_radix_slot(index,
  695                                     rnode->rn_clev) == 0);
  696 
  697                                 /*
  698                                  * The following computation cannot overflow
  699                                  * because index's slot at the current level
  700                                  * is greater than 0.
  701                                  */
  702                                 index = vm_radix_trimkey(index,
  703                                     rnode->rn_clev);
  704                         }
  705                         index--;
  706                         KASSERT(!vm_radix_keybarr(rnode, index),
  707                             ("vm_radix_lookup_le: keybarr failed"));
  708                 }
  709                 slot = vm_radix_slot(index, rnode->rn_clev);
  710                 child = vm_radix_node_load(&rnode->rn_child[slot], LOCKED);
  711                 if (vm_radix_isleaf(child)) {
  712                         m = vm_radix_topage(child);
  713                         if (m->pindex <= index)
  714                                 return (m);
  715                 } else if (child != NULL)
  716                         goto descend;
  717 
  718                 /*
  719                  * Look for an available edge or page within the current
  720                  * bisection node.
  721                  */
  722                 if (slot > 0) {
  723                         inc = VM_RADIX_UNITLEVEL(rnode->rn_clev);
  724                         index |= inc - 1;
  725                         do {
  726                                 index -= inc;
  727                                 slot--;
  728                                 child = vm_radix_node_load(&rnode->rn_child[slot],
  729                                     LOCKED);
  730                                 if (vm_radix_isleaf(child)) {
  731                                         m = vm_radix_topage(child);
  732                                         if (m->pindex <= index)
  733                                                 return (m);
  734                                 } else if (child != NULL)
  735                                         goto descend;
  736                         } while (slot > 0);
  737                 }
  738                 KASSERT(child == NULL || vm_radix_isleaf(child),
  739                     ("vm_radix_lookup_le: child is radix node"));
  740 
  741                 /*
  742                  * If a page or edge smaller than the search slot is not found
  743                  * in the current node, ascend to the next higher-level node.
  744                  */
  745                 goto ascend;
  746 descend:
  747                 KASSERT(rnode->rn_clev > 0,
  748                     ("vm_radix_lookup_le: pushing leaf's parent"));
  749                 KASSERT(tos < VM_RADIX_LIMIT,
  750                     ("vm_radix_lookup_le: stack overflow"));
  751                 stack[tos++] = rnode;
  752                 rnode = child;
  753         }
  754 }
  755 
  756 /*
  757  * Remove the specified index from the trie, and return the value stored at
  758  * that index.  If the index is not present, return NULL.
  759  */
  760 vm_page_t
  761 vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index)
  762 {
  763         struct vm_radix_node *rnode, *parent, *tmp;
  764         vm_page_t m;
  765         int i, slot;
  766 
  767         rnode = vm_radix_root_load(rtree, LOCKED);
  768         if (vm_radix_isleaf(rnode)) {
  769                 m = vm_radix_topage(rnode);
  770                 if (m->pindex != index)
  771                         return (NULL);
  772                 vm_radix_root_store(rtree, NULL, LOCKED);
  773                 return (m);
  774         }
  775         parent = NULL;
  776         for (;;) {
  777                 if (rnode == NULL)
  778                         return (NULL);
  779                 slot = vm_radix_slot(index, rnode->rn_clev);
  780                 tmp = vm_radix_node_load(&rnode->rn_child[slot], LOCKED);
  781                 if (vm_radix_isleaf(tmp)) {
  782                         m = vm_radix_topage(tmp);
  783                         if (m->pindex != index)
  784                                 return (NULL);
  785                         vm_radix_node_store(&rnode->rn_child[slot], NULL, LOCKED);
  786                         rnode->rn_count--;
  787                         if (rnode->rn_count > 1)
  788                                 return (m);
  789                         for (i = 0; i < VM_RADIX_COUNT; i++)
  790                                 if (vm_radix_node_load(&rnode->rn_child[i],
  791                                     LOCKED) != NULL)
  792                                         break;
  793                         KASSERT(i != VM_RADIX_COUNT,
  794                             ("%s: invalid node configuration", __func__));
  795                         tmp = vm_radix_node_load(&rnode->rn_child[i], LOCKED);
  796                         if (parent == NULL)
  797                                 vm_radix_root_store(rtree, tmp, LOCKED);
  798                         else {
  799                                 slot = vm_radix_slot(index, parent->rn_clev);
  800                                 KASSERT(vm_radix_node_load(
  801                                     &parent->rn_child[slot], LOCKED) == rnode,
  802                                     ("%s: invalid child value", __func__));
  803                                 vm_radix_node_store(&parent->rn_child[slot],
  804                                     tmp, LOCKED);
  805                         }
  806                         /*
  807                          * The child is still valid and we can not zero the
  808                          * pointer until all smr references are gone.
  809                          */
  810                         rnode->rn_count--;
  811                         vm_radix_node_put(rnode, i);
  812                         return (m);
  813                 }
  814                 parent = rnode;
  815                 rnode = tmp;
  816         }
  817 }
  818 
  819 /*
  820  * Remove and free all the nodes from the radix tree.
  821  * This function is recursive but there is a tight control on it as the
  822  * maximum depth of the tree is fixed.
  823  */
  824 void
  825 vm_radix_reclaim_allnodes(struct vm_radix *rtree)
  826 {
  827         struct vm_radix_node *root;
  828 
  829         root = vm_radix_root_load(rtree, LOCKED);
  830         if (root == NULL)
  831                 return;
  832         vm_radix_root_store(rtree, NULL, UNSERIALIZED);
  833         if (!vm_radix_isleaf(root))
  834                 vm_radix_reclaim_allnodes_int(root);
  835 }
  836 
  837 /*
  838  * Replace an existing page in the trie with another one.
  839  * Panics if there is not an old page in the trie at the new page's index.
  840  */
  841 vm_page_t
  842 vm_radix_replace(struct vm_radix *rtree, vm_page_t newpage)
  843 {
  844         struct vm_radix_node *rnode, *tmp;
  845         vm_page_t m;
  846         vm_pindex_t index;
  847         int slot;
  848 
  849         index = newpage->pindex;
  850         rnode = vm_radix_root_load(rtree, LOCKED);
  851         if (rnode == NULL)
  852                 panic("%s: replacing page on an empty trie", __func__);
  853         if (vm_radix_isleaf(rnode)) {
  854                 m = vm_radix_topage(rnode);
  855                 if (m->pindex != index)
  856                         panic("%s: original replacing root key not found",
  857                             __func__);
  858                 rtree->rt_root = (uintptr_t)newpage | VM_RADIX_ISLEAF;
  859                 return (m);
  860         }
  861         for (;;) {
  862                 slot = vm_radix_slot(index, rnode->rn_clev);
  863                 tmp = vm_radix_node_load(&rnode->rn_child[slot], LOCKED);
  864                 if (vm_radix_isleaf(tmp)) {
  865                         m = vm_radix_topage(tmp);
  866                         if (m->pindex == index) {
  867                                 vm_radix_node_store(&rnode->rn_child[slot],
  868                                     (struct vm_radix_node *)((uintptr_t)newpage |
  869                                     VM_RADIX_ISLEAF), LOCKED);
  870                                 return (m);
  871                         } else
  872                                 break;
  873                 } else if (tmp == NULL || vm_radix_keybarr(tmp, index))
  874                         break;
  875                 rnode = tmp;
  876         }
  877         panic("%s: original replacing page not found", __func__);
  878 }
  879 
  880 void
  881 vm_radix_wait(void)
  882 {
  883         uma_zwait(vm_radix_node_zone);
  884 }
  885 
  886 #ifdef DDB
  887 /*
  888  * Show details about the given radix node.
  889  */
  890 DB_SHOW_COMMAND(radixnode, db_show_radixnode)
  891 {
  892         struct vm_radix_node *rnode, *tmp;
  893         int i;
  894 
  895         if (!have_addr)
  896                 return;
  897         rnode = (struct vm_radix_node *)addr;
  898         db_printf("radixnode %p, owner %jx, children count %u, level %u:\n",
  899             (void *)rnode, (uintmax_t)rnode->rn_owner, rnode->rn_count,
  900             rnode->rn_clev);
  901         for (i = 0; i < VM_RADIX_COUNT; i++) {
  902                 tmp = vm_radix_node_load(&rnode->rn_child[i], UNSERIALIZED);
  903                 if (tmp != NULL)
  904                         db_printf("slot: %d, val: %p, page: %p, clev: %d\n",
  905                             i, (void *)tmp,
  906                             vm_radix_isleaf(tmp) ?  vm_radix_topage(tmp) : NULL,
  907                             rnode->rn_clev);
  908         }
  909 }
  910 #endif /* DDB */

Cache object: 1b566860d3fc4f3c813a60f0836c3b24


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.