The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/vm/vm_radix.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 2013 EMC Corp.
    3  * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org>
    4  * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com>
    5  * All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   26  * SUCH DAMAGE.
   27  *
   28  */
   29 
   30 /*
   31  * Path-compressed radix trie implementation.
   32  * The following code is not generalized into a general purpose library
   33  * because there are way too many parameters embedded that should really
   34  * be decided by the library consumers.  At the same time, consumers
   35  * of this code must achieve highest possible performance.
   36  *
   37  * The implementation takes into account the following rationale:
   38  * - Size of the nodes should be as small as possible but still big enough
   39  *   to avoid a large maximum depth for the trie.  This is a balance
   40  *   between the necessity to not wire too much physical memory for the nodes
   41  *   and the necessity to avoid too much cache pollution during the trie
   42  *   operations.
   43  * - There is not a huge bias toward the number of lookup operations over
   44  *   the number of insert and remove operations.  This basically implies
   45  *   that optimizations supposedly helping one operation but hurting the
   46  *   other might be carefully evaluated.
   47  * - On average not many nodes are expected to be fully populated, hence
   48  *   level compression may just complicate things.
   49  */
   50 
   51 #include <sys/cdefs.h>
   52 __FBSDID("$FreeBSD$");
   53 
   54 #include "opt_ddb.h"
   55 
   56 #include <sys/param.h>
   57 #include <sys/systm.h>
   58 #include <sys/kernel.h>
   59 #include <sys/vmmeter.h>
   60 
   61 #include <vm/uma.h>
   62 #include <vm/vm.h>
   63 #include <vm/vm_param.h>
   64 #include <vm/vm_page.h>
   65 #include <vm/vm_radix.h>
   66 
   67 #ifdef DDB
   68 #include <ddb/ddb.h>
   69 #endif
   70 
   71 /*
   72  * These widths should allow the pointers to a node's children to fit within
   73  * a single cache line.  The extra levels from a narrow width should not be
   74  * a problem thanks to path compression.
   75  */
   76 #ifdef __LP64__
   77 #define VM_RADIX_WIDTH  4
   78 #else
   79 #define VM_RADIX_WIDTH  3
   80 #endif
   81 
   82 #define VM_RADIX_COUNT  (1 << VM_RADIX_WIDTH)
   83 #define VM_RADIX_MASK   (VM_RADIX_COUNT - 1)
   84 #define VM_RADIX_LIMIT                                                  \
   85         (howmany(sizeof(vm_pindex_t) * NBBY, VM_RADIX_WIDTH) - 1)
   86 
   87 /* Flag bits stored in node pointers. */
   88 #define VM_RADIX_ISLEAF 0x1
   89 #define VM_RADIX_FLAGS  0x1
   90 #define VM_RADIX_PAD    VM_RADIX_FLAGS
   91 
   92 /* Returns one unit associated with specified level. */
   93 #define VM_RADIX_UNITLEVEL(lev)                                         \
   94         ((vm_pindex_t)1 << ((lev) * VM_RADIX_WIDTH))
   95 
   96 struct vm_radix_node {
   97         vm_pindex_t      rn_owner;                      /* Owner of record. */
   98         uint16_t         rn_count;                      /* Valid children. */
   99         uint16_t         rn_clev;                       /* Current level. */
  100         void            *rn_child[VM_RADIX_COUNT];      /* Child nodes. */
  101 };
  102 
  103 static uma_zone_t vm_radix_node_zone;
  104 
  105 /*
  106  * Allocate a radix node.
  107  */
  108 static __inline struct vm_radix_node *
  109 vm_radix_node_get(vm_pindex_t owner, uint16_t count, uint16_t clevel)
  110 {
  111         struct vm_radix_node *rnode;
  112 
  113         rnode = uma_zalloc(vm_radix_node_zone, M_NOWAIT | M_ZERO);
  114         if (rnode == NULL)
  115                 return (NULL);
  116         rnode->rn_owner = owner;
  117         rnode->rn_count = count;
  118         rnode->rn_clev = clevel;
  119         return (rnode);
  120 }
  121 
  122 /*
  123  * Free radix node.
  124  */
  125 static __inline void
  126 vm_radix_node_put(struct vm_radix_node *rnode)
  127 {
  128 
  129         uma_zfree(vm_radix_node_zone, rnode);
  130 }
  131 
  132 /*
  133  * Return the position in the array for a given level.
  134  */
  135 static __inline int
  136 vm_radix_slot(vm_pindex_t index, uint16_t level)
  137 {
  138 
  139         return ((index >> (level * VM_RADIX_WIDTH)) & VM_RADIX_MASK);
  140 }
  141 
  142 /* Trims the key after the specified level. */
  143 static __inline vm_pindex_t
  144 vm_radix_trimkey(vm_pindex_t index, uint16_t level)
  145 {
  146         vm_pindex_t ret;
  147 
  148         ret = index;
  149         if (level > 0) {
  150                 ret >>= level * VM_RADIX_WIDTH;
  151                 ret <<= level * VM_RADIX_WIDTH;
  152         }
  153         return (ret);
  154 }
  155 
  156 /*
  157  * Get the root node for a radix tree.
  158  */
  159 static __inline struct vm_radix_node *
  160 vm_radix_getroot(struct vm_radix *rtree)
  161 {
  162 
  163         return ((struct vm_radix_node *)rtree->rt_root);
  164 }
  165 
  166 /*
  167  * Set the root node for a radix tree.
  168  */
  169 static __inline void
  170 vm_radix_setroot(struct vm_radix *rtree, struct vm_radix_node *rnode)
  171 {
  172 
  173         rtree->rt_root = (uintptr_t)rnode;
  174 }
  175 
  176 /*
  177  * Returns TRUE if the specified radix node is a leaf and FALSE otherwise.
  178  */
  179 static __inline boolean_t
  180 vm_radix_isleaf(struct vm_radix_node *rnode)
  181 {
  182 
  183         return (((uintptr_t)rnode & VM_RADIX_ISLEAF) != 0);
  184 }
  185 
  186 /*
  187  * Returns the associated page extracted from rnode.
  188  */
  189 static __inline vm_page_t
  190 vm_radix_topage(struct vm_radix_node *rnode)
  191 {
  192 
  193         return ((vm_page_t)((uintptr_t)rnode & ~VM_RADIX_FLAGS));
  194 }
  195 
  196 /*
  197  * Adds the page as a child of the provided node.
  198  */
  199 static __inline void
  200 vm_radix_addpage(struct vm_radix_node *rnode, vm_pindex_t index, uint16_t clev,
  201     vm_page_t page)
  202 {
  203         int slot;
  204 
  205         slot = vm_radix_slot(index, clev);
  206         rnode->rn_child[slot] = (void *)((uintptr_t)page | VM_RADIX_ISLEAF);
  207 }
  208 
  209 /*
  210  * Returns the slot where two keys differ.
  211  * It cannot accept 2 equal keys.
  212  */
  213 static __inline uint16_t
  214 vm_radix_keydiff(vm_pindex_t index1, vm_pindex_t index2)
  215 {
  216         uint16_t clev;
  217 
  218         KASSERT(index1 != index2, ("%s: passing the same key value %jx",
  219             __func__, (uintmax_t)index1));
  220 
  221         index1 ^= index2;
  222         for (clev = VM_RADIX_LIMIT;; clev--)
  223                 if (vm_radix_slot(index1, clev) != 0)
  224                         return (clev);
  225 }
  226 
  227 /*
  228  * Returns TRUE if it can be determined that key does not belong to the
  229  * specified rnode.  Otherwise, returns FALSE.
  230  */
  231 static __inline boolean_t
  232 vm_radix_keybarr(struct vm_radix_node *rnode, vm_pindex_t idx)
  233 {
  234 
  235         if (rnode->rn_clev < VM_RADIX_LIMIT) {
  236                 idx = vm_radix_trimkey(idx, rnode->rn_clev + 1);
  237                 return (idx != rnode->rn_owner);
  238         }
  239         return (FALSE);
  240 }
  241 
  242 /*
  243  * Internal helper for vm_radix_reclaim_allnodes().
  244  * This function is recursive.
  245  */
  246 static void
  247 vm_radix_reclaim_allnodes_int(struct vm_radix_node *rnode)
  248 {
  249         int slot;
  250 
  251         KASSERT(rnode->rn_count <= VM_RADIX_COUNT,
  252             ("vm_radix_reclaim_allnodes_int: bad count in rnode %p", rnode));
  253         for (slot = 0; rnode->rn_count != 0; slot++) {
  254                 if (rnode->rn_child[slot] == NULL)
  255                         continue;
  256                 if (!vm_radix_isleaf(rnode->rn_child[slot]))
  257                         vm_radix_reclaim_allnodes_int(rnode->rn_child[slot]);
  258                 rnode->rn_child[slot] = NULL;
  259                 rnode->rn_count--;
  260         }
  261         vm_radix_node_put(rnode);
  262 }
  263 
  264 #ifdef INVARIANTS
  265 /*
  266  * Radix node zone destructor.
  267  */
  268 static void
  269 vm_radix_node_zone_dtor(void *mem, int size __unused, void *arg __unused)
  270 {
  271         struct vm_radix_node *rnode;
  272         int slot;
  273 
  274         rnode = mem;
  275         KASSERT(rnode->rn_count == 0,
  276             ("vm_radix_node_put: rnode %p has %d children", rnode,
  277             rnode->rn_count));
  278         for (slot = 0; slot < VM_RADIX_COUNT; slot++)
  279                 KASSERT(rnode->rn_child[slot] == NULL,
  280                     ("vm_radix_node_put: rnode %p has a child", rnode));
  281 }
  282 #endif
  283 
  284 #ifndef UMA_MD_SMALL_ALLOC
  285 /*
  286  * Reserve the KVA necessary to satisfy the node allocation.
  287  * This is mandatory in architectures not supporting direct
  288  * mapping as they will need otherwise to carve into the kernel maps for
  289  * every node allocation, resulting into deadlocks for consumers already
  290  * working with kernel maps.
  291  */
  292 static void
  293 vm_radix_reserve_kva(void *arg __unused)
  294 {
  295 
  296         /*
  297          * Calculate the number of reserved nodes, discounting the pages that
  298          * are needed to store them.
  299          */
  300         if (!uma_zone_reserve_kva(vm_radix_node_zone,
  301             ((vm_paddr_t)vm_cnt.v_page_count * PAGE_SIZE) / (PAGE_SIZE +
  302             sizeof(struct vm_radix_node))))
  303                 panic("%s: unable to reserve KVA", __func__);
  304 }
  305 SYSINIT(vm_radix_reserve_kva, SI_SUB_KMEM, SI_ORDER_THIRD,
  306     vm_radix_reserve_kva, NULL);
  307 #endif
  308 
  309 /*
  310  * Initialize the UMA slab zone.
  311  */
  312 void
  313 vm_radix_zinit(void)
  314 {
  315 
  316         vm_radix_node_zone = uma_zcreate("RADIX NODE",
  317             sizeof(struct vm_radix_node), NULL,
  318 #ifdef INVARIANTS
  319             vm_radix_node_zone_dtor,
  320 #else
  321             NULL,
  322 #endif
  323             NULL, NULL, VM_RADIX_PAD, UMA_ZONE_VM);
  324 }
  325 
  326 /*
  327  * Inserts the key-value pair into the trie.
  328  * Panics if the key already exists.
  329  */
  330 int
  331 vm_radix_insert(struct vm_radix *rtree, vm_page_t page)
  332 {
  333         vm_pindex_t index, newind;
  334         void **parentp;
  335         struct vm_radix_node *rnode, *tmp;
  336         vm_page_t m;
  337         int slot;
  338         uint16_t clev;
  339 
  340         index = page->pindex;
  341 
  342         /*
  343          * The owner of record for root is not really important because it
  344          * will never be used.
  345          */
  346         rnode = vm_radix_getroot(rtree);
  347         if (rnode == NULL) {
  348                 rtree->rt_root = (uintptr_t)page | VM_RADIX_ISLEAF;
  349                 return (0);
  350         }
  351         parentp = (void **)&rtree->rt_root;
  352         for (;;) {
  353                 if (vm_radix_isleaf(rnode)) {
  354                         m = vm_radix_topage(rnode);
  355                         if (m->pindex == index)
  356                                 panic("%s: key %jx is already present",
  357                                     __func__, (uintmax_t)index);
  358                         clev = vm_radix_keydiff(m->pindex, index);
  359                         tmp = vm_radix_node_get(vm_radix_trimkey(index,
  360                             clev + 1), 2, clev);
  361                         if (tmp == NULL)
  362                                 return (ENOMEM);
  363                         *parentp = tmp;
  364                         vm_radix_addpage(tmp, index, clev, page);
  365                         vm_radix_addpage(tmp, m->pindex, clev, m);
  366                         return (0);
  367                 } else if (vm_radix_keybarr(rnode, index))
  368                         break;
  369                 slot = vm_radix_slot(index, rnode->rn_clev);
  370                 if (rnode->rn_child[slot] == NULL) {
  371                         rnode->rn_count++;
  372                         vm_radix_addpage(rnode, index, rnode->rn_clev, page);
  373                         return (0);
  374                 }
  375                 parentp = &rnode->rn_child[slot];
  376                 rnode = rnode->rn_child[slot];
  377         }
  378 
  379         /*
  380          * A new node is needed because the right insertion level is reached.
  381          * Setup the new intermediate node and add the 2 children: the
  382          * new object and the older edge.
  383          */
  384         newind = rnode->rn_owner;
  385         clev = vm_radix_keydiff(newind, index);
  386         tmp = vm_radix_node_get(vm_radix_trimkey(index, clev + 1), 2, clev);
  387         if (tmp == NULL)
  388                 return (ENOMEM);
  389         *parentp = tmp;
  390         vm_radix_addpage(tmp, index, clev, page);
  391         slot = vm_radix_slot(newind, clev);
  392         tmp->rn_child[slot] = rnode;
  393         return (0);
  394 }
  395 
  396 /*
  397  * Returns TRUE if the specified radix tree contains a single leaf and FALSE
  398  * otherwise.
  399  */
  400 boolean_t
  401 vm_radix_is_singleton(struct vm_radix *rtree)
  402 {
  403         struct vm_radix_node *rnode;
  404 
  405         rnode = vm_radix_getroot(rtree);
  406         if (rnode == NULL)
  407                 return (FALSE);
  408         return (vm_radix_isleaf(rnode));
  409 }
  410 
  411 /*
  412  * Returns the value stored at the index.  If the index is not present,
  413  * NULL is returned.
  414  */
  415 vm_page_t
  416 vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index)
  417 {
  418         struct vm_radix_node *rnode;
  419         vm_page_t m;
  420         int slot;
  421 
  422         rnode = vm_radix_getroot(rtree);
  423         while (rnode != NULL) {
  424                 if (vm_radix_isleaf(rnode)) {
  425                         m = vm_radix_topage(rnode);
  426                         if (m->pindex == index)
  427                                 return (m);
  428                         else
  429                                 break;
  430                 } else if (vm_radix_keybarr(rnode, index))
  431                         break;
  432                 slot = vm_radix_slot(index, rnode->rn_clev);
  433                 rnode = rnode->rn_child[slot];
  434         }
  435         return (NULL);
  436 }
  437 
  438 /*
  439  * Look up the nearest entry at a position bigger than or equal to index.
  440  */
  441 vm_page_t
  442 vm_radix_lookup_ge(struct vm_radix *rtree, vm_pindex_t index)
  443 {
  444         struct vm_radix_node *stack[VM_RADIX_LIMIT];
  445         vm_pindex_t inc;
  446         vm_page_t m;
  447         struct vm_radix_node *child, *rnode;
  448 #ifdef INVARIANTS
  449         int loops = 0;
  450 #endif
  451         int slot, tos;
  452 
  453         rnode = vm_radix_getroot(rtree);
  454         if (rnode == NULL)
  455                 return (NULL);
  456         else if (vm_radix_isleaf(rnode)) {
  457                 m = vm_radix_topage(rnode);
  458                 if (m->pindex >= index)
  459                         return (m);
  460                 else
  461                         return (NULL);
  462         }
  463         tos = 0;
  464         for (;;) {
  465                 /*
  466                  * If the keys differ before the current bisection node,
  467                  * then the search key might rollback to the earliest
  468                  * available bisection node or to the smallest key
  469                  * in the current node (if the owner is bigger than the
  470                  * search key).
  471                  */
  472                 if (vm_radix_keybarr(rnode, index)) {
  473                         if (index > rnode->rn_owner) {
  474 ascend:
  475                                 KASSERT(++loops < 1000,
  476                                     ("vm_radix_lookup_ge: too many loops"));
  477 
  478                                 /*
  479                                  * Pop nodes from the stack until either the
  480                                  * stack is empty or a node that could have a
  481                                  * matching descendant is found.
  482                                  */
  483                                 do {
  484                                         if (tos == 0)
  485                                                 return (NULL);
  486                                         rnode = stack[--tos];
  487                                 } while (vm_radix_slot(index,
  488                                     rnode->rn_clev) == (VM_RADIX_COUNT - 1));
  489 
  490                                 /*
  491                                  * The following computation cannot overflow
  492                                  * because index's slot at the current level
  493                                  * is less than VM_RADIX_COUNT - 1.
  494                                  */
  495                                 index = vm_radix_trimkey(index,
  496                                     rnode->rn_clev);
  497                                 index += VM_RADIX_UNITLEVEL(rnode->rn_clev);
  498                         } else
  499                                 index = rnode->rn_owner;
  500                         KASSERT(!vm_radix_keybarr(rnode, index),
  501                             ("vm_radix_lookup_ge: keybarr failed"));
  502                 }
  503                 slot = vm_radix_slot(index, rnode->rn_clev);
  504                 child = rnode->rn_child[slot];
  505                 if (vm_radix_isleaf(child)) {
  506                         m = vm_radix_topage(child);
  507                         if (m->pindex >= index)
  508                                 return (m);
  509                 } else if (child != NULL)
  510                         goto descend;
  511 
  512                 /*
  513                  * Look for an available edge or page within the current
  514                  * bisection node.
  515                  */
  516                 if (slot < (VM_RADIX_COUNT - 1)) {
  517                         inc = VM_RADIX_UNITLEVEL(rnode->rn_clev);
  518                         index = vm_radix_trimkey(index, rnode->rn_clev);
  519                         do {
  520                                 index += inc;
  521                                 slot++;
  522                                 child = rnode->rn_child[slot];
  523                                 if (vm_radix_isleaf(child)) {
  524                                         m = vm_radix_topage(child);
  525                                         if (m->pindex >= index)
  526                                                 return (m);
  527                                 } else if (child != NULL)
  528                                         goto descend;
  529                         } while (slot < (VM_RADIX_COUNT - 1));
  530                 }
  531                 KASSERT(child == NULL || vm_radix_isleaf(child),
  532                     ("vm_radix_lookup_ge: child is radix node"));
  533 
  534                 /*
  535                  * If a page or edge bigger than the search slot is not found
  536                  * in the current node, ascend to the next higher-level node.
  537                  */
  538                 goto ascend;
  539 descend:
  540                 KASSERT(rnode->rn_clev > 0,
  541                     ("vm_radix_lookup_ge: pushing leaf's parent"));
  542                 KASSERT(tos < VM_RADIX_LIMIT,
  543                     ("vm_radix_lookup_ge: stack overflow"));
  544                 stack[tos++] = rnode;
  545                 rnode = child;
  546         }
  547 }
  548 
  549 /*
  550  * Look up the nearest entry at a position less than or equal to index.
  551  */
  552 vm_page_t
  553 vm_radix_lookup_le(struct vm_radix *rtree, vm_pindex_t index)
  554 {
  555         struct vm_radix_node *stack[VM_RADIX_LIMIT];
  556         vm_pindex_t inc;
  557         vm_page_t m;
  558         struct vm_radix_node *child, *rnode;
  559 #ifdef INVARIANTS
  560         int loops = 0;
  561 #endif
  562         int slot, tos;
  563 
  564         rnode = vm_radix_getroot(rtree);
  565         if (rnode == NULL)
  566                 return (NULL);
  567         else if (vm_radix_isleaf(rnode)) {
  568                 m = vm_radix_topage(rnode);
  569                 if (m->pindex <= index)
  570                         return (m);
  571                 else
  572                         return (NULL);
  573         }
  574         tos = 0;
  575         for (;;) {
  576                 /*
  577                  * If the keys differ before the current bisection node,
  578                  * then the search key might rollback to the earliest
  579                  * available bisection node or to the largest key
  580                  * in the current node (if the owner is smaller than the
  581                  * search key).
  582                  */
  583                 if (vm_radix_keybarr(rnode, index)) {
  584                         if (index > rnode->rn_owner) {
  585                                 index = rnode->rn_owner + VM_RADIX_COUNT *
  586                                     VM_RADIX_UNITLEVEL(rnode->rn_clev);
  587                         } else {
  588 ascend:
  589                                 KASSERT(++loops < 1000,
  590                                     ("vm_radix_lookup_le: too many loops"));
  591 
  592                                 /*
  593                                  * Pop nodes from the stack until either the
  594                                  * stack is empty or a node that could have a
  595                                  * matching descendant is found.
  596                                  */
  597                                 do {
  598                                         if (tos == 0)
  599                                                 return (NULL);
  600                                         rnode = stack[--tos];
  601                                 } while (vm_radix_slot(index,
  602                                     rnode->rn_clev) == 0);
  603 
  604                                 /*
  605                                  * The following computation cannot overflow
  606                                  * because index's slot at the current level
  607                                  * is greater than 0.
  608                                  */
  609                                 index = vm_radix_trimkey(index,
  610                                     rnode->rn_clev);
  611                         }
  612                         index--;
  613                         KASSERT(!vm_radix_keybarr(rnode, index),
  614                             ("vm_radix_lookup_le: keybarr failed"));
  615                 }
  616                 slot = vm_radix_slot(index, rnode->rn_clev);
  617                 child = rnode->rn_child[slot];
  618                 if (vm_radix_isleaf(child)) {
  619                         m = vm_radix_topage(child);
  620                         if (m->pindex <= index)
  621                                 return (m);
  622                 } else if (child != NULL)
  623                         goto descend;
  624 
  625                 /*
  626                  * Look for an available edge or page within the current
  627                  * bisection node.
  628                  */
  629                 if (slot > 0) {
  630                         inc = VM_RADIX_UNITLEVEL(rnode->rn_clev);
  631                         index |= inc - 1;
  632                         do {
  633                                 index -= inc;
  634                                 slot--;
  635                                 child = rnode->rn_child[slot];
  636                                 if (vm_radix_isleaf(child)) {
  637                                         m = vm_radix_topage(child);
  638                                         if (m->pindex <= index)
  639                                                 return (m);
  640                                 } else if (child != NULL)
  641                                         goto descend;
  642                         } while (slot > 0);
  643                 }
  644                 KASSERT(child == NULL || vm_radix_isleaf(child),
  645                     ("vm_radix_lookup_le: child is radix node"));
  646 
  647                 /*
  648                  * If a page or edge smaller than the search slot is not found
  649                  * in the current node, ascend to the next higher-level node.
  650                  */
  651                 goto ascend;
  652 descend:
  653                 KASSERT(rnode->rn_clev > 0,
  654                     ("vm_radix_lookup_le: pushing leaf's parent"));
  655                 KASSERT(tos < VM_RADIX_LIMIT,
  656                     ("vm_radix_lookup_le: stack overflow"));
  657                 stack[tos++] = rnode;
  658                 rnode = child;
  659         }
  660 }
  661 
  662 /*
  663  * Remove the specified index from the trie, and return the value stored at
  664  * that index.  If the index is not present, return NULL.
  665  */
  666 vm_page_t
  667 vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index)
  668 {
  669         struct vm_radix_node *rnode, *parent;
  670         vm_page_t m;
  671         int i, slot;
  672 
  673         rnode = vm_radix_getroot(rtree);
  674         if (vm_radix_isleaf(rnode)) {
  675                 m = vm_radix_topage(rnode);
  676                 if (m->pindex != index)
  677                         return (NULL);
  678                 vm_radix_setroot(rtree, NULL);
  679                 return (m);
  680         }
  681         parent = NULL;
  682         for (;;) {
  683                 if (rnode == NULL)
  684                         return (NULL);
  685                 slot = vm_radix_slot(index, rnode->rn_clev);
  686                 if (vm_radix_isleaf(rnode->rn_child[slot])) {
  687                         m = vm_radix_topage(rnode->rn_child[slot]);
  688                         if (m->pindex != index)
  689                                 return (NULL);
  690                         rnode->rn_child[slot] = NULL;
  691                         rnode->rn_count--;
  692                         if (rnode->rn_count > 1)
  693                                 return (m);
  694                         for (i = 0; i < VM_RADIX_COUNT; i++)
  695                                 if (rnode->rn_child[i] != NULL)
  696                                         break;
  697                         KASSERT(i != VM_RADIX_COUNT,
  698                             ("%s: invalid node configuration", __func__));
  699                         if (parent == NULL)
  700                                 vm_radix_setroot(rtree, rnode->rn_child[i]);
  701                         else {
  702                                 slot = vm_radix_slot(index, parent->rn_clev);
  703                                 KASSERT(parent->rn_child[slot] == rnode,
  704                                     ("%s: invalid child value", __func__));
  705                                 parent->rn_child[slot] = rnode->rn_child[i];
  706                         }
  707                         rnode->rn_count--;
  708                         rnode->rn_child[i] = NULL;
  709                         vm_radix_node_put(rnode);
  710                         return (m);
  711                 }
  712                 parent = rnode;
  713                 rnode = rnode->rn_child[slot];
  714         }
  715 }
  716 
  717 /*
  718  * Remove and free all the nodes from the radix tree.
  719  * This function is recursive but there is a tight control on it as the
  720  * maximum depth of the tree is fixed.
  721  */
  722 void
  723 vm_radix_reclaim_allnodes(struct vm_radix *rtree)
  724 {
  725         struct vm_radix_node *root;
  726 
  727         root = vm_radix_getroot(rtree);
  728         if (root == NULL)
  729                 return;
  730         vm_radix_setroot(rtree, NULL);
  731         if (!vm_radix_isleaf(root))
  732                 vm_radix_reclaim_allnodes_int(root);
  733 }
  734 
  735 /*
  736  * Replace an existing page in the trie with another one.
  737  * Panics if there is not an old page in the trie at the new page's index.
  738  */
  739 vm_page_t
  740 vm_radix_replace(struct vm_radix *rtree, vm_page_t newpage)
  741 {
  742         struct vm_radix_node *rnode;
  743         vm_page_t m;
  744         vm_pindex_t index;
  745         int slot;
  746 
  747         index = newpage->pindex;
  748         rnode = vm_radix_getroot(rtree);
  749         if (rnode == NULL)
  750                 panic("%s: replacing page on an empty trie", __func__);
  751         if (vm_radix_isleaf(rnode)) {
  752                 m = vm_radix_topage(rnode);
  753                 if (m->pindex != index)
  754                         panic("%s: original replacing root key not found",
  755                             __func__);
  756                 rtree->rt_root = (uintptr_t)newpage | VM_RADIX_ISLEAF;
  757                 return (m);
  758         }
  759         for (;;) {
  760                 slot = vm_radix_slot(index, rnode->rn_clev);
  761                 if (vm_radix_isleaf(rnode->rn_child[slot])) {
  762                         m = vm_radix_topage(rnode->rn_child[slot]);
  763                         if (m->pindex == index) {
  764                                 rnode->rn_child[slot] =
  765                                     (void *)((uintptr_t)newpage |
  766                                     VM_RADIX_ISLEAF);
  767                                 return (m);
  768                         } else
  769                                 break;
  770                 } else if (rnode->rn_child[slot] == NULL ||
  771                     vm_radix_keybarr(rnode->rn_child[slot], index))
  772                         break;
  773                 rnode = rnode->rn_child[slot];
  774         }
  775         panic("%s: original replacing page not found", __func__);
  776 }
  777 
  778 void
  779 vm_radix_wait(void)
  780 {
  781         uma_zwait(vm_radix_node_zone);
  782 }
  783 
  784 #ifdef DDB
  785 /*
  786  * Show details about the given radix node.
  787  */
  788 DB_SHOW_COMMAND(radixnode, db_show_radixnode)
  789 {
  790         struct vm_radix_node *rnode;
  791         int i;
  792 
  793         if (!have_addr)
  794                 return;
  795         rnode = (struct vm_radix_node *)addr;
  796         db_printf("radixnode %p, owner %jx, children count %u, level %u:\n",
  797             (void *)rnode, (uintmax_t)rnode->rn_owner, rnode->rn_count,
  798             rnode->rn_clev);
  799         for (i = 0; i < VM_RADIX_COUNT; i++)
  800                 if (rnode->rn_child[i] != NULL)
  801                         db_printf("slot: %d, val: %p, page: %p, clev: %d\n",
  802                             i, (void *)rnode->rn_child[i],
  803                             vm_radix_isleaf(rnode->rn_child[i]) ?
  804                             vm_radix_topage(rnode->rn_child[i]) : NULL,
  805                             rnode->rn_clev);
  806 }
  807 #endif /* DDB */

Cache object: 402acd21149bb0828f3071775378f4aa


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.