The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/vm/vm_radix.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 2013 EMC Corp.
    3  * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org>
    4  * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com>
    5  * All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   26  * SUCH DAMAGE.
   27  *
   28  */
   29 
   30 /*
   31  * Path-compressed radix trie implementation.
   32  * The following code is not generalized into a general purpose library
   33  * because there are way too many parameters embedded that should really
   34  * be decided by the library consumers.  At the same time, consumers
   35  * of this code must achieve highest possible performance.
   36  *
   37  * The implementation takes into account the following rationale:
   38  * - Size of the nodes should be as small as possible but still big enough
   39  *   to avoid a large maximum depth for the trie.  This is a balance
   40  *   between the necessity to not wire too much physical memory for the nodes
   41  *   and the necessity to avoid too much cache pollution during the trie
   42  *   operations.
   43  * - There is not a huge bias toward the number of lookup operations over
   44  *   the number of insert and remove operations.  This basically implies
   45  *   that optimizations supposedly helping one operation but hurting the
   46  *   other might be carefully evaluated.
   47  * - On average not many nodes are expected to be fully populated, hence
   48  *   level compression may just complicate things.
   49  */
   50 
   51 #include <sys/cdefs.h>
   52 __FBSDID("$FreeBSD$");
   53 
   54 #include "opt_ddb.h"
   55 
   56 #include <sys/param.h>
   57 #include <sys/systm.h>
   58 #include <sys/kernel.h>
   59 #include <sys/vmmeter.h>
   60 
   61 #include <vm/uma.h>
   62 #include <vm/vm.h>
   63 #include <vm/vm_param.h>
   64 #include <vm/vm_page.h>
   65 #include <vm/vm_radix.h>
   66 
   67 #ifdef DDB
   68 #include <ddb/ddb.h>
   69 #endif
   70 
   71 /*
   72  * These widths should allow the pointers to a node's children to fit within
   73  * a single cache line.  The extra levels from a narrow width should not be
   74  * a problem thanks to path compression.
   75  */
   76 #ifdef __LP64__
   77 #define VM_RADIX_WIDTH  4
   78 #else
   79 #define VM_RADIX_WIDTH  3
   80 #endif
   81 
   82 #define VM_RADIX_COUNT  (1 << VM_RADIX_WIDTH)
   83 #define VM_RADIX_MASK   (VM_RADIX_COUNT - 1)
   84 #define VM_RADIX_LIMIT                                                  \
   85         (howmany(sizeof(vm_pindex_t) * NBBY, VM_RADIX_WIDTH) - 1)
   86 
   87 /* Flag bits stored in node pointers. */
   88 #define VM_RADIX_ISLEAF 0x1
   89 #define VM_RADIX_FLAGS  0x1
   90 #define VM_RADIX_PAD    VM_RADIX_FLAGS
   91 
   92 /* Returns one unit associated with specified level. */
   93 #define VM_RADIX_UNITLEVEL(lev)                                         \
   94         ((vm_pindex_t)1 << ((lev) * VM_RADIX_WIDTH))
   95 
   96 struct vm_radix_node {
   97         vm_pindex_t      rn_owner;                      /* Owner of record. */
   98         uint16_t         rn_count;                      /* Valid children. */
   99         uint16_t         rn_clev;                       /* Current level. */
  100         void            *rn_child[VM_RADIX_COUNT];      /* Child nodes. */
  101 };
  102 
  103 static uma_zone_t vm_radix_node_zone;
  104 
  105 /*
  106  * Allocate a radix node.
  107  */
  108 static __inline struct vm_radix_node *
  109 vm_radix_node_get(vm_pindex_t owner, uint16_t count, uint16_t clevel)
  110 {
  111         struct vm_radix_node *rnode;
  112 
  113         rnode = uma_zalloc(vm_radix_node_zone, M_NOWAIT | M_ZERO);
  114         if (rnode == NULL)
  115                 return (NULL);
  116         rnode->rn_owner = owner;
  117         rnode->rn_count = count;
  118         rnode->rn_clev = clevel;
  119         return (rnode);
  120 }
  121 
  122 /*
  123  * Free radix node.
  124  */
  125 static __inline void
  126 vm_radix_node_put(struct vm_radix_node *rnode)
  127 {
  128 
  129         uma_zfree(vm_radix_node_zone, rnode);
  130 }
  131 
  132 /*
  133  * Return the position in the array for a given level.
  134  */
  135 static __inline int
  136 vm_radix_slot(vm_pindex_t index, uint16_t level)
  137 {
  138 
  139         return ((index >> (level * VM_RADIX_WIDTH)) & VM_RADIX_MASK);
  140 }
  141 
  142 /* Trims the key after the specified level. */
  143 static __inline vm_pindex_t
  144 vm_radix_trimkey(vm_pindex_t index, uint16_t level)
  145 {
  146         vm_pindex_t ret;
  147 
  148         ret = index;
  149         if (level > 0) {
  150                 ret >>= level * VM_RADIX_WIDTH;
  151                 ret <<= level * VM_RADIX_WIDTH;
  152         }
  153         return (ret);
  154 }
  155 
  156 /*
  157  * Get the root node for a radix tree.
  158  */
  159 static __inline struct vm_radix_node *
  160 vm_radix_getroot(struct vm_radix *rtree)
  161 {
  162 
  163         return ((struct vm_radix_node *)rtree->rt_root);
  164 }
  165 
  166 /*
  167  * Set the root node for a radix tree.
  168  */
  169 static __inline void
  170 vm_radix_setroot(struct vm_radix *rtree, struct vm_radix_node *rnode)
  171 {
  172 
  173         rtree->rt_root = (uintptr_t)rnode;
  174 }
  175 
  176 /*
  177  * Returns TRUE if the specified radix node is a leaf and FALSE otherwise.
  178  */
  179 static __inline boolean_t
  180 vm_radix_isleaf(struct vm_radix_node *rnode)
  181 {
  182 
  183         return (((uintptr_t)rnode & VM_RADIX_ISLEAF) != 0);
  184 }
  185 
  186 /*
  187  * Returns the associated page extracted from rnode.
  188  */
  189 static __inline vm_page_t
  190 vm_radix_topage(struct vm_radix_node *rnode)
  191 {
  192 
  193         return ((vm_page_t)((uintptr_t)rnode & ~VM_RADIX_FLAGS));
  194 }
  195 
  196 /*
  197  * Adds the page as a child of the provided node.
  198  */
  199 static __inline void
  200 vm_radix_addpage(struct vm_radix_node *rnode, vm_pindex_t index, uint16_t clev,
  201     vm_page_t page)
  202 {
  203         int slot;
  204 
  205         slot = vm_radix_slot(index, clev);
  206         rnode->rn_child[slot] = (void *)((uintptr_t)page | VM_RADIX_ISLEAF);
  207 }
  208 
  209 /*
  210  * Returns the slot where two keys differ.
  211  * It cannot accept 2 equal keys.
  212  */
  213 static __inline uint16_t
  214 vm_radix_keydiff(vm_pindex_t index1, vm_pindex_t index2)
  215 {
  216         uint16_t clev;
  217 
  218         KASSERT(index1 != index2, ("%s: passing the same key value %jx",
  219             __func__, (uintmax_t)index1));
  220 
  221         index1 ^= index2;
  222         for (clev = VM_RADIX_LIMIT;; clev--)
  223                 if (vm_radix_slot(index1, clev) != 0)
  224                         return (clev);
  225 }
  226 
  227 /*
  228  * Returns TRUE if it can be determined that key does not belong to the
  229  * specified rnode.  Otherwise, returns FALSE.
  230  */
  231 static __inline boolean_t
  232 vm_radix_keybarr(struct vm_radix_node *rnode, vm_pindex_t idx)
  233 {
  234 
  235         if (rnode->rn_clev < VM_RADIX_LIMIT) {
  236                 idx = vm_radix_trimkey(idx, rnode->rn_clev + 1);
  237                 return (idx != rnode->rn_owner);
  238         }
  239         return (FALSE);
  240 }
  241 
  242 /*
  243  * Internal helper for vm_radix_reclaim_allnodes().
  244  * This function is recursive.
  245  */
  246 static void
  247 vm_radix_reclaim_allnodes_int(struct vm_radix_node *rnode)
  248 {
  249         int slot;
  250 
  251         KASSERT(rnode->rn_count <= VM_RADIX_COUNT,
  252             ("vm_radix_reclaim_allnodes_int: bad count in rnode %p", rnode));
  253         for (slot = 0; rnode->rn_count != 0; slot++) {
  254                 if (rnode->rn_child[slot] == NULL)
  255                         continue;
  256                 if (!vm_radix_isleaf(rnode->rn_child[slot]))
  257                         vm_radix_reclaim_allnodes_int(rnode->rn_child[slot]);
  258                 rnode->rn_child[slot] = NULL;
  259                 rnode->rn_count--;
  260         }
  261         vm_radix_node_put(rnode);
  262 }
  263 
  264 #ifdef INVARIANTS
  265 /*
  266  * Radix node zone destructor.
  267  */
  268 static void
  269 vm_radix_node_zone_dtor(void *mem, int size __unused, void *arg __unused)
  270 {
  271         struct vm_radix_node *rnode;
  272         int slot;
  273 
  274         rnode = mem;
  275         KASSERT(rnode->rn_count == 0,
  276             ("vm_radix_node_put: rnode %p has %d children", rnode,
  277             rnode->rn_count));
  278         for (slot = 0; slot < VM_RADIX_COUNT; slot++)
  279                 KASSERT(rnode->rn_child[slot] == NULL,
  280                     ("vm_radix_node_put: rnode %p has a child", rnode));
  281 }
  282 #endif
  283 
  284 #ifndef UMA_MD_SMALL_ALLOC
  285 /*
  286  * Reserve the KVA necessary to satisfy the node allocation.
  287  * This is mandatory in architectures not supporting direct
  288  * mapping as they will need otherwise to carve into the kernel maps for
  289  * every node allocation, resulting into deadlocks for consumers already
  290  * working with kernel maps.
  291  */
  292 static void
  293 vm_radix_reserve_kva(void *arg __unused)
  294 {
  295 
  296         /*
  297          * Calculate the number of reserved nodes, discounting the pages that
  298          * are needed to store them.
  299          */
  300         if (!uma_zone_reserve_kva(vm_radix_node_zone,
  301             ((vm_paddr_t)cnt.v_page_count * PAGE_SIZE) / (PAGE_SIZE +
  302             sizeof(struct vm_radix_node))))
  303                 panic("%s: unable to reserve KVA", __func__);
  304 }
  305 SYSINIT(vm_radix_reserve_kva, SI_SUB_KMEM, SI_ORDER_SECOND,
  306     vm_radix_reserve_kva, NULL);
  307 #endif
  308 
  309 /*
  310  * Initialize the UMA slab zone.
  311  * Until vm_radix_prealloc() is called, the zone will be served by the
  312  * UMA boot-time pre-allocated pool of pages.
  313  */
  314 void
  315 vm_radix_init(void)
  316 {
  317 
  318         vm_radix_node_zone = uma_zcreate("RADIX NODE",
  319             sizeof(struct vm_radix_node), NULL,
  320 #ifdef INVARIANTS
  321             vm_radix_node_zone_dtor,
  322 #else
  323             NULL,
  324 #endif
  325             NULL, NULL, VM_RADIX_PAD, UMA_ZONE_VM);
  326 }
  327 
  328 /*
  329  * Inserts the key-value pair into the trie.
  330  * Panics if the key already exists.
  331  */
  332 int
  333 vm_radix_insert(struct vm_radix *rtree, vm_page_t page)
  334 {
  335         vm_pindex_t index, newind;
  336         void **parentp;
  337         struct vm_radix_node *rnode, *tmp;
  338         vm_page_t m;
  339         int slot;
  340         uint16_t clev;
  341 
  342         index = page->pindex;
  343 
  344 restart:
  345 
  346         /*
  347          * The owner of record for root is not really important because it
  348          * will never be used.
  349          */
  350         rnode = vm_radix_getroot(rtree);
  351         if (rnode == NULL) {
  352                 rtree->rt_root = (uintptr_t)page | VM_RADIX_ISLEAF;
  353                 return (0);
  354         }
  355         parentp = (void **)&rtree->rt_root;
  356         for (;;) {
  357                 if (vm_radix_isleaf(rnode)) {
  358                         m = vm_radix_topage(rnode);
  359                         if (m->pindex == index)
  360                                 panic("%s: key %jx is already present",
  361                                     __func__, (uintmax_t)index);
  362                         clev = vm_radix_keydiff(m->pindex, index);
  363 
  364                         /*
  365                          * During node allocation the trie that is being
  366                          * walked can be modified because of recursing radix
  367                          * trie operations.
  368                          * If this is the case, the recursing functions signal
  369                          * such situation and the insert operation must
  370                          * start from scratch again.
  371                          * The freed radix node will then be in the UMA
  372                          * caches very likely to avoid the same situation
  373                          * to happen.
  374                          */
  375                         rtree->rt_flags |= RT_INSERT_INPROG;
  376                         tmp = vm_radix_node_get(vm_radix_trimkey(index,
  377                             clev + 1), 2, clev);
  378                         rtree->rt_flags &= ~RT_INSERT_INPROG;
  379                         if (tmp == NULL) {
  380                                 rtree->rt_flags &= ~RT_TRIE_MODIFIED;
  381                                 return (ENOMEM);
  382                         }
  383                         if ((rtree->rt_flags & RT_TRIE_MODIFIED) != 0) {
  384                                 rtree->rt_flags &= ~RT_TRIE_MODIFIED;
  385                                 tmp->rn_count = 0;
  386                                 vm_radix_node_put(tmp);
  387                                 goto restart;
  388                         }
  389                         *parentp = tmp;
  390                         vm_radix_addpage(tmp, index, clev, page);
  391                         vm_radix_addpage(tmp, m->pindex, clev, m);
  392                         return (0);
  393                 } else if (vm_radix_keybarr(rnode, index))
  394                         break;
  395                 slot = vm_radix_slot(index, rnode->rn_clev);
  396                 if (rnode->rn_child[slot] == NULL) {
  397                         rnode->rn_count++;
  398                         vm_radix_addpage(rnode, index, rnode->rn_clev, page);
  399                         return (0);
  400                 }
  401                 parentp = &rnode->rn_child[slot];
  402                 rnode = rnode->rn_child[slot];
  403         }
  404 
  405         /*
  406          * A new node is needed because the right insertion level is reached.
  407          * Setup the new intermediate node and add the 2 children: the
  408          * new object and the older edge.
  409          */
  410         newind = rnode->rn_owner;
  411         clev = vm_radix_keydiff(newind, index);
  412 
  413         /* See the comments above. */
  414         rtree->rt_flags |= RT_INSERT_INPROG;
  415         tmp = vm_radix_node_get(vm_radix_trimkey(index, clev + 1), 2, clev);
  416         rtree->rt_flags &= ~RT_INSERT_INPROG;
  417         if (tmp == NULL) {
  418                 rtree->rt_flags &= ~RT_TRIE_MODIFIED;
  419                 return (ENOMEM);
  420         }
  421         if ((rtree->rt_flags & RT_TRIE_MODIFIED) != 0) {
  422                 rtree->rt_flags &= ~RT_TRIE_MODIFIED;
  423                 tmp->rn_count = 0;
  424                 vm_radix_node_put(tmp);
  425                 goto restart;
  426         }
  427         *parentp = tmp;
  428         vm_radix_addpage(tmp, index, clev, page);
  429         slot = vm_radix_slot(newind, clev);
  430         tmp->rn_child[slot] = rnode;
  431         return (0);
  432 }
  433 
  434 /*
  435  * Returns TRUE if the specified radix tree contains a single leaf and FALSE
  436  * otherwise.
  437  */
  438 boolean_t
  439 vm_radix_is_singleton(struct vm_radix *rtree)
  440 {
  441         struct vm_radix_node *rnode;
  442 
  443         rnode = vm_radix_getroot(rtree);
  444         if (rnode == NULL)
  445                 return (FALSE);
  446         return (vm_radix_isleaf(rnode));
  447 }
  448 
  449 /*
  450  * Returns the value stored at the index.  If the index is not present,
  451  * NULL is returned.
  452  */
  453 vm_page_t
  454 vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index)
  455 {
  456         struct vm_radix_node *rnode;
  457         vm_page_t m;
  458         int slot;
  459 
  460         rnode = vm_radix_getroot(rtree);
  461         while (rnode != NULL) {
  462                 if (vm_radix_isleaf(rnode)) {
  463                         m = vm_radix_topage(rnode);
  464                         if (m->pindex == index)
  465                                 return (m);
  466                         else
  467                                 break;
  468                 } else if (vm_radix_keybarr(rnode, index))
  469                         break;
  470                 slot = vm_radix_slot(index, rnode->rn_clev);
  471                 rnode = rnode->rn_child[slot];
  472         }
  473         return (NULL);
  474 }
  475 
  476 /*
  477  * Look up the nearest entry at a position bigger than or equal to index.
  478  */
  479 vm_page_t
  480 vm_radix_lookup_ge(struct vm_radix *rtree, vm_pindex_t index)
  481 {
  482         struct vm_radix_node *stack[VM_RADIX_LIMIT];
  483         vm_pindex_t inc;
  484         vm_page_t m;
  485         struct vm_radix_node *child, *rnode;
  486 #ifdef INVARIANTS
  487         int loops = 0;
  488 #endif
  489         int slot, tos;
  490 
  491         rnode = vm_radix_getroot(rtree);
  492         if (rnode == NULL)
  493                 return (NULL);
  494         else if (vm_radix_isleaf(rnode)) {
  495                 m = vm_radix_topage(rnode);
  496                 if (m->pindex >= index)
  497                         return (m);
  498                 else
  499                         return (NULL);
  500         }
  501         tos = 0;
  502         for (;;) {
  503                 /*
  504                  * If the keys differ before the current bisection node,
  505                  * then the search key might rollback to the earliest
  506                  * available bisection node or to the smallest key
  507                  * in the current node (if the owner is bigger than the
  508                  * search key).
  509                  */
  510                 if (vm_radix_keybarr(rnode, index)) {
  511                         if (index > rnode->rn_owner) {
  512 ascend:
  513                                 KASSERT(++loops < 1000,
  514                                     ("vm_radix_lookup_ge: too many loops"));
  515 
  516                                 /*
  517                                  * Pop nodes from the stack until either the
  518                                  * stack is empty or a node that could have a
  519                                  * matching descendant is found.
  520                                  */
  521                                 do {
  522                                         if (tos == 0)
  523                                                 return (NULL);
  524                                         rnode = stack[--tos];
  525                                 } while (vm_radix_slot(index,
  526                                     rnode->rn_clev) == (VM_RADIX_COUNT - 1));
  527 
  528                                 /*
  529                                  * The following computation cannot overflow
  530                                  * because index's slot at the current level
  531                                  * is less than VM_RADIX_COUNT - 1.
  532                                  */
  533                                 index = vm_radix_trimkey(index,
  534                                     rnode->rn_clev);
  535                                 index += VM_RADIX_UNITLEVEL(rnode->rn_clev);
  536                         } else
  537                                 index = rnode->rn_owner;
  538                         KASSERT(!vm_radix_keybarr(rnode, index),
  539                             ("vm_radix_lookup_ge: keybarr failed"));
  540                 }
  541                 slot = vm_radix_slot(index, rnode->rn_clev);
  542                 child = rnode->rn_child[slot];
  543                 if (vm_radix_isleaf(child)) {
  544                         m = vm_radix_topage(child);
  545                         if (m->pindex >= index)
  546                                 return (m);
  547                 } else if (child != NULL)
  548                         goto descend;
  549 
  550                 /*
  551                  * Look for an available edge or page within the current
  552                  * bisection node.
  553                  */
  554                 if (slot < (VM_RADIX_COUNT - 1)) {
  555                         inc = VM_RADIX_UNITLEVEL(rnode->rn_clev);
  556                         index = vm_radix_trimkey(index, rnode->rn_clev);
  557                         do {
  558                                 index += inc;
  559                                 slot++;
  560                                 child = rnode->rn_child[slot];
  561                                 if (vm_radix_isleaf(child)) {
  562                                         m = vm_radix_topage(child);
  563                                         if (m->pindex >= index)
  564                                                 return (m);
  565                                 } else if (child != NULL)
  566                                         goto descend;
  567                         } while (slot < (VM_RADIX_COUNT - 1));
  568                 }
  569                 KASSERT(child == NULL || vm_radix_isleaf(child),
  570                     ("vm_radix_lookup_ge: child is radix node"));
  571 
  572                 /*
  573                  * If a page or edge bigger than the search slot is not found
  574                  * in the current node, ascend to the next higher-level node.
  575                  */
  576                 goto ascend;
  577 descend:
  578                 KASSERT(rnode->rn_clev > 0,
  579                     ("vm_radix_lookup_ge: pushing leaf's parent"));
  580                 KASSERT(tos < VM_RADIX_LIMIT,
  581                     ("vm_radix_lookup_ge: stack overflow"));
  582                 stack[tos++] = rnode;
  583                 rnode = child;
  584         }
  585 }
  586 
  587 /*
  588  * Look up the nearest entry at a position less than or equal to index.
  589  */
  590 vm_page_t
  591 vm_radix_lookup_le(struct vm_radix *rtree, vm_pindex_t index)
  592 {
  593         struct vm_radix_node *stack[VM_RADIX_LIMIT];
  594         vm_pindex_t inc;
  595         vm_page_t m;
  596         struct vm_radix_node *child, *rnode;
  597 #ifdef INVARIANTS
  598         int loops = 0;
  599 #endif
  600         int slot, tos;
  601 
  602         rnode = vm_radix_getroot(rtree);
  603         if (rnode == NULL)
  604                 return (NULL);
  605         else if (vm_radix_isleaf(rnode)) {
  606                 m = vm_radix_topage(rnode);
  607                 if (m->pindex <= index)
  608                         return (m);
  609                 else
  610                         return (NULL);
  611         }
  612         tos = 0;
  613         for (;;) {
  614                 /*
  615                  * If the keys differ before the current bisection node,
  616                  * then the search key might rollback to the earliest
  617                  * available bisection node or to the largest key
  618                  * in the current node (if the owner is smaller than the
  619                  * search key).
  620                  */
  621                 if (vm_radix_keybarr(rnode, index)) {
  622                         if (index > rnode->rn_owner) {
  623                                 index = rnode->rn_owner + VM_RADIX_COUNT *
  624                                     VM_RADIX_UNITLEVEL(rnode->rn_clev);
  625                         } else {
  626 ascend:
  627                                 KASSERT(++loops < 1000,
  628                                     ("vm_radix_lookup_le: too many loops"));
  629 
  630                                 /*
  631                                  * Pop nodes from the stack until either the
  632                                  * stack is empty or a node that could have a
  633                                  * matching descendant is found.
  634                                  */
  635                                 do {
  636                                         if (tos == 0)
  637                                                 return (NULL);
  638                                         rnode = stack[--tos];
  639                                 } while (vm_radix_slot(index,
  640                                     rnode->rn_clev) == 0);
  641 
  642                                 /*
  643                                  * The following computation cannot overflow
  644                                  * because index's slot at the current level
  645                                  * is greater than 0.
  646                                  */
  647                                 index = vm_radix_trimkey(index,
  648                                     rnode->rn_clev);
  649                         }
  650                         index--;
  651                         KASSERT(!vm_radix_keybarr(rnode, index),
  652                             ("vm_radix_lookup_le: keybarr failed"));
  653                 }
  654                 slot = vm_radix_slot(index, rnode->rn_clev);
  655                 child = rnode->rn_child[slot];
  656                 if (vm_radix_isleaf(child)) {
  657                         m = vm_radix_topage(child);
  658                         if (m->pindex <= index)
  659                                 return (m);
  660                 } else if (child != NULL)
  661                         goto descend;
  662 
  663                 /*
  664                  * Look for an available edge or page within the current
  665                  * bisection node.
  666                  */
  667                 if (slot > 0) {
  668                         inc = VM_RADIX_UNITLEVEL(rnode->rn_clev);
  669                         index |= inc - 1;
  670                         do {
  671                                 index -= inc;
  672                                 slot--;
  673                                 child = rnode->rn_child[slot];
  674                                 if (vm_radix_isleaf(child)) {
  675                                         m = vm_radix_topage(child);
  676                                         if (m->pindex <= index)
  677                                                 return (m);
  678                                 } else if (child != NULL)
  679                                         goto descend;
  680                         } while (slot > 0);
  681                 }
  682                 KASSERT(child == NULL || vm_radix_isleaf(child),
  683                     ("vm_radix_lookup_le: child is radix node"));
  684 
  685                 /*
  686                  * If a page or edge smaller than the search slot is not found
  687                  * in the current node, ascend to the next higher-level node.
  688                  */
  689                 goto ascend;
  690 descend:
  691                 KASSERT(rnode->rn_clev > 0,
  692                     ("vm_radix_lookup_le: pushing leaf's parent"));
  693                 KASSERT(tos < VM_RADIX_LIMIT,
  694                     ("vm_radix_lookup_le: stack overflow"));
  695                 stack[tos++] = rnode;
  696                 rnode = child;
  697         }
  698 }
  699 
  700 /*
  701  * Remove the specified index from the tree.
  702  * Panics if the key is not present.
  703  */
  704 void
  705 vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index)
  706 {
  707         struct vm_radix_node *rnode, *parent;
  708         vm_page_t m;
  709         int i, slot;
  710 
  711         /*
  712          * Detect if a page is going to be removed from a trie which is
  713          * already undergoing another trie operation.
  714          * Right now this is only possible for vm_radix_remove() recursing
  715          * into vm_radix_insert().
  716          * If this is the case, the caller must be notified about this
  717          * situation.  It will also takecare to update the RT_TRIE_MODIFIED
  718          * accordingly.
  719          * The RT_TRIE_MODIFIED bit is set here because the remove operation
  720          * will always succeed.
  721          */
  722         if ((rtree->rt_flags & RT_INSERT_INPROG) != 0)
  723                 rtree->rt_flags |= RT_TRIE_MODIFIED;
  724 
  725         rnode = vm_radix_getroot(rtree);
  726         if (vm_radix_isleaf(rnode)) {
  727                 m = vm_radix_topage(rnode);
  728                 if (m->pindex != index)
  729                         panic("%s: invalid key found", __func__);
  730                 vm_radix_setroot(rtree, NULL);
  731                 return;
  732         }
  733         parent = NULL;
  734         for (;;) {
  735                 if (rnode == NULL)
  736                         panic("vm_radix_remove: impossible to locate the key");
  737                 slot = vm_radix_slot(index, rnode->rn_clev);
  738                 if (vm_radix_isleaf(rnode->rn_child[slot])) {
  739                         m = vm_radix_topage(rnode->rn_child[slot]);
  740                         if (m->pindex != index)
  741                                 panic("%s: invalid key found", __func__);
  742                         rnode->rn_child[slot] = NULL;
  743                         rnode->rn_count--;
  744                         if (rnode->rn_count > 1)
  745                                 break;
  746                         for (i = 0; i < VM_RADIX_COUNT; i++)
  747                                 if (rnode->rn_child[i] != NULL)
  748                                         break;
  749                         KASSERT(i != VM_RADIX_COUNT,
  750                             ("%s: invalid node configuration", __func__));
  751                         if (parent == NULL)
  752                                 vm_radix_setroot(rtree, rnode->rn_child[i]);
  753                         else {
  754                                 slot = vm_radix_slot(index, parent->rn_clev);
  755                                 KASSERT(parent->rn_child[slot] == rnode,
  756                                     ("%s: invalid child value", __func__));
  757                                 parent->rn_child[slot] = rnode->rn_child[i];
  758                         }
  759                         rnode->rn_count--;
  760                         rnode->rn_child[i] = NULL;
  761                         vm_radix_node_put(rnode);
  762                         break;
  763                 }
  764                 parent = rnode;
  765                 rnode = rnode->rn_child[slot];
  766         }
  767 }
  768 
  769 /*
  770  * Remove and free all the nodes from the radix tree.
  771  * This function is recursive but there is a tight control on it as the
  772  * maximum depth of the tree is fixed.
  773  */
  774 void
  775 vm_radix_reclaim_allnodes(struct vm_radix *rtree)
  776 {
  777         struct vm_radix_node *root;
  778 
  779         KASSERT((rtree->rt_flags & RT_INSERT_INPROG) == 0,
  780             ("vm_radix_reclaim_allnodes: unexpected trie recursion"));
  781 
  782         root = vm_radix_getroot(rtree);
  783         if (root == NULL)
  784                 return;
  785         vm_radix_setroot(rtree, NULL);
  786         if (!vm_radix_isleaf(root))
  787                 vm_radix_reclaim_allnodes_int(root);
  788 }
  789 
  790 /*
  791  * Replace an existing page in the trie with another one.
  792  * Panics if there is not an old page in the trie at the new page's index.
  793  */
  794 vm_page_t
  795 vm_radix_replace(struct vm_radix *rtree, vm_page_t newpage)
  796 {
  797         struct vm_radix_node *rnode;
  798         vm_page_t m;
  799         vm_pindex_t index;
  800         int slot;
  801 
  802         index = newpage->pindex;
  803         rnode = vm_radix_getroot(rtree);
  804         if (rnode == NULL)
  805                 panic("%s: replacing page on an empty trie", __func__);
  806         if (vm_radix_isleaf(rnode)) {
  807                 m = vm_radix_topage(rnode);
  808                 if (m->pindex != index)
  809                         panic("%s: original replacing root key not found",
  810                             __func__);
  811                 rtree->rt_root = (uintptr_t)newpage | VM_RADIX_ISLEAF;
  812                 return (m);
  813         }
  814         for (;;) {
  815                 slot = vm_radix_slot(index, rnode->rn_clev);
  816                 if (vm_radix_isleaf(rnode->rn_child[slot])) {
  817                         m = vm_radix_topage(rnode->rn_child[slot]);
  818                         if (m->pindex == index) {
  819                                 rnode->rn_child[slot] =
  820                                     (void *)((uintptr_t)newpage |
  821                                     VM_RADIX_ISLEAF);
  822                                 return (m);
  823                         } else
  824                                 break;
  825                 } else if (rnode->rn_child[slot] == NULL ||
  826                     vm_radix_keybarr(rnode->rn_child[slot], index))
  827                         break;
  828                 rnode = rnode->rn_child[slot];
  829         }
  830         panic("%s: original replacing page not found", __func__);
  831 }
  832 
  833 #ifdef DDB
  834 /*
  835  * Show details about the given radix node.
  836  */
  837 DB_SHOW_COMMAND(radixnode, db_show_radixnode)
  838 {
  839         struct vm_radix_node *rnode;
  840         int i;
  841 
  842         if (!have_addr)
  843                 return;
  844         rnode = (struct vm_radix_node *)addr;
  845         db_printf("radixnode %p, owner %jx, children count %u, level %u:\n",
  846             (void *)rnode, (uintmax_t)rnode->rn_owner, rnode->rn_count,
  847             rnode->rn_clev);
  848         for (i = 0; i < VM_RADIX_COUNT; i++)
  849                 if (rnode->rn_child[i] != NULL)
  850                         db_printf("slot: %d, val: %p, page: %p, clev: %d\n",
  851                             i, (void *)rnode->rn_child[i],
  852                             vm_radix_isleaf(rnode->rn_child[i]) ?
  853                             vm_radix_topage(rnode->rn_child[i]) : NULL,
  854                             rnode->rn_clev);
  855 }
  856 #endif /* DDB */

Cache object: e6201eb7664615e9a580672bdd29077a


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.