vm_reserv.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*-
    2  * Copyright (c) 2002-2006 Rice University
    3  * Copyright (c) 2007-2011 Alan L. Cox <alc@cs.rice.edu>
    4  * All rights reserved.
    5  *
    6  * This software was developed for the FreeBSD Project by Alan L. Cox,
    7  * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro.
    8  *
    9  * Redistribution and use in source and binary forms, with or without
   10  * modification, are permitted provided that the following conditions
   11  * are met:
   12  * 1. Redistributions of source code must retain the above copyright
   13  *    notice, this list of conditions and the following disclaimer.
   14  * 2. Redistributions in binary form must reproduce the above copyright
   15  *    notice, this list of conditions and the following disclaimer in the
   16  *    documentation and/or other materials provided with the distribution.
   17  *
   18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
   21  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
   22  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   24  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
   25  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
   26  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
   28  * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   29  * POSSIBILITY OF SUCH DAMAGE.
   30  */
   31 
   32 /*
   33  *      Superpage reservation management module
   34  *
   35  * Any external functions defined by this module are only to be used by the
   36  * virtual memory system.
   37  */
   38 
   39 #include <sys/cdefs.h>
   40 __FBSDID("$FreeBSD: releng/11.1/sys/vm/vm_reserv.c 318716 2017-05-23 07:27:30Z markj $");
   41 
   42 #include "opt_vm.h"
   43 
   44 #include <sys/param.h>
   45 #include <sys/kernel.h>
   46 #include <sys/lock.h>
   47 #include <sys/malloc.h>
   48 #include <sys/mutex.h>
   49 #include <sys/queue.h>
   50 #include <sys/rwlock.h>
   51 #include <sys/sbuf.h>
   52 #include <sys/sysctl.h>
   53 #include <sys/systm.h>
   54 
   55 #include <vm/vm.h>
   56 #include <vm/vm_param.h>
   57 #include <vm/vm_object.h>
   58 #include <vm/vm_page.h>
   59 #include <vm/vm_phys.h>
   60 #include <vm/vm_radix.h>
   61 #include <vm/vm_reserv.h>
   62 
   63 /*
   64  * The reservation system supports the speculative allocation of large physical
   65  * pages ("superpages").  Speculative allocation enables the fully automatic
   66  * utilization of superpages by the virtual memory system.  In other words, no
   67  * programmatic directives are required to use superpages.
   68  */
   69 
   70 #if VM_NRESERVLEVEL > 0
   71 
   72 /*
   73  * The number of small pages that are contained in a level 0 reservation
   74  */
   75 #define VM_LEVEL_0_NPAGES       (1 << VM_LEVEL_0_ORDER)
   76 
   77 /*
   78  * The number of bits by which a physical address is shifted to obtain the
   79  * reservation number
   80  */
   81 #define VM_LEVEL_0_SHIFT        (VM_LEVEL_0_ORDER + PAGE_SHIFT)
   82 
   83 /*
   84  * The size of a level 0 reservation in bytes
   85  */
   86 #define VM_LEVEL_0_SIZE         (1 << VM_LEVEL_0_SHIFT)
   87 
   88 /*
   89  * Computes the index of the small page underlying the given (object, pindex)
   90  * within the reservation's array of small pages.
   91  */
   92 #define VM_RESERV_INDEX(object, pindex) \
   93     (((object)->pg_color + (pindex)) & (VM_LEVEL_0_NPAGES - 1))
   94 
   95 /*
   96  * The size of a population map entry
   97  */
   98 typedef u_long          popmap_t;
   99 
  100 /*
  101  * The number of bits in a population map entry
  102  */
  103 #define NBPOPMAP        (NBBY * sizeof(popmap_t))
  104 
  105 /*
  106  * The number of population map entries in a reservation
  107  */
  108 #define NPOPMAP         howmany(VM_LEVEL_0_NPAGES, NBPOPMAP)
  109 
  110 /*
  111  * Clear a bit in the population map.
  112  */
  113 static __inline void
  114 popmap_clear(popmap_t popmap[], int i)
  115 {
  116 
  117         popmap[i / NBPOPMAP] &= ~(1UL << (i % NBPOPMAP));
  118 }
  119 
  120 /*
  121  * Set a bit in the population map.
  122  */
  123 static __inline void
  124 popmap_set(popmap_t popmap[], int i)
  125 {
  126 
  127         popmap[i / NBPOPMAP] |= 1UL << (i % NBPOPMAP);
  128 }
  129 
  130 /*
  131  * Is a bit in the population map clear?
  132  */
  133 static __inline boolean_t
  134 popmap_is_clear(popmap_t popmap[], int i)
  135 {
  136 
  137         return ((popmap[i / NBPOPMAP] & (1UL << (i % NBPOPMAP))) == 0);
  138 }
  139 
  140 /*
  141  * Is a bit in the population map set?
  142  */
  143 static __inline boolean_t
  144 popmap_is_set(popmap_t popmap[], int i)
  145 {
  146 
  147         return ((popmap[i / NBPOPMAP] & (1UL << (i % NBPOPMAP))) != 0);
  148 }
  149 
  150 /*
  151  * The reservation structure
  152  *
  153  * A reservation structure is constructed whenever a large physical page is
  154  * speculatively allocated to an object.  The reservation provides the small
  155  * physical pages for the range [pindex, pindex + VM_LEVEL_0_NPAGES) of offsets
  156  * within that object.  The reservation's "popcnt" tracks the number of these
  157  * small physical pages that are in use at any given time.  When and if the
  158  * reservation is not fully utilized, it appears in the queue of partially
  159  * populated reservations.  The reservation always appears on the containing
  160  * object's list of reservations.
  161  *
  162  * A partially populated reservation can be broken and reclaimed at any time.
  163  */
  164 struct vm_reserv {
  165         TAILQ_ENTRY(vm_reserv) partpopq;
  166         LIST_ENTRY(vm_reserv) objq;
  167         vm_object_t     object;                 /* containing object */
  168         vm_pindex_t     pindex;                 /* offset within object */
  169         vm_page_t       pages;                  /* first page of a superpage */
  170         int             popcnt;                 /* # of pages in use */
  171         char            inpartpopq;
  172         popmap_t        popmap[NPOPMAP];        /* bit vector of used pages */
  173 };
  174 
  175 /*
  176  * The reservation array
  177  *
  178  * This array is analoguous in function to vm_page_array.  It differs in the
  179  * respect that it may contain a greater number of useful reservation
  180  * structures than there are (physical) superpages.  These "invalid"
  181  * reservation structures exist to trade-off space for time in the
  182  * implementation of vm_reserv_from_page().  Invalid reservation structures are
  183  * distinguishable from "valid" reservation structures by inspecting the
  184  * reservation's "pages" field.  Invalid reservation structures have a NULL
  185  * "pages" field.
  186  *
  187  * vm_reserv_from_page() maps a small (physical) page to an element of this
  188  * array by computing a physical reservation number from the page's physical
  189  * address.  The physical reservation number is used as the array index.
  190  *
  191  * An "active" reservation is a valid reservation structure that has a non-NULL
  192  * "object" field and a non-zero "popcnt" field.  In other words, every active
  193  * reservation belongs to a particular object.  Moreover, every active
  194  * reservation has an entry in the containing object's list of reservations.  
  195  */
  196 static vm_reserv_t vm_reserv_array;
  197 
  198 /*
  199  * The partially populated reservation queue
  200  *
  201  * This queue enables the fast recovery of an unused free small page from a
  202  * partially populated reservation.  The reservation at the head of this queue
  203  * is the least recently changed, partially populated reservation.
  204  *
  205  * Access to this queue is synchronized by the free page queue lock.
  206  */
  207 static TAILQ_HEAD(, vm_reserv) vm_rvq_partpop =
  208                             TAILQ_HEAD_INITIALIZER(vm_rvq_partpop);
  209 
  210 static SYSCTL_NODE(_vm, OID_AUTO, reserv, CTLFLAG_RD, 0, "Reservation Info");
  211 
  212 static long vm_reserv_broken;
  213 SYSCTL_LONG(_vm_reserv, OID_AUTO, broken, CTLFLAG_RD,
  214     &vm_reserv_broken, 0, "Cumulative number of broken reservations");
  215 
  216 static long vm_reserv_freed;
  217 SYSCTL_LONG(_vm_reserv, OID_AUTO, freed, CTLFLAG_RD,
  218     &vm_reserv_freed, 0, "Cumulative number of freed reservations");
  219 
  220 static int sysctl_vm_reserv_fullpop(SYSCTL_HANDLER_ARGS);
  221 
  222 SYSCTL_PROC(_vm_reserv, OID_AUTO, fullpop, CTLTYPE_INT | CTLFLAG_RD, NULL, 0,
  223     sysctl_vm_reserv_fullpop, "I", "Current number of full reservations");
  224 
  225 static int sysctl_vm_reserv_partpopq(SYSCTL_HANDLER_ARGS);
  226 
  227 SYSCTL_OID(_vm_reserv, OID_AUTO, partpopq, CTLTYPE_STRING | CTLFLAG_RD, NULL, 0,
  228     sysctl_vm_reserv_partpopq, "A", "Partially populated reservation queues");
  229 
  230 static long vm_reserv_reclaimed;
  231 SYSCTL_LONG(_vm_reserv, OID_AUTO, reclaimed, CTLFLAG_RD,
  232     &vm_reserv_reclaimed, 0, "Cumulative number of reclaimed reservations");
  233 
  234 static void             vm_reserv_break(vm_reserv_t rv, vm_page_t m);
  235 static void             vm_reserv_depopulate(vm_reserv_t rv, int index);
  236 static vm_reserv_t      vm_reserv_from_page(vm_page_t m);
  237 static boolean_t        vm_reserv_has_pindex(vm_reserv_t rv,
  238                             vm_pindex_t pindex);
  239 static void             vm_reserv_populate(vm_reserv_t rv, int index);
  240 static void             vm_reserv_reclaim(vm_reserv_t rv);
  241 
  242 /*
  243  * Returns the current number of full reservations.
  244  *
  245  * Since the number of full reservations is computed without acquiring the
  246  * free page queue lock, the returned value may be inexact.
  247  */
  248 static int
  249 sysctl_vm_reserv_fullpop(SYSCTL_HANDLER_ARGS)
  250 {
  251         vm_paddr_t paddr;
  252         struct vm_phys_seg *seg;
  253         vm_reserv_t rv;
  254         int fullpop, segind;
  255 
  256         fullpop = 0;
  257         for (segind = 0; segind < vm_phys_nsegs; segind++) {
  258                 seg = &vm_phys_segs[segind];
  259                 paddr = roundup2(seg->start, VM_LEVEL_0_SIZE);
  260                 while (paddr + VM_LEVEL_0_SIZE <= seg->end) {
  261                         rv = &vm_reserv_array[paddr >> VM_LEVEL_0_SHIFT];
  262                         fullpop += rv->popcnt == VM_LEVEL_0_NPAGES;
  263                         paddr += VM_LEVEL_0_SIZE;
  264                 }
  265         }
  266         return (sysctl_handle_int(oidp, &fullpop, 0, req));
  267 }
  268 
  269 /*
  270  * Describes the current state of the partially populated reservation queue.
  271  */
  272 static int
  273 sysctl_vm_reserv_partpopq(SYSCTL_HANDLER_ARGS)
  274 {
  275         struct sbuf sbuf;
  276         vm_reserv_t rv;
  277         int counter, error, level, unused_pages;
  278 
  279         error = sysctl_wire_old_buffer(req, 0);
  280         if (error != 0)
  281                 return (error);
  282         sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
  283         sbuf_printf(&sbuf, "\nLEVEL     SIZE  NUMBER\n\n");
  284         for (level = -1; level <= VM_NRESERVLEVEL - 2; level++) {
  285                 counter = 0;
  286                 unused_pages = 0;
  287                 mtx_lock(&vm_page_queue_free_mtx);
  288                 TAILQ_FOREACH(rv, &vm_rvq_partpop/*[level]*/, partpopq) {
  289                         counter++;
  290                         unused_pages += VM_LEVEL_0_NPAGES - rv->popcnt;
  291                 }
  292                 mtx_unlock(&vm_page_queue_free_mtx);
  293                 sbuf_printf(&sbuf, "%5d: %6dK, %6d\n", level,
  294                     unused_pages * ((int)PAGE_SIZE / 1024), counter);
  295         }
  296         error = sbuf_finish(&sbuf);
  297         sbuf_delete(&sbuf);
  298         return (error);
  299 }
  300 
  301 /*
  302  * Reduces the given reservation's population count.  If the population count
  303  * becomes zero, the reservation is destroyed.  Additionally, moves the
  304  * reservation to the tail of the partially populated reservation queue if the
  305  * population count is non-zero.
  306  *
  307  * The free page queue lock must be held.
  308  */
  309 static void
  310 vm_reserv_depopulate(vm_reserv_t rv, int index)
  311 {
  312 
  313         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
  314         KASSERT(rv->object != NULL,
  315             ("vm_reserv_depopulate: reserv %p is free", rv));
  316         KASSERT(popmap_is_set(rv->popmap, index),
  317             ("vm_reserv_depopulate: reserv %p's popmap[%d] is clear", rv,
  318             index));
  319         KASSERT(rv->popcnt > 0,
  320             ("vm_reserv_depopulate: reserv %p's popcnt is corrupted", rv));
  321         if (rv->inpartpopq) {
  322                 TAILQ_REMOVE(&vm_rvq_partpop, rv, partpopq);
  323                 rv->inpartpopq = FALSE;
  324         } else {
  325                 KASSERT(rv->pages->psind == 1,
  326                     ("vm_reserv_depopulate: reserv %p is already demoted",
  327                     rv));
  328                 rv->pages->psind = 0;
  329         }
  330         popmap_clear(rv->popmap, index);
  331         rv->popcnt--;
  332         if (rv->popcnt == 0) {
  333                 LIST_REMOVE(rv, objq);
  334                 rv->object = NULL;
  335                 vm_phys_free_pages(rv->pages, VM_LEVEL_0_ORDER);
  336                 vm_reserv_freed++;
  337         } else {
  338                 rv->inpartpopq = TRUE;
  339                 TAILQ_INSERT_TAIL(&vm_rvq_partpop, rv, partpopq);
  340         }
  341 }
  342 
  343 /*
  344  * Returns the reservation to which the given page might belong.
  345  */
  346 static __inline vm_reserv_t
  347 vm_reserv_from_page(vm_page_t m)
  348 {
  349 
  350         return (&vm_reserv_array[VM_PAGE_TO_PHYS(m) >> VM_LEVEL_0_SHIFT]);
  351 }
  352 
  353 /*
  354  * Returns TRUE if the given reservation contains the given page index and
  355  * FALSE otherwise.
  356  */
  357 static __inline boolean_t
  358 vm_reserv_has_pindex(vm_reserv_t rv, vm_pindex_t pindex)
  359 {
  360 
  361         return (((pindex - rv->pindex) & ~(VM_LEVEL_0_NPAGES - 1)) == 0);
  362 }
  363 
  364 /*
  365  * Increases the given reservation's population count.  Moves the reservation
  366  * to the tail of the partially populated reservation queue.
  367  *
  368  * The free page queue must be locked.
  369  */
  370 static void
  371 vm_reserv_populate(vm_reserv_t rv, int index)
  372 {
  373 
  374         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
  375         KASSERT(rv->object != NULL,
  376             ("vm_reserv_populate: reserv %p is free", rv));
  377         KASSERT(popmap_is_clear(rv->popmap, index),
  378             ("vm_reserv_populate: reserv %p's popmap[%d] is set", rv,
  379             index));
  380         KASSERT(rv->popcnt < VM_LEVEL_0_NPAGES,
  381             ("vm_reserv_populate: reserv %p is already full", rv));
  382         KASSERT(rv->pages->psind == 0,
  383             ("vm_reserv_populate: reserv %p is already promoted", rv));
  384         if (rv->inpartpopq) {
  385                 TAILQ_REMOVE(&vm_rvq_partpop, rv, partpopq);
  386                 rv->inpartpopq = FALSE;
  387         }
  388         popmap_set(rv->popmap, index);
  389         rv->popcnt++;
  390         if (rv->popcnt < VM_LEVEL_0_NPAGES) {
  391                 rv->inpartpopq = TRUE;
  392                 TAILQ_INSERT_TAIL(&vm_rvq_partpop, rv, partpopq);
  393         } else
  394                 rv->pages->psind = 1;
  395 }
  396 
  397 /*
  398  * Allocates a contiguous set of physical pages of the given size "npages"
  399  * from existing or newly created reservations.  All of the physical pages
  400  * must be at or above the given physical address "low" and below the given
  401  * physical address "high".  The given value "alignment" determines the
  402  * alignment of the first physical page in the set.  If the given value
  403  * "boundary" is non-zero, then the set of physical pages cannot cross any
  404  * physical address boundary that is a multiple of that value.  Both
  405  * "alignment" and "boundary" must be a power of two.
  406  *
  407  * The page "mpred" must immediately precede the offset "pindex" within the
  408  * specified object.
  409  *
  410  * The object and free page queue must be locked.
  411  */
  412 vm_page_t
  413 vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex, u_long npages,
  414     vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary,
  415     vm_page_t mpred)
  416 {
  417         vm_paddr_t pa, size;
  418         vm_page_t m, m_ret, msucc;
  419         vm_pindex_t first, leftcap, rightcap;
  420         vm_reserv_t rv;
  421         u_long allocpages, maxpages, minpages;
  422         int i, index, n;
  423 
  424         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
  425         VM_OBJECT_ASSERT_WLOCKED(object);
  426         KASSERT(npages != 0, ("vm_reserv_alloc_contig: npages is 0"));
  427 
  428         /*
  429          * Is a reservation fundamentally impossible?
  430          */
  431         if (pindex < VM_RESERV_INDEX(object, pindex) ||
  432             pindex + npages > object->size)
  433                 return (NULL);
  434 
  435         /*
  436          * All reservations of a particular size have the same alignment.
  437          * Assuming that the first page is allocated from a reservation, the
  438          * least significant bits of its physical address can be determined
  439          * from its offset from the beginning of the reservation and the size
  440          * of the reservation.
  441          *
  442          * Could the specified index within a reservation of the smallest
  443          * possible size satisfy the alignment and boundary requirements?
  444          */
  445         pa = VM_RESERV_INDEX(object, pindex) << PAGE_SHIFT;
  446         if ((pa & (alignment - 1)) != 0)
  447                 return (NULL);
  448         size = npages << PAGE_SHIFT;
  449         if (((pa ^ (pa + size - 1)) & ~(boundary - 1)) != 0)
  450                 return (NULL);
  451 
  452         /*
  453          * Look for an existing reservation.
  454          */
  455         if (mpred != NULL) {
  456                 KASSERT(mpred->object == object,
  457                     ("vm_reserv_alloc_contig: object doesn't contain mpred"));
  458                 KASSERT(mpred->pindex < pindex,
  459                     ("vm_reserv_alloc_contig: mpred doesn't precede pindex"));
  460                 rv = vm_reserv_from_page(mpred);
  461                 if (rv->object == object && vm_reserv_has_pindex(rv, pindex))
  462                         goto found;
  463                 msucc = TAILQ_NEXT(mpred, listq);
  464         } else
  465                 msucc = TAILQ_FIRST(&object->memq);
  466         if (msucc != NULL) {
  467                 KASSERT(msucc->pindex > pindex,
  468                     ("vm_reserv_alloc_contig: msucc doesn't succeed pindex"));
  469                 rv = vm_reserv_from_page(msucc);
  470                 if (rv->object == object && vm_reserv_has_pindex(rv, pindex))
  471                         goto found;
  472         }
  473 
  474         /*
  475          * Could at least one reservation fit between the first index to the
  476          * left that can be used ("leftcap") and the first index to the right
  477          * that cannot be used ("rightcap")?
  478          */
  479         first = pindex - VM_RESERV_INDEX(object, pindex);
  480         if (mpred != NULL) {
  481                 if ((rv = vm_reserv_from_page(mpred))->object != object)
  482                         leftcap = mpred->pindex + 1;
  483                 else
  484                         leftcap = rv->pindex + VM_LEVEL_0_NPAGES;
  485                 if (leftcap > first)
  486                         return (NULL);
  487         }
  488         minpages = VM_RESERV_INDEX(object, pindex) + npages;
  489         maxpages = roundup2(minpages, VM_LEVEL_0_NPAGES);
  490         allocpages = maxpages;
  491         if (msucc != NULL) {
  492                 if ((rv = vm_reserv_from_page(msucc))->object != object)
  493                         rightcap = msucc->pindex;
  494                 else
  495                         rightcap = rv->pindex;
  496                 if (first + maxpages > rightcap) {
  497                         if (maxpages == VM_LEVEL_0_NPAGES)
  498                                 return (NULL);
  499 
  500                         /*
  501                          * At least one reservation will fit between "leftcap"
  502                          * and "rightcap".  However, a reservation for the
  503                          * last of the requested pages will not fit.  Reduce
  504                          * the size of the upcoming allocation accordingly.
  505                          */
  506                         allocpages = minpages;
  507                 }
  508         }
  509 
  510         /*
  511          * Would the last new reservation extend past the end of the object?
  512          */
  513         if (first + maxpages > object->size) {
  514                 /*
  515                  * Don't allocate the last new reservation if the object is a
  516                  * vnode or backed by another object that is a vnode. 
  517                  */
  518                 if (object->type == OBJT_VNODE ||
  519                     (object->backing_object != NULL &&
  520                     object->backing_object->type == OBJT_VNODE)) {
  521                         if (maxpages == VM_LEVEL_0_NPAGES)
  522                                 return (NULL);
  523                         allocpages = minpages;
  524                 }
  525                 /* Speculate that the object may grow. */
  526         }
  527 
  528         /*
  529          * Allocate the physical pages.  The alignment and boundary specified
  530          * for this allocation may be different from the alignment and
  531          * boundary specified for the requested pages.  For instance, the
  532          * specified index may not be the first page within the first new
  533          * reservation.
  534          */
  535         m = vm_phys_alloc_contig(allocpages, low, high, ulmax(alignment,
  536             VM_LEVEL_0_SIZE), boundary > VM_LEVEL_0_SIZE ? boundary : 0);
  537         if (m == NULL)
  538                 return (NULL);
  539 
  540         /*
  541          * The allocated physical pages always begin at a reservation
  542          * boundary, but they do not always end at a reservation boundary.
  543          * Initialize every reservation that is completely covered by the
  544          * allocated physical pages.
  545          */
  546         m_ret = NULL;
  547         index = VM_RESERV_INDEX(object, pindex);
  548         do {
  549                 rv = vm_reserv_from_page(m);
  550                 KASSERT(rv->pages == m,
  551                     ("vm_reserv_alloc_contig: reserv %p's pages is corrupted",
  552                     rv));
  553                 KASSERT(rv->object == NULL,
  554                     ("vm_reserv_alloc_contig: reserv %p isn't free", rv));
  555                 LIST_INSERT_HEAD(&object->rvq, rv, objq);
  556                 rv->object = object;
  557                 rv->pindex = first;
  558                 KASSERT(rv->popcnt == 0,
  559                     ("vm_reserv_alloc_contig: reserv %p's popcnt is corrupted",
  560                     rv));
  561                 KASSERT(!rv->inpartpopq,
  562                     ("vm_reserv_alloc_contig: reserv %p's inpartpopq is TRUE",
  563                     rv));
  564                 for (i = 0; i < NPOPMAP; i++)
  565                         KASSERT(rv->popmap[i] == 0,
  566                     ("vm_reserv_alloc_contig: reserv %p's popmap is corrupted",
  567                             rv));
  568                 n = ulmin(VM_LEVEL_0_NPAGES - index, npages);
  569                 for (i = 0; i < n; i++)
  570                         vm_reserv_populate(rv, index + i);
  571                 npages -= n;
  572                 if (m_ret == NULL) {
  573                         m_ret = &rv->pages[index];
  574                         index = 0;
  575                 }
  576                 m += VM_LEVEL_0_NPAGES;
  577                 first += VM_LEVEL_0_NPAGES;
  578                 allocpages -= VM_LEVEL_0_NPAGES;
  579         } while (allocpages >= VM_LEVEL_0_NPAGES);
  580         return (m_ret);
  581 
  582         /*
  583          * Found a matching reservation.
  584          */
  585 found:
  586         index = VM_RESERV_INDEX(object, pindex);
  587         /* Does the allocation fit within the reservation? */
  588         if (index + npages > VM_LEVEL_0_NPAGES)
  589                 return (NULL);
  590         m = &rv->pages[index];
  591         pa = VM_PAGE_TO_PHYS(m);
  592         if (pa < low || pa + size > high || (pa & (alignment - 1)) != 0 ||
  593             ((pa ^ (pa + size - 1)) & ~(boundary - 1)) != 0)
  594                 return (NULL);
  595         /* Handle vm_page_rename(m, new_object, ...). */
  596         for (i = 0; i < npages; i++)
  597                 if (popmap_is_set(rv->popmap, index + i))
  598                         return (NULL);
  599         for (i = 0; i < npages; i++)
  600                 vm_reserv_populate(rv, index + i);
  601         return (m);
  602 }
  603 
  604 /*
  605  * Allocates a page from an existing or newly created reservation.
  606  *
  607  * The page "mpred" must immediately precede the offset "pindex" within the
  608  * specified object.
  609  *
  610  * The object and free page queue must be locked.
  611  */
  612 vm_page_t
  613 vm_reserv_alloc_page(vm_object_t object, vm_pindex_t pindex, vm_page_t mpred)
  614 {
  615         vm_page_t m, msucc;
  616         vm_pindex_t first, leftcap, rightcap;
  617         vm_reserv_t rv;
  618         int i, index;
  619 
  620         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
  621         VM_OBJECT_ASSERT_WLOCKED(object);
  622 
  623         /*
  624          * Is a reservation fundamentally impossible?
  625          */
  626         if (pindex < VM_RESERV_INDEX(object, pindex) ||
  627             pindex >= object->size)
  628                 return (NULL);
  629 
  630         /*
  631          * Look for an existing reservation.
  632          */
  633         if (mpred != NULL) {
  634                 KASSERT(mpred->object == object,
  635                     ("vm_reserv_alloc_page: object doesn't contain mpred"));
  636                 KASSERT(mpred->pindex < pindex,
  637                     ("vm_reserv_alloc_page: mpred doesn't precede pindex"));
  638                 rv = vm_reserv_from_page(mpred);
  639                 if (rv->object == object && vm_reserv_has_pindex(rv, pindex))
  640                         goto found;
  641                 msucc = TAILQ_NEXT(mpred, listq);
  642         } else
  643                 msucc = TAILQ_FIRST(&object->memq);
  644         if (msucc != NULL) {
  645                 KASSERT(msucc->pindex > pindex,
  646                     ("vm_reserv_alloc_page: msucc doesn't succeed pindex"));
  647                 rv = vm_reserv_from_page(msucc);
  648                 if (rv->object == object && vm_reserv_has_pindex(rv, pindex))
  649                         goto found;
  650         }
  651 
  652         /*
  653          * Could a reservation fit between the first index to the left that
  654          * can be used and the first index to the right that cannot be used?
  655          */
  656         first = pindex - VM_RESERV_INDEX(object, pindex);
  657         if (mpred != NULL) {
  658                 if ((rv = vm_reserv_from_page(mpred))->object != object)
  659                         leftcap = mpred->pindex + 1;
  660                 else
  661                         leftcap = rv->pindex + VM_LEVEL_0_NPAGES;
  662                 if (leftcap > first)
  663                         return (NULL);
  664         }
  665         if (msucc != NULL) {
  666                 if ((rv = vm_reserv_from_page(msucc))->object != object)
  667                         rightcap = msucc->pindex;
  668                 else
  669                         rightcap = rv->pindex;
  670                 if (first + VM_LEVEL_0_NPAGES > rightcap)
  671                         return (NULL);
  672         }
  673 
  674         /*
  675          * Would a new reservation extend past the end of the object? 
  676          */
  677         if (first + VM_LEVEL_0_NPAGES > object->size) {
  678                 /*
  679                  * Don't allocate a new reservation if the object is a vnode or
  680                  * backed by another object that is a vnode. 
  681                  */
  682                 if (object->type == OBJT_VNODE ||
  683                     (object->backing_object != NULL &&
  684                     object->backing_object->type == OBJT_VNODE))
  685                         return (NULL);
  686                 /* Speculate that the object may grow. */
  687         }
  688 
  689         /*
  690          * Allocate and populate the new reservation.
  691          */
  692         m = vm_phys_alloc_pages(VM_FREEPOOL_DEFAULT, VM_LEVEL_0_ORDER);
  693         if (m == NULL)
  694                 return (NULL);
  695         rv = vm_reserv_from_page(m);
  696         KASSERT(rv->pages == m,
  697             ("vm_reserv_alloc_page: reserv %p's pages is corrupted", rv));
  698         KASSERT(rv->object == NULL,
  699             ("vm_reserv_alloc_page: reserv %p isn't free", rv));
  700         LIST_INSERT_HEAD(&object->rvq, rv, objq);
  701         rv->object = object;
  702         rv->pindex = first;
  703         KASSERT(rv->popcnt == 0,
  704             ("vm_reserv_alloc_page: reserv %p's popcnt is corrupted", rv));
  705         KASSERT(!rv->inpartpopq,
  706             ("vm_reserv_alloc_page: reserv %p's inpartpopq is TRUE", rv));
  707         for (i = 0; i < NPOPMAP; i++)
  708                 KASSERT(rv->popmap[i] == 0,
  709                     ("vm_reserv_alloc_page: reserv %p's popmap is corrupted",
  710                     rv));
  711         index = VM_RESERV_INDEX(object, pindex);
  712         vm_reserv_populate(rv, index);
  713         return (&rv->pages[index]);
  714 
  715         /*
  716          * Found a matching reservation.
  717          */
  718 found:
  719         index = VM_RESERV_INDEX(object, pindex);
  720         m = &rv->pages[index];
  721         /* Handle vm_page_rename(m, new_object, ...). */
  722         if (popmap_is_set(rv->popmap, index))
  723                 return (NULL);
  724         vm_reserv_populate(rv, index);
  725         return (m);
  726 }
  727 
  728 /*
  729  * Breaks the given reservation.  Except for the specified free page, all free
  730  * pages in the reservation are returned to the physical memory allocator.
  731  * The reservation's population count and map are reset to their initial
  732  * state.
  733  *
  734  * The given reservation must not be in the partially populated reservation
  735  * queue.  The free page queue lock must be held.
  736  */
  737 static void
  738 vm_reserv_break(vm_reserv_t rv, vm_page_t m)
  739 {
  740         int begin_zeroes, hi, i, lo;
  741 
  742         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
  743         KASSERT(rv->object != NULL,
  744             ("vm_reserv_break: reserv %p is free", rv));
  745         KASSERT(!rv->inpartpopq,
  746             ("vm_reserv_break: reserv %p's inpartpopq is TRUE", rv));
  747         LIST_REMOVE(rv, objq);
  748         rv->object = NULL;
  749         if (m != NULL) {
  750                 /*
  751                  * Since the reservation is being broken, there is no harm in
  752                  * abusing the population map to stop "m" from being returned
  753                  * to the physical memory allocator.
  754                  */
  755                 i = m - rv->pages;
  756                 KASSERT(popmap_is_clear(rv->popmap, i),
  757                     ("vm_reserv_break: reserv %p's popmap is corrupted", rv));
  758                 popmap_set(rv->popmap, i);
  759                 rv->popcnt++;
  760         }
  761         i = hi = 0;
  762         do {
  763                 /* Find the next 0 bit.  Any previous 0 bits are < "hi". */
  764                 lo = ffsl(~(((1UL << hi) - 1) | rv->popmap[i]));
  765                 if (lo == 0) {
  766                         /* Redundantly clears bits < "hi". */
  767                         rv->popmap[i] = 0;
  768                         rv->popcnt -= NBPOPMAP - hi;
  769                         while (++i < NPOPMAP) {
  770                                 lo = ffsl(~rv->popmap[i]);
  771                                 if (lo == 0) {
  772                                         rv->popmap[i] = 0;
  773                                         rv->popcnt -= NBPOPMAP;
  774                                 } else
  775                                         break;
  776                         }
  777                         if (i == NPOPMAP)
  778                                 break;
  779                         hi = 0;
  780                 }
  781                 KASSERT(lo > 0, ("vm_reserv_break: lo is %d", lo));
  782                 /* Convert from ffsl() to ordinary bit numbering. */
  783                 lo--;
  784                 if (lo > 0) {
  785                         /* Redundantly clears bits < "hi". */
  786                         rv->popmap[i] &= ~((1UL << lo) - 1);
  787                         rv->popcnt -= lo - hi;
  788                 }
  789                 begin_zeroes = NBPOPMAP * i + lo;
  790                 /* Find the next 1 bit. */
  791                 do
  792                         hi = ffsl(rv->popmap[i]);
  793                 while (hi == 0 && ++i < NPOPMAP);
  794                 if (i != NPOPMAP)
  795                         /* Convert from ffsl() to ordinary bit numbering. */
  796                         hi--;
  797                 vm_phys_free_contig(&rv->pages[begin_zeroes], NBPOPMAP * i +
  798                     hi - begin_zeroes);
  799         } while (i < NPOPMAP);
  800         KASSERT(rv->popcnt == 0,
  801             ("vm_reserv_break: reserv %p's popcnt is corrupted", rv));
  802         vm_reserv_broken++;
  803 }
  804 
  805 /*
  806  * Breaks all reservations belonging to the given object.
  807  */
  808 void
  809 vm_reserv_break_all(vm_object_t object)
  810 {
  811         vm_reserv_t rv;
  812 
  813         mtx_lock(&vm_page_queue_free_mtx);
  814         while ((rv = LIST_FIRST(&object->rvq)) != NULL) {
  815                 KASSERT(rv->object == object,
  816                     ("vm_reserv_break_all: reserv %p is corrupted", rv));
  817                 if (rv->inpartpopq) {
  818                         TAILQ_REMOVE(&vm_rvq_partpop, rv, partpopq);
  819                         rv->inpartpopq = FALSE;
  820                 }
  821                 vm_reserv_break(rv, NULL);
  822         }
  823         mtx_unlock(&vm_page_queue_free_mtx);
  824 }
  825 
  826 /*
  827  * Frees the given page if it belongs to a reservation.  Returns TRUE if the
  828  * page is freed and FALSE otherwise.
  829  *
  830  * The free page queue lock must be held.
  831  */
  832 boolean_t
  833 vm_reserv_free_page(vm_page_t m)
  834 {
  835         vm_reserv_t rv;
  836 
  837         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
  838         rv = vm_reserv_from_page(m);
  839         if (rv->object == NULL)
  840                 return (FALSE);
  841         vm_reserv_depopulate(rv, m - rv->pages);
  842         return (TRUE);
  843 }
  844 
  845 /*
  846  * Initializes the reservation management system.  Specifically, initializes
  847  * the reservation array.
  848  *
  849  * Requires that vm_page_array and first_page are initialized!
  850  */
  851 void
  852 vm_reserv_init(void)
  853 {
  854         vm_paddr_t paddr;
  855         struct vm_phys_seg *seg;
  856         int segind;
  857 
  858         /*
  859          * Initialize the reservation array.  Specifically, initialize the
  860          * "pages" field for every element that has an underlying superpage.
  861          */
  862         for (segind = 0; segind < vm_phys_nsegs; segind++) {
  863                 seg = &vm_phys_segs[segind];
  864                 paddr = roundup2(seg->start, VM_LEVEL_0_SIZE);
  865                 while (paddr + VM_LEVEL_0_SIZE <= seg->end) {
  866                         vm_reserv_array[paddr >> VM_LEVEL_0_SHIFT].pages =
  867                             PHYS_TO_VM_PAGE(paddr);
  868                         paddr += VM_LEVEL_0_SIZE;
  869                 }
  870         }
  871 }
  872 
  873 /*
  874  * Returns true if the given page belongs to a reservation and that page is
  875  * free.  Otherwise, returns false.
  876  */
  877 bool
  878 vm_reserv_is_page_free(vm_page_t m)
  879 {
  880         vm_reserv_t rv;
  881 
  882         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
  883         rv = vm_reserv_from_page(m);
  884         if (rv->object == NULL)
  885                 return (false);
  886         return (popmap_is_clear(rv->popmap, m - rv->pages));
  887 }
  888 
  889 /*
  890  * If the given page belongs to a reservation, returns the level of that
  891  * reservation.  Otherwise, returns -1.
  892  */
  893 int
  894 vm_reserv_level(vm_page_t m)
  895 {
  896         vm_reserv_t rv;
  897 
  898         rv = vm_reserv_from_page(m);
  899         return (rv->object != NULL ? 0 : -1);
  900 }
  901 
  902 /*
  903  * Returns a reservation level if the given page belongs to a fully populated
  904  * reservation and -1 otherwise.
  905  */
  906 int
  907 vm_reserv_level_iffullpop(vm_page_t m)
  908 {
  909         vm_reserv_t rv;
  910 
  911         rv = vm_reserv_from_page(m);
  912         return (rv->popcnt == VM_LEVEL_0_NPAGES ? 0 : -1);
  913 }
  914 
  915 /*
  916  * Breaks the given partially populated reservation, releasing its free pages
  917  * to the physical memory allocator.
  918  *
  919  * The free page queue lock must be held.
  920  */
  921 static void
  922 vm_reserv_reclaim(vm_reserv_t rv)
  923 {
  924 
  925         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
  926         KASSERT(rv->inpartpopq,
  927             ("vm_reserv_reclaim: reserv %p's inpartpopq is FALSE", rv));
  928         TAILQ_REMOVE(&vm_rvq_partpop, rv, partpopq);
  929         rv->inpartpopq = FALSE;
  930         vm_reserv_break(rv, NULL);
  931         vm_reserv_reclaimed++;
  932 }
  933 
  934 /*
  935  * Breaks the reservation at the head of the partially populated reservation
  936  * queue, releasing its free pages to the physical memory allocator.  Returns
  937  * TRUE if a reservation is broken and FALSE otherwise.
  938  *
  939  * The free page queue lock must be held.
  940  */
  941 boolean_t
  942 vm_reserv_reclaim_inactive(void)
  943 {
  944         vm_reserv_t rv;
  945 
  946         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
  947         if ((rv = TAILQ_FIRST(&vm_rvq_partpop)) != NULL) {
  948                 vm_reserv_reclaim(rv);
  949                 return (TRUE);
  950         }
  951         return (FALSE);
  952 }
  953 
  954 /*
  955  * Searches the partially populated reservation queue for the least recently
  956  * changed reservation with free pages that satisfy the given request for
  957  * contiguous physical memory.  If a satisfactory reservation is found, it is
  958  * broken.  Returns TRUE if a reservation is broken and FALSE otherwise.
  959  *
  960  * The free page queue lock must be held.
  961  */
  962 boolean_t
  963 vm_reserv_reclaim_contig(u_long npages, vm_paddr_t low, vm_paddr_t high,
  964     u_long alignment, vm_paddr_t boundary)
  965 {
  966         vm_paddr_t pa, size;
  967         vm_reserv_t rv;
  968         int hi, i, lo, low_index, next_free;
  969 
  970         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
  971         if (npages > VM_LEVEL_0_NPAGES - 1)
  972                 return (FALSE);
  973         size = npages << PAGE_SHIFT;
  974         TAILQ_FOREACH(rv, &vm_rvq_partpop, partpopq) {
  975                 pa = VM_PAGE_TO_PHYS(&rv->pages[VM_LEVEL_0_NPAGES - 1]);
  976                 if (pa + PAGE_SIZE - size < low) {
  977                         /* This entire reservation is too low; go to next. */
  978                         continue;
  979                 }
  980                 pa = VM_PAGE_TO_PHYS(&rv->pages[0]);
  981                 if (pa + size > high) {
  982                         /* This entire reservation is too high; go to next. */
  983                         continue;
  984                 }
  985                 if (pa < low) {
  986                         /* Start the search for free pages at "low". */
  987                         low_index = (low + PAGE_MASK - pa) >> PAGE_SHIFT;
  988                         i = low_index / NBPOPMAP;
  989                         hi = low_index % NBPOPMAP;
  990                 } else
  991                         i = hi = 0;
  992                 do {
  993                         /* Find the next free page. */
  994                         lo = ffsl(~(((1UL << hi) - 1) | rv->popmap[i]));
  995                         while (lo == 0 && ++i < NPOPMAP)
  996                                 lo = ffsl(~rv->popmap[i]);
  997                         if (i == NPOPMAP)
  998                                 break;
  999                         /* Convert from ffsl() to ordinary bit numbering. */
 1000                         lo--;
 1001                         next_free = NBPOPMAP * i + lo;
 1002                         pa = VM_PAGE_TO_PHYS(&rv->pages[next_free]);
 1003                         KASSERT(pa >= low,
 1004                             ("vm_reserv_reclaim_contig: pa is too low"));
 1005                         if (pa + size > high) {
 1006                                 /* The rest of this reservation is too high. */
 1007                                 break;
 1008                         } else if ((pa & (alignment - 1)) != 0 ||
 1009                             ((pa ^ (pa + size - 1)) & ~(boundary - 1)) != 0) {
 1010                                 /*
 1011                                  * The current page doesn't meet the alignment
 1012                                  * and/or boundary requirements.  Continue
 1013                                  * searching this reservation until the rest
 1014                                  * of its free pages are either excluded or
 1015                                  * exhausted.
 1016                                  */
 1017                                 hi = lo + 1;
 1018                                 if (hi >= NBPOPMAP) {
 1019                                         hi = 0;
 1020                                         i++;
 1021                                 }
 1022                                 continue;
 1023                         }
 1024                         /* Find the next used page. */
 1025                         hi = ffsl(rv->popmap[i] & ~((1UL << lo) - 1));
 1026                         while (hi == 0 && ++i < NPOPMAP) {
 1027                                 if ((NBPOPMAP * i - next_free) * PAGE_SIZE >=
 1028                                     size) {
 1029                                         vm_reserv_reclaim(rv);
 1030                                         return (TRUE);
 1031                                 }
 1032                                 hi = ffsl(rv->popmap[i]);
 1033                         }
 1034                         /* Convert from ffsl() to ordinary bit numbering. */
 1035                         if (i != NPOPMAP)
 1036                                 hi--;
 1037                         if ((NBPOPMAP * i + hi - next_free) * PAGE_SIZE >=
 1038                             size) {
 1039                                 vm_reserv_reclaim(rv);
 1040                                 return (TRUE);
 1041                         }
 1042                 } while (i < NPOPMAP);
 1043         }
 1044         return (FALSE);
 1045 }
 1046 
 1047 /*
 1048  * Transfers the reservation underlying the given page to a new object.
 1049  *
 1050  * The object must be locked.
 1051  */
 1052 void
 1053 vm_reserv_rename(vm_page_t m, vm_object_t new_object, vm_object_t old_object,
 1054     vm_pindex_t old_object_offset)
 1055 {
 1056         vm_reserv_t rv;
 1057 
 1058         VM_OBJECT_ASSERT_WLOCKED(new_object);
 1059         rv = vm_reserv_from_page(m);
 1060         if (rv->object == old_object) {
 1061                 mtx_lock(&vm_page_queue_free_mtx);
 1062                 if (rv->object == old_object) {
 1063                         LIST_REMOVE(rv, objq);
 1064                         LIST_INSERT_HEAD(&new_object->rvq, rv, objq);
 1065                         rv->object = new_object;
 1066                         rv->pindex -= old_object_offset;
 1067                 }
 1068                 mtx_unlock(&vm_page_queue_free_mtx);
 1069         }
 1070 }
 1071 
 1072 /*
 1073  * Returns the size (in bytes) of a reservation of the specified level.
 1074  */
 1075 int
 1076 vm_reserv_size(int level)
 1077 {
 1078 
 1079         switch (level) {
 1080         case 0:
 1081                 return (VM_LEVEL_0_SIZE);
 1082         case -1:
 1083                 return (PAGE_SIZE);
 1084         default:
 1085                 return (0);
 1086         }
 1087 }
 1088 
 1089 /*
 1090  * Allocates the virtual and physical memory required by the reservation
 1091  * management system's data structures, in particular, the reservation array.
 1092  */
 1093 vm_paddr_t
 1094 vm_reserv_startup(vm_offset_t *vaddr, vm_paddr_t end, vm_paddr_t high_water)
 1095 {
 1096         vm_paddr_t new_end;
 1097         size_t size;
 1098 
 1099         /*
 1100          * Calculate the size (in bytes) of the reservation array.  Round up
 1101          * from "high_water" because every small page is mapped to an element
 1102          * in the reservation array based on its physical address.  Thus, the
 1103          * number of elements in the reservation array can be greater than the
 1104          * number of superpages. 
 1105          */
 1106         size = howmany(high_water, VM_LEVEL_0_SIZE) * sizeof(struct vm_reserv);
 1107 
 1108         /*
 1109          * Allocate and map the physical memory for the reservation array.  The
 1110          * next available virtual address is returned by reference.
 1111          */
 1112         new_end = end - round_page(size);
 1113         vm_reserv_array = (void *)(uintptr_t)pmap_map(vaddr, new_end, end,
 1114             VM_PROT_READ | VM_PROT_WRITE);
 1115         bzero(vm_reserv_array, size);
 1116 
 1117         /*
 1118          * Return the next available physical address.
 1119          */
 1120         return (new_end);
 1121 }
 1122 
 1123 #endif  /* VM_NRESERVLEVEL > 0 */
Cache object: 0cd76dd534e2fc2e242bd77addb3f6c4
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/vm/vm_reserv.c

FreeBSD/Linux Kernel Cross Reference
sys/vm/vm_reserv.c