vm_page.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*-
    2  * Copyright (c) 1991 Regents of the University of California.
    3  * All rights reserved.
    4  *
    5  * This code is derived from software contributed to Berkeley by
    6  * The Mach Operating System project at Carnegie-Mellon University.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  * 4. Neither the name of the University nor the names of its contributors
   17  *    may be used to endorse or promote products derived from this software
   18  *    without specific prior written permission.
   19  *
   20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   30  * SUCH DAMAGE.
   31  *
   32  *      from: @(#)vm_page.c     7.4 (Berkeley) 5/7/91
   33  */
   34 
   35 /*-
   36  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
   37  * All rights reserved.
   38  *
   39  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
   40  *
   41  * Permission to use, copy, modify and distribute this software and
   42  * its documentation is hereby granted, provided that both the copyright
   43  * notice and this permission notice appear in all copies of the
   44  * software, derivative works or modified versions, and any portions
   45  * thereof, and that both notices appear in supporting documentation.
   46  *
   47  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
   48  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
   49  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
   50  *
   51  * Carnegie Mellon requests users of this software to return to
   52  *
   53  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   54  *  School of Computer Science
   55  *  Carnegie Mellon University
   56  *  Pittsburgh PA 15213-3890
   57  *
   58  * any improvements or extensions that they make and grant Carnegie the
   59  * rights to redistribute these changes.
   60  */
   61 
   62 /*
   63  *                      GENERAL RULES ON VM_PAGE MANIPULATION
   64  *
   65  *      - a pageq mutex is required when adding or removing a page from a
   66  *        page queue (vm_page_queue[]), regardless of other mutexes or the
   67  *        busy state of a page.
   68  *
   69  *      - a hash chain mutex is required when associating or disassociating
   70  *        a page from the VM PAGE CACHE hash table (vm_page_buckets),
   71  *        regardless of other mutexes or the busy state of a page.
   72  *
   73  *      - either a hash chain mutex OR a busied page is required in order
   74  *        to modify the page flags.  A hash chain mutex must be obtained in
   75  *        order to busy a page.  A page's flags cannot be modified by a
   76  *        hash chain mutex if the page is marked busy.
   77  *
   78  *      - The object memq mutex is held when inserting or removing
   79  *        pages from an object (vm_page_insert() or vm_page_remove()).  This
   80  *        is different from the object's main mutex.
   81  *
   82  *      Generally speaking, you have to be aware of side effects when running
   83  *      vm_page ops.  A vm_page_lookup() will return with the hash chain
   84  *      locked, whether it was able to lookup the page or not.  vm_page_free(),
   85  *      vm_page_cache(), vm_page_activate(), and a number of other routines
   86  *      will release the hash chain mutex for you.  Intermediate manipulation
   87  *      routines such as vm_page_flag_set() expect the hash chain to be held
   88  *      on entry and the hash chain will remain held on return.
   89  *
   90  *      pageq scanning can only occur with the pageq in question locked.
   91  *      We have a known bottleneck with the active queue, but the cache
   92  *      and free queues are actually arrays already. 
   93  */
   94 
   95 /*
   96  *      Resident memory management module.
   97  */
   98 
   99 #include <sys/cdefs.h>
  100 __FBSDID("$FreeBSD$");
  101 
  102 #include <sys/param.h>
  103 #include <sys/systm.h>
  104 #include <sys/lock.h>
  105 #include <sys/kernel.h>
  106 #include <sys/malloc.h>
  107 #include <sys/mutex.h>
  108 #include <sys/proc.h>
  109 #include <sys/sysctl.h>
  110 #include <sys/vmmeter.h>
  111 #include <sys/vnode.h>
  112 
  113 #include <vm/vm.h>
  114 #include <vm/vm_param.h>
  115 #include <vm/vm_kern.h>
  116 #include <vm/vm_object.h>
  117 #include <vm/vm_page.h>
  118 #include <vm/vm_pageout.h>
  119 #include <vm/vm_pager.h>
  120 #include <vm/vm_phys.h>
  121 #include <vm/vm_extern.h>
  122 #include <vm/uma.h>
  123 #include <vm/uma_int.h>
  124 
  125 #include <machine/md_var.h>
  126 
  127 /*
  128  *      Associated with page of user-allocatable memory is a
  129  *      page structure.
  130  */
  131 
  132 struct mtx vm_page_queue_mtx;
  133 struct mtx vm_page_queue_free_mtx;
  134 
  135 vm_page_t vm_page_array = 0;
  136 int vm_page_array_size = 0;
  137 long first_page = 0;
  138 int vm_page_zero_count = 0;
  139 
  140 static int boot_pages = UMA_BOOT_PAGES;
  141 TUNABLE_INT("vm.boot_pages", &boot_pages);
  142 SYSCTL_INT(_vm, OID_AUTO, boot_pages, CTLFLAG_RD, &boot_pages, 0,
  143         "number of pages allocated for bootstrapping the VM system");
  144 
  145 /*
  146  *      vm_set_page_size:
  147  *
  148  *      Sets the page size, perhaps based upon the memory
  149  *      size.  Must be called before any use of page-size
  150  *      dependent functions.
  151  */
  152 void
  153 vm_set_page_size(void)
  154 {
  155         if (cnt.v_page_size == 0)
  156                 cnt.v_page_size = PAGE_SIZE;
  157         if (((cnt.v_page_size - 1) & cnt.v_page_size) != 0)
  158                 panic("vm_set_page_size: page size not a power of two");
  159 }
  160 
  161 /*
  162  *      vm_page_blacklist_lookup:
  163  *
  164  *      See if a physical address in this page has been listed
  165  *      in the blacklist tunable.  Entries in the tunable are
  166  *      separated by spaces or commas.  If an invalid integer is
  167  *      encountered then the rest of the string is skipped.
  168  */
  169 static int
  170 vm_page_blacklist_lookup(char *list, vm_paddr_t pa)
  171 {
  172         vm_paddr_t bad;
  173         char *cp, *pos;
  174 
  175         for (pos = list; *pos != '\0'; pos = cp) {
  176                 bad = strtoq(pos, &cp, 0);
  177                 if (*cp != '\0') {
  178                         if (*cp == ' ' || *cp == ',') {
  179                                 cp++;
  180                                 if (cp == pos)
  181                                         continue;
  182                         } else
  183                                 break;
  184                 }
  185                 if (pa == trunc_page(bad))
  186                         return (1);
  187         }
  188         return (0);
  189 }
  190 
  191 /*
  192  *      vm_page_startup:
  193  *
  194  *      Initializes the resident memory module.
  195  *
  196  *      Allocates memory for the page cells, and
  197  *      for the object/offset-to-page hash table headers.
  198  *      Each page cell is initialized and placed on the free list.
  199  */
  200 vm_offset_t
  201 vm_page_startup(vm_offset_t vaddr)
  202 {
  203         vm_offset_t mapped;
  204         vm_size_t npages;
  205         vm_paddr_t page_range;
  206         vm_paddr_t new_end;
  207         int i;
  208         vm_paddr_t pa;
  209         int nblocks;
  210         vm_paddr_t last_pa;
  211         char *list;
  212 
  213         /* the biggest memory array is the second group of pages */
  214         vm_paddr_t end;
  215         vm_paddr_t biggestsize;
  216         vm_paddr_t low_water, high_water;
  217         int biggestone;
  218 
  219         vm_paddr_t total;
  220 
  221         total = 0;
  222         biggestsize = 0;
  223         biggestone = 0;
  224         nblocks = 0;
  225         vaddr = round_page(vaddr);
  226 
  227         for (i = 0; phys_avail[i + 1]; i += 2) {
  228                 phys_avail[i] = round_page(phys_avail[i]);
  229                 phys_avail[i + 1] = trunc_page(phys_avail[i + 1]);
  230         }
  231 
  232         low_water = phys_avail[0];
  233         high_water = phys_avail[1];
  234 
  235         for (i = 0; phys_avail[i + 1]; i += 2) {
  236                 vm_paddr_t size = phys_avail[i + 1] - phys_avail[i];
  237 
  238                 if (size > biggestsize) {
  239                         biggestone = i;
  240                         biggestsize = size;
  241                 }
  242                 if (phys_avail[i] < low_water)
  243                         low_water = phys_avail[i];
  244                 if (phys_avail[i + 1] > high_water)
  245                         high_water = phys_avail[i + 1];
  246                 ++nblocks;
  247                 total += size;
  248         }
  249 
  250         end = phys_avail[biggestone+1];
  251 
  252         /*
  253          * Initialize the locks.
  254          */
  255         mtx_init(&vm_page_queue_mtx, "vm page queue mutex", NULL, MTX_DEF |
  256             MTX_RECURSE);
  257         mtx_init(&vm_page_queue_free_mtx, "vm page queue free mutex", NULL,
  258             MTX_DEF);
  259 
  260         /*
  261          * Initialize the queue headers for the free queue, the active queue
  262          * and the inactive queue.
  263          */
  264         vm_pageq_init();
  265 
  266         /*
  267          * Allocate memory for use when boot strapping the kernel memory
  268          * allocator.
  269          */
  270         new_end = end - (boot_pages * UMA_SLAB_SIZE);
  271         new_end = trunc_page(new_end);
  272         mapped = pmap_map(&vaddr, new_end, end,
  273             VM_PROT_READ | VM_PROT_WRITE);
  274         bzero((void *)mapped, end - new_end);
  275         uma_startup((void *)mapped, boot_pages);
  276 
  277 #if defined(__amd64__) || defined(__i386__)
  278         /*
  279          * Allocate a bitmap to indicate that a random physical page
  280          * needs to be included in a minidump.
  281          *
  282          * The amd64 port needs this to indicate which direct map pages
  283          * need to be dumped, via calls to dump_add_page()/dump_drop_page().
  284          *
  285          * However, i386 still needs this workspace internally within the
  286          * minidump code.  In theory, they are not needed on i386, but are
  287          * included should the sf_buf code decide to use them.
  288          */
  289         page_range = phys_avail[(nblocks - 1) * 2 + 1] / PAGE_SIZE;
  290         vm_page_dump_size = round_page(roundup2(page_range, NBBY) / NBBY);
  291         new_end -= vm_page_dump_size;
  292         vm_page_dump = (void *)(uintptr_t)pmap_map(&vaddr, new_end,
  293             new_end + vm_page_dump_size, VM_PROT_READ | VM_PROT_WRITE);
  294         bzero((void *)vm_page_dump, vm_page_dump_size);
  295 #endif
  296         /*
  297          * Compute the number of pages of memory that will be available for
  298          * use (taking into account the overhead of a page structure per
  299          * page).
  300          */
  301         first_page = low_water / PAGE_SIZE;
  302 #ifdef VM_PHYSSEG_SPARSE
  303         page_range = 0;
  304         for (i = 0; phys_avail[i + 1] != 0; i += 2)
  305                 page_range += atop(phys_avail[i + 1] - phys_avail[i]);
  306 #elif defined(VM_PHYSSEG_DENSE)
  307         page_range = high_water / PAGE_SIZE - first_page;
  308 #else
  309 #error "Either VM_PHYSSEG_DENSE or VM_PHYSSEG_SPARSE must be defined."
  310 #endif
  311         npages = (total - (page_range * sizeof(struct vm_page)) -
  312             (end - new_end)) / PAGE_SIZE;
  313         end = new_end;
  314 
  315         /*
  316          * Reserve an unmapped guard page to trap access to vm_page_array[-1].
  317          */
  318         vaddr += PAGE_SIZE;
  319 
  320         /*
  321          * Initialize the mem entry structures now, and put them in the free
  322          * queue.
  323          */
  324         new_end = trunc_page(end - page_range * sizeof(struct vm_page));
  325         mapped = pmap_map(&vaddr, new_end, end,
  326             VM_PROT_READ | VM_PROT_WRITE);
  327         vm_page_array = (vm_page_t) mapped;
  328 #ifdef __amd64__
  329         /*
  330          * pmap_map on amd64 comes out of the direct-map, not kvm like i386,
  331          * so the pages must be tracked for a crashdump to include this data.
  332          * This includes the vm_page_array and the early UMA bootstrap pages.
  333          */
  334         for (pa = new_end; pa < phys_avail[biggestone + 1]; pa += PAGE_SIZE)
  335                 dump_add_page(pa);
  336 #endif  
  337         phys_avail[biggestone + 1] = new_end;
  338 
  339         /*
  340          * Clear all of the page structures
  341          */
  342         bzero((caddr_t) vm_page_array, page_range * sizeof(struct vm_page));
  343         for (i = 0; i < page_range; i++)
  344                 vm_page_array[i].order = VM_NFREEORDER;
  345         vm_page_array_size = page_range;
  346 
  347         /*
  348          * This assertion tests the hypothesis that npages and total are
  349          * redundant.  XXX
  350          */
  351         page_range = 0;
  352         for (i = 0; phys_avail[i + 1] != 0; i += 2)
  353                 page_range += atop(phys_avail[i + 1] - phys_avail[i]);
  354         KASSERT(page_range == npages,
  355             ("vm_page_startup: inconsistent page counts"));
  356 
  357         /*
  358          * Initialize the physical memory allocator.
  359          */
  360         vm_phys_init();
  361 
  362         /*
  363          * Add every available physical page that is not blacklisted to
  364          * the free lists.
  365          */
  366         cnt.v_page_count = 0;
  367         cnt.v_free_count = 0;
  368         list = getenv("vm.blacklist");
  369         for (i = 0; phys_avail[i + 1] != 0; i += 2) {
  370                 pa = phys_avail[i];
  371                 last_pa = phys_avail[i + 1];
  372                 while (pa < last_pa) {
  373                         if (list != NULL &&
  374                             vm_page_blacklist_lookup(list, pa))
  375                                 printf("Skipping page with pa 0x%jx\n",
  376                                     (uintmax_t)pa);
  377                         else
  378                                 vm_phys_add_page(pa);
  379                         pa += PAGE_SIZE;
  380                 }
  381         }
  382         freeenv(list);
  383         return (vaddr);
  384 }
  385 
  386 void
  387 vm_page_flag_set(vm_page_t m, unsigned short bits)
  388 {
  389 
  390         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
  391         m->flags |= bits;
  392 } 
  393 
  394 void
  395 vm_page_flag_clear(vm_page_t m, unsigned short bits)
  396 {
  397 
  398         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
  399         m->flags &= ~bits;
  400 }
  401 
  402 void
  403 vm_page_busy(vm_page_t m)
  404 {
  405 
  406         VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
  407         KASSERT((m->oflags & VPO_BUSY) == 0,
  408             ("vm_page_busy: page already busy!!!"));
  409         m->oflags |= VPO_BUSY;
  410 }
  411 
  412 /*
  413  *      vm_page_flash:
  414  *
  415  *      wakeup anyone waiting for the page.
  416  */
  417 void
  418 vm_page_flash(vm_page_t m)
  419 {
  420 
  421         VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
  422         if (m->oflags & VPO_WANTED) {
  423                 m->oflags &= ~VPO_WANTED;
  424                 wakeup(m);
  425         }
  426 }
  427 
  428 /*
  429  *      vm_page_wakeup:
  430  *
  431  *      clear the VPO_BUSY flag and wakeup anyone waiting for the
  432  *      page.
  433  *
  434  */
  435 void
  436 vm_page_wakeup(vm_page_t m)
  437 {
  438 
  439         VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
  440         KASSERT(m->oflags & VPO_BUSY, ("vm_page_wakeup: page not busy!!!"));
  441         m->oflags &= ~VPO_BUSY;
  442         vm_page_flash(m);
  443 }
  444 
  445 void
  446 vm_page_io_start(vm_page_t m)
  447 {
  448 
  449         VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
  450         m->busy++;
  451 }
  452 
  453 void
  454 vm_page_io_finish(vm_page_t m)
  455 {
  456 
  457         VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
  458         m->busy--;
  459         if (m->busy == 0)
  460                 vm_page_flash(m);
  461 }
  462 
  463 /*
  464  * Keep page from being freed by the page daemon
  465  * much of the same effect as wiring, except much lower
  466  * overhead and should be used only for *very* temporary
  467  * holding ("wiring").
  468  */
  469 void
  470 vm_page_hold(vm_page_t mem)
  471 {
  472 
  473         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
  474         mem->hold_count++;
  475 }
  476 
  477 void
  478 vm_page_unhold(vm_page_t mem)
  479 {
  480 
  481         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
  482         --mem->hold_count;
  483         KASSERT(mem->hold_count >= 0, ("vm_page_unhold: hold count < 0!!!"));
  484         if (mem->hold_count == 0 && VM_PAGE_INQUEUE2(mem, PQ_HOLD))
  485                 vm_page_free_toq(mem);
  486 }
  487 
  488 /*
  489  *      vm_page_free:
  490  *
  491  *      Free a page.
  492  */
  493 void
  494 vm_page_free(vm_page_t m)
  495 {
  496 
  497         m->flags &= ~PG_ZERO;
  498         vm_page_free_toq(m);
  499 }
  500 
  501 /*
  502  *      vm_page_free_zero:
  503  *
  504  *      Free a page to the zerod-pages queue
  505  */
  506 void
  507 vm_page_free_zero(vm_page_t m)
  508 {
  509 
  510         m->flags |= PG_ZERO;
  511         vm_page_free_toq(m);
  512 }
  513 
  514 /*
  515  *      vm_page_sleep:
  516  *
  517  *      Sleep and release the page queues lock.
  518  *
  519  *      The object containing the given page must be locked.
  520  */
  521 void
  522 vm_page_sleep(vm_page_t m, const char *msg)
  523 {
  524 
  525         VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
  526         if (!mtx_owned(&vm_page_queue_mtx))
  527                 vm_page_lock_queues();
  528         vm_page_flag_set(m, PG_REFERENCED);
  529         vm_page_unlock_queues();
  530 
  531         /*
  532          * It's possible that while we sleep, the page will get
  533          * unbusied and freed.  If we are holding the object
  534          * lock, we will assume we hold a reference to the object
  535          * such that even if m->object changes, we can re-lock
  536          * it.
  537          */
  538         m->oflags |= VPO_WANTED;
  539         msleep(m, VM_OBJECT_MTX(m->object), PVM, msg, 0);
  540 }
  541 
  542 /*
  543  *      vm_page_dirty:
  544  *
  545  *      make page all dirty
  546  */
  547 void
  548 vm_page_dirty(vm_page_t m)
  549 {
  550         KASSERT((m->flags & PG_CACHED) == 0,
  551             ("vm_page_dirty: page in cache!"));
  552         KASSERT(!VM_PAGE_IS_FREE(m),
  553             ("vm_page_dirty: page is free!"));
  554         m->dirty = VM_PAGE_BITS_ALL;
  555 }
  556 
  557 /*
  558  *      vm_page_splay:
  559  *
  560  *      Implements Sleator and Tarjan's top-down splay algorithm.  Returns
  561  *      the vm_page containing the given pindex.  If, however, that
  562  *      pindex is not found in the vm_object, returns a vm_page that is
  563  *      adjacent to the pindex, coming before or after it.
  564  */
  565 vm_page_t
  566 vm_page_splay(vm_pindex_t pindex, vm_page_t root)
  567 {
  568         struct vm_page dummy;
  569         vm_page_t lefttreemax, righttreemin, y;
  570 
  571         if (root == NULL)
  572                 return (root);
  573         lefttreemax = righttreemin = &dummy;
  574         for (;; root = y) {
  575                 if (pindex < root->pindex) {
  576                         if ((y = root->left) == NULL)
  577                                 break;
  578                         if (pindex < y->pindex) {
  579                                 /* Rotate right. */
  580                                 root->left = y->right;
  581                                 y->right = root;
  582                                 root = y;
  583                                 if ((y = root->left) == NULL)
  584                                         break;
  585                         }
  586                         /* Link into the new root's right tree. */
  587                         righttreemin->left = root;
  588                         righttreemin = root;
  589                 } else if (pindex > root->pindex) {
  590                         if ((y = root->right) == NULL)
  591                                 break;
  592                         if (pindex > y->pindex) {
  593                                 /* Rotate left. */
  594                                 root->right = y->left;
  595                                 y->left = root;
  596                                 root = y;
  597                                 if ((y = root->right) == NULL)
  598                                         break;
  599                         }
  600                         /* Link into the new root's left tree. */
  601                         lefttreemax->right = root;
  602                         lefttreemax = root;
  603                 } else
  604                         break;
  605         }
  606         /* Assemble the new root. */
  607         lefttreemax->right = root->left;
  608         righttreemin->left = root->right;
  609         root->left = dummy.right;
  610         root->right = dummy.left;
  611         return (root);
  612 }
  613 
  614 /*
  615  *      vm_page_insert:         [ internal use only ]
  616  *
  617  *      Inserts the given mem entry into the object and object list.
  618  *
  619  *      The pagetables are not updated but will presumably fault the page
  620  *      in if necessary, or if a kernel page the caller will at some point
  621  *      enter the page into the kernel's pmap.  We are not allowed to block
  622  *      here so we *can't* do this anyway.
  623  *
  624  *      The object and page must be locked.
  625  *      This routine may not block.
  626  */
  627 void
  628 vm_page_insert(vm_page_t m, vm_object_t object, vm_pindex_t pindex)
  629 {
  630         vm_page_t root;
  631 
  632         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
  633         if (m->object != NULL)
  634                 panic("vm_page_insert: page already inserted");
  635 
  636         /*
  637          * Record the object/offset pair in this page
  638          */
  639         m->object = object;
  640         m->pindex = pindex;
  641 
  642         /*
  643          * Now link into the object's ordered list of backed pages.
  644          */
  645         root = object->root;
  646         if (root == NULL) {
  647                 m->left = NULL;
  648                 m->right = NULL;
  649                 TAILQ_INSERT_TAIL(&object->memq, m, listq);
  650         } else {
  651                 root = vm_page_splay(pindex, root);
  652                 if (pindex < root->pindex) {
  653                         m->left = root->left;
  654                         m->right = root;
  655                         root->left = NULL;
  656                         TAILQ_INSERT_BEFORE(root, m, listq);
  657                 } else if (pindex == root->pindex)
  658                         panic("vm_page_insert: offset already allocated");
  659                 else {
  660                         m->right = root->right;
  661                         m->left = root;
  662                         root->right = NULL;
  663                         TAILQ_INSERT_AFTER(&object->memq, root, m, listq);
  664                 }
  665         }
  666         object->root = m;
  667         object->generation++;
  668 
  669         /*
  670          * show that the object has one more resident page.
  671          */
  672         object->resident_page_count++;
  673         /*
  674          * Hold the vnode until the last page is released.
  675          */
  676         if (object->resident_page_count == 1 && object->type == OBJT_VNODE)
  677                 vhold((struct vnode *)object->handle);
  678 
  679         /*
  680          * Since we are inserting a new and possibly dirty page,
  681          * update the object's OBJ_MIGHTBEDIRTY flag.
  682          */
  683         if (m->flags & PG_WRITEABLE)
  684                 vm_object_set_writeable_dirty(object);
  685 }
  686 
  687 /*
  688  *      vm_page_remove:
  689  *                              NOTE: used by device pager as well -wfj
  690  *
  691  *      Removes the given mem entry from the object/offset-page
  692  *      table and the object page list, but do not invalidate/terminate
  693  *      the backing store.
  694  *
  695  *      The object and page must be locked.
  696  *      The underlying pmap entry (if any) is NOT removed here.
  697  *      This routine may not block.
  698  */
  699 void
  700 vm_page_remove(vm_page_t m)
  701 {
  702         vm_object_t object;
  703         vm_page_t root;
  704 
  705         if ((object = m->object) == NULL)
  706                 return;
  707         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
  708         if (m->oflags & VPO_BUSY) {
  709                 m->oflags &= ~VPO_BUSY;
  710                 vm_page_flash(m);
  711         }
  712         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
  713 
  714         /*
  715          * Now remove from the object's list of backed pages.
  716          */
  717         if (m != object->root)
  718                 vm_page_splay(m->pindex, object->root);
  719         if (m->left == NULL)
  720                 root = m->right;
  721         else {
  722                 root = vm_page_splay(m->pindex, m->left);
  723                 root->right = m->right;
  724         }
  725         object->root = root;
  726         TAILQ_REMOVE(&object->memq, m, listq);
  727 
  728         /*
  729          * And show that the object has one fewer resident page.
  730          */
  731         object->resident_page_count--;
  732         object->generation++;
  733         /*
  734          * The vnode may now be recycled.
  735          */
  736         if (object->resident_page_count == 0 && object->type == OBJT_VNODE)
  737                 vdrop((struct vnode *)object->handle);
  738 
  739         m->object = NULL;
  740 }
  741 
  742 /*
  743  *      vm_page_lookup:
  744  *
  745  *      Returns the page associated with the object/offset
  746  *      pair specified; if none is found, NULL is returned.
  747  *
  748  *      The object must be locked.
  749  *      This routine may not block.
  750  *      This is a critical path routine
  751  */
  752 vm_page_t
  753 vm_page_lookup(vm_object_t object, vm_pindex_t pindex)
  754 {
  755         vm_page_t m;
  756 
  757         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
  758         if ((m = object->root) != NULL && m->pindex != pindex) {
  759                 m = vm_page_splay(pindex, m);
  760                 if ((object->root = m)->pindex != pindex)
  761                         m = NULL;
  762         }
  763         return (m);
  764 }
  765 
  766 /*
  767  *      vm_page_rename:
  768  *
  769  *      Move the given memory entry from its
  770  *      current object to the specified target object/offset.
  771  *
  772  *      The object must be locked.
  773  *      This routine may not block.
  774  *
  775  *      Note: swap associated with the page must be invalidated by the move.  We
  776  *            have to do this for several reasons:  (1) we aren't freeing the
  777  *            page, (2) we are dirtying the page, (3) the VM system is probably
  778  *            moving the page from object A to B, and will then later move
  779  *            the backing store from A to B and we can't have a conflict.
  780  *
  781  *      Note: we *always* dirty the page.  It is necessary both for the
  782  *            fact that we moved it, and because we may be invalidating
  783  *            swap.  If the page is on the cache, we have to deactivate it
  784  *            or vm_page_dirty() will panic.  Dirty pages are not allowed
  785  *            on the cache.
  786  */
  787 void
  788 vm_page_rename(vm_page_t m, vm_object_t new_object, vm_pindex_t new_pindex)
  789 {
  790 
  791         vm_page_remove(m);
  792         vm_page_insert(m, new_object, new_pindex);
  793         vm_page_dirty(m);
  794 }
  795 
  796 /*
  797  *      Convert all of the given object's cached pages that have a
  798  *      pindex within the given range into free pages.  If the value
  799  *      zero is given for "end", then the range's upper bound is
  800  *      infinity.  If the given object is backed by a vnode and it
  801  *      transitions from having one or more cached pages to none, the
  802  *      vnode's hold count is reduced. 
  803  */
  804 void
  805 vm_page_cache_free(vm_object_t object, vm_pindex_t start, vm_pindex_t end)
  806 {
  807         vm_page_t m, m_next;
  808         boolean_t empty;
  809 
  810         mtx_lock(&vm_page_queue_free_mtx);
  811         if (__predict_false(object->cache == NULL)) {
  812                 mtx_unlock(&vm_page_queue_free_mtx);
  813                 return;
  814         }
  815         m = object->cache = vm_page_splay(start, object->cache);
  816         if (m->pindex < start) {
  817                 if (m->right == NULL)
  818                         m = NULL;
  819                 else {
  820                         m_next = vm_page_splay(start, m->right);
  821                         m_next->left = m;
  822                         m->right = NULL;
  823                         m = object->cache = m_next;
  824                 }
  825         }
  826 
  827         /*
  828          * At this point, "m" is either (1) a reference to the page
  829          * with the least pindex that is greater than or equal to
  830          * "start" or (2) NULL.
  831          */
  832         for (; m != NULL && (m->pindex < end || end == 0); m = m_next) {
  833                 /*
  834                  * Find "m"'s successor and remove "m" from the
  835                  * object's cache.
  836                  */
  837                 if (m->right == NULL) {
  838                         object->cache = m->left;
  839                         m_next = NULL;
  840                 } else {
  841                         m_next = vm_page_splay(start, m->right);
  842                         m_next->left = m->left;
  843                         object->cache = m_next;
  844                 }
  845                 /* Convert "m" to a free page. */
  846                 m->object = NULL;
  847                 m->valid = 0;
  848                 /* Clear PG_CACHED and set PG_FREE. */
  849                 m->flags ^= PG_CACHED | PG_FREE;
  850                 KASSERT((m->flags & (PG_CACHED | PG_FREE)) == PG_FREE,
  851                     ("vm_page_cache_free: page %p has inconsistent flags", m));
  852                 cnt.v_cache_count--;
  853                 cnt.v_free_count++;
  854         }
  855         empty = object->cache == NULL;
  856         mtx_unlock(&vm_page_queue_free_mtx);
  857         if (object->type == OBJT_VNODE && empty)
  858                 vdrop(object->handle);
  859 }
  860 
  861 /*
  862  *      Returns the cached page that is associated with the given
  863  *      object and offset.  If, however, none exists, returns NULL.
  864  *
  865  *      The free page queue must be locked.
  866  */
  867 static inline vm_page_t
  868 vm_page_cache_lookup(vm_object_t object, vm_pindex_t pindex)
  869 {
  870         vm_page_t m;
  871 
  872         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
  873         if ((m = object->cache) != NULL && m->pindex != pindex) {
  874                 m = vm_page_splay(pindex, m);
  875                 if ((object->cache = m)->pindex != pindex)
  876                         m = NULL;
  877         }
  878         return (m);
  879 }
  880 
  881 /*
  882  *      Remove the given cached page from its containing object's
  883  *      collection of cached pages.
  884  *
  885  *      The free page queue must be locked.
  886  */
  887 void
  888 vm_page_cache_remove(vm_page_t m)
  889 {
  890         vm_object_t object;
  891         vm_page_t root;
  892 
  893         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
  894         KASSERT((m->flags & PG_CACHED) != 0,
  895             ("vm_page_cache_remove: page %p is not cached", m));
  896         object = m->object;
  897         if (m != object->cache) {
  898                 root = vm_page_splay(m->pindex, object->cache);
  899                 KASSERT(root == m,
  900                     ("vm_page_cache_remove: page %p is not cached in object %p",
  901                     m, object));
  902         }
  903         if (m->left == NULL)
  904                 root = m->right;
  905         else if (m->right == NULL)
  906                 root = m->left;
  907         else {
  908                 root = vm_page_splay(m->pindex, m->left);
  909                 root->right = m->right;
  910         }
  911         object->cache = root;
  912         m->object = NULL;
  913         cnt.v_cache_count--;
  914 }
  915 
  916 /*
  917  *      Transfer all of the cached pages with offset greater than or
  918  *      equal to 'offidxstart' from the original object's cache to the
  919  *      new object's cache.  However, any cached pages with offset
  920  *      greater than or equal to the new object's size are kept in the
  921  *      original object.  Initially, the new object's cache must be
  922  *      empty.  Offset 'offidxstart' in the original object must
  923  *      correspond to offset zero in the new object.
  924  *
  925  *      The new object must be locked.
  926  */
  927 void
  928 vm_page_cache_transfer(vm_object_t orig_object, vm_pindex_t offidxstart,
  929     vm_object_t new_object)
  930 {
  931         vm_page_t m, m_next;
  932 
  933         /*
  934          * Insertion into an object's collection of cached pages
  935          * requires the object to be locked.  In contrast, removal does
  936          * not.
  937          */
  938         VM_OBJECT_LOCK_ASSERT(new_object, MA_OWNED);
  939         KASSERT(new_object->cache == NULL,
  940             ("vm_page_cache_transfer: object %p has cached pages",
  941             new_object));
  942         mtx_lock(&vm_page_queue_free_mtx);
  943         if ((m = orig_object->cache) != NULL) {
  944                 /*
  945                  * Transfer all of the pages with offset greater than or
  946                  * equal to 'offidxstart' from the original object's
  947                  * cache to the new object's cache.
  948                  */
  949                 m = vm_page_splay(offidxstart, m);
  950                 if (m->pindex < offidxstart) {
  951                         orig_object->cache = m;
  952                         new_object->cache = m->right;
  953                         m->right = NULL;
  954                 } else {
  955                         orig_object->cache = m->left;
  956                         new_object->cache = m;
  957                         m->left = NULL;
  958                 }
  959                 while ((m = new_object->cache) != NULL) {
  960                         if ((m->pindex - offidxstart) >= new_object->size) {
  961                                 /*
  962                                  * Return all of the cached pages with
  963                                  * offset greater than or equal to the
  964                                  * new object's size to the original
  965                                  * object's cache. 
  966                                  */
  967                                 new_object->cache = m->left;
  968                                 m->left = orig_object->cache;
  969                                 orig_object->cache = m;
  970                                 break;
  971                         }
  972                         m_next = vm_page_splay(m->pindex, m->right);
  973                         /* Update the page's object and offset. */
  974                         m->object = new_object;
  975                         m->pindex -= offidxstart;
  976                         if (m_next == NULL)
  977                                 break;
  978                         m->right = NULL;
  979                         m_next->left = m;
  980                         new_object->cache = m_next;
  981                 }
  982                 KASSERT(new_object->cache == NULL ||
  983                     new_object->type == OBJT_SWAP,
  984                     ("vm_page_cache_transfer: object %p's type is incompatible"
  985                     " with cached pages", new_object));
  986         }
  987         mtx_unlock(&vm_page_queue_free_mtx);
  988 }
  989 
  990 /*
  991  *      vm_page_alloc:
  992  *
  993  *      Allocate and return a memory cell associated
  994  *      with this VM object/offset pair.
  995  *
  996  *      page_req classes:
  997  *      VM_ALLOC_NORMAL         normal process request
  998  *      VM_ALLOC_SYSTEM         system *really* needs a page
  999  *      VM_ALLOC_INTERRUPT      interrupt time request
 1000  *      VM_ALLOC_ZERO           zero page
 1001  *
 1002  *      This routine may not block.
 1003  */
 1004 vm_page_t
 1005 vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req)
 1006 {
 1007         struct vnode *vp = NULL;
 1008         vm_object_t m_object;
 1009         vm_page_t m;
 1010         int flags, page_req;
 1011 
 1012         page_req = req & VM_ALLOC_CLASS_MASK;
 1013         KASSERT(curthread->td_intr_nesting_level == 0 ||
 1014             page_req == VM_ALLOC_INTERRUPT,
 1015             ("vm_page_alloc(NORMAL|SYSTEM) in interrupt context"));
 1016 
 1017         if ((req & VM_ALLOC_NOOBJ) == 0) {
 1018                 KASSERT(object != NULL,
 1019                     ("vm_page_alloc: NULL object."));
 1020                 VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 1021         }
 1022 
 1023         /*
 1024          * The pager is allowed to eat deeper into the free page list.
 1025          */
 1026         if ((curproc == pageproc) && (page_req != VM_ALLOC_INTERRUPT)) {
 1027                 page_req = VM_ALLOC_SYSTEM;
 1028         };
 1029 
 1030         mtx_lock(&vm_page_queue_free_mtx);
 1031         if (cnt.v_free_count + cnt.v_cache_count > cnt.v_free_reserved ||
 1032             (page_req == VM_ALLOC_SYSTEM && 
 1033             cnt.v_free_count + cnt.v_cache_count > cnt.v_interrupt_free_min) ||
 1034             (page_req == VM_ALLOC_INTERRUPT &&
 1035             cnt.v_free_count + cnt.v_cache_count > 0)) {
 1036                 /*
 1037                  * Allocate from the free queue if the number of free pages
 1038                  * exceeds the minimum for the request class.
 1039                  */
 1040                 if (object != NULL &&
 1041                     (m = vm_page_cache_lookup(object, pindex)) != NULL) {
 1042                         if ((req & VM_ALLOC_IFNOTCACHED) != 0) {
 1043                                 mtx_unlock(&vm_page_queue_free_mtx);
 1044                                 return (NULL);
 1045                         }
 1046                         vm_phys_unfree_page(m);
 1047                         vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m, 0);
 1048                 } else if ((req & VM_ALLOC_IFCACHED) != 0) {
 1049                         mtx_unlock(&vm_page_queue_free_mtx);
 1050                         return (NULL);
 1051                 } else
 1052                         m = vm_phys_alloc_pages(object != NULL ?
 1053                             VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT, 0);
 1054         } else {
 1055                 /*
 1056                  * Not allocatable, give up.
 1057                  */
 1058                 mtx_unlock(&vm_page_queue_free_mtx);
 1059                 atomic_add_int(&vm_pageout_deficit, 1);
 1060                 pagedaemon_wakeup();
 1061                 return (NULL);
 1062         }
 1063 
 1064         /*
 1065          *  At this point we had better have found a good page.
 1066          */
 1067 
 1068         KASSERT(
 1069             m != NULL,
 1070             ("vm_page_alloc(): missing page on free queue")
 1071         );
 1072         if ((m->flags & PG_CACHED) != 0) {
 1073                 KASSERT(m->valid != 0,
 1074                     ("vm_page_alloc: cached page %p is invalid", m));
 1075                 if (m->object == object && m->pindex == pindex)
 1076                         cnt.v_reactivated++;
 1077                 else
 1078                         m->valid = 0;
 1079                 m_object = m->object;
 1080                 vm_page_cache_remove(m);
 1081                 if (m_object->type == OBJT_VNODE && m_object->cache == NULL)
 1082                         vp = m_object->handle;
 1083         } else {
 1084                 KASSERT(VM_PAGE_IS_FREE(m),
 1085                     ("vm_page_alloc: page %p is not free", m));
 1086                 KASSERT(m->valid == 0,
 1087                     ("vm_page_alloc: free page %p is valid", m));
 1088                 cnt.v_free_count--;
 1089         }
 1090 
 1091         /*
 1092          * Initialize structure.  Only the PG_ZERO flag is inherited.
 1093          */
 1094         flags = 0;
 1095         if (m->flags & PG_ZERO) {
 1096                 vm_page_zero_count--;
 1097                 if (req & VM_ALLOC_ZERO)
 1098                         flags = PG_ZERO;
 1099         }
 1100         if (object == NULL || object->type == OBJT_PHYS)
 1101                 flags |= PG_UNMANAGED;
 1102         m->flags = flags;
 1103         if (req & (VM_ALLOC_NOBUSY | VM_ALLOC_NOOBJ))
 1104                 m->oflags = 0;
 1105         else
 1106                 m->oflags = VPO_BUSY;
 1107         if (req & VM_ALLOC_WIRED) {
 1108                 atomic_add_int(&cnt.v_wire_count, 1);
 1109                 m->wire_count = 1;
 1110         } else
 1111                 m->wire_count = 0;
 1112         m->hold_count = 0;
 1113         m->act_count = 0;
 1114         m->busy = 0;
 1115         KASSERT(m->dirty == 0, ("vm_page_alloc: free/cache page %p was dirty", m));
 1116         mtx_unlock(&vm_page_queue_free_mtx);
 1117 
 1118         if ((req & VM_ALLOC_NOOBJ) == 0)
 1119                 vm_page_insert(m, object, pindex);
 1120         else
 1121                 m->pindex = pindex;
 1122 
 1123         /*
 1124          * The following call to vdrop() must come after the above call
 1125          * to vm_page_insert() in case both affect the same object and
 1126          * vnode.  Otherwise, the affected vnode's hold count could
 1127          * temporarily become zero.
 1128          */
 1129         if (vp != NULL)
 1130                 vdrop(vp);
 1131 
 1132         /*
 1133          * Don't wakeup too often - wakeup the pageout daemon when
 1134          * we would be nearly out of memory.
 1135          */
 1136         if (vm_paging_needed())
 1137                 pagedaemon_wakeup();
 1138 
 1139         return (m);
 1140 }
 1141 
 1142 /*
 1143  *      vm_wait:        (also see VM_WAIT macro)
 1144  *
 1145  *      Block until free pages are available for allocation
 1146  *      - Called in various places before memory allocations.
 1147  */
 1148 void
 1149 vm_wait(void)
 1150 {
 1151 
 1152         mtx_lock(&vm_page_queue_free_mtx);
 1153         if (curproc == pageproc) {
 1154                 vm_pageout_pages_needed = 1;
 1155                 msleep(&vm_pageout_pages_needed, &vm_page_queue_free_mtx,
 1156                     PDROP | PSWP, "VMWait", 0);
 1157         } else {
 1158                 if (!vm_pages_needed) {
 1159                         vm_pages_needed = 1;
 1160                         wakeup(&vm_pages_needed);
 1161                 }
 1162                 msleep(&cnt.v_free_count, &vm_page_queue_free_mtx, PDROP | PVM,
 1163                     "vmwait", 0);
 1164         }
 1165 }
 1166 
 1167 /*
 1168  *      vm_waitpfault:  (also see VM_WAITPFAULT macro)
 1169  *
 1170  *      Block until free pages are available for allocation
 1171  *      - Called only in vm_fault so that processes page faulting
 1172  *        can be easily tracked.
 1173  *      - Sleeps at a lower priority than vm_wait() so that vm_wait()ing
 1174  *        processes will be able to grab memory first.  Do not change
 1175  *        this balance without careful testing first.
 1176  */
 1177 void
 1178 vm_waitpfault(void)
 1179 {
 1180 
 1181         mtx_lock(&vm_page_queue_free_mtx);
 1182         if (!vm_pages_needed) {
 1183                 vm_pages_needed = 1;
 1184                 wakeup(&vm_pages_needed);
 1185         }
 1186         msleep(&cnt.v_free_count, &vm_page_queue_free_mtx, PDROP | PUSER,
 1187             "pfault", 0);
 1188 }
 1189 
 1190 /*
 1191  *      vm_page_activate:
 1192  *
 1193  *      Put the specified page on the active list (if appropriate).
 1194  *      Ensure that act_count is at least ACT_INIT but do not otherwise
 1195  *      mess with it.
 1196  *
 1197  *      The page queues must be locked.
 1198  *      This routine may not block.
 1199  */
 1200 void
 1201 vm_page_activate(vm_page_t m)
 1202 {
 1203 
 1204         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 1205         if (VM_PAGE_GETKNOWNQUEUE2(m) != PQ_ACTIVE) {
 1206                 vm_pageq_remove(m);
 1207                 if (m->wire_count == 0 && (m->flags & PG_UNMANAGED) == 0) {
 1208                         if (m->act_count < ACT_INIT)
 1209                                 m->act_count = ACT_INIT;
 1210                         vm_pageq_enqueue(PQ_ACTIVE, m);
 1211                 }
 1212         } else {
 1213                 if (m->act_count < ACT_INIT)
 1214                         m->act_count = ACT_INIT;
 1215         }
 1216 }
 1217 
 1218 /*
 1219  *      vm_page_free_wakeup:
 1220  *
 1221  *      Helper routine for vm_page_free_toq() and vm_page_cache().  This
 1222  *      routine is called when a page has been added to the cache or free
 1223  *      queues.
 1224  *
 1225  *      The page queues must be locked.
 1226  *      This routine may not block.
 1227  */
 1228 static inline void
 1229 vm_page_free_wakeup(void)
 1230 {
 1231 
 1232         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
 1233         /*
 1234          * if pageout daemon needs pages, then tell it that there are
 1235          * some free.
 1236          */
 1237         if (vm_pageout_pages_needed &&
 1238             cnt.v_cache_count + cnt.v_free_count >= cnt.v_pageout_free_min) {
 1239                 wakeup(&vm_pageout_pages_needed);
 1240                 vm_pageout_pages_needed = 0;
 1241         }
 1242         /*
 1243          * wakeup processes that are waiting on memory if we hit a
 1244          * high water mark. And wakeup scheduler process if we have
 1245          * lots of memory. this process will swapin processes.
 1246          */
 1247         if (vm_pages_needed && !vm_page_count_min()) {
 1248                 vm_pages_needed = 0;
 1249                 wakeup(&cnt.v_free_count);
 1250         }
 1251 }
 1252 
 1253 /*
 1254  *      vm_page_free_toq:
 1255  *
 1256  *      Returns the given page to the free list,
 1257  *      disassociating it with any VM object.
 1258  *
 1259  *      Object and page must be locked prior to entry.
 1260  *      This routine may not block.
 1261  */
 1262 
 1263 void
 1264 vm_page_free_toq(vm_page_t m)
 1265 {
 1266 
 1267         if (VM_PAGE_GETQUEUE(m) != PQ_NONE)
 1268                 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 1269         KASSERT(!pmap_page_is_mapped(m),
 1270             ("vm_page_free_toq: freeing mapped page %p", m));
 1271         PCPU_INC(cnt.v_tfree);
 1272 
 1273         if (m->busy || VM_PAGE_IS_FREE(m)) {
 1274                 printf(
 1275                 "vm_page_free: pindex(%lu), busy(%d), VPO_BUSY(%d), hold(%d)\n",
 1276                     (u_long)m->pindex, m->busy, (m->oflags & VPO_BUSY) ? 1 : 0,
 1277                     m->hold_count);
 1278                 if (VM_PAGE_IS_FREE(m))
 1279                         panic("vm_page_free: freeing free page");
 1280                 else
 1281                         panic("vm_page_free: freeing busy page");
 1282         }
 1283 
 1284         /*
 1285          * unqueue, then remove page.  Note that we cannot destroy
 1286          * the page here because we do not want to call the pager's
 1287          * callback routine until after we've put the page on the
 1288          * appropriate free queue.
 1289          */
 1290         vm_pageq_remove(m);
 1291         vm_page_remove(m);
 1292 
 1293         /*
 1294          * If fictitious remove object association and
 1295          * return, otherwise delay object association removal.
 1296          */
 1297         if ((m->flags & PG_FICTITIOUS) != 0) {
 1298                 return;
 1299         }
 1300 
 1301         m->valid = 0;
 1302         vm_page_undirty(m);
 1303 
 1304         if (m->wire_count != 0) {
 1305                 if (m->wire_count > 1) {
 1306                         panic("vm_page_free: invalid wire count (%d), pindex: 0x%lx",
 1307                                 m->wire_count, (long)m->pindex);
 1308                 }
 1309                 panic("vm_page_free: freeing wired page");
 1310         }
 1311         if (m->hold_count != 0) {
 1312                 m->flags &= ~PG_ZERO;
 1313                 vm_pageq_enqueue(PQ_HOLD, m);
 1314         } else {
 1315                 m->flags |= PG_FREE;
 1316                 mtx_lock(&vm_page_queue_free_mtx);
 1317                 cnt.v_free_count++;
 1318                 if ((m->flags & PG_ZERO) != 0) {
 1319                         vm_phys_free_pages(m, 0);
 1320                         ++vm_page_zero_count;
 1321                 } else {
 1322                         vm_phys_free_pages(m, 0);
 1323                         vm_page_zero_idle_wakeup();
 1324                 }
 1325                 vm_page_free_wakeup();
 1326                 mtx_unlock(&vm_page_queue_free_mtx);
 1327         }
 1328 }
 1329 
 1330 /*
 1331  *      vm_page_wire:
 1332  *
 1333  *      Mark this page as wired down by yet
 1334  *      another map, removing it from paging queues
 1335  *      as necessary.
 1336  *
 1337  *      The page queues must be locked.
 1338  *      This routine may not block.
 1339  */
 1340 void
 1341 vm_page_wire(vm_page_t m)
 1342 {
 1343 
 1344         /*
 1345          * Only bump the wire statistics if the page is not already wired,
 1346          * and only unqueue the page if it is on some queue (if it is unmanaged
 1347          * it is already off the queues).
 1348          */
 1349         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 1350         if (m->flags & PG_FICTITIOUS)
 1351                 return;
 1352         if (m->wire_count == 0) {
 1353                 if ((m->flags & PG_UNMANAGED) == 0)
 1354                         vm_pageq_remove(m);
 1355                 atomic_add_int(&cnt.v_wire_count, 1);
 1356         }
 1357         m->wire_count++;
 1358         KASSERT(m->wire_count != 0, ("vm_page_wire: wire_count overflow m=%p", m));
 1359 }
 1360 
 1361 /*
 1362  *      vm_page_unwire:
 1363  *
 1364  *      Release one wiring of this page, potentially
 1365  *      enabling it to be paged again.
 1366  *
 1367  *      Many pages placed on the inactive queue should actually go
 1368  *      into the cache, but it is difficult to figure out which.  What
 1369  *      we do instead, if the inactive target is well met, is to put
 1370  *      clean pages at the head of the inactive queue instead of the tail.
 1371  *      This will cause them to be moved to the cache more quickly and
 1372  *      if not actively re-referenced, freed more quickly.  If we just
 1373  *      stick these pages at the end of the inactive queue, heavy filesystem
 1374  *      meta-data accesses can cause an unnecessary paging load on memory bound 
 1375  *      processes.  This optimization causes one-time-use metadata to be
 1376  *      reused more quickly.
 1377  *
 1378  *      BUT, if we are in a low-memory situation we have no choice but to
 1379  *      put clean pages on the cache queue.
 1380  *
 1381  *      A number of routines use vm_page_unwire() to guarantee that the page
 1382  *      will go into either the inactive or active queues, and will NEVER
 1383  *      be placed in the cache - for example, just after dirtying a page.
 1384  *      dirty pages in the cache are not allowed.
 1385  *
 1386  *      The page queues must be locked.
 1387  *      This routine may not block.
 1388  */
 1389 void
 1390 vm_page_unwire(vm_page_t m, int activate)
 1391 {
 1392 
 1393         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 1394         if (m->flags & PG_FICTITIOUS)
 1395                 return;
 1396         if (m->wire_count > 0) {
 1397                 m->wire_count--;
 1398                 if (m->wire_count == 0) {
 1399                         atomic_subtract_int(&cnt.v_wire_count, 1);
 1400                         if (m->flags & PG_UNMANAGED) {
 1401                                 ;
 1402                         } else if (activate)
 1403                                 vm_pageq_enqueue(PQ_ACTIVE, m);
 1404                         else {
 1405                                 vm_page_flag_clear(m, PG_WINATCFLS);
 1406                                 vm_pageq_enqueue(PQ_INACTIVE, m);
 1407                         }
 1408                 }
 1409         } else {
 1410                 panic("vm_page_unwire: invalid wire count: %d", m->wire_count);
 1411         }
 1412 }
 1413 
 1414 
 1415 /*
 1416  * Move the specified page to the inactive queue.  If the page has
 1417  * any associated swap, the swap is deallocated.
 1418  *
 1419  * Normally athead is 0 resulting in LRU operation.  athead is set
 1420  * to 1 if we want this page to be 'as if it were placed in the cache',
 1421  * except without unmapping it from the process address space.
 1422  *
 1423  * This routine may not block.
 1424  */
 1425 static inline void
 1426 _vm_page_deactivate(vm_page_t m, int athead)
 1427 {
 1428 
 1429         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 1430 
 1431         /*
 1432          * Ignore if already inactive.
 1433          */
 1434         if (VM_PAGE_INQUEUE2(m, PQ_INACTIVE))
 1435                 return;
 1436         if (m->wire_count == 0 && (m->flags & PG_UNMANAGED) == 0) {
 1437                 vm_page_flag_clear(m, PG_WINATCFLS);
 1438                 vm_pageq_remove(m);
 1439                 if (athead)
 1440                         TAILQ_INSERT_HEAD(&vm_page_queues[PQ_INACTIVE].pl, m, pageq);
 1441                 else
 1442                         TAILQ_INSERT_TAIL(&vm_page_queues[PQ_INACTIVE].pl, m, pageq);
 1443                 VM_PAGE_SETQUEUE2(m, PQ_INACTIVE);
 1444                 cnt.v_inactive_count++;
 1445         }
 1446 }
 1447 
 1448 void
 1449 vm_page_deactivate(vm_page_t m)
 1450 {
 1451     _vm_page_deactivate(m, 0);
 1452 }
 1453 
 1454 /*
 1455  * vm_page_try_to_cache:
 1456  *
 1457  * Returns 0 on failure, 1 on success
 1458  */
 1459 int
 1460 vm_page_try_to_cache(vm_page_t m)
 1461 {
 1462 
 1463         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 1464         VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
 1465         if (m->dirty || m->hold_count || m->busy || m->wire_count ||
 1466             (m->oflags & VPO_BUSY) || (m->flags & PG_UNMANAGED)) {
 1467                 return (0);
 1468         }
 1469         pmap_remove_all(m);
 1470         if (m->dirty)
 1471                 return (0);
 1472         vm_page_cache(m);
 1473         return (1);
 1474 }
 1475 
 1476 /*
 1477  * vm_page_try_to_free()
 1478  *
 1479  *      Attempt to free the page.  If we cannot free it, we do nothing.
 1480  *      1 is returned on success, 0 on failure.
 1481  */
 1482 int
 1483 vm_page_try_to_free(vm_page_t m)
 1484 {
 1485 
 1486         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 1487         if (m->object != NULL)
 1488                 VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
 1489         if (m->dirty || m->hold_count || m->busy || m->wire_count ||
 1490             (m->oflags & VPO_BUSY) || (m->flags & PG_UNMANAGED)) {
 1491                 return (0);
 1492         }
 1493         pmap_remove_all(m);
 1494         if (m->dirty)
 1495                 return (0);
 1496         vm_page_free(m);
 1497         return (1);
 1498 }
 1499 
 1500 /*
 1501  * vm_page_cache
 1502  *
 1503  * Put the specified page onto the page cache queue (if appropriate).
 1504  *
 1505  * This routine may not block.
 1506  */
 1507 void
 1508 vm_page_cache(vm_page_t m)
 1509 {
 1510         vm_object_t object;
 1511         vm_page_t root;
 1512 
 1513         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 1514         object = m->object;
 1515         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 1516         if ((m->flags & PG_UNMANAGED) || (m->oflags & VPO_BUSY) || m->busy ||
 1517             m->hold_count || m->wire_count) {
 1518                 panic("vm_page_cache: attempting to cache busy page");
 1519         }
 1520         pmap_remove_all(m);
 1521         if (m->dirty != 0)
 1522                 panic("vm_page_cache: page %p is dirty", m);
 1523         if (m->valid == 0 || object->type == OBJT_DEFAULT ||
 1524             (object->type == OBJT_SWAP &&
 1525             !vm_pager_has_page(object, m->pindex, NULL, NULL))) {
 1526                 /*
 1527                  * Hypothesis: A cache-elgible page belonging to a
 1528                  * default object or swap object but without a backing
 1529                  * store must be zero filled.
 1530                  */
 1531                 vm_page_free(m);
 1532                 return;
 1533         }
 1534         KASSERT((m->flags & PG_CACHED) == 0,
 1535             ("vm_page_cache: page %p is already cached", m));
 1536         cnt.v_tcached++;
 1537 
 1538         /*
 1539          * Remove the page from the paging queues.
 1540          */
 1541         vm_pageq_remove(m);
 1542 
 1543         /*
 1544          * Remove the page from the object's collection of resident
 1545          * pages. 
 1546          */
 1547         if (m != object->root)
 1548                 vm_page_splay(m->pindex, object->root);
 1549         if (m->left == NULL)
 1550                 root = m->right;
 1551         else {
 1552                 root = vm_page_splay(m->pindex, m->left);
 1553                 root->right = m->right;
 1554         }
 1555         object->root = root;
 1556         TAILQ_REMOVE(&object->memq, m, listq);
 1557         object->resident_page_count--;
 1558         object->generation++;
 1559 
 1560         /*
 1561          * Insert the page into the object's collection of cached pages
 1562          * and the physical memory allocator's cache/free page queues.
 1563          */
 1564         vm_page_flag_set(m, PG_CACHED);
 1565         vm_page_flag_clear(m, PG_ZERO);
 1566         mtx_lock(&vm_page_queue_free_mtx);
 1567         vm_phys_set_pool(VM_FREEPOOL_CACHE, m, 0);
 1568         cnt.v_cache_count++;
 1569         root = object->cache;
 1570         if (root == NULL) {
 1571                 m->left = NULL;
 1572                 m->right = NULL;
 1573         } else {
 1574                 root = vm_page_splay(m->pindex, root);
 1575                 if (m->pindex < root->pindex) {
 1576                         m->left = root->left;
 1577                         m->right = root;
 1578                         root->left = NULL;
 1579                 } else if (__predict_false(m->pindex == root->pindex))
 1580                         panic("vm_page_cache: offset already cached");
 1581                 else {
 1582                         m->right = root->right;
 1583                         m->left = root;
 1584                         root->right = NULL;
 1585                 }
 1586         }
 1587         object->cache = m;
 1588         vm_phys_free_pages(m, 0);
 1589         vm_page_free_wakeup();
 1590         mtx_unlock(&vm_page_queue_free_mtx);
 1591 
 1592         /*
 1593          * Increment the vnode's hold count if this is the object's only
 1594          * cached page.  Decrement the vnode's hold count if this was
 1595          * the object's only resident page.
 1596          */
 1597         if (object->type == OBJT_VNODE) {
 1598                 if (root == NULL && object->resident_page_count != 0)
 1599                         vhold(object->handle);
 1600                 else if (root != NULL && object->resident_page_count == 0)
 1601                         vdrop(object->handle);
 1602         }
 1603 }
 1604 
 1605 /*
 1606  * vm_page_dontneed
 1607  *
 1608  *      Cache, deactivate, or do nothing as appropriate.  This routine
 1609  *      is typically used by madvise() MADV_DONTNEED.
 1610  *
 1611  *      Generally speaking we want to move the page into the cache so
 1612  *      it gets reused quickly.  However, this can result in a silly syndrome
 1613  *      due to the page recycling too quickly.  Small objects will not be
 1614  *      fully cached.  On the otherhand, if we move the page to the inactive
 1615  *      queue we wind up with a problem whereby very large objects 
 1616  *      unnecessarily blow away our inactive and cache queues.
 1617  *
 1618  *      The solution is to move the pages based on a fixed weighting.  We
 1619  *      either leave them alone, deactivate them, or move them to the cache,
 1620  *      where moving them to the cache has the highest weighting.
 1621  *      By forcing some pages into other queues we eventually force the
 1622  *      system to balance the queues, potentially recovering other unrelated
 1623  *      space from active.  The idea is to not force this to happen too
 1624  *      often.
 1625  */
 1626 void
 1627 vm_page_dontneed(vm_page_t m)
 1628 {
 1629         static int dnweight;
 1630         int dnw;
 1631         int head;
 1632 
 1633         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 1634         dnw = ++dnweight;
 1635 
 1636         /*
 1637          * occassionally leave the page alone
 1638          */
 1639         if ((dnw & 0x01F0) == 0 ||
 1640             VM_PAGE_INQUEUE2(m, PQ_INACTIVE)) {
 1641                 if (m->act_count >= ACT_INIT)
 1642                         --m->act_count;
 1643                 return;
 1644         }
 1645 
 1646         if (m->dirty == 0 && pmap_is_modified(m))
 1647                 vm_page_dirty(m);
 1648 
 1649         if (m->dirty || (dnw & 0x0070) == 0) {
 1650                 /*
 1651                  * Deactivate the page 3 times out of 32.
 1652                  */
 1653                 head = 0;
 1654         } else {
 1655                 /*
 1656                  * Cache the page 28 times out of every 32.  Note that
 1657                  * the page is deactivated instead of cached, but placed
 1658                  * at the head of the queue instead of the tail.
 1659                  */
 1660                 head = 1;
 1661         }
 1662         _vm_page_deactivate(m, head);
 1663 }
 1664 
 1665 /*
 1666  * Grab a page, waiting until we are waken up due to the page
 1667  * changing state.  We keep on waiting, if the page continues
 1668  * to be in the object.  If the page doesn't exist, first allocate it
 1669  * and then conditionally zero it.
 1670  *
 1671  * This routine may block.
 1672  */
 1673 vm_page_t
 1674 vm_page_grab(vm_object_t object, vm_pindex_t pindex, int allocflags)
 1675 {
 1676         vm_page_t m;
 1677 
 1678         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 1679 retrylookup:
 1680         if ((m = vm_page_lookup(object, pindex)) != NULL) {
 1681                 if (vm_page_sleep_if_busy(m, TRUE, "pgrbwt")) {
 1682                         if ((allocflags & VM_ALLOC_RETRY) == 0)
 1683                                 return (NULL);
 1684                         goto retrylookup;
 1685                 } else {
 1686                         if ((allocflags & VM_ALLOC_WIRED) != 0) {
 1687                                 vm_page_lock_queues();
 1688                                 vm_page_wire(m);
 1689                                 vm_page_unlock_queues();
 1690                         }
 1691                         if ((allocflags & VM_ALLOC_NOBUSY) == 0)
 1692                                 vm_page_busy(m);
 1693                         return (m);
 1694                 }
 1695         }
 1696         m = vm_page_alloc(object, pindex, allocflags & ~VM_ALLOC_RETRY);
 1697         if (m == NULL) {
 1698                 VM_OBJECT_UNLOCK(object);
 1699                 VM_WAIT;
 1700                 VM_OBJECT_LOCK(object);
 1701                 if ((allocflags & VM_ALLOC_RETRY) == 0)
 1702                         return (NULL);
 1703                 goto retrylookup;
 1704         } else if (m->valid != 0)
 1705                 return (m);
 1706         if (allocflags & VM_ALLOC_ZERO && (m->flags & PG_ZERO) == 0)
 1707                 pmap_zero_page(m);
 1708         return (m);
 1709 }
 1710 
 1711 /*
 1712  * Mapping function for valid bits or for dirty bits in
 1713  * a page.  May not block.
 1714  *
 1715  * Inputs are required to range within a page.
 1716  */
 1717 int
 1718 vm_page_bits(int base, int size)
 1719 {
 1720         int first_bit;
 1721         int last_bit;
 1722 
 1723         KASSERT(
 1724             base + size <= PAGE_SIZE,
 1725             ("vm_page_bits: illegal base/size %d/%d", base, size)
 1726         );
 1727 
 1728         if (size == 0)          /* handle degenerate case */
 1729                 return (0);
 1730 
 1731         first_bit = base >> DEV_BSHIFT;
 1732         last_bit = (base + size - 1) >> DEV_BSHIFT;
 1733 
 1734         return ((2 << last_bit) - (1 << first_bit));
 1735 }
 1736 
 1737 /*
 1738  *      vm_page_set_validclean:
 1739  *
 1740  *      Sets portions of a page valid and clean.  The arguments are expected
 1741  *      to be DEV_BSIZE aligned but if they aren't the bitmap is inclusive
 1742  *      of any partial chunks touched by the range.  The invalid portion of
 1743  *      such chunks will be zero'd.
 1744  *
 1745  *      This routine may not block.
 1746  *
 1747  *      (base + size) must be less then or equal to PAGE_SIZE.
 1748  */
 1749 void
 1750 vm_page_set_validclean(vm_page_t m, int base, int size)
 1751 {
 1752         int pagebits;
 1753         int frag;
 1754         int endoff;
 1755 
 1756         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 1757         VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
 1758         if (size == 0)  /* handle degenerate case */
 1759                 return;
 1760 
 1761         /*
 1762          * If the base is not DEV_BSIZE aligned and the valid
 1763          * bit is clear, we have to zero out a portion of the
 1764          * first block.
 1765          */
 1766         if ((frag = base & ~(DEV_BSIZE - 1)) != base &&
 1767             (m->valid & (1 << (base >> DEV_BSHIFT))) == 0)
 1768                 pmap_zero_page_area(m, frag, base - frag);
 1769 
 1770         /*
 1771          * If the ending offset is not DEV_BSIZE aligned and the 
 1772          * valid bit is clear, we have to zero out a portion of
 1773          * the last block.
 1774          */
 1775         endoff = base + size;
 1776         if ((frag = endoff & ~(DEV_BSIZE - 1)) != endoff &&
 1777             (m->valid & (1 << (endoff >> DEV_BSHIFT))) == 0)
 1778                 pmap_zero_page_area(m, endoff,
 1779                     DEV_BSIZE - (endoff & (DEV_BSIZE - 1)));
 1780 
 1781         /*
 1782          * Set valid, clear dirty bits.  If validating the entire
 1783          * page we can safely clear the pmap modify bit.  We also
 1784          * use this opportunity to clear the VPO_NOSYNC flag.  If a process
 1785          * takes a write fault on a MAP_NOSYNC memory area the flag will
 1786          * be set again.
 1787          *
 1788          * We set valid bits inclusive of any overlap, but we can only
 1789          * clear dirty bits for DEV_BSIZE chunks that are fully within
 1790          * the range.
 1791          */
 1792         pagebits = vm_page_bits(base, size);
 1793         m->valid |= pagebits;
 1794 #if 0   /* NOT YET */
 1795         if ((frag = base & (DEV_BSIZE - 1)) != 0) {
 1796                 frag = DEV_BSIZE - frag;
 1797                 base += frag;
 1798                 size -= frag;
 1799                 if (size < 0)
 1800                         size = 0;
 1801         }
 1802         pagebits = vm_page_bits(base, size & (DEV_BSIZE - 1));
 1803 #endif
 1804         m->dirty &= ~pagebits;
 1805         if (base == 0 && size == PAGE_SIZE) {
 1806                 pmap_clear_modify(m);
 1807                 m->oflags &= ~VPO_NOSYNC;
 1808         }
 1809 }
 1810 
 1811 void
 1812 vm_page_clear_dirty(vm_page_t m, int base, int size)
 1813 {
 1814 
 1815         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 1816         m->dirty &= ~vm_page_bits(base, size);
 1817 }
 1818 
 1819 /*
 1820  *      vm_page_set_invalid:
 1821  *
 1822  *      Invalidates DEV_BSIZE'd chunks within a page.  Both the
 1823  *      valid and dirty bits for the effected areas are cleared.
 1824  *
 1825  *      May not block.
 1826  */
 1827 void
 1828 vm_page_set_invalid(vm_page_t m, int base, int size)
 1829 {
 1830         int bits;
 1831 
 1832         VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
 1833         bits = vm_page_bits(base, size);
 1834         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 1835         if (m->valid == VM_PAGE_BITS_ALL && bits != 0)
 1836                 pmap_remove_all(m);
 1837         m->valid &= ~bits;
 1838         m->dirty &= ~bits;
 1839         m->object->generation++;
 1840 }
 1841 
 1842 /*
 1843  * vm_page_zero_invalid()
 1844  *
 1845  *      The kernel assumes that the invalid portions of a page contain 
 1846  *      garbage, but such pages can be mapped into memory by user code.
 1847  *      When this occurs, we must zero out the non-valid portions of the
 1848  *      page so user code sees what it expects.
 1849  *
 1850  *      Pages are most often semi-valid when the end of a file is mapped 
 1851  *      into memory and the file's size is not page aligned.
 1852  */
 1853 void
 1854 vm_page_zero_invalid(vm_page_t m, boolean_t setvalid)
 1855 {
 1856         int b;
 1857         int i;
 1858 
 1859         VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
 1860         /*
 1861          * Scan the valid bits looking for invalid sections that
 1862          * must be zerod.  Invalid sub-DEV_BSIZE'd areas ( where the
 1863          * valid bit may be set ) have already been zerod by
 1864          * vm_page_set_validclean().
 1865          */
 1866         for (b = i = 0; i <= PAGE_SIZE / DEV_BSIZE; ++i) {
 1867                 if (i == (PAGE_SIZE / DEV_BSIZE) || 
 1868                     (m->valid & (1 << i))
 1869                 ) {
 1870                         if (i > b) {
 1871                                 pmap_zero_page_area(m, 
 1872                                     b << DEV_BSHIFT, (i - b) << DEV_BSHIFT);
 1873                         }
 1874                         b = i + 1;
 1875                 }
 1876         }
 1877 
 1878         /*
 1879          * setvalid is TRUE when we can safely set the zero'd areas
 1880          * as being valid.  We can do this if there are no cache consistancy
 1881          * issues.  e.g. it is ok to do with UFS, but not ok to do with NFS.
 1882          */
 1883         if (setvalid)
 1884                 m->valid = VM_PAGE_BITS_ALL;
 1885 }
 1886 
 1887 /*
 1888  *      vm_page_is_valid:
 1889  *
 1890  *      Is (partial) page valid?  Note that the case where size == 0
 1891  *      will return FALSE in the degenerate case where the page is
 1892  *      entirely invalid, and TRUE otherwise.
 1893  *
 1894  *      May not block.
 1895  */
 1896 int
 1897 vm_page_is_valid(vm_page_t m, int base, int size)
 1898 {
 1899         int bits = vm_page_bits(base, size);
 1900 
 1901         VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
 1902         if (m->valid && ((m->valid & bits) == bits))
 1903                 return 1;
 1904         else
 1905                 return 0;
 1906 }
 1907 
 1908 /*
 1909  * update dirty bits from pmap/mmu.  May not block.
 1910  */
 1911 void
 1912 vm_page_test_dirty(vm_page_t m)
 1913 {
 1914         if ((m->dirty != VM_PAGE_BITS_ALL) && pmap_is_modified(m)) {
 1915                 vm_page_dirty(m);
 1916         }
 1917 }
 1918 
 1919 int so_zerocp_fullpage = 0;
 1920 
 1921 /*
 1922  *      Replace the given page with a copy.  The copied page assumes
 1923  *      the portion of the given page's "wire_count" that is not the
 1924  *      responsibility of this copy-on-write mechanism.
 1925  *
 1926  *      The object containing the given page must have a non-zero
 1927  *      paging-in-progress count and be locked.
 1928  */
 1929 void
 1930 vm_page_cowfault(vm_page_t m)
 1931 {
 1932         vm_page_t mnew;
 1933         vm_object_t object;
 1934         vm_pindex_t pindex;
 1935 
 1936         object = m->object;
 1937         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 1938         KASSERT(object->paging_in_progress != 0,
 1939             ("vm_page_cowfault: object %p's paging-in-progress count is zero.",
 1940             object)); 
 1941         pindex = m->pindex;
 1942 
 1943  retry_alloc:
 1944         pmap_remove_all(m);
 1945         vm_page_remove(m);
 1946         mnew = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY);
 1947         if (mnew == NULL) {
 1948                 vm_page_insert(m, object, pindex);
 1949                 vm_page_unlock_queues();
 1950                 VM_OBJECT_UNLOCK(object);
 1951                 VM_WAIT;
 1952                 VM_OBJECT_LOCK(object);
 1953                 if (m == vm_page_lookup(object, pindex)) {
 1954                         vm_page_lock_queues();
 1955                         goto retry_alloc;
 1956                 } else {
 1957                         /*
 1958                          * Page disappeared during the wait.
 1959                          */
 1960                         vm_page_lock_queues();
 1961                         return;
 1962                 }
 1963         }
 1964 
 1965         if (m->cow == 0) {
 1966                 /* 
 1967                  * check to see if we raced with an xmit complete when 
 1968                  * waiting to allocate a page.  If so, put things back 
 1969                  * the way they were 
 1970                  */
 1971                 vm_page_free(mnew);
 1972                 vm_page_insert(m, object, pindex);
 1973         } else { /* clear COW & copy page */
 1974                 if (!so_zerocp_fullpage)
 1975                         pmap_copy_page(m, mnew);
 1976                 mnew->valid = VM_PAGE_BITS_ALL;
 1977                 vm_page_dirty(mnew);
 1978                 mnew->wire_count = m->wire_count - m->cow;
 1979                 m->wire_count = m->cow;
 1980         }
 1981 }
 1982 
 1983 void 
 1984 vm_page_cowclear(vm_page_t m)
 1985 {
 1986 
 1987         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 1988         if (m->cow) {
 1989                 m->cow--;
 1990                 /* 
 1991                  * let vm_fault add back write permission  lazily
 1992                  */
 1993         } 
 1994         /*
 1995          *  sf_buf_free() will free the page, so we needn't do it here
 1996          */ 
 1997 }
 1998 
 1999 void
 2000 vm_page_cowsetup(vm_page_t m)
 2001 {
 2002 
 2003         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 2004         m->cow++;
 2005         pmap_remove_write(m);
 2006 }
 2007 
 2008 #include "opt_ddb.h"
 2009 #ifdef DDB
 2010 #include <sys/kernel.h>
 2011 
 2012 #include <ddb/ddb.h>
 2013 
 2014 DB_SHOW_COMMAND(page, vm_page_print_page_info)
 2015 {
 2016         db_printf("cnt.v_free_count: %d\n", cnt.v_free_count);
 2017         db_printf("cnt.v_cache_count: %d\n", cnt.v_cache_count);
 2018         db_printf("cnt.v_inactive_count: %d\n", cnt.v_inactive_count);
 2019         db_printf("cnt.v_active_count: %d\n", cnt.v_active_count);
 2020         db_printf("cnt.v_wire_count: %d\n", cnt.v_wire_count);
 2021         db_printf("cnt.v_free_reserved: %d\n", cnt.v_free_reserved);
 2022         db_printf("cnt.v_free_min: %d\n", cnt.v_free_min);
 2023         db_printf("cnt.v_free_target: %d\n", cnt.v_free_target);
 2024         db_printf("cnt.v_cache_min: %d\n", cnt.v_cache_min);
 2025         db_printf("cnt.v_inactive_target: %d\n", cnt.v_inactive_target);
 2026 }
 2027 
 2028 DB_SHOW_COMMAND(pageq, vm_page_print_pageq_info)
 2029 {
 2030                 
 2031         db_printf("PQ_FREE:");
 2032         db_printf(" %d", cnt.v_free_count);
 2033         db_printf("\n");
 2034                 
 2035         db_printf("PQ_CACHE:");
 2036         db_printf(" %d", cnt.v_cache_count);
 2037         db_printf("\n");
 2038 
 2039         db_printf("PQ_ACTIVE: %d, PQ_INACTIVE: %d\n",
 2040                 *vm_page_queues[PQ_ACTIVE].cnt,
 2041                 *vm_page_queues[PQ_INACTIVE].cnt);
 2042 }
 2043 #endif /* DDB */
Cache object: 0a732899c86014a9414a7710cb470cfd
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/vm/vm_page.c

FreeBSD/Linux Kernel Cross Reference
sys/vm/vm_page.c