vm_page.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*-
    2  * Copyright (c) 1991 Regents of the University of California.
    3  * All rights reserved.
    4  * Copyright (c) 1998 Matthew Dillon.  All Rights Reserved.
    5  *
    6  * This code is derived from software contributed to Berkeley by
    7  * The Mach Operating System project at Carnegie-Mellon University.
    8  *
    9  * Redistribution and use in source and binary forms, with or without
   10  * modification, are permitted provided that the following conditions
   11  * are met:
   12  * 1. Redistributions of source code must retain the above copyright
   13  *    notice, this list of conditions and the following disclaimer.
   14  * 2. Redistributions in binary form must reproduce the above copyright
   15  *    notice, this list of conditions and the following disclaimer in the
   16  *    documentation and/or other materials provided with the distribution.
   17  * 4. Neither the name of the University nor the names of its contributors
   18  *    may be used to endorse or promote products derived from this software
   19  *    without specific prior written permission.
   20  *
   21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   31  * SUCH DAMAGE.
   32  *
   33  *      from: @(#)vm_page.c     7.4 (Berkeley) 5/7/91
   34  */
   35 
   36 /*-
   37  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
   38  * All rights reserved.
   39  *
   40  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
   41  *
   42  * Permission to use, copy, modify and distribute this software and
   43  * its documentation is hereby granted, provided that both the copyright
   44  * notice and this permission notice appear in all copies of the
   45  * software, derivative works or modified versions, and any portions
   46  * thereof, and that both notices appear in supporting documentation.
   47  *
   48  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
   49  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
   50  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
   51  *
   52  * Carnegie Mellon requests users of this software to return to
   53  *
   54  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   55  *  School of Computer Science
   56  *  Carnegie Mellon University
   57  *  Pittsburgh PA 15213-3890
   58  *
   59  * any improvements or extensions that they make and grant Carnegie the
   60  * rights to redistribute these changes.
   61  */
   62 
   63 /*
   64  *                      GENERAL RULES ON VM_PAGE MANIPULATION
   65  *
   66  *      - a pageq mutex is required when adding or removing a page from a
   67  *        page queue (vm_page_queue[]), regardless of other mutexes or the
   68  *        busy state of a page.
   69  *
   70  *      - The object mutex is held when inserting or removing
   71  *        pages from an object (vm_page_insert() or vm_page_remove()).
   72  *
   73  */
   74 
   75 /*
   76  *      Resident memory management module.
   77  */
   78 
   79 #include <sys/cdefs.h>
   80 __FBSDID("$FreeBSD$");
   81 
   82 #include "opt_vm.h"
   83 
   84 #include <sys/param.h>
   85 #include <sys/systm.h>
   86 #include <sys/lock.h>
   87 #include <sys/kernel.h>
   88 #include <sys/limits.h>
   89 #include <sys/malloc.h>
   90 #include <sys/msgbuf.h>
   91 #include <sys/mutex.h>
   92 #include <sys/proc.h>
   93 #include <sys/sysctl.h>
   94 #include <sys/vmmeter.h>
   95 #include <sys/vnode.h>
   96 
   97 #include <vm/vm.h>
   98 #include <vm/pmap.h>
   99 #include <vm/vm_param.h>
  100 #include <vm/vm_kern.h>
  101 #include <vm/vm_object.h>
  102 #include <vm/vm_page.h>
  103 #include <vm/vm_pageout.h>
  104 #include <vm/vm_pager.h>
  105 #include <vm/vm_phys.h>
  106 #include <vm/vm_reserv.h>
  107 #include <vm/vm_extern.h>
  108 #include <vm/uma.h>
  109 #include <vm/uma_int.h>
  110 
  111 #include <machine/md_var.h>
  112 
  113 /*
  114  *      Associated with page of user-allocatable memory is a
  115  *      page structure.
  116  */
  117 
  118 struct vpgqueues vm_page_queues[PQ_COUNT];
  119 struct vpglocks vm_page_queue_lock;
  120 struct vpglocks vm_page_queue_free_lock;
  121 
  122 struct vpglocks pa_lock[PA_LOCK_COUNT];
  123 
  124 vm_page_t vm_page_array;
  125 long vm_page_array_size;
  126 long first_page;
  127 int vm_page_zero_count;
  128 
  129 static int boot_pages = UMA_BOOT_PAGES;
  130 TUNABLE_INT("vm.boot_pages", &boot_pages);
  131 SYSCTL_INT(_vm, OID_AUTO, boot_pages, CTLFLAG_RD, &boot_pages, 0,
  132         "number of pages allocated for bootstrapping the VM system");
  133 
  134 int pa_tryrelock_restart;
  135 SYSCTL_INT(_vm, OID_AUTO, tryrelock_restart, CTLFLAG_RD,
  136     &pa_tryrelock_restart, 0, "Number of tryrelock restarts");
  137 
  138 static uma_zone_t fakepg_zone;
  139 
  140 static struct vnode *vm_page_alloc_init(vm_page_t m);
  141 static void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits);
  142 static void vm_page_queue_remove(int queue, vm_page_t m);
  143 static void vm_page_enqueue(int queue, vm_page_t m);
  144 static void vm_page_init_fakepg(void *dummy);
  145 
  146 SYSINIT(vm_page, SI_SUB_VM, SI_ORDER_SECOND, vm_page_init_fakepg, NULL);
  147 
  148 static void
  149 vm_page_init_fakepg(void *dummy)
  150 {
  151 
  152         fakepg_zone = uma_zcreate("fakepg", sizeof(struct vm_page), NULL, NULL,
  153             NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE | UMA_ZONE_VM); 
  154 }
  155 
  156 /* Make sure that u_long is at least 64 bits when PAGE_SIZE is 32K. */
  157 #if PAGE_SIZE == 32768
  158 #ifdef CTASSERT
  159 CTASSERT(sizeof(u_long) >= 8);
  160 #endif
  161 #endif
  162 
  163 /*
  164  * Try to acquire a physical address lock while a pmap is locked.  If we
  165  * fail to trylock we unlock and lock the pmap directly and cache the
  166  * locked pa in *locked.  The caller should then restart their loop in case
  167  * the virtual to physical mapping has changed.
  168  */
  169 int
  170 vm_page_pa_tryrelock(pmap_t pmap, vm_paddr_t pa, vm_paddr_t *locked)
  171 {
  172         vm_paddr_t lockpa;
  173 
  174         lockpa = *locked;
  175         *locked = pa;
  176         if (lockpa) {
  177                 PA_LOCK_ASSERT(lockpa, MA_OWNED);
  178                 if (PA_LOCKPTR(pa) == PA_LOCKPTR(lockpa))
  179                         return (0);
  180                 PA_UNLOCK(lockpa);
  181         }
  182         if (PA_TRYLOCK(pa))
  183                 return (0);
  184         PMAP_UNLOCK(pmap);
  185         atomic_add_int(&pa_tryrelock_restart, 1);
  186         PA_LOCK(pa);
  187         PMAP_LOCK(pmap);
  188         return (EAGAIN);
  189 }
  190 
  191 /*
  192  *      vm_set_page_size:
  193  *
  194  *      Sets the page size, perhaps based upon the memory
  195  *      size.  Must be called before any use of page-size
  196  *      dependent functions.
  197  */
  198 void
  199 vm_set_page_size(void)
  200 {
  201         if (cnt.v_page_size == 0)
  202                 cnt.v_page_size = PAGE_SIZE;
  203         if (((cnt.v_page_size - 1) & cnt.v_page_size) != 0)
  204                 panic("vm_set_page_size: page size not a power of two");
  205 }
  206 
  207 /*
  208  *      vm_page_blacklist_lookup:
  209  *
  210  *      See if a physical address in this page has been listed
  211  *      in the blacklist tunable.  Entries in the tunable are
  212  *      separated by spaces or commas.  If an invalid integer is
  213  *      encountered then the rest of the string is skipped.
  214  */
  215 static int
  216 vm_page_blacklist_lookup(char *list, vm_paddr_t pa)
  217 {
  218         vm_paddr_t bad;
  219         char *cp, *pos;
  220 
  221         for (pos = list; *pos != '\0'; pos = cp) {
  222                 bad = strtoq(pos, &cp, 0);
  223                 if (*cp != '\0') {
  224                         if (*cp == ' ' || *cp == ',') {
  225                                 cp++;
  226                                 if (cp == pos)
  227                                         continue;
  228                         } else
  229                                 break;
  230                 }
  231                 if (pa == trunc_page(bad))
  232                         return (1);
  233         }
  234         return (0);
  235 }
  236 
  237 /*
  238  *      vm_page_startup:
  239  *
  240  *      Initializes the resident memory module.
  241  *
  242  *      Allocates memory for the page cells, and
  243  *      for the object/offset-to-page hash table headers.
  244  *      Each page cell is initialized and placed on the free list.
  245  */
  246 vm_offset_t
  247 vm_page_startup(vm_offset_t vaddr)
  248 {
  249         vm_offset_t mapped;
  250         vm_paddr_t page_range;
  251         vm_paddr_t new_end;
  252         int i;
  253         vm_paddr_t pa;
  254         vm_paddr_t last_pa;
  255         char *list;
  256 
  257         /* the biggest memory array is the second group of pages */
  258         vm_paddr_t end;
  259         vm_paddr_t biggestsize;
  260         vm_paddr_t low_water, high_water;
  261         int biggestone;
  262 
  263         biggestsize = 0;
  264         biggestone = 0;
  265         vaddr = round_page(vaddr);
  266 
  267         for (i = 0; phys_avail[i + 1]; i += 2) {
  268                 phys_avail[i] = round_page(phys_avail[i]);
  269                 phys_avail[i + 1] = trunc_page(phys_avail[i + 1]);
  270         }
  271 
  272         low_water = phys_avail[0];
  273         high_water = phys_avail[1];
  274 
  275         for (i = 0; phys_avail[i + 1]; i += 2) {
  276                 vm_paddr_t size = phys_avail[i + 1] - phys_avail[i];
  277 
  278                 if (size > biggestsize) {
  279                         biggestone = i;
  280                         biggestsize = size;
  281                 }
  282                 if (phys_avail[i] < low_water)
  283                         low_water = phys_avail[i];
  284                 if (phys_avail[i + 1] > high_water)
  285                         high_water = phys_avail[i + 1];
  286         }
  287 
  288 #ifdef XEN
  289         low_water = 0;
  290 #endif  
  291 
  292         end = phys_avail[biggestone+1];
  293 
  294         /*
  295          * Initialize the page and queue locks.
  296          */
  297         mtx_init(&vm_page_queue_mtx, "vm page queue", NULL, MTX_DEF |
  298             MTX_RECURSE);
  299         mtx_init(&vm_page_queue_free_mtx, "vm page free queue", NULL, MTX_DEF);
  300         for (i = 0; i < PA_LOCK_COUNT; i++)
  301                 mtx_init(&pa_lock[i].data, "vm page", NULL, MTX_DEF);
  302 
  303         /*
  304          * Initialize the queue headers for the hold queue, the active queue,
  305          * and the inactive queue.
  306          */
  307         for (i = 0; i < PQ_COUNT; i++)
  308                 TAILQ_INIT(&vm_page_queues[i].pl);
  309         vm_page_queues[PQ_INACTIVE].cnt = &cnt.v_inactive_count;
  310         vm_page_queues[PQ_ACTIVE].cnt = &cnt.v_active_count;
  311         vm_page_queues[PQ_HOLD].cnt = &cnt.v_active_count;
  312 
  313         /*
  314          * Allocate memory for use when boot strapping the kernel memory
  315          * allocator.
  316          */
  317         new_end = end - (boot_pages * UMA_SLAB_SIZE);
  318         new_end = trunc_page(new_end);
  319         mapped = pmap_map(&vaddr, new_end, end,
  320             VM_PROT_READ | VM_PROT_WRITE);
  321         bzero((void *)mapped, end - new_end);
  322         uma_startup((void *)mapped, boot_pages);
  323 
  324 #if defined(__amd64__) || defined(__i386__) || defined(__arm__) || \
  325     defined(__mips__)
  326         /*
  327          * Allocate a bitmap to indicate that a random physical page
  328          * needs to be included in a minidump.
  329          *
  330          * The amd64 port needs this to indicate which direct map pages
  331          * need to be dumped, via calls to dump_add_page()/dump_drop_page().
  332          *
  333          * However, i386 still needs this workspace internally within the
  334          * minidump code.  In theory, they are not needed on i386, but are
  335          * included should the sf_buf code decide to use them.
  336          */
  337         last_pa = 0;
  338         for (i = 0; dump_avail[i + 1] != 0; i += 2)
  339                 if (dump_avail[i + 1] > last_pa)
  340                         last_pa = dump_avail[i + 1];
  341         page_range = last_pa / PAGE_SIZE;
  342         vm_page_dump_size = round_page(roundup2(page_range, NBBY) / NBBY);
  343         new_end -= vm_page_dump_size;
  344         vm_page_dump = (void *)(uintptr_t)pmap_map(&vaddr, new_end,
  345             new_end + vm_page_dump_size, VM_PROT_READ | VM_PROT_WRITE);
  346         bzero((void *)vm_page_dump, vm_page_dump_size);
  347 #endif
  348 #ifdef __amd64__
  349         /*
  350          * Request that the physical pages underlying the message buffer be
  351          * included in a crash dump.  Since the message buffer is accessed
  352          * through the direct map, they are not automatically included.
  353          */
  354         pa = DMAP_TO_PHYS((vm_offset_t)msgbufp->msg_ptr);
  355         last_pa = pa + round_page(msgbufsize);
  356         while (pa < last_pa) {
  357                 dump_add_page(pa);
  358                 pa += PAGE_SIZE;
  359         }
  360 #endif
  361         /*
  362          * Compute the number of pages of memory that will be available for
  363          * use (taking into account the overhead of a page structure per
  364          * page).
  365          */
  366         first_page = low_water / PAGE_SIZE;
  367 #ifdef VM_PHYSSEG_SPARSE
  368         page_range = 0;
  369         for (i = 0; phys_avail[i + 1] != 0; i += 2)
  370                 page_range += atop(phys_avail[i + 1] - phys_avail[i]);
  371 #elif defined(VM_PHYSSEG_DENSE)
  372         page_range = high_water / PAGE_SIZE - first_page;
  373 #else
  374 #error "Either VM_PHYSSEG_DENSE or VM_PHYSSEG_SPARSE must be defined."
  375 #endif
  376         end = new_end;
  377 
  378         /*
  379          * Reserve an unmapped guard page to trap access to vm_page_array[-1].
  380          */
  381         vaddr += PAGE_SIZE;
  382 
  383         /*
  384          * Initialize the mem entry structures now, and put them in the free
  385          * queue.
  386          */
  387         new_end = trunc_page(end - page_range * sizeof(struct vm_page));
  388         mapped = pmap_map(&vaddr, new_end, end,
  389             VM_PROT_READ | VM_PROT_WRITE);
  390         vm_page_array = (vm_page_t) mapped;
  391 #if VM_NRESERVLEVEL > 0
  392         /*
  393          * Allocate memory for the reservation management system's data
  394          * structures.
  395          */
  396         new_end = vm_reserv_startup(&vaddr, new_end, high_water);
  397 #endif
  398 #if defined(__amd64__) || defined(__mips__)
  399         /*
  400          * pmap_map on amd64 and mips can come out of the direct-map, not kvm
  401          * like i386, so the pages must be tracked for a crashdump to include
  402          * this data.  This includes the vm_page_array and the early UMA
  403          * bootstrap pages.
  404          */
  405         for (pa = new_end; pa < phys_avail[biggestone + 1]; pa += PAGE_SIZE)
  406                 dump_add_page(pa);
  407 #endif  
  408         phys_avail[biggestone + 1] = new_end;
  409 
  410         /*
  411          * Clear all of the page structures
  412          */
  413         bzero((caddr_t) vm_page_array, page_range * sizeof(struct vm_page));
  414         for (i = 0; i < page_range; i++)
  415                 vm_page_array[i].order = VM_NFREEORDER;
  416         vm_page_array_size = page_range;
  417 
  418         /*
  419          * Initialize the physical memory allocator.
  420          */
  421         vm_phys_init();
  422 
  423         /*
  424          * Add every available physical page that is not blacklisted to
  425          * the free lists.
  426          */
  427         cnt.v_page_count = 0;
  428         cnt.v_free_count = 0;
  429         list = getenv("vm.blacklist");
  430         for (i = 0; phys_avail[i + 1] != 0; i += 2) {
  431                 pa = phys_avail[i];
  432                 last_pa = phys_avail[i + 1];
  433                 while (pa < last_pa) {
  434                         if (list != NULL &&
  435                             vm_page_blacklist_lookup(list, pa))
  436                                 printf("Skipping page with pa 0x%jx\n",
  437                                     (uintmax_t)pa);
  438                         else
  439                                 vm_phys_add_page(pa);
  440                         pa += PAGE_SIZE;
  441                 }
  442         }
  443         freeenv(list);
  444 #if VM_NRESERVLEVEL > 0
  445         /*
  446          * Initialize the reservation management system.
  447          */
  448         vm_reserv_init();
  449 #endif
  450         return (vaddr);
  451 }
  452 
  453 
  454 CTASSERT(offsetof(struct vm_page, aflags) % sizeof(uint32_t) == 0);
  455 
  456 void
  457 vm_page_aflag_set(vm_page_t m, uint8_t bits)
  458 {
  459         uint32_t *addr, val;
  460 
  461         /*
  462          * The PGA_WRITEABLE flag can only be set if the page is managed and
  463          * VPO_BUSY.  Currently, this flag is only set by pmap_enter().
  464          */
  465         KASSERT((bits & PGA_WRITEABLE) == 0 ||
  466             (m->oflags & (VPO_UNMANAGED | VPO_BUSY)) == VPO_BUSY,
  467             ("PGA_WRITEABLE and !VPO_BUSY"));
  468 
  469         /*
  470          * We want to use atomic updates for m->aflags, which is a
  471          * byte wide.  Not all architectures provide atomic operations
  472          * on the single-byte destination.  Punt and access the whole
  473          * 4-byte word with an atomic update.  Parallel non-atomic
  474          * updates to the fields included in the update by proximity
  475          * are handled properly by atomics.
  476          */
  477         addr = (void *)&m->aflags;
  478         MPASS(((uintptr_t)addr & (sizeof(uint32_t) - 1)) == 0);
  479         val = bits;
  480 #if BYTE_ORDER == BIG_ENDIAN
  481         val <<= 24;
  482 #endif
  483         atomic_set_32(addr, val);
  484 } 
  485 
  486 void
  487 vm_page_aflag_clear(vm_page_t m, uint8_t bits)
  488 {
  489         uint32_t *addr, val;
  490 
  491         /*
  492          * The PGA_REFERENCED flag can only be cleared if the object
  493          * containing the page is locked.
  494          */
  495         KASSERT((bits & PGA_REFERENCED) == 0 || VM_OBJECT_LOCKED(m->object),
  496             ("PGA_REFERENCED and !VM_OBJECT_LOCKED"));
  497 
  498         /*
  499          * See the comment in vm_page_aflag_set().
  500          */
  501         addr = (void *)&m->aflags;
  502         MPASS(((uintptr_t)addr & (sizeof(uint32_t) - 1)) == 0);
  503         val = bits;
  504 #if BYTE_ORDER == BIG_ENDIAN
  505         val <<= 24;
  506 #endif
  507         atomic_clear_32(addr, val);
  508 }
  509 
  510 void
  511 vm_page_reference(vm_page_t m)
  512 {
  513 
  514         vm_page_aflag_set(m, PGA_REFERENCED);
  515 }
  516 
  517 void
  518 vm_page_busy(vm_page_t m)
  519 {
  520 
  521         VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
  522         KASSERT((m->oflags & VPO_BUSY) == 0,
  523             ("vm_page_busy: page already busy!!!"));
  524         m->oflags |= VPO_BUSY;
  525 }
  526 
  527 /*
  528  *      vm_page_flash:
  529  *
  530  *      wakeup anyone waiting for the page.
  531  */
  532 void
  533 vm_page_flash(vm_page_t m)
  534 {
  535 
  536         VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
  537         if (m->oflags & VPO_WANTED) {
  538                 m->oflags &= ~VPO_WANTED;
  539                 wakeup(m);
  540         }
  541 }
  542 
  543 /*
  544  *      vm_page_wakeup:
  545  *
  546  *      clear the VPO_BUSY flag and wakeup anyone waiting for the
  547  *      page.
  548  *
  549  */
  550 void
  551 vm_page_wakeup(vm_page_t m)
  552 {
  553 
  554         VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
  555         KASSERT(m->oflags & VPO_BUSY, ("vm_page_wakeup: page not busy!!!"));
  556         m->oflags &= ~VPO_BUSY;
  557         vm_page_flash(m);
  558 }
  559 
  560 void
  561 vm_page_io_start(vm_page_t m)
  562 {
  563 
  564         VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
  565         m->busy++;
  566 }
  567 
  568 void
  569 vm_page_io_finish(vm_page_t m)
  570 {
  571 
  572         VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
  573         KASSERT(m->busy > 0, ("vm_page_io_finish: page %p is not busy", m));
  574         m->busy--;
  575         if (m->busy == 0)
  576                 vm_page_flash(m);
  577 }
  578 
  579 /*
  580  * Keep page from being freed by the page daemon
  581  * much of the same effect as wiring, except much lower
  582  * overhead and should be used only for *very* temporary
  583  * holding ("wiring").
  584  */
  585 void
  586 vm_page_hold(vm_page_t mem)
  587 {
  588 
  589         vm_page_lock_assert(mem, MA_OWNED);
  590         mem->hold_count++;
  591 }
  592 
  593 void
  594 vm_page_unhold(vm_page_t mem)
  595 {
  596 
  597         vm_page_lock_assert(mem, MA_OWNED);
  598         --mem->hold_count;
  599         KASSERT(mem->hold_count >= 0, ("vm_page_unhold: hold count < 0!!!"));
  600         if (mem->hold_count == 0 && mem->queue == PQ_HOLD)
  601                 vm_page_free_toq(mem);
  602 }
  603 
  604 /*
  605  *      vm_page_unhold_pages:
  606  *
  607  *      Unhold each of the pages that is referenced by the given array.
  608  */ 
  609 void
  610 vm_page_unhold_pages(vm_page_t *ma, int count)
  611 {
  612         struct mtx *mtx, *new_mtx;
  613 
  614         mtx = NULL;
  615         for (; count != 0; count--) {
  616                 /*
  617                  * Avoid releasing and reacquiring the same page lock.
  618                  */
  619                 new_mtx = vm_page_lockptr(*ma);
  620                 if (mtx != new_mtx) {
  621                         if (mtx != NULL)
  622                                 mtx_unlock(mtx);
  623                         mtx = new_mtx;
  624                         mtx_lock(mtx);
  625                 }
  626                 vm_page_unhold(*ma);
  627                 ma++;
  628         }
  629         if (mtx != NULL)
  630                 mtx_unlock(mtx);
  631 }
  632 
  633 vm_page_t
  634 PHYS_TO_VM_PAGE(vm_paddr_t pa)
  635 {
  636         vm_page_t m;
  637 
  638 #ifdef VM_PHYSSEG_SPARSE
  639         m = vm_phys_paddr_to_vm_page(pa);
  640         if (m == NULL)
  641                 m = vm_phys_fictitious_to_vm_page(pa);
  642         return (m);
  643 #elif defined(VM_PHYSSEG_DENSE)
  644         long pi;
  645 
  646         pi = atop(pa);
  647         if (pi >= first_page && (pi - first_page) < vm_page_array_size) {
  648                 m = &vm_page_array[pi - first_page];
  649                 return (m);
  650         }
  651         return (vm_phys_fictitious_to_vm_page(pa));
  652 #else
  653 #error "Either VM_PHYSSEG_DENSE or VM_PHYSSEG_SPARSE must be defined."
  654 #endif
  655 }
  656 
  657 /*
  658  *      vm_page_getfake:
  659  *
  660  *      Create a fictitious page with the specified physical address and
  661  *      memory attribute.  The memory attribute is the only the machine-
  662  *      dependent aspect of a fictitious page that must be initialized.
  663  */
  664 vm_page_t
  665 vm_page_getfake(vm_paddr_t paddr, vm_memattr_t memattr)
  666 {
  667         vm_page_t m;
  668 
  669         m = uma_zalloc(fakepg_zone, M_WAITOK | M_ZERO);
  670         vm_page_initfake(m, paddr, memattr);
  671         return (m);
  672 }
  673 
  674 void
  675 vm_page_initfake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr)
  676 {
  677 
  678         if ((m->flags & PG_FICTITIOUS) != 0) {
  679                 /*
  680                  * The page's memattr might have changed since the
  681                  * previous initialization.  Update the pmap to the
  682                  * new memattr.
  683                  */
  684                 goto memattr;
  685         }
  686         m->phys_addr = paddr;
  687         m->queue = PQ_NONE;
  688         /* Fictitious pages don't use "segind". */
  689         m->flags = PG_FICTITIOUS;
  690         /* Fictitious pages don't use "order" or "pool". */
  691         m->oflags = VPO_BUSY | VPO_UNMANAGED;
  692         m->wire_count = 1;
  693 memattr:
  694         pmap_page_set_memattr(m, memattr);
  695 }
  696 
  697 /*
  698  *      vm_page_putfake:
  699  *
  700  *      Release a fictitious page.
  701  */
  702 void
  703 vm_page_putfake(vm_page_t m)
  704 {
  705 
  706         KASSERT((m->oflags & VPO_UNMANAGED) != 0, ("managed %p", m));
  707         KASSERT((m->flags & PG_FICTITIOUS) != 0,
  708             ("vm_page_putfake: bad page %p", m));
  709         uma_zfree(fakepg_zone, m);
  710 }
  711 
  712 /*
  713  *      vm_page_updatefake:
  714  *
  715  *      Update the given fictitious page to the specified physical address and
  716  *      memory attribute.
  717  */
  718 void
  719 vm_page_updatefake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr)
  720 {
  721 
  722         KASSERT((m->flags & PG_FICTITIOUS) != 0,
  723             ("vm_page_updatefake: bad page %p", m));
  724         m->phys_addr = paddr;
  725         pmap_page_set_memattr(m, memattr);
  726 }
  727 
  728 /*
  729  *      vm_page_free:
  730  *
  731  *      Free a page.
  732  */
  733 void
  734 vm_page_free(vm_page_t m)
  735 {
  736 
  737         m->flags &= ~PG_ZERO;
  738         vm_page_free_toq(m);
  739 }
  740 
  741 /*
  742  *      vm_page_free_zero:
  743  *
  744  *      Free a page to the zerod-pages queue
  745  */
  746 void
  747 vm_page_free_zero(vm_page_t m)
  748 {
  749 
  750         m->flags |= PG_ZERO;
  751         vm_page_free_toq(m);
  752 }
  753 
  754 /*
  755  * Unbusy and handle the page queueing for a page from the VOP_GETPAGES()
  756  * array which is not the request page.
  757  */
  758 void
  759 vm_page_readahead_finish(vm_page_t m)
  760 {
  761 
  762         if (m->valid != 0) {
  763                 /*
  764                  * Since the page is not the requested page, whether
  765                  * it should be activated or deactivated is not
  766                  * obvious.  Empirical results have shown that
  767                  * deactivating the page is usually the best choice,
  768                  * unless the page is wanted by another thread.
  769                  */
  770                 if (m->oflags & VPO_WANTED) {
  771                         vm_page_lock(m);
  772                         vm_page_activate(m);
  773                         vm_page_unlock(m);
  774                 } else {
  775                         vm_page_lock(m);
  776                         vm_page_deactivate(m);
  777                         vm_page_unlock(m);
  778                 }
  779                 vm_page_wakeup(m);
  780         } else {
  781                 /*
  782                  * Free the completely invalid page.  Such page state
  783                  * occurs due to the short read operation which did
  784                  * not covered our page at all, or in case when a read
  785                  * error happens.
  786                  */
  787                 vm_page_lock(m);
  788                 vm_page_free(m);
  789                 vm_page_unlock(m);
  790         }
  791 }
  792 
  793 /*
  794  *      vm_page_sleep:
  795  *
  796  *      Sleep and release the page and page queues locks.
  797  *
  798  *      The object containing the given page must be locked.
  799  */
  800 void
  801 vm_page_sleep(vm_page_t m, const char *msg)
  802 {
  803 
  804         VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
  805         if (mtx_owned(&vm_page_queue_mtx))
  806                 vm_page_unlock_queues();
  807         if (mtx_owned(vm_page_lockptr(m)))
  808                 vm_page_unlock(m);
  809 
  810         /*
  811          * It's possible that while we sleep, the page will get
  812          * unbusied and freed.  If we are holding the object
  813          * lock, we will assume we hold a reference to the object
  814          * such that even if m->object changes, we can re-lock
  815          * it.
  816          */
  817         m->oflags |= VPO_WANTED;
  818         msleep(m, VM_OBJECT_MTX(m->object), PVM, msg, 0);
  819 }
  820 
  821 /*
  822  *      vm_page_dirty:
  823  *
  824  *      Set all bits in the page's dirty field.
  825  *
  826  *      The object containing the specified page must be locked if the
  827  *      call is made from the machine-independent layer.
  828  *
  829  *      See vm_page_clear_dirty_mask().
  830  */
  831 void
  832 vm_page_dirty(vm_page_t m)
  833 {
  834 
  835         KASSERT((m->flags & PG_CACHED) == 0,
  836             ("vm_page_dirty: page in cache!"));
  837         KASSERT(!VM_PAGE_IS_FREE(m),
  838             ("vm_page_dirty: page is free!"));
  839         KASSERT(m->valid == VM_PAGE_BITS_ALL,
  840             ("vm_page_dirty: page is invalid!"));
  841         m->dirty = VM_PAGE_BITS_ALL;
  842 }
  843 
  844 /*
  845  *      vm_page_splay:
  846  *
  847  *      Implements Sleator and Tarjan's top-down splay algorithm.  Returns
  848  *      the vm_page containing the given pindex.  If, however, that
  849  *      pindex is not found in the vm_object, returns a vm_page that is
  850  *      adjacent to the pindex, coming before or after it.
  851  */
  852 vm_page_t
  853 vm_page_splay(vm_pindex_t pindex, vm_page_t root)
  854 {
  855         struct vm_page dummy;
  856         vm_page_t lefttreemax, righttreemin, y;
  857 
  858         if (root == NULL)
  859                 return (root);
  860         lefttreemax = righttreemin = &dummy;
  861         for (;; root = y) {
  862                 if (pindex < root->pindex) {
  863                         if ((y = root->left) == NULL)
  864                                 break;
  865                         if (pindex < y->pindex) {
  866                                 /* Rotate right. */
  867                                 root->left = y->right;
  868                                 y->right = root;
  869                                 root = y;
  870                                 if ((y = root->left) == NULL)
  871                                         break;
  872                         }
  873                         /* Link into the new root's right tree. */
  874                         righttreemin->left = root;
  875                         righttreemin = root;
  876                 } else if (pindex > root->pindex) {
  877                         if ((y = root->right) == NULL)
  878                                 break;
  879                         if (pindex > y->pindex) {
  880                                 /* Rotate left. */
  881                                 root->right = y->left;
  882                                 y->left = root;
  883                                 root = y;
  884                                 if ((y = root->right) == NULL)
  885                                         break;
  886                         }
  887                         /* Link into the new root's left tree. */
  888                         lefttreemax->right = root;
  889                         lefttreemax = root;
  890                 } else
  891                         break;
  892         }
  893         /* Assemble the new root. */
  894         lefttreemax->right = root->left;
  895         righttreemin->left = root->right;
  896         root->left = dummy.right;
  897         root->right = dummy.left;
  898         return (root);
  899 }
  900 
  901 /*
  902  *      vm_page_insert:         [ internal use only ]
  903  *
  904  *      Inserts the given mem entry into the object and object list.
  905  *
  906  *      The object must be locked.
  907  */
  908 void
  909 vm_page_insert(vm_page_t m, vm_object_t object, vm_pindex_t pindex)
  910 {
  911         vm_page_t root;
  912 
  913         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
  914         if (m->object != NULL)
  915                 panic("vm_page_insert: page already inserted");
  916 
  917         /*
  918          * Record the object/offset pair in this page
  919          */
  920         m->object = object;
  921         m->pindex = pindex;
  922 
  923         /*
  924          * Now link into the object's ordered list of backed pages.
  925          */
  926         root = object->root;
  927         if (root == NULL) {
  928                 m->left = NULL;
  929                 m->right = NULL;
  930                 TAILQ_INSERT_TAIL(&object->memq, m, listq);
  931         } else {
  932                 root = vm_page_splay(pindex, root);
  933                 if (pindex < root->pindex) {
  934                         m->left = root->left;
  935                         m->right = root;
  936                         root->left = NULL;
  937                         TAILQ_INSERT_BEFORE(root, m, listq);
  938                 } else if (pindex == root->pindex)
  939                         panic("vm_page_insert: offset already allocated");
  940                 else {
  941                         m->right = root->right;
  942                         m->left = root;
  943                         root->right = NULL;
  944                         TAILQ_INSERT_AFTER(&object->memq, root, m, listq);
  945                 }
  946         }
  947         object->root = m;
  948 
  949         /*
  950          * Show that the object has one more resident page.
  951          */
  952         object->resident_page_count++;
  953 
  954         /*
  955          * Hold the vnode until the last page is released.
  956          */
  957         if (object->resident_page_count == 1 && object->type == OBJT_VNODE)
  958                 vhold(object->handle);
  959 
  960         /*
  961          * Since we are inserting a new and possibly dirty page,
  962          * update the object's OBJ_MIGHTBEDIRTY flag.
  963          */
  964         if (pmap_page_is_write_mapped(m))
  965                 vm_object_set_writeable_dirty(object);
  966 }
  967 
  968 /*
  969  *      vm_page_remove:
  970  *
  971  *      Removes the given mem entry from the object/offset-page
  972  *      table and the object page list, but do not invalidate/terminate
  973  *      the backing store.
  974  *
  975  *      The object must be locked.  The page must be locked if it is managed.
  976  */
  977 void
  978 vm_page_remove(vm_page_t m)
  979 {
  980         vm_object_t object;
  981         vm_page_t next, prev, root;
  982 
  983         if ((m->oflags & VPO_UNMANAGED) == 0)
  984                 vm_page_lock_assert(m, MA_OWNED);
  985         if ((object = m->object) == NULL)
  986                 return;
  987         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
  988         if (m->oflags & VPO_BUSY) {
  989                 m->oflags &= ~VPO_BUSY;
  990                 vm_page_flash(m);
  991         }
  992 
  993         /*
  994          * Now remove from the object's list of backed pages.
  995          */
  996         if ((next = TAILQ_NEXT(m, listq)) != NULL && next->left == m) {
  997                 /*
  998                  * Since the page's successor in the list is also its parent
  999                  * in the tree, its right subtree must be empty.
 1000                  */
 1001                 next->left = m->left;
 1002                 KASSERT(m->right == NULL,
 1003                     ("vm_page_remove: page %p has right child", m));
 1004         } else if ((prev = TAILQ_PREV(m, pglist, listq)) != NULL &&
 1005             prev->right == m) {
 1006                 /*
 1007                  * Since the page's predecessor in the list is also its parent
 1008                  * in the tree, its left subtree must be empty.
 1009                  */
 1010                 KASSERT(m->left == NULL,
 1011                     ("vm_page_remove: page %p has left child", m));
 1012                 prev->right = m->right;
 1013         } else {
 1014                 if (m != object->root)
 1015                         vm_page_splay(m->pindex, object->root);
 1016                 if (m->left == NULL)
 1017                         root = m->right;
 1018                 else if (m->right == NULL)
 1019                         root = m->left;
 1020                 else {
 1021                         /*
 1022                          * Move the page's successor to the root, because
 1023                          * pages are usually removed in ascending order.
 1024                          */
 1025                         if (m->right != next)
 1026                                 vm_page_splay(m->pindex, m->right);
 1027                         next->left = m->left;
 1028                         root = next;
 1029                 }
 1030                 object->root = root;
 1031         }
 1032         TAILQ_REMOVE(&object->memq, m, listq);
 1033 
 1034         /*
 1035          * And show that the object has one fewer resident page.
 1036          */
 1037         object->resident_page_count--;
 1038 
 1039         /*
 1040          * The vnode may now be recycled.
 1041          */
 1042         if (object->resident_page_count == 0 && object->type == OBJT_VNODE)
 1043                 vdrop(object->handle);
 1044 
 1045         m->object = NULL;
 1046 }
 1047 
 1048 /*
 1049  *      vm_page_lookup:
 1050  *
 1051  *      Returns the page associated with the object/offset
 1052  *      pair specified; if none is found, NULL is returned.
 1053  *
 1054  *      The object must be locked.
 1055  */
 1056 vm_page_t
 1057 vm_page_lookup(vm_object_t object, vm_pindex_t pindex)
 1058 {
 1059         vm_page_t m;
 1060 
 1061         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 1062         if ((m = object->root) != NULL && m->pindex != pindex) {
 1063                 m = vm_page_splay(pindex, m);
 1064                 if ((object->root = m)->pindex != pindex)
 1065                         m = NULL;
 1066         }
 1067         return (m);
 1068 }
 1069 
 1070 /*
 1071  *      vm_page_find_least:
 1072  *
 1073  *      Returns the page associated with the object with least pindex
 1074  *      greater than or equal to the parameter pindex, or NULL.
 1075  *
 1076  *      The object must be locked.
 1077  */
 1078 vm_page_t
 1079 vm_page_find_least(vm_object_t object, vm_pindex_t pindex)
 1080 {
 1081         vm_page_t m;
 1082 
 1083         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 1084         if ((m = TAILQ_FIRST(&object->memq)) != NULL) {
 1085                 if (m->pindex < pindex) {
 1086                         m = vm_page_splay(pindex, object->root);
 1087                         if ((object->root = m)->pindex < pindex)
 1088                                 m = TAILQ_NEXT(m, listq);
 1089                 }
 1090         }
 1091         return (m);
 1092 }
 1093 
 1094 /*
 1095  * Returns the given page's successor (by pindex) within the object if it is
 1096  * resident; if none is found, NULL is returned.
 1097  *
 1098  * The object must be locked.
 1099  */
 1100 vm_page_t
 1101 vm_page_next(vm_page_t m)
 1102 {
 1103         vm_page_t next;
 1104 
 1105         VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
 1106         if ((next = TAILQ_NEXT(m, listq)) != NULL &&
 1107             next->pindex != m->pindex + 1)
 1108                 next = NULL;
 1109         return (next);
 1110 }
 1111 
 1112 /*
 1113  * Returns the given page's predecessor (by pindex) within the object if it is
 1114  * resident; if none is found, NULL is returned.
 1115  *
 1116  * The object must be locked.
 1117  */
 1118 vm_page_t
 1119 vm_page_prev(vm_page_t m)
 1120 {
 1121         vm_page_t prev;
 1122 
 1123         VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
 1124         if ((prev = TAILQ_PREV(m, pglist, listq)) != NULL &&
 1125             prev->pindex != m->pindex - 1)
 1126                 prev = NULL;
 1127         return (prev);
 1128 }
 1129 
 1130 /*
 1131  *      vm_page_rename:
 1132  *
 1133  *      Move the given memory entry from its
 1134  *      current object to the specified target object/offset.
 1135  *
 1136  *      Note: swap associated with the page must be invalidated by the move.  We
 1137  *            have to do this for several reasons:  (1) we aren't freeing the
 1138  *            page, (2) we are dirtying the page, (3) the VM system is probably
 1139  *            moving the page from object A to B, and will then later move
 1140  *            the backing store from A to B and we can't have a conflict.
 1141  *
 1142  *      Note: we *always* dirty the page.  It is necessary both for the
 1143  *            fact that we moved it, and because we may be invalidating
 1144  *            swap.  If the page is on the cache, we have to deactivate it
 1145  *            or vm_page_dirty() will panic.  Dirty pages are not allowed
 1146  *            on the cache.
 1147  *
 1148  *      The objects must be locked.  The page must be locked if it is managed.
 1149  */
 1150 void
 1151 vm_page_rename(vm_page_t m, vm_object_t new_object, vm_pindex_t new_pindex)
 1152 {
 1153 
 1154         vm_page_remove(m);
 1155         vm_page_insert(m, new_object, new_pindex);
 1156         vm_page_dirty(m);
 1157 }
 1158 
 1159 /*
 1160  *      Convert all of the given object's cached pages that have a
 1161  *      pindex within the given range into free pages.  If the value
 1162  *      zero is given for "end", then the range's upper bound is
 1163  *      infinity.  If the given object is backed by a vnode and it
 1164  *      transitions from having one or more cached pages to none, the
 1165  *      vnode's hold count is reduced. 
 1166  */
 1167 void
 1168 vm_page_cache_free(vm_object_t object, vm_pindex_t start, vm_pindex_t end)
 1169 {
 1170         vm_page_t m, m_next;
 1171         boolean_t empty;
 1172 
 1173         mtx_lock(&vm_page_queue_free_mtx);
 1174         if (__predict_false(object->cache == NULL)) {
 1175                 mtx_unlock(&vm_page_queue_free_mtx);
 1176                 return;
 1177         }
 1178         m = object->cache = vm_page_splay(start, object->cache);
 1179         if (m->pindex < start) {
 1180                 if (m->right == NULL)
 1181                         m = NULL;
 1182                 else {
 1183                         m_next = vm_page_splay(start, m->right);
 1184                         m_next->left = m;
 1185                         m->right = NULL;
 1186                         m = object->cache = m_next;
 1187                 }
 1188         }
 1189 
 1190         /*
 1191          * At this point, "m" is either (1) a reference to the page
 1192          * with the least pindex that is greater than or equal to
 1193          * "start" or (2) NULL.
 1194          */
 1195         for (; m != NULL && (m->pindex < end || end == 0); m = m_next) {
 1196                 /*
 1197                  * Find "m"'s successor and remove "m" from the
 1198                  * object's cache.
 1199                  */
 1200                 if (m->right == NULL) {
 1201                         object->cache = m->left;
 1202                         m_next = NULL;
 1203                 } else {
 1204                         m_next = vm_page_splay(start, m->right);
 1205                         m_next->left = m->left;
 1206                         object->cache = m_next;
 1207                 }
 1208                 /* Convert "m" to a free page. */
 1209                 m->object = NULL;
 1210                 m->valid = 0;
 1211                 /* Clear PG_CACHED and set PG_FREE. */
 1212                 m->flags ^= PG_CACHED | PG_FREE;
 1213                 KASSERT((m->flags & (PG_CACHED | PG_FREE)) == PG_FREE,
 1214                     ("vm_page_cache_free: page %p has inconsistent flags", m));
 1215                 cnt.v_cache_count--;
 1216                 cnt.v_free_count++;
 1217         }
 1218         empty = object->cache == NULL;
 1219         mtx_unlock(&vm_page_queue_free_mtx);
 1220         if (object->type == OBJT_VNODE && empty)
 1221                 vdrop(object->handle);
 1222 }
 1223 
 1224 /*
 1225  *      Returns the cached page that is associated with the given
 1226  *      object and offset.  If, however, none exists, returns NULL.
 1227  *
 1228  *      The free page queue must be locked.
 1229  */
 1230 static inline vm_page_t
 1231 vm_page_cache_lookup(vm_object_t object, vm_pindex_t pindex)
 1232 {
 1233         vm_page_t m;
 1234 
 1235         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
 1236         if ((m = object->cache) != NULL && m->pindex != pindex) {
 1237                 m = vm_page_splay(pindex, m);
 1238                 if ((object->cache = m)->pindex != pindex)
 1239                         m = NULL;
 1240         }
 1241         return (m);
 1242 }
 1243 
 1244 /*
 1245  *      Remove the given cached page from its containing object's
 1246  *      collection of cached pages.
 1247  *
 1248  *      The free page queue must be locked.
 1249  */
 1250 void
 1251 vm_page_cache_remove(vm_page_t m)
 1252 {
 1253         vm_object_t object;
 1254         vm_page_t root;
 1255 
 1256         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
 1257         KASSERT((m->flags & PG_CACHED) != 0,
 1258             ("vm_page_cache_remove: page %p is not cached", m));
 1259         object = m->object;
 1260         if (m != object->cache) {
 1261                 root = vm_page_splay(m->pindex, object->cache);
 1262                 KASSERT(root == m,
 1263                     ("vm_page_cache_remove: page %p is not cached in object %p",
 1264                     m, object));
 1265         }
 1266         if (m->left == NULL)
 1267                 root = m->right;
 1268         else if (m->right == NULL)
 1269                 root = m->left;
 1270         else {
 1271                 root = vm_page_splay(m->pindex, m->left);
 1272                 root->right = m->right;
 1273         }
 1274         object->cache = root;
 1275         m->object = NULL;
 1276         cnt.v_cache_count--;
 1277 }
 1278 
 1279 /*
 1280  *      Transfer all of the cached pages with offset greater than or
 1281  *      equal to 'offidxstart' from the original object's cache to the
 1282  *      new object's cache.  However, any cached pages with offset
 1283  *      greater than or equal to the new object's size are kept in the
 1284  *      original object.  Initially, the new object's cache must be
 1285  *      empty.  Offset 'offidxstart' in the original object must
 1286  *      correspond to offset zero in the new object.
 1287  *
 1288  *      The new object must be locked.
 1289  */
 1290 void
 1291 vm_page_cache_transfer(vm_object_t orig_object, vm_pindex_t offidxstart,
 1292     vm_object_t new_object)
 1293 {
 1294         vm_page_t m, m_next;
 1295 
 1296         /*
 1297          * Insertion into an object's collection of cached pages
 1298          * requires the object to be locked.  In contrast, removal does
 1299          * not.
 1300          */
 1301         VM_OBJECT_LOCK_ASSERT(new_object, MA_OWNED);
 1302         KASSERT(new_object->cache == NULL,
 1303             ("vm_page_cache_transfer: object %p has cached pages",
 1304             new_object));
 1305         mtx_lock(&vm_page_queue_free_mtx);
 1306         if ((m = orig_object->cache) != NULL) {
 1307                 /*
 1308                  * Transfer all of the pages with offset greater than or
 1309                  * equal to 'offidxstart' from the original object's
 1310                  * cache to the new object's cache.
 1311                  */
 1312                 m = vm_page_splay(offidxstart, m);
 1313                 if (m->pindex < offidxstart) {
 1314                         orig_object->cache = m;
 1315                         new_object->cache = m->right;
 1316                         m->right = NULL;
 1317                 } else {
 1318                         orig_object->cache = m->left;
 1319                         new_object->cache = m;
 1320                         m->left = NULL;
 1321                 }
 1322                 while ((m = new_object->cache) != NULL) {
 1323                         if ((m->pindex - offidxstart) >= new_object->size) {
 1324                                 /*
 1325                                  * Return all of the cached pages with
 1326                                  * offset greater than or equal to the
 1327                                  * new object's size to the original
 1328                                  * object's cache. 
 1329                                  */
 1330                                 new_object->cache = m->left;
 1331                                 m->left = orig_object->cache;
 1332                                 orig_object->cache = m;
 1333                                 break;
 1334                         }
 1335                         m_next = vm_page_splay(m->pindex, m->right);
 1336                         /* Update the page's object and offset. */
 1337                         m->object = new_object;
 1338                         m->pindex -= offidxstart;
 1339                         if (m_next == NULL)
 1340                                 break;
 1341                         m->right = NULL;
 1342                         m_next->left = m;
 1343                         new_object->cache = m_next;
 1344                 }
 1345                 KASSERT(new_object->cache == NULL ||
 1346                     new_object->type == OBJT_SWAP,
 1347                     ("vm_page_cache_transfer: object %p's type is incompatible"
 1348                     " with cached pages", new_object));
 1349         }
 1350         mtx_unlock(&vm_page_queue_free_mtx);
 1351 }
 1352 
 1353 /*
 1354  *      Returns TRUE if a cached page is associated with the given object and
 1355  *      offset, and FALSE otherwise.
 1356  *
 1357  *      The object must be locked.
 1358  */
 1359 boolean_t
 1360 vm_page_is_cached(vm_object_t object, vm_pindex_t pindex)
 1361 {
 1362         vm_page_t m;
 1363 
 1364         /*
 1365          * Insertion into an object's collection of cached pages requires the
 1366          * object to be locked.  Therefore, if the object is locked and the
 1367          * object's collection is empty, there is no need to acquire the free
 1368          * page queues lock in order to prove that the specified page doesn't
 1369          * exist.
 1370          */
 1371         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 1372         if (object->cache == NULL)
 1373                 return (FALSE);
 1374         mtx_lock(&vm_page_queue_free_mtx);
 1375         m = vm_page_cache_lookup(object, pindex);
 1376         mtx_unlock(&vm_page_queue_free_mtx);
 1377         return (m != NULL);
 1378 }
 1379 
 1380 /*
 1381  *      vm_page_alloc:
 1382  *
 1383  *      Allocate and return a page that is associated with the specified
 1384  *      object and offset pair.  By default, this page has the flag VPO_BUSY
 1385  *      set.
 1386  *
 1387  *      The caller must always specify an allocation class.
 1388  *
 1389  *      allocation classes:
 1390  *      VM_ALLOC_NORMAL         normal process request
 1391  *      VM_ALLOC_SYSTEM         system *really* needs a page
 1392  *      VM_ALLOC_INTERRUPT      interrupt time request
 1393  *
 1394  *      optional allocation flags:
 1395  *      VM_ALLOC_COUNT(number)  the number of additional pages that the caller
 1396  *                              intends to allocate
 1397  *      VM_ALLOC_IFCACHED       return page only if it is cached
 1398  *      VM_ALLOC_IFNOTCACHED    return NULL, do not reactivate if the page
 1399  *                              is cached
 1400  *      VM_ALLOC_NOBUSY         do not set the flag VPO_BUSY on the page
 1401  *      VM_ALLOC_NOOBJ          page is not associated with an object and
 1402  *                              should not have the flag VPO_BUSY set
 1403  *      VM_ALLOC_WIRED          wire the allocated page
 1404  *      VM_ALLOC_ZERO           prefer a zeroed page
 1405  *
 1406  *      This routine may not sleep.
 1407  */
 1408 vm_page_t
 1409 vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req)
 1410 {
 1411         struct vnode *vp = NULL;
 1412         vm_object_t m_object;
 1413         vm_page_t m;
 1414         int flags, req_class;
 1415 
 1416         KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0),
 1417             ("vm_page_alloc: inconsistent object/req"));
 1418         if (object != NULL)
 1419                 VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 1420 
 1421         req_class = req & VM_ALLOC_CLASS_MASK;
 1422 
 1423         /*
 1424          * The page daemon is allowed to dig deeper into the free page list.
 1425          */
 1426         if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT)
 1427                 req_class = VM_ALLOC_SYSTEM;
 1428 
 1429         mtx_lock(&vm_page_queue_free_mtx);
 1430         if (cnt.v_free_count + cnt.v_cache_count > cnt.v_free_reserved ||
 1431             (req_class == VM_ALLOC_SYSTEM && 
 1432             cnt.v_free_count + cnt.v_cache_count > cnt.v_interrupt_free_min) ||
 1433             (req_class == VM_ALLOC_INTERRUPT &&
 1434             cnt.v_free_count + cnt.v_cache_count > 0)) {
 1435                 /*
 1436                  * Allocate from the free queue if the number of free pages
 1437                  * exceeds the minimum for the request class.
 1438                  */
 1439                 if (object != NULL &&
 1440                     (m = vm_page_cache_lookup(object, pindex)) != NULL) {
 1441                         if ((req & VM_ALLOC_IFNOTCACHED) != 0) {
 1442                                 mtx_unlock(&vm_page_queue_free_mtx);
 1443                                 return (NULL);
 1444                         }
 1445                         if (vm_phys_unfree_page(m))
 1446                                 vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m, 0);
 1447 #if VM_NRESERVLEVEL > 0
 1448                         else if (!vm_reserv_reactivate_page(m))
 1449 #else
 1450                         else
 1451 #endif
 1452                                 panic("vm_page_alloc: cache page %p is missing"
 1453                                     " from the free queue", m);
 1454                 } else if ((req & VM_ALLOC_IFCACHED) != 0) {
 1455                         mtx_unlock(&vm_page_queue_free_mtx);
 1456                         return (NULL);
 1457 #if VM_NRESERVLEVEL > 0
 1458                 } else if (object == NULL || object->type == OBJT_DEVICE ||
 1459                     object->type == OBJT_SG ||
 1460                     (object->flags & OBJ_COLORED) == 0 ||
 1461                     (m = vm_reserv_alloc_page(object, pindex)) == NULL) {
 1462 #else
 1463                 } else {
 1464 #endif
 1465                         m = vm_phys_alloc_pages(object != NULL ?
 1466                             VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT, 0);
 1467 #if VM_NRESERVLEVEL > 0
 1468                         if (m == NULL && vm_reserv_reclaim_inactive()) {
 1469                                 m = vm_phys_alloc_pages(object != NULL ?
 1470                                     VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT,
 1471                                     0);
 1472                         }
 1473 #endif
 1474                 }
 1475         } else {
 1476                 /*
 1477                  * Not allocatable, give up.
 1478                  */
 1479                 mtx_unlock(&vm_page_queue_free_mtx);
 1480                 atomic_add_int(&vm_pageout_deficit,
 1481                     max((u_int)req >> VM_ALLOC_COUNT_SHIFT, 1));
 1482                 pagedaemon_wakeup();
 1483                 return (NULL);
 1484         }
 1485 
 1486         /*
 1487          *  At this point we had better have found a good page.
 1488          */
 1489         KASSERT(m != NULL, ("vm_page_alloc: missing page"));
 1490         KASSERT(m->queue == PQ_NONE,
 1491             ("vm_page_alloc: page %p has unexpected queue %d", m, m->queue));
 1492         KASSERT(m->wire_count == 0, ("vm_page_alloc: page %p is wired", m));
 1493         KASSERT(m->hold_count == 0, ("vm_page_alloc: page %p is held", m));
 1494         KASSERT(m->busy == 0, ("vm_page_alloc: page %p is busy", m));
 1495         KASSERT(m->dirty == 0, ("vm_page_alloc: page %p is dirty", m));
 1496         KASSERT(pmap_page_get_memattr(m) == VM_MEMATTR_DEFAULT,
 1497             ("vm_page_alloc: page %p has unexpected memattr %d", m,
 1498             pmap_page_get_memattr(m)));
 1499         if ((m->flags & PG_CACHED) != 0) {
 1500                 KASSERT((m->flags & PG_ZERO) == 0,
 1501                     ("vm_page_alloc: cached page %p is PG_ZERO", m));
 1502                 KASSERT(m->valid != 0,
 1503                     ("vm_page_alloc: cached page %p is invalid", m));
 1504                 if (m->object == object && m->pindex == pindex)
 1505                         cnt.v_reactivated++;
 1506                 else
 1507                         m->valid = 0;
 1508                 m_object = m->object;
 1509                 vm_page_cache_remove(m);
 1510                 if (m_object->type == OBJT_VNODE && m_object->cache == NULL)
 1511                         vp = m_object->handle;
 1512         } else {
 1513                 KASSERT(VM_PAGE_IS_FREE(m),
 1514                     ("vm_page_alloc: page %p is not free", m));
 1515                 KASSERT(m->valid == 0,
 1516                     ("vm_page_alloc: free page %p is valid", m));
 1517                 cnt.v_free_count--;
 1518         }
 1519 
 1520         /*
 1521          * Only the PG_ZERO flag is inherited.  The PG_CACHED or PG_FREE flag
 1522          * must be cleared before the free page queues lock is released.
 1523          */
 1524         flags = 0;
 1525         if (req & VM_ALLOC_NODUMP)
 1526                 flags |= PG_NODUMP;
 1527         if (m->flags & PG_ZERO) {
 1528                 vm_page_zero_count--;
 1529                 if (req & VM_ALLOC_ZERO)
 1530                         flags = PG_ZERO;
 1531         }
 1532         m->flags = flags;
 1533         mtx_unlock(&vm_page_queue_free_mtx);
 1534         m->aflags = 0;
 1535         if (object == NULL || object->type == OBJT_PHYS)
 1536                 m->oflags = VPO_UNMANAGED;
 1537         else
 1538                 m->oflags = 0;
 1539         if ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_NOOBJ)) == 0)
 1540                 m->oflags |= VPO_BUSY;
 1541         if (req & VM_ALLOC_WIRED) {
 1542                 /*
 1543                  * The page lock is not required for wiring a page until that
 1544                  * page is inserted into the object.
 1545                  */
 1546                 atomic_add_int(&cnt.v_wire_count, 1);
 1547                 m->wire_count = 1;
 1548         }
 1549         m->act_count = 0;
 1550 
 1551         if (object != NULL) {
 1552                 /* Ignore device objects; the pager sets "memattr" for them. */
 1553                 if (object->memattr != VM_MEMATTR_DEFAULT &&
 1554                     object->type != OBJT_DEVICE && object->type != OBJT_SG)
 1555                         pmap_page_set_memattr(m, object->memattr);
 1556                 vm_page_insert(m, object, pindex);
 1557         } else
 1558                 m->pindex = pindex;
 1559 
 1560         /*
 1561          * The following call to vdrop() must come after the above call
 1562          * to vm_page_insert() in case both affect the same object and
 1563          * vnode.  Otherwise, the affected vnode's hold count could
 1564          * temporarily become zero.
 1565          */
 1566         if (vp != NULL)
 1567                 vdrop(vp);
 1568 
 1569         /*
 1570          * Don't wakeup too often - wakeup the pageout daemon when
 1571          * we would be nearly out of memory.
 1572          */
 1573         if (vm_paging_needed())
 1574                 pagedaemon_wakeup();
 1575 
 1576         return (m);
 1577 }
 1578 
 1579 /*
 1580  *      vm_page_alloc_contig:
 1581  *
 1582  *      Allocate a contiguous set of physical pages of the given size "npages"
 1583  *      from the free lists.  All of the physical pages must be at or above
 1584  *      the given physical address "low" and below the given physical address
 1585  *      "high".  The given value "alignment" determines the alignment of the
 1586  *      first physical page in the set.  If the given value "boundary" is
 1587  *      non-zero, then the set of physical pages cannot cross any physical
 1588  *      address boundary that is a multiple of that value.  Both "alignment"
 1589  *      and "boundary" must be a power of two.
 1590  *
 1591  *      If the specified memory attribute, "memattr", is VM_MEMATTR_DEFAULT,
 1592  *      then the memory attribute setting for the physical pages is configured
 1593  *      to the object's memory attribute setting.  Otherwise, the memory
 1594  *      attribute setting for the physical pages is configured to "memattr",
 1595  *      overriding the object's memory attribute setting.  However, if the
 1596  *      object's memory attribute setting is not VM_MEMATTR_DEFAULT, then the
 1597  *      memory attribute setting for the physical pages cannot be configured
 1598  *      to VM_MEMATTR_DEFAULT.
 1599  *
 1600  *      The caller must always specify an allocation class.
 1601  *
 1602  *      allocation classes:
 1603  *      VM_ALLOC_NORMAL         normal process request
 1604  *      VM_ALLOC_SYSTEM         system *really* needs a page
 1605  *      VM_ALLOC_INTERRUPT      interrupt time request
 1606  *
 1607  *      optional allocation flags:
 1608  *      VM_ALLOC_NOBUSY         do not set the flag VPO_BUSY on the page
 1609  *      VM_ALLOC_NOOBJ          page is not associated with an object and
 1610  *                              should not have the flag VPO_BUSY set
 1611  *      VM_ALLOC_WIRED          wire the allocated page
 1612  *      VM_ALLOC_ZERO           prefer a zeroed page
 1613  *
 1614  *      This routine may not sleep.
 1615  */
 1616 vm_page_t
 1617 vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req,
 1618     u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment,
 1619     u_long boundary, vm_memattr_t memattr)
 1620 {
 1621         struct vnode *drop;
 1622         vm_page_t deferred_vdrop_list, m, m_ret;
 1623         u_int flags, oflags;
 1624         int req_class;
 1625 
 1626         KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0),
 1627             ("vm_page_alloc_contig: inconsistent object/req"));
 1628         if (object != NULL) {
 1629                 VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 1630                 KASSERT(object->type == OBJT_PHYS,
 1631                     ("vm_page_alloc_contig: object %p isn't OBJT_PHYS",
 1632                     object));
 1633         }
 1634         KASSERT(npages > 0, ("vm_page_alloc_contig: npages is zero"));
 1635         req_class = req & VM_ALLOC_CLASS_MASK;
 1636 
 1637         /*
 1638          * The page daemon is allowed to dig deeper into the free page list.
 1639          */
 1640         if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT)
 1641                 req_class = VM_ALLOC_SYSTEM;
 1642 
 1643         deferred_vdrop_list = NULL;
 1644         mtx_lock(&vm_page_queue_free_mtx);
 1645         if (cnt.v_free_count + cnt.v_cache_count >= npages +
 1646             cnt.v_free_reserved || (req_class == VM_ALLOC_SYSTEM &&
 1647             cnt.v_free_count + cnt.v_cache_count >= npages +
 1648             cnt.v_interrupt_free_min) || (req_class == VM_ALLOC_INTERRUPT &&
 1649             cnt.v_free_count + cnt.v_cache_count >= npages)) {
 1650 #if VM_NRESERVLEVEL > 0
 1651 retry:
 1652 #endif
 1653                 m_ret = vm_phys_alloc_contig(npages, low, high, alignment,
 1654                     boundary);
 1655         } else {
 1656                 mtx_unlock(&vm_page_queue_free_mtx);
 1657                 atomic_add_int(&vm_pageout_deficit, npages);
 1658                 pagedaemon_wakeup();
 1659                 return (NULL);
 1660         }
 1661         if (m_ret != NULL)
 1662                 for (m = m_ret; m < &m_ret[npages]; m++) {
 1663                         drop = vm_page_alloc_init(m);
 1664                         if (drop != NULL) {
 1665                                 /*
 1666                                  * Enqueue the vnode for deferred vdrop().
 1667                                  *
 1668                                  * Once the pages are removed from the free
 1669                                  * page list, "pageq" can be safely abused to
 1670                                  * construct a short-lived list of vnodes.
 1671                                  */
 1672                                 m->pageq.tqe_prev = (void *)drop;
 1673                                 m->pageq.tqe_next = deferred_vdrop_list;
 1674                                 deferred_vdrop_list = m;
 1675                         }
 1676                 }
 1677         else {
 1678 #if VM_NRESERVLEVEL > 0
 1679                 if (vm_reserv_reclaim_contig(npages << PAGE_SHIFT, low, high,
 1680                     alignment, boundary))
 1681                         goto retry;
 1682 #endif
 1683         }
 1684         mtx_unlock(&vm_page_queue_free_mtx);
 1685         if (m_ret == NULL)
 1686                 return (NULL);
 1687 
 1688         /*
 1689          * Initialize the pages.  Only the PG_ZERO flag is inherited.
 1690          */
 1691         flags = 0;
 1692         if ((req & VM_ALLOC_ZERO) != 0)
 1693                 flags = PG_ZERO;
 1694         if ((req & VM_ALLOC_WIRED) != 0)
 1695                 atomic_add_int(&cnt.v_wire_count, npages);
 1696         oflags = VPO_UNMANAGED;
 1697         if (object != NULL) {
 1698                 if ((req & VM_ALLOC_NOBUSY) == 0)
 1699                         oflags |= VPO_BUSY;
 1700                 if (object->memattr != VM_MEMATTR_DEFAULT &&
 1701                     memattr == VM_MEMATTR_DEFAULT)
 1702                         memattr = object->memattr;
 1703         }
 1704         for (m = m_ret; m < &m_ret[npages]; m++) {
 1705                 m->aflags = 0;
 1706                 m->flags &= flags;
 1707                 if ((req & VM_ALLOC_WIRED) != 0)
 1708                         m->wire_count = 1;
 1709                 /* Unmanaged pages don't use "act_count". */
 1710                 m->oflags = oflags;
 1711                 if (memattr != VM_MEMATTR_DEFAULT)
 1712                         pmap_page_set_memattr(m, memattr);
 1713                 if (object != NULL)
 1714                         vm_page_insert(m, object, pindex);
 1715                 else
 1716                         m->pindex = pindex;
 1717                 pindex++;
 1718         }
 1719         while (deferred_vdrop_list != NULL) {
 1720                 vdrop((struct vnode *)deferred_vdrop_list->pageq.tqe_prev);
 1721                 deferred_vdrop_list = deferred_vdrop_list->pageq.tqe_next;
 1722         }
 1723         if (vm_paging_needed())
 1724                 pagedaemon_wakeup();
 1725         return (m_ret);
 1726 }
 1727 
 1728 /*
 1729  * Initialize a page that has been freshly dequeued from a freelist.
 1730  * The caller has to drop the vnode returned, if it is not NULL.
 1731  *
 1732  * This function may only be used to initialize unmanaged pages.
 1733  *
 1734  * To be called with vm_page_queue_free_mtx held.
 1735  */
 1736 static struct vnode *
 1737 vm_page_alloc_init(vm_page_t m)
 1738 {
 1739         struct vnode *drop;
 1740         vm_object_t m_object;
 1741 
 1742         KASSERT(m->queue == PQ_NONE,
 1743             ("vm_page_alloc_init: page %p has unexpected queue %d",
 1744             m, m->queue));
 1745         KASSERT(m->wire_count == 0,
 1746             ("vm_page_alloc_init: page %p is wired", m));
 1747         KASSERT(m->hold_count == 0,
 1748             ("vm_page_alloc_init: page %p is held", m));
 1749         KASSERT(m->busy == 0,
 1750             ("vm_page_alloc_init: page %p is busy", m));
 1751         KASSERT(m->dirty == 0,
 1752             ("vm_page_alloc_init: page %p is dirty", m));
 1753         KASSERT(pmap_page_get_memattr(m) == VM_MEMATTR_DEFAULT,
 1754             ("vm_page_alloc_init: page %p has unexpected memattr %d",
 1755             m, pmap_page_get_memattr(m)));
 1756         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
 1757         drop = NULL;
 1758         if ((m->flags & PG_CACHED) != 0) {
 1759                 KASSERT((m->flags & PG_ZERO) == 0,
 1760                     ("vm_page_alloc_init: cached page %p is PG_ZERO", m));
 1761                 m->valid = 0;
 1762                 m_object = m->object;
 1763                 vm_page_cache_remove(m);
 1764                 if (m_object->type == OBJT_VNODE && m_object->cache == NULL)
 1765                         drop = m_object->handle;
 1766         } else {
 1767                 KASSERT(VM_PAGE_IS_FREE(m),
 1768                     ("vm_page_alloc_init: page %p is not free", m));
 1769                 KASSERT(m->valid == 0,
 1770                     ("vm_page_alloc_init: free page %p is valid", m));
 1771                 cnt.v_free_count--;
 1772                 if ((m->flags & PG_ZERO) != 0)
 1773                         vm_page_zero_count--;
 1774         }
 1775         /* Don't clear the PG_ZERO flag; we'll need it later. */
 1776         m->flags &= PG_ZERO;
 1777         return (drop);
 1778 }
 1779 
 1780 /*
 1781  *      vm_page_alloc_freelist:
 1782  *
 1783  *      Allocate a physical page from the specified free page list.
 1784  *
 1785  *      The caller must always specify an allocation class.
 1786  *
 1787  *      allocation classes:
 1788  *      VM_ALLOC_NORMAL         normal process request
 1789  *      VM_ALLOC_SYSTEM         system *really* needs a page
 1790  *      VM_ALLOC_INTERRUPT      interrupt time request
 1791  *
 1792  *      optional allocation flags:
 1793  *      VM_ALLOC_COUNT(number)  the number of additional pages that the caller
 1794  *                              intends to allocate
 1795  *      VM_ALLOC_WIRED          wire the allocated page
 1796  *      VM_ALLOC_ZERO           prefer a zeroed page
 1797  *
 1798  *      This routine may not sleep.
 1799  */
 1800 vm_page_t
 1801 vm_page_alloc_freelist(int flind, int req)
 1802 {
 1803         struct vnode *drop;
 1804         vm_page_t m;
 1805         u_int flags;
 1806         int req_class;
 1807 
 1808         req_class = req & VM_ALLOC_CLASS_MASK;
 1809 
 1810         /*
 1811          * The page daemon is allowed to dig deeper into the free page list.
 1812          */
 1813         if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT)
 1814                 req_class = VM_ALLOC_SYSTEM;
 1815 
 1816         /*
 1817          * Do not allocate reserved pages unless the req has asked for it.
 1818          */
 1819         mtx_lock(&vm_page_queue_free_mtx);
 1820         if (cnt.v_free_count + cnt.v_cache_count > cnt.v_free_reserved ||
 1821             (req_class == VM_ALLOC_SYSTEM && 
 1822             cnt.v_free_count + cnt.v_cache_count > cnt.v_interrupt_free_min) ||
 1823             (req_class == VM_ALLOC_INTERRUPT &&
 1824             cnt.v_free_count + cnt.v_cache_count > 0))
 1825                 m = vm_phys_alloc_freelist_pages(flind, VM_FREEPOOL_DIRECT, 0);
 1826         else {
 1827                 mtx_unlock(&vm_page_queue_free_mtx);
 1828                 atomic_add_int(&vm_pageout_deficit,
 1829                     max((u_int)req >> VM_ALLOC_COUNT_SHIFT, 1));
 1830                 pagedaemon_wakeup();
 1831                 return (NULL);
 1832         }
 1833         if (m == NULL) {
 1834                 mtx_unlock(&vm_page_queue_free_mtx);
 1835                 return (NULL);
 1836         }
 1837         drop = vm_page_alloc_init(m);
 1838         mtx_unlock(&vm_page_queue_free_mtx);
 1839 
 1840         /*
 1841          * Initialize the page.  Only the PG_ZERO flag is inherited.
 1842          */
 1843         m->aflags = 0;
 1844         flags = 0;
 1845         if ((req & VM_ALLOC_ZERO) != 0)
 1846                 flags = PG_ZERO;
 1847         m->flags &= flags;
 1848         if ((req & VM_ALLOC_WIRED) != 0) {
 1849                 /*
 1850                  * The page lock is not required for wiring a page that does
 1851                  * not belong to an object.
 1852                  */
 1853                 atomic_add_int(&cnt.v_wire_count, 1);
 1854                 m->wire_count = 1;
 1855         }
 1856         /* Unmanaged pages don't use "act_count". */
 1857         m->oflags = VPO_UNMANAGED;
 1858         if (drop != NULL)
 1859                 vdrop(drop);
 1860         if (vm_paging_needed())
 1861                 pagedaemon_wakeup();
 1862         return (m);
 1863 }
 1864 
 1865 /*
 1866  *      vm_wait:        (also see VM_WAIT macro)
 1867  *
 1868  *      Sleep until free pages are available for allocation.
 1869  *      - Called in various places before memory allocations.
 1870  */
 1871 void
 1872 vm_wait(void)
 1873 {
 1874 
 1875         mtx_lock(&vm_page_queue_free_mtx);
 1876         if (curproc == pageproc) {
 1877                 vm_pageout_pages_needed = 1;
 1878                 msleep(&vm_pageout_pages_needed, &vm_page_queue_free_mtx,
 1879                     PDROP | PSWP, "VMWait", 0);
 1880         } else {
 1881                 if (!vm_pages_needed) {
 1882                         vm_pages_needed = 1;
 1883                         wakeup(&vm_pages_needed);
 1884                 }
 1885                 msleep(&cnt.v_free_count, &vm_page_queue_free_mtx, PDROP | PVM,
 1886                     "vmwait", 0);
 1887         }
 1888 }
 1889 
 1890 /*
 1891  *      vm_waitpfault:  (also see VM_WAITPFAULT macro)
 1892  *
 1893  *      Sleep until free pages are available for allocation.
 1894  *      - Called only in vm_fault so that processes page faulting
 1895  *        can be easily tracked.
 1896  *      - Sleeps at a lower priority than vm_wait() so that vm_wait()ing
 1897  *        processes will be able to grab memory first.  Do not change
 1898  *        this balance without careful testing first.
 1899  */
 1900 void
 1901 vm_waitpfault(void)
 1902 {
 1903 
 1904         mtx_lock(&vm_page_queue_free_mtx);
 1905         if (!vm_pages_needed) {
 1906                 vm_pages_needed = 1;
 1907                 wakeup(&vm_pages_needed);
 1908         }
 1909         msleep(&cnt.v_free_count, &vm_page_queue_free_mtx, PDROP | PUSER,
 1910             "pfault", 0);
 1911 }
 1912 
 1913 /*
 1914  *      vm_page_requeue:
 1915  *
 1916  *      Move the given page to the tail of its present page queue.
 1917  *
 1918  *      The page queues must be locked.
 1919  */
 1920 void
 1921 vm_page_requeue(vm_page_t m)
 1922 {
 1923         struct vpgqueues *vpq;
 1924         int queue;
 1925 
 1926         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 1927         queue = m->queue;
 1928         KASSERT(queue != PQ_NONE,
 1929             ("vm_page_requeue: page %p is not queued", m));
 1930         vpq = &vm_page_queues[queue];
 1931         TAILQ_REMOVE(&vpq->pl, m, pageq);
 1932         TAILQ_INSERT_TAIL(&vpq->pl, m, pageq);
 1933 }
 1934 
 1935 /*
 1936  *      vm_page_queue_remove:
 1937  *
 1938  *      Remove the given page from the specified queue.
 1939  *
 1940  *      The page and page queues must be locked.
 1941  */
 1942 static __inline void
 1943 vm_page_queue_remove(int queue, vm_page_t m)
 1944 {
 1945         struct vpgqueues *pq;
 1946 
 1947         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 1948         vm_page_lock_assert(m, MA_OWNED);
 1949         pq = &vm_page_queues[queue];
 1950         TAILQ_REMOVE(&pq->pl, m, pageq);
 1951         (*pq->cnt)--;
 1952 }
 1953 
 1954 /*
 1955  *      vm_pageq_remove:
 1956  *
 1957  *      Remove a page from its queue.
 1958  *
 1959  *      The given page must be locked.
 1960  */
 1961 void
 1962 vm_pageq_remove(vm_page_t m)
 1963 {
 1964         int queue;
 1965 
 1966         vm_page_lock_assert(m, MA_OWNED);
 1967         if ((queue = m->queue) != PQ_NONE) {
 1968                 vm_page_lock_queues();
 1969                 m->queue = PQ_NONE;
 1970                 vm_page_queue_remove(queue, m);
 1971                 vm_page_unlock_queues();
 1972         }
 1973 }
 1974 
 1975 /*
 1976  *      vm_page_enqueue:
 1977  *
 1978  *      Add the given page to the specified queue.
 1979  *
 1980  *      The page queues must be locked.
 1981  */
 1982 static void
 1983 vm_page_enqueue(int queue, vm_page_t m)
 1984 {
 1985         struct vpgqueues *vpq;
 1986 
 1987         vpq = &vm_page_queues[queue];
 1988         m->queue = queue;
 1989         TAILQ_INSERT_TAIL(&vpq->pl, m, pageq);
 1990         ++*vpq->cnt;
 1991 }
 1992 
 1993 /*
 1994  *      vm_page_activate:
 1995  *
 1996  *      Put the specified page on the active list (if appropriate).
 1997  *      Ensure that act_count is at least ACT_INIT but do not otherwise
 1998  *      mess with it.
 1999  *
 2000  *      The page must be locked.
 2001  */
 2002 void
 2003 vm_page_activate(vm_page_t m)
 2004 {
 2005         int queue;
 2006 
 2007         vm_page_lock_assert(m, MA_OWNED);
 2008         VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
 2009         if ((queue = m->queue) != PQ_ACTIVE) {
 2010                 if (m->wire_count == 0 && (m->oflags & VPO_UNMANAGED) == 0) {
 2011                         if (m->act_count < ACT_INIT)
 2012                                 m->act_count = ACT_INIT;
 2013                         vm_page_lock_queues();
 2014                         if (queue != PQ_NONE)
 2015                                 vm_page_queue_remove(queue, m);
 2016                         vm_page_enqueue(PQ_ACTIVE, m);
 2017                         vm_page_unlock_queues();
 2018                 } else
 2019                         KASSERT(queue == PQ_NONE,
 2020                             ("vm_page_activate: wired page %p is queued", m));
 2021         } else {
 2022                 if (m->act_count < ACT_INIT)
 2023                         m->act_count = ACT_INIT;
 2024         }
 2025 }
 2026 
 2027 /*
 2028  *      vm_page_free_wakeup:
 2029  *
 2030  *      Helper routine for vm_page_free_toq() and vm_page_cache().  This
 2031  *      routine is called when a page has been added to the cache or free
 2032  *      queues.
 2033  *
 2034  *      The page queues must be locked.
 2035  */
 2036 static inline void
 2037 vm_page_free_wakeup(void)
 2038 {
 2039 
 2040         mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
 2041         /*
 2042          * if pageout daemon needs pages, then tell it that there are
 2043          * some free.
 2044          */
 2045         if (vm_pageout_pages_needed &&
 2046             cnt.v_cache_count + cnt.v_free_count >= cnt.v_pageout_free_min) {
 2047                 wakeup(&vm_pageout_pages_needed);
 2048                 vm_pageout_pages_needed = 0;
 2049         }
 2050         /*
 2051          * wakeup processes that are waiting on memory if we hit a
 2052          * high water mark. And wakeup scheduler process if we have
 2053          * lots of memory. this process will swapin processes.
 2054          */
 2055         if (vm_pages_needed && !vm_page_count_min()) {
 2056                 vm_pages_needed = 0;
 2057                 wakeup(&cnt.v_free_count);
 2058         }
 2059 }
 2060 
 2061 /*
 2062  *      vm_page_free_toq:
 2063  *
 2064  *      Returns the given page to the free list,
 2065  *      disassociating it with any VM object.
 2066  *
 2067  *      The object must be locked.  The page must be locked if it is managed.
 2068  */
 2069 void
 2070 vm_page_free_toq(vm_page_t m)
 2071 {
 2072 
 2073         if ((m->oflags & VPO_UNMANAGED) == 0) {
 2074                 vm_page_lock_assert(m, MA_OWNED);
 2075                 KASSERT(!pmap_page_is_mapped(m),
 2076                     ("vm_page_free_toq: freeing mapped page %p", m));
 2077         }
 2078         PCPU_INC(cnt.v_tfree);
 2079 
 2080         if (VM_PAGE_IS_FREE(m))
 2081                 panic("vm_page_free: freeing free page %p", m);
 2082         else if (m->busy != 0)
 2083                 panic("vm_page_free: freeing busy page %p", m);
 2084 
 2085         /*
 2086          * Unqueue, then remove page.  Note that we cannot destroy
 2087          * the page here because we do not want to call the pager's
 2088          * callback routine until after we've put the page on the
 2089          * appropriate free queue.
 2090          */
 2091         if ((m->oflags & VPO_UNMANAGED) == 0)
 2092                 vm_pageq_remove(m);
 2093         vm_page_remove(m);
 2094 
 2095         /*
 2096          * If fictitious remove object association and
 2097          * return, otherwise delay object association removal.
 2098          */
 2099         if ((m->flags & PG_FICTITIOUS) != 0) {
 2100                 return;
 2101         }
 2102 
 2103         m->valid = 0;
 2104         vm_page_undirty(m);
 2105 
 2106         if (m->wire_count != 0)
 2107                 panic("vm_page_free: freeing wired page %p", m);
 2108         if (m->hold_count != 0) {
 2109                 m->flags &= ~PG_ZERO;
 2110                 vm_page_lock_queues();
 2111                 vm_page_enqueue(PQ_HOLD, m);
 2112                 vm_page_unlock_queues();
 2113         } else {
 2114                 /*
 2115                  * Restore the default memory attribute to the page.
 2116                  */
 2117                 if (pmap_page_get_memattr(m) != VM_MEMATTR_DEFAULT)
 2118                         pmap_page_set_memattr(m, VM_MEMATTR_DEFAULT);
 2119 
 2120                 /*
 2121                  * Insert the page into the physical memory allocator's
 2122                  * cache/free page queues.
 2123                  */
 2124                 mtx_lock(&vm_page_queue_free_mtx);
 2125                 m->flags |= PG_FREE;
 2126                 cnt.v_free_count++;
 2127 #if VM_NRESERVLEVEL > 0
 2128                 if (!vm_reserv_free_page(m))
 2129 #else
 2130                 if (TRUE)
 2131 #endif
 2132                         vm_phys_free_pages(m, 0);
 2133                 if ((m->flags & PG_ZERO) != 0)
 2134                         ++vm_page_zero_count;
 2135                 else
 2136                         vm_page_zero_idle_wakeup();
 2137                 vm_page_free_wakeup();
 2138                 mtx_unlock(&vm_page_queue_free_mtx);
 2139         }
 2140 }
 2141 
 2142 /*
 2143  *      vm_page_wire:
 2144  *
 2145  *      Mark this page as wired down by yet
 2146  *      another map, removing it from paging queues
 2147  *      as necessary.
 2148  *
 2149  *      If the page is fictitious, then its wire count must remain one.
 2150  *
 2151  *      The page must be locked.
 2152  */
 2153 void
 2154 vm_page_wire(vm_page_t m)
 2155 {
 2156 
 2157         /*
 2158          * Only bump the wire statistics if the page is not already wired,
 2159          * and only unqueue the page if it is on some queue (if it is unmanaged
 2160          * it is already off the queues).
 2161          */
 2162         vm_page_lock_assert(m, MA_OWNED);
 2163         if ((m->flags & PG_FICTITIOUS) != 0) {
 2164                 KASSERT(m->wire_count == 1,
 2165                     ("vm_page_wire: fictitious page %p's wire count isn't one",
 2166                     m));
 2167                 return;
 2168         }
 2169         if (m->wire_count == 0) {
 2170                 if ((m->oflags & VPO_UNMANAGED) == 0)
 2171                         vm_pageq_remove(m);
 2172                 atomic_add_int(&cnt.v_wire_count, 1);
 2173         }
 2174         m->wire_count++;
 2175         KASSERT(m->wire_count != 0, ("vm_page_wire: wire_count overflow m=%p", m));
 2176 }
 2177 
 2178 /*
 2179  * vm_page_unwire:
 2180  *
 2181  * Release one wiring of the specified page, potentially enabling it to be
 2182  * paged again.  If paging is enabled, then the value of the parameter
 2183  * "activate" determines to which queue the page is added.  If "activate" is
 2184  * non-zero, then the page is added to the active queue.  Otherwise, it is
 2185  * added to the inactive queue.
 2186  *
 2187  * However, unless the page belongs to an object, it is not enqueued because
 2188  * it cannot be paged out.
 2189  *
 2190  * If a page is fictitious, then its wire count must alway be one.
 2191  *
 2192  * A managed page must be locked.
 2193  */
 2194 void
 2195 vm_page_unwire(vm_page_t m, int activate)
 2196 {
 2197 
 2198         if ((m->oflags & VPO_UNMANAGED) == 0)
 2199                 vm_page_lock_assert(m, MA_OWNED);
 2200         if ((m->flags & PG_FICTITIOUS) != 0) {
 2201                 KASSERT(m->wire_count == 1,
 2202             ("vm_page_unwire: fictitious page %p's wire count isn't one", m));
 2203                 return;
 2204         }
 2205         if (m->wire_count > 0) {
 2206                 m->wire_count--;
 2207                 if (m->wire_count == 0) {
 2208                         atomic_subtract_int(&cnt.v_wire_count, 1);
 2209                         if ((m->oflags & VPO_UNMANAGED) != 0 ||
 2210                             m->object == NULL)
 2211                                 return;
 2212                         if (!activate)
 2213                                 m->flags &= ~PG_WINATCFLS;
 2214                         vm_page_lock_queues();
 2215                         vm_page_enqueue(activate ? PQ_ACTIVE : PQ_INACTIVE, m);
 2216                         vm_page_unlock_queues();
 2217                 }
 2218         } else
 2219                 panic("vm_page_unwire: page %p's wire count is zero", m);
 2220 }
 2221 
 2222 /*
 2223  * Move the specified page to the inactive queue.
 2224  *
 2225  * Many pages placed on the inactive queue should actually go
 2226  * into the cache, but it is difficult to figure out which.  What
 2227  * we do instead, if the inactive target is well met, is to put
 2228  * clean pages at the head of the inactive queue instead of the tail.
 2229  * This will cause them to be moved to the cache more quickly and
 2230  * if not actively re-referenced, reclaimed more quickly.  If we just
 2231  * stick these pages at the end of the inactive queue, heavy filesystem
 2232  * meta-data accesses can cause an unnecessary paging load on memory bound 
 2233  * processes.  This optimization causes one-time-use metadata to be
 2234  * reused more quickly.
 2235  *
 2236  * Normally athead is 0 resulting in LRU operation.  athead is set
 2237  * to 1 if we want this page to be 'as if it were placed in the cache',
 2238  * except without unmapping it from the process address space.
 2239  *
 2240  * The page must be locked.
 2241  */
 2242 static inline void
 2243 _vm_page_deactivate(vm_page_t m, int athead)
 2244 {
 2245         int queue;
 2246 
 2247         vm_page_lock_assert(m, MA_OWNED);
 2248 
 2249         /*
 2250          * Ignore if already inactive.
 2251          */
 2252         if ((queue = m->queue) == PQ_INACTIVE)
 2253                 return;
 2254         if (m->wire_count == 0 && (m->oflags & VPO_UNMANAGED) == 0) {
 2255                 m->flags &= ~PG_WINATCFLS;
 2256                 vm_page_lock_queues();
 2257                 if (queue != PQ_NONE)
 2258                         vm_page_queue_remove(queue, m);
 2259                 if (athead)
 2260                         TAILQ_INSERT_HEAD(&vm_page_queues[PQ_INACTIVE].pl, m,
 2261                             pageq);
 2262                 else
 2263                         TAILQ_INSERT_TAIL(&vm_page_queues[PQ_INACTIVE].pl, m,
 2264                             pageq);
 2265                 m->queue = PQ_INACTIVE;
 2266                 cnt.v_inactive_count++;
 2267                 vm_page_unlock_queues();
 2268         }
 2269 }
 2270 
 2271 /*
 2272  * Move the specified page to the inactive queue.
 2273  *
 2274  * The page must be locked.
 2275  */
 2276 void
 2277 vm_page_deactivate(vm_page_t m)
 2278 {
 2279 
 2280         _vm_page_deactivate(m, 0);
 2281 }
 2282 
 2283 /*
 2284  * vm_page_try_to_cache:
 2285  *
 2286  * Returns 0 on failure, 1 on success
 2287  */
 2288 int
 2289 vm_page_try_to_cache(vm_page_t m)
 2290 {
 2291 
 2292         vm_page_lock_assert(m, MA_OWNED);
 2293         VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
 2294         if (m->dirty || m->hold_count || m->busy || m->wire_count ||
 2295             (m->oflags & (VPO_BUSY | VPO_UNMANAGED)) != 0)
 2296                 return (0);
 2297         pmap_remove_all(m);
 2298         if (m->dirty)
 2299                 return (0);
 2300         vm_page_cache(m);
 2301         return (1);
 2302 }
 2303 
 2304 /*
 2305  * vm_page_try_to_free()
 2306  *
 2307  *      Attempt to free the page.  If we cannot free it, we do nothing.
 2308  *      1 is returned on success, 0 on failure.
 2309  */
 2310 int
 2311 vm_page_try_to_free(vm_page_t m)
 2312 {
 2313 
 2314         vm_page_lock_assert(m, MA_OWNED);
 2315         if (m->object != NULL)
 2316                 VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
 2317         if (m->dirty || m->hold_count || m->busy || m->wire_count ||
 2318             (m->oflags & (VPO_BUSY | VPO_UNMANAGED)) != 0)
 2319                 return (0);
 2320         pmap_remove_all(m);
 2321         if (m->dirty)
 2322                 return (0);
 2323         vm_page_free(m);
 2324         return (1);
 2325 }
 2326 
 2327 /*
 2328  * vm_page_cache
 2329  *
 2330  * Put the specified page onto the page cache queue (if appropriate).
 2331  *
 2332  * The object and page must be locked.
 2333  */
 2334 void
 2335 vm_page_cache(vm_page_t m)
 2336 {
 2337         vm_object_t object;
 2338         vm_page_t next, prev, root;
 2339 
 2340         vm_page_lock_assert(m, MA_OWNED);
 2341         object = m->object;
 2342         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 2343         if ((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) || m->busy ||
 2344             m->hold_count || m->wire_count)
 2345                 panic("vm_page_cache: attempting to cache busy page");
 2346         pmap_remove_all(m);
 2347         if (m->dirty != 0)
 2348                 panic("vm_page_cache: page %p is dirty", m);
 2349         if (m->valid == 0 || object->type == OBJT_DEFAULT ||
 2350             (object->type == OBJT_SWAP &&
 2351             !vm_pager_has_page(object, m->pindex, NULL, NULL))) {
 2352                 /*
 2353                  * Hypothesis: A cache-elgible page belonging to a
 2354                  * default object or swap object but without a backing
 2355                  * store must be zero filled.
 2356                  */
 2357                 vm_page_free(m);
 2358                 return;
 2359         }
 2360         KASSERT((m->flags & PG_CACHED) == 0,
 2361             ("vm_page_cache: page %p is already cached", m));
 2362         PCPU_INC(cnt.v_tcached);
 2363 
 2364         /*
 2365          * Remove the page from the paging queues.
 2366          */
 2367         vm_pageq_remove(m);
 2368 
 2369         /*
 2370          * Remove the page from the object's collection of resident
 2371          * pages. 
 2372          */
 2373         if ((next = TAILQ_NEXT(m, listq)) != NULL && next->left == m) {
 2374                 /*
 2375                  * Since the page's successor in the list is also its parent
 2376                  * in the tree, its right subtree must be empty.
 2377                  */
 2378                 next->left = m->left;
 2379                 KASSERT(m->right == NULL,
 2380                     ("vm_page_cache: page %p has right child", m));
 2381         } else if ((prev = TAILQ_PREV(m, pglist, listq)) != NULL &&
 2382             prev->right == m) {
 2383                 /*
 2384                  * Since the page's predecessor in the list is also its parent
 2385                  * in the tree, its left subtree must be empty.
 2386                  */
 2387                 KASSERT(m->left == NULL,
 2388                     ("vm_page_cache: page %p has left child", m));
 2389                 prev->right = m->right;
 2390         } else {
 2391                 if (m != object->root)
 2392                         vm_page_splay(m->pindex, object->root);
 2393                 if (m->left == NULL)
 2394                         root = m->right;
 2395                 else if (m->right == NULL)
 2396                         root = m->left;
 2397                 else {
 2398                         /*
 2399                          * Move the page's successor to the root, because
 2400                          * pages are usually removed in ascending order.
 2401                          */
 2402                         if (m->right != next)
 2403                                 vm_page_splay(m->pindex, m->right);
 2404                         next->left = m->left;
 2405                         root = next;
 2406                 }
 2407                 object->root = root;
 2408         }
 2409         TAILQ_REMOVE(&object->memq, m, listq);
 2410         object->resident_page_count--;
 2411 
 2412         /*
 2413          * Restore the default memory attribute to the page.
 2414          */
 2415         if (pmap_page_get_memattr(m) != VM_MEMATTR_DEFAULT)
 2416                 pmap_page_set_memattr(m, VM_MEMATTR_DEFAULT);
 2417 
 2418         /*
 2419          * Insert the page into the object's collection of cached pages
 2420          * and the physical memory allocator's cache/free page queues.
 2421          */
 2422         m->flags &= ~PG_ZERO;
 2423         mtx_lock(&vm_page_queue_free_mtx);
 2424         m->flags |= PG_CACHED;
 2425         cnt.v_cache_count++;
 2426         root = object->cache;
 2427         if (root == NULL) {
 2428                 m->left = NULL;
 2429                 m->right = NULL;
 2430         } else {
 2431                 root = vm_page_splay(m->pindex, root);
 2432                 if (m->pindex < root->pindex) {
 2433                         m->left = root->left;
 2434                         m->right = root;
 2435                         root->left = NULL;
 2436                 } else if (__predict_false(m->pindex == root->pindex))
 2437                         panic("vm_page_cache: offset already cached");
 2438                 else {
 2439                         m->right = root->right;
 2440                         m->left = root;
 2441                         root->right = NULL;
 2442                 }
 2443         }
 2444         object->cache = m;
 2445 #if VM_NRESERVLEVEL > 0
 2446         if (!vm_reserv_free_page(m)) {
 2447 #else
 2448         if (TRUE) {
 2449 #endif
 2450                 vm_phys_set_pool(VM_FREEPOOL_CACHE, m, 0);
 2451                 vm_phys_free_pages(m, 0);
 2452         }
 2453         vm_page_free_wakeup();
 2454         mtx_unlock(&vm_page_queue_free_mtx);
 2455 
 2456         /*
 2457          * Increment the vnode's hold count if this is the object's only
 2458          * cached page.  Decrement the vnode's hold count if this was
 2459          * the object's only resident page.
 2460          */
 2461         if (object->type == OBJT_VNODE) {
 2462                 if (root == NULL && object->resident_page_count != 0)
 2463                         vhold(object->handle);
 2464                 else if (root != NULL && object->resident_page_count == 0)
 2465                         vdrop(object->handle);
 2466         }
 2467 }
 2468 
 2469 /*
 2470  * vm_page_dontneed
 2471  *
 2472  *      Cache, deactivate, or do nothing as appropriate.  This routine
 2473  *      is typically used by madvise() MADV_DONTNEED.
 2474  *
 2475  *      Generally speaking we want to move the page into the cache so
 2476  *      it gets reused quickly.  However, this can result in a silly syndrome
 2477  *      due to the page recycling too quickly.  Small objects will not be
 2478  *      fully cached.  On the otherhand, if we move the page to the inactive
 2479  *      queue we wind up with a problem whereby very large objects 
 2480  *      unnecessarily blow away our inactive and cache queues.
 2481  *
 2482  *      The solution is to move the pages based on a fixed weighting.  We
 2483  *      either leave them alone, deactivate them, or move them to the cache,
 2484  *      where moving them to the cache has the highest weighting.
 2485  *      By forcing some pages into other queues we eventually force the
 2486  *      system to balance the queues, potentially recovering other unrelated
 2487  *      space from active.  The idea is to not force this to happen too
 2488  *      often.
 2489  *
 2490  *      The object and page must be locked.
 2491  */
 2492 void
 2493 vm_page_dontneed(vm_page_t m)
 2494 {
 2495         int dnw;
 2496         int head;
 2497 
 2498         vm_page_lock_assert(m, MA_OWNED);
 2499         VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
 2500         dnw = PCPU_GET(dnweight);
 2501         PCPU_INC(dnweight);
 2502 
 2503         /*
 2504          * Occasionally leave the page alone.
 2505          */
 2506         if ((dnw & 0x01F0) == 0 || m->queue == PQ_INACTIVE) {
 2507                 if (m->act_count >= ACT_INIT)
 2508                         --m->act_count;
 2509                 return;
 2510         }
 2511 
 2512         /*
 2513          * Clear any references to the page.  Otherwise, the page daemon will
 2514          * immediately reactivate the page.
 2515          *
 2516          * Perform the pmap_clear_reference() first.  Otherwise, a concurrent
 2517          * pmap operation, such as pmap_remove(), could clear a reference in
 2518          * the pmap and set PGA_REFERENCED on the page before the
 2519          * pmap_clear_reference() had completed.  Consequently, the page would
 2520          * appear referenced based upon an old reference that occurred before
 2521          * this function ran.
 2522          */
 2523         pmap_clear_reference(m);
 2524         vm_page_aflag_clear(m, PGA_REFERENCED);
 2525 
 2526         if (m->dirty == 0 && pmap_is_modified(m))
 2527                 vm_page_dirty(m);
 2528 
 2529         if (m->dirty || (dnw & 0x0070) == 0) {
 2530                 /*
 2531                  * Deactivate the page 3 times out of 32.
 2532                  */
 2533                 head = 0;
 2534         } else {
 2535                 /*
 2536                  * Cache the page 28 times out of every 32.  Note that
 2537                  * the page is deactivated instead of cached, but placed
 2538                  * at the head of the queue instead of the tail.
 2539                  */
 2540                 head = 1;
 2541         }
 2542         _vm_page_deactivate(m, head);
 2543 }
 2544 
 2545 /*
 2546  * Grab a page, waiting until we are waken up due to the page
 2547  * changing state.  We keep on waiting, if the page continues
 2548  * to be in the object.  If the page doesn't exist, first allocate it
 2549  * and then conditionally zero it.
 2550  *
 2551  * The caller must always specify the VM_ALLOC_RETRY flag.  This is intended
 2552  * to facilitate its eventual removal.
 2553  *
 2554  * This routine may sleep.
 2555  *
 2556  * The object must be locked on entry.  The lock will, however, be released
 2557  * and reacquired if the routine sleeps.
 2558  */
 2559 vm_page_t
 2560 vm_page_grab(vm_object_t object, vm_pindex_t pindex, int allocflags)
 2561 {
 2562         vm_page_t m;
 2563 
 2564         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 2565         KASSERT((allocflags & VM_ALLOC_RETRY) != 0,
 2566             ("vm_page_grab: VM_ALLOC_RETRY is required"));
 2567 retrylookup:
 2568         if ((m = vm_page_lookup(object, pindex)) != NULL) {
 2569                 if ((m->oflags & VPO_BUSY) != 0 ||
 2570                     ((allocflags & VM_ALLOC_IGN_SBUSY) == 0 && m->busy != 0)) {
 2571                         /*
 2572                          * Reference the page before unlocking and
 2573                          * sleeping so that the page daemon is less
 2574                          * likely to reclaim it.
 2575                          */
 2576                         vm_page_aflag_set(m, PGA_REFERENCED);
 2577                         vm_page_sleep(m, "pgrbwt");
 2578                         goto retrylookup;
 2579                 } else {
 2580                         if ((allocflags & VM_ALLOC_WIRED) != 0) {
 2581                                 vm_page_lock(m);
 2582                                 vm_page_wire(m);
 2583                                 vm_page_unlock(m);
 2584                         }
 2585                         if ((allocflags & VM_ALLOC_NOBUSY) == 0)
 2586                                 vm_page_busy(m);
 2587                         return (m);
 2588                 }
 2589         }
 2590         m = vm_page_alloc(object, pindex, allocflags & ~(VM_ALLOC_RETRY |
 2591             VM_ALLOC_IGN_SBUSY));
 2592         if (m == NULL) {
 2593                 VM_OBJECT_UNLOCK(object);
 2594                 VM_WAIT;
 2595                 VM_OBJECT_LOCK(object);
 2596                 goto retrylookup;
 2597         } else if (m->valid != 0)
 2598                 return (m);
 2599         if (allocflags & VM_ALLOC_ZERO && (m->flags & PG_ZERO) == 0)
 2600                 pmap_zero_page(m);
 2601         return (m);
 2602 }
 2603 
 2604 /*
 2605  * Mapping function for valid or dirty bits in a page.
 2606  *
 2607  * Inputs are required to range within a page.
 2608  */
 2609 vm_page_bits_t
 2610 vm_page_bits(int base, int size)
 2611 {
 2612         int first_bit;
 2613         int last_bit;
 2614 
 2615         KASSERT(
 2616             base + size <= PAGE_SIZE,
 2617             ("vm_page_bits: illegal base/size %d/%d", base, size)
 2618         );
 2619 
 2620         if (size == 0)          /* handle degenerate case */
 2621                 return (0);
 2622 
 2623         first_bit = base >> DEV_BSHIFT;
 2624         last_bit = (base + size - 1) >> DEV_BSHIFT;
 2625 
 2626         return (((vm_page_bits_t)2 << last_bit) -
 2627             ((vm_page_bits_t)1 << first_bit));
 2628 }
 2629 
 2630 /*
 2631  *      vm_page_set_valid:
 2632  *
 2633  *      Sets portions of a page valid.  The arguments are expected
 2634  *      to be DEV_BSIZE aligned but if they aren't the bitmap is inclusive
 2635  *      of any partial chunks touched by the range.  The invalid portion of
 2636  *      such chunks will be zeroed.
 2637  *
 2638  *      (base + size) must be less then or equal to PAGE_SIZE.
 2639  */
 2640 void
 2641 vm_page_set_valid(vm_page_t m, int base, int size)
 2642 {
 2643         int endoff, frag;
 2644 
 2645         VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
 2646         if (size == 0)  /* handle degenerate case */
 2647                 return;
 2648 
 2649         /*
 2650          * If the base is not DEV_BSIZE aligned and the valid
 2651          * bit is clear, we have to zero out a portion of the
 2652          * first block.
 2653          */
 2654         if ((frag = base & ~(DEV_BSIZE - 1)) != base &&
 2655             (m->valid & (1 << (base >> DEV_BSHIFT))) == 0)
 2656                 pmap_zero_page_area(m, frag, base - frag);
 2657 
 2658         /*
 2659          * If the ending offset is not DEV_BSIZE aligned and the 
 2660          * valid bit is clear, we have to zero out a portion of
 2661          * the last block.
 2662          */
 2663         endoff = base + size;
 2664         if ((frag = endoff & ~(DEV_BSIZE - 1)) != endoff &&
 2665             (m->valid & (1 << (endoff >> DEV_BSHIFT))) == 0)
 2666                 pmap_zero_page_area(m, endoff,
 2667                     DEV_BSIZE - (endoff & (DEV_BSIZE - 1)));
 2668 
 2669         /*
 2670          * Assert that no previously invalid block that is now being validated
 2671          * is already dirty. 
 2672          */
 2673         KASSERT((~m->valid & vm_page_bits(base, size) & m->dirty) == 0,
 2674             ("vm_page_set_valid: page %p is dirty", m)); 
 2675 
 2676         /*
 2677          * Set valid bits inclusive of any overlap.
 2678          */
 2679         m->valid |= vm_page_bits(base, size);
 2680 }
 2681 
 2682 /*
 2683  * Clear the given bits from the specified page's dirty field.
 2684  */
 2685 static __inline void
 2686 vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits)
 2687 {
 2688         uintptr_t addr;
 2689 #if PAGE_SIZE < 16384
 2690         int shift;
 2691 #endif
 2692 
 2693         /*
 2694          * If the object is locked and the page is neither VPO_BUSY nor
 2695          * write mapped, then the page's dirty field cannot possibly be
 2696          * set by a concurrent pmap operation.
 2697          */
 2698         VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
 2699         if ((m->oflags & VPO_BUSY) == 0 && !pmap_page_is_write_mapped(m))
 2700                 m->dirty &= ~pagebits;
 2701         else {
 2702                 /*
 2703                  * The pmap layer can call vm_page_dirty() without
 2704                  * holding a distinguished lock.  The combination of
 2705                  * the object's lock and an atomic operation suffice
 2706                  * to guarantee consistency of the page dirty field.
 2707                  *
 2708                  * For PAGE_SIZE == 32768 case, compiler already
 2709                  * properly aligns the dirty field, so no forcible
 2710                  * alignment is needed. Only require existence of
 2711                  * atomic_clear_64 when page size is 32768.
 2712                  */
 2713                 addr = (uintptr_t)&m->dirty;
 2714 #if PAGE_SIZE == 32768
 2715                 atomic_clear_64((uint64_t *)addr, pagebits);
 2716 #elif PAGE_SIZE == 16384
 2717                 atomic_clear_32((uint32_t *)addr, pagebits);
 2718 #else           /* PAGE_SIZE <= 8192 */
 2719                 /*
 2720                  * Use a trick to perform a 32-bit atomic on the
 2721                  * containing aligned word, to not depend on the existence
 2722                  * of atomic_clear_{8, 16}.
 2723                  */
 2724                 shift = addr & (sizeof(uint32_t) - 1);
 2725 #if BYTE_ORDER == BIG_ENDIAN
 2726                 shift = (sizeof(uint32_t) - sizeof(m->dirty) - shift) * NBBY;
 2727 #else
 2728                 shift *= NBBY;
 2729 #endif
 2730                 addr &= ~(sizeof(uint32_t) - 1);
 2731                 atomic_clear_32((uint32_t *)addr, pagebits << shift);
 2732 #endif          /* PAGE_SIZE */
 2733         }
 2734 }
 2735 
 2736 /*
 2737  *      vm_page_set_validclean:
 2738  *
 2739  *      Sets portions of a page valid and clean.  The arguments are expected
 2740  *      to be DEV_BSIZE aligned but if they aren't the bitmap is inclusive
 2741  *      of any partial chunks touched by the range.  The invalid portion of
 2742  *      such chunks will be zero'd.
 2743  *
 2744  *      (base + size) must be less then or equal to PAGE_SIZE.
 2745  */
 2746 void
 2747 vm_page_set_validclean(vm_page_t m, int base, int size)
 2748 {
 2749         vm_page_bits_t oldvalid, pagebits;
 2750         int endoff, frag;
 2751 
 2752         VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
 2753         if (size == 0)  /* handle degenerate case */
 2754                 return;
 2755 
 2756         /*
 2757          * If the base is not DEV_BSIZE aligned and the valid
 2758          * bit is clear, we have to zero out a portion of the
 2759          * first block.
 2760          */
 2761         if ((frag = base & ~(DEV_BSIZE - 1)) != base &&
 2762             (m->valid & ((vm_page_bits_t)1 << (base >> DEV_BSHIFT))) == 0)
 2763                 pmap_zero_page_area(m, frag, base - frag);
 2764 
 2765         /*
 2766          * If the ending offset is not DEV_BSIZE aligned and the 
 2767          * valid bit is clear, we have to zero out a portion of
 2768          * the last block.
 2769          */
 2770         endoff = base + size;
 2771         if ((frag = endoff & ~(DEV_BSIZE - 1)) != endoff &&
 2772             (m->valid & ((vm_page_bits_t)1 << (endoff >> DEV_BSHIFT))) == 0)
 2773                 pmap_zero_page_area(m, endoff,
 2774                     DEV_BSIZE - (endoff & (DEV_BSIZE - 1)));
 2775 
 2776         /*
 2777          * Set valid, clear dirty bits.  If validating the entire
 2778          * page we can safely clear the pmap modify bit.  We also
 2779          * use this opportunity to clear the VPO_NOSYNC flag.  If a process
 2780          * takes a write fault on a MAP_NOSYNC memory area the flag will
 2781          * be set again.
 2782          *
 2783          * We set valid bits inclusive of any overlap, but we can only
 2784          * clear dirty bits for DEV_BSIZE chunks that are fully within
 2785          * the range.
 2786          */
 2787         oldvalid = m->valid;
 2788         pagebits = vm_page_bits(base, size);
 2789         m->valid |= pagebits;
 2790 #if 0   /* NOT YET */
 2791         if ((frag = base & (DEV_BSIZE - 1)) != 0) {
 2792                 frag = DEV_BSIZE - frag;
 2793                 base += frag;
 2794                 size -= frag;
 2795                 if (size < 0)
 2796                         size = 0;
 2797         }
 2798         pagebits = vm_page_bits(base, size & (DEV_BSIZE - 1));
 2799 #endif
 2800         if (base == 0 && size == PAGE_SIZE) {
 2801                 /*
 2802                  * The page can only be modified within the pmap if it is
 2803                  * mapped, and it can only be mapped if it was previously
 2804                  * fully valid.
 2805                  */
 2806                 if (oldvalid == VM_PAGE_BITS_ALL)
 2807                         /*
 2808                          * Perform the pmap_clear_modify() first.  Otherwise,
 2809                          * a concurrent pmap operation, such as
 2810                          * pmap_protect(), could clear a modification in the
 2811                          * pmap and set the dirty field on the page before
 2812                          * pmap_clear_modify() had begun and after the dirty
 2813                          * field was cleared here.
 2814                          */
 2815                         pmap_clear_modify(m);
 2816                 m->dirty = 0;
 2817                 m->oflags &= ~VPO_NOSYNC;
 2818         } else if (oldvalid != VM_PAGE_BITS_ALL)
 2819                 m->dirty &= ~pagebits;
 2820         else
 2821                 vm_page_clear_dirty_mask(m, pagebits);
 2822 }
 2823 
 2824 void
 2825 vm_page_clear_dirty(vm_page_t m, int base, int size)
 2826 {
 2827 
 2828         vm_page_clear_dirty_mask(m, vm_page_bits(base, size));
 2829 }
 2830 
 2831 /*
 2832  *      vm_page_set_invalid:
 2833  *
 2834  *      Invalidates DEV_BSIZE'd chunks within a page.  Both the
 2835  *      valid and dirty bits for the effected areas are cleared.
 2836  */
 2837 void
 2838 vm_page_set_invalid(vm_page_t m, int base, int size)
 2839 {
 2840         vm_page_bits_t bits;
 2841         vm_object_t object;
 2842 
 2843         object = m->object;
 2844         VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
 2845         if (object->type == OBJT_VNODE && base == 0 && IDX_TO_OFF(m->pindex) +
 2846             size >= object->un_pager.vnp.vnp_size)
 2847                 bits = VM_PAGE_BITS_ALL;
 2848         else
 2849                 bits = vm_page_bits(base, size);
 2850         if (m->valid == VM_PAGE_BITS_ALL && bits != 0)
 2851                 pmap_remove_all(m);
 2852         KASSERT((bits == 0 && m->valid == VM_PAGE_BITS_ALL) ||
 2853             !pmap_page_is_mapped(m),
 2854             ("vm_page_set_invalid: page %p is mapped", m));
 2855         m->valid &= ~bits;
 2856         m->dirty &= ~bits;
 2857 }
 2858 
 2859 /*
 2860  * vm_page_zero_invalid()
 2861  *
 2862  *      The kernel assumes that the invalid portions of a page contain 
 2863  *      garbage, but such pages can be mapped into memory by user code.
 2864  *      When this occurs, we must zero out the non-valid portions of the
 2865  *      page so user code sees what it expects.
 2866  *
 2867  *      Pages are most often semi-valid when the end of a file is mapped 
 2868  *      into memory and the file's size is not page aligned.
 2869  */
 2870 void
 2871 vm_page_zero_invalid(vm_page_t m, boolean_t setvalid)
 2872 {
 2873         int b;
 2874         int i;
 2875 
 2876         VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
 2877         /*
 2878          * Scan the valid bits looking for invalid sections that
 2879          * must be zerod.  Invalid sub-DEV_BSIZE'd areas ( where the
 2880          * valid bit may be set ) have already been zerod by
 2881          * vm_page_set_validclean().
 2882          */
 2883         for (b = i = 0; i <= PAGE_SIZE / DEV_BSIZE; ++i) {
 2884                 if (i == (PAGE_SIZE / DEV_BSIZE) || 
 2885                     (m->valid & ((vm_page_bits_t)1 << i))) {
 2886                         if (i > b) {
 2887                                 pmap_zero_page_area(m, 
 2888                                     b << DEV_BSHIFT, (i - b) << DEV_BSHIFT);
 2889                         }
 2890                         b = i + 1;
 2891                 }
 2892         }
 2893 
 2894         /*
 2895          * setvalid is TRUE when we can safely set the zero'd areas
 2896          * as being valid.  We can do this if there are no cache consistancy
 2897          * issues.  e.g. it is ok to do with UFS, but not ok to do with NFS.
 2898          */
 2899         if (setvalid)
 2900                 m->valid = VM_PAGE_BITS_ALL;
 2901 }
 2902 
 2903 /*
 2904  *      vm_page_is_valid:
 2905  *
 2906  *      Is (partial) page valid?  Note that the case where size == 0
 2907  *      will return FALSE in the degenerate case where the page is
 2908  *      entirely invalid, and TRUE otherwise.
 2909  */
 2910 int
 2911 vm_page_is_valid(vm_page_t m, int base, int size)
 2912 {
 2913         vm_page_bits_t bits;
 2914 
 2915         VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
 2916         bits = vm_page_bits(base, size);
 2917         if (m->valid && ((m->valid & bits) == bits))
 2918                 return 1;
 2919         else
 2920                 return 0;
 2921 }
 2922 
 2923 /*
 2924  * Set the page's dirty bits if the page is modified.
 2925  */
 2926 void
 2927 vm_page_test_dirty(vm_page_t m)
 2928 {
 2929 
 2930         VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
 2931         if (m->dirty != VM_PAGE_BITS_ALL && pmap_is_modified(m))
 2932                 vm_page_dirty(m);
 2933 }
 2934 
 2935 void
 2936 vm_page_lock_KBI(vm_page_t m, const char *file, int line)
 2937 {
 2938 
 2939         mtx_lock_flags_(vm_page_lockptr(m), 0, file, line);
 2940 }
 2941 
 2942 void
 2943 vm_page_unlock_KBI(vm_page_t m, const char *file, int line)
 2944 {
 2945 
 2946         mtx_unlock_flags_(vm_page_lockptr(m), 0, file, line);
 2947 }
 2948 
 2949 int
 2950 vm_page_trylock_KBI(vm_page_t m, const char *file, int line)
 2951 {
 2952 
 2953         return (mtx_trylock_flags_(vm_page_lockptr(m), 0, file, line));
 2954 }
 2955 
 2956 #if defined(INVARIANTS) || defined(INVARIANT_SUPPORT)
 2957 void
 2958 vm_page_lock_assert_KBI(vm_page_t m, int a, const char *file, int line)
 2959 {
 2960 
 2961         mtx_assert_(vm_page_lockptr(m), a, file, line);
 2962 }
 2963 #endif
 2964 
 2965 int so_zerocp_fullpage = 0;
 2966 
 2967 /*
 2968  *      Replace the given page with a copy.  The copied page assumes
 2969  *      the portion of the given page's "wire_count" that is not the
 2970  *      responsibility of this copy-on-write mechanism.
 2971  *
 2972  *      The object containing the given page must have a non-zero
 2973  *      paging-in-progress count and be locked.
 2974  */
 2975 void
 2976 vm_page_cowfault(vm_page_t m)
 2977 {
 2978         vm_page_t mnew;
 2979         vm_object_t object;
 2980         vm_pindex_t pindex;
 2981 
 2982         mtx_assert(&vm_page_queue_mtx, MA_NOTOWNED);
 2983         vm_page_lock_assert(m, MA_OWNED);
 2984         object = m->object;
 2985         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 2986         KASSERT(object->paging_in_progress != 0,
 2987             ("vm_page_cowfault: object %p's paging-in-progress count is zero.",
 2988             object)); 
 2989         pindex = m->pindex;
 2990 
 2991  retry_alloc:
 2992         pmap_remove_all(m);
 2993         vm_page_remove(m);
 2994         mnew = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY);
 2995         if (mnew == NULL) {
 2996                 vm_page_insert(m, object, pindex);
 2997                 vm_page_unlock(m);
 2998                 VM_OBJECT_UNLOCK(object);
 2999                 VM_WAIT;
 3000                 VM_OBJECT_LOCK(object);
 3001                 if (m == vm_page_lookup(object, pindex)) {
 3002                         vm_page_lock(m);
 3003                         goto retry_alloc;
 3004                 } else {
 3005                         /*
 3006                          * Page disappeared during the wait.
 3007                          */
 3008                         return;
 3009                 }
 3010         }
 3011 
 3012         if (m->cow == 0) {
 3013                 /* 
 3014                  * check to see if we raced with an xmit complete when 
 3015                  * waiting to allocate a page.  If so, put things back 
 3016                  * the way they were 
 3017                  */
 3018                 vm_page_unlock(m);
 3019                 vm_page_lock(mnew);
 3020                 vm_page_free(mnew);
 3021                 vm_page_unlock(mnew);
 3022                 vm_page_insert(m, object, pindex);
 3023         } else { /* clear COW & copy page */
 3024                 if (!so_zerocp_fullpage)
 3025                         pmap_copy_page(m, mnew);
 3026                 mnew->valid = VM_PAGE_BITS_ALL;
 3027                 vm_page_dirty(mnew);
 3028                 mnew->wire_count = m->wire_count - m->cow;
 3029                 m->wire_count = m->cow;
 3030                 vm_page_unlock(m);
 3031         }
 3032 }
 3033 
 3034 void 
 3035 vm_page_cowclear(vm_page_t m)
 3036 {
 3037 
 3038         vm_page_lock_assert(m, MA_OWNED);
 3039         if (m->cow) {
 3040                 m->cow--;
 3041                 /* 
 3042                  * let vm_fault add back write permission  lazily
 3043                  */
 3044         } 
 3045         /*
 3046          *  sf_buf_free() will free the page, so we needn't do it here
 3047          */ 
 3048 }
 3049 
 3050 int
 3051 vm_page_cowsetup(vm_page_t m)
 3052 {
 3053 
 3054         vm_page_lock_assert(m, MA_OWNED);
 3055         if ((m->flags & PG_FICTITIOUS) != 0 ||
 3056             (m->oflags & VPO_UNMANAGED) != 0 ||
 3057             m->cow == USHRT_MAX - 1 || !VM_OBJECT_TRYLOCK(m->object))
 3058                 return (EBUSY);
 3059         m->cow++;
 3060         pmap_remove_write(m);
 3061         VM_OBJECT_UNLOCK(m->object);
 3062         return (0);
 3063 }
 3064 
 3065 #ifdef INVARIANTS
 3066 void
 3067 vm_page_object_lock_assert(vm_page_t m)
 3068 {
 3069 
 3070         /*
 3071          * Certain of the page's fields may only be modified by the
 3072          * holder of the containing object's lock or the setter of the
 3073          * page's VPO_BUSY flag.  Unfortunately, the setter of the
 3074          * VPO_BUSY flag is not recorded, and thus cannot be checked
 3075          * here.
 3076          */
 3077         if (m->object != NULL && (m->oflags & VPO_BUSY) == 0)
 3078                 VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
 3079 }
 3080 #endif
 3081 
 3082 #include "opt_ddb.h"
 3083 #ifdef DDB
 3084 #include <sys/kernel.h>
 3085 
 3086 #include <ddb/ddb.h>
 3087 
 3088 DB_SHOW_COMMAND(page, vm_page_print_page_info)
 3089 {
 3090         db_printf("cnt.v_free_count: %d\n", cnt.v_free_count);
 3091         db_printf("cnt.v_cache_count: %d\n", cnt.v_cache_count);
 3092         db_printf("cnt.v_inactive_count: %d\n", cnt.v_inactive_count);
 3093         db_printf("cnt.v_active_count: %d\n", cnt.v_active_count);
 3094         db_printf("cnt.v_wire_count: %d\n", cnt.v_wire_count);
 3095         db_printf("cnt.v_free_reserved: %d\n", cnt.v_free_reserved);
 3096         db_printf("cnt.v_free_min: %d\n", cnt.v_free_min);
 3097         db_printf("cnt.v_free_target: %d\n", cnt.v_free_target);
 3098         db_printf("cnt.v_cache_min: %d\n", cnt.v_cache_min);
 3099         db_printf("cnt.v_inactive_target: %d\n", cnt.v_inactive_target);
 3100 }
 3101 
 3102 DB_SHOW_COMMAND(pageq, vm_page_print_pageq_info)
 3103 {
 3104                 
 3105         db_printf("PQ_FREE:");
 3106         db_printf(" %d", cnt.v_free_count);
 3107         db_printf("\n");
 3108                 
 3109         db_printf("PQ_CACHE:");
 3110         db_printf(" %d", cnt.v_cache_count);
 3111         db_printf("\n");
 3112 
 3113         db_printf("PQ_ACTIVE: %d, PQ_INACTIVE: %d\n",
 3114                 *vm_page_queues[PQ_ACTIVE].cnt,
 3115                 *vm_page_queues[PQ_INACTIVE].cnt);
 3116 }
 3117 
 3118 DB_SHOW_COMMAND(pginfo, vm_page_print_pginfo)
 3119 {
 3120         vm_page_t m;
 3121         boolean_t phys;
 3122 
 3123         if (!have_addr) {
 3124                 db_printf("show pginfo addr\n");
 3125                 return;
 3126         }
 3127 
 3128         phys = strchr(modif, 'p') != NULL;
 3129         if (phys)
 3130                 m = PHYS_TO_VM_PAGE(addr);
 3131         else
 3132                 m = (vm_page_t)addr;
 3133         db_printf(
 3134     "page %p obj %p pidx 0x%jx phys 0x%jx q %d hold %d wire %d\n"
 3135     "  af 0x%x of 0x%x f 0x%x act %d busy %d valid 0x%x dirty 0x%x\n",
 3136             m, m->object, (uintmax_t)m->pindex, (uintmax_t)m->phys_addr,
 3137             m->queue, m->hold_count, m->wire_count, m->aflags, m->oflags,
 3138             m->flags, m->act_count, m->busy, m->valid, m->dirty);
 3139 }
 3140 #endif /* DDB */
Cache object: 7d862f71b926ed36480cd38c215f2330
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/vm/vm_page.c

FreeBSD/Linux Kernel Cross Reference
sys/vm/vm_page.c