vm_page.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*
    2  * (MPSAFE)
    3  *
    4  * Copyright (c) 1991 Regents of the University of California.
    5  * All rights reserved.
    6  *
    7  * This code is derived from software contributed to Berkeley by
    8  * The Mach Operating System project at Carnegie-Mellon University.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 3. Neither the name of the University nor the names of its contributors
   19  *    may be used to endorse or promote products derived from this software
   20  *    without specific prior written permission.
   21  *
   22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   32  * SUCH DAMAGE.
   33  *
   34  *      from: @(#)vm_page.c     7.4 (Berkeley) 5/7/91
   35  * $FreeBSD: src/sys/vm/vm_page.c,v 1.147.2.18 2002/03/10 05:03:19 alc Exp $
   36  */
   37 
   38 /*
   39  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
   40  * All rights reserved.
   41  *
   42  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
   43  *
   44  * Permission to use, copy, modify and distribute this software and
   45  * its documentation is hereby granted, provided that both the copyright
   46  * notice and this permission notice appear in all copies of the
   47  * software, derivative works or modified versions, and any portions
   48  * thereof, and that both notices appear in supporting documentation.
   49  *
   50  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
   51  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
   52  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
   53  *
   54  * Carnegie Mellon requests users of this software to return to
   55  *
   56  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   57  *  School of Computer Science
   58  *  Carnegie Mellon University
   59  *  Pittsburgh PA 15213-3890
   60  *
   61  * any improvements or extensions that they make and grant Carnegie the
   62  * rights to redistribute these changes.
   63  */
   64 /*
   65  * Resident memory management module.  The module manipulates 'VM pages'.
   66  * A VM page is the core building block for memory management.
   67  */
   68 
   69 #include <sys/param.h>
   70 #include <sys/systm.h>
   71 #include <sys/malloc.h>
   72 #include <sys/proc.h>
   73 #include <sys/vmmeter.h>
   74 #include <sys/vnode.h>
   75 #include <sys/kernel.h>
   76 #include <sys/alist.h>
   77 #include <sys/sysctl.h>
   78 
   79 #include <vm/vm.h>
   80 #include <vm/vm_param.h>
   81 #include <sys/lock.h>
   82 #include <vm/vm_kern.h>
   83 #include <vm/pmap.h>
   84 #include <vm/vm_map.h>
   85 #include <vm/vm_object.h>
   86 #include <vm/vm_page.h>
   87 #include <vm/vm_pageout.h>
   88 #include <vm/vm_pager.h>
   89 #include <vm/vm_extern.h>
   90 #include <vm/swap_pager.h>
   91 
   92 #include <machine/inttypes.h>
   93 #include <machine/md_var.h>
   94 #include <machine/specialreg.h>
   95 
   96 #include <vm/vm_page2.h>
   97 #include <sys/spinlock2.h>
   98 
   99 #define VMACTION_HSIZE  256
  100 #define VMACTION_HMASK  (VMACTION_HSIZE - 1)
  101 
  102 static void vm_page_queue_init(void);
  103 static void vm_page_free_wakeup(void);
  104 static vm_page_t vm_page_select_cache(u_short pg_color);
  105 static vm_page_t _vm_page_list_find2(int basequeue, int index);
  106 static void _vm_page_deactivate_locked(vm_page_t m, int athead);
  107 
  108 /*
  109  * Array of tailq lists
  110  */
  111 __cachealign struct vpgqueues vm_page_queues[PQ_COUNT];
  112 
  113 LIST_HEAD(vm_page_action_list, vm_page_action);
  114 struct vm_page_action_list      action_list[VMACTION_HSIZE];
  115 static volatile int vm_pages_waiting;
  116 
  117 static struct alist vm_contig_alist;
  118 static struct almeta vm_contig_ameta[ALIST_RECORDS_65536];
  119 static struct spinlock vm_contig_spin = SPINLOCK_INITIALIZER(&vm_contig_spin);
  120 
  121 static u_long vm_dma_reserved = 0;
  122 TUNABLE_ULONG("vm.dma_reserved", &vm_dma_reserved);
  123 SYSCTL_ULONG(_vm, OID_AUTO, dma_reserved, CTLFLAG_RD, &vm_dma_reserved, 0,
  124             "Memory reserved for DMA");
  125 SYSCTL_UINT(_vm, OID_AUTO, dma_free_pages, CTLFLAG_RD,
  126             &vm_contig_alist.bl_free, 0, "Memory reserved for DMA");
  127 
  128 static int vm_contig_verbose = 0;
  129 TUNABLE_INT("vm.contig_verbose", &vm_contig_verbose);
  130 
  131 RB_GENERATE2(vm_page_rb_tree, vm_page, rb_entry, rb_vm_page_compare,
  132              vm_pindex_t, pindex);
  133 
  134 static void
  135 vm_page_queue_init(void) 
  136 {
  137         int i;
  138 
  139         for (i = 0; i < PQ_L2_SIZE; i++)
  140                 vm_page_queues[PQ_FREE+i].cnt = &vmstats.v_free_count;
  141         for (i = 0; i < PQ_L2_SIZE; i++)
  142                 vm_page_queues[PQ_CACHE+i].cnt = &vmstats.v_cache_count;
  143         for (i = 0; i < PQ_L2_SIZE; i++)
  144                 vm_page_queues[PQ_INACTIVE+i].cnt = &vmstats.v_inactive_count;
  145         for (i = 0; i < PQ_L2_SIZE; i++)
  146                 vm_page_queues[PQ_ACTIVE+i].cnt = &vmstats.v_active_count;
  147         for (i = 0; i < PQ_L2_SIZE; i++)
  148                 vm_page_queues[PQ_HOLD+i].cnt = &vmstats.v_active_count;
  149         /* PQ_NONE has no queue */
  150 
  151         for (i = 0; i < PQ_COUNT; i++) {
  152                 TAILQ_INIT(&vm_page_queues[i].pl);
  153                 spin_init(&vm_page_queues[i].spin);
  154         }
  155 
  156         for (i = 0; i < VMACTION_HSIZE; i++)
  157                 LIST_INIT(&action_list[i]);
  158 }
  159 
  160 /*
  161  * note: place in initialized data section?  Is this necessary?
  162  */
  163 long first_page = 0;
  164 int vm_page_array_size = 0;
  165 int vm_page_zero_count = 0;
  166 vm_page_t vm_page_array = NULL;
  167 vm_paddr_t vm_low_phys_reserved;
  168 
  169 /*
  170  * (low level boot)
  171  *
  172  * Sets the page size, perhaps based upon the memory size.
  173  * Must be called before any use of page-size dependent functions.
  174  */
  175 void
  176 vm_set_page_size(void)
  177 {
  178         if (vmstats.v_page_size == 0)
  179                 vmstats.v_page_size = PAGE_SIZE;
  180         if (((vmstats.v_page_size - 1) & vmstats.v_page_size) != 0)
  181                 panic("vm_set_page_size: page size not a power of two");
  182 }
  183 
  184 /*
  185  * (low level boot)
  186  *
  187  * Add a new page to the freelist for use by the system.  New pages
  188  * are added to both the head and tail of the associated free page
  189  * queue in a bottom-up fashion, so both zero'd and non-zero'd page
  190  * requests pull 'recent' adds (higher physical addresses) first.
  191  *
  192  * Beware that the page zeroing daemon will also be running soon after
  193  * boot, moving pages from the head to the tail of the PQ_FREE queues.
  194  *
  195  * Must be called in a critical section.
  196  */
  197 static void
  198 vm_add_new_page(vm_paddr_t pa)
  199 {
  200         struct vpgqueues *vpq;
  201         vm_page_t m;
  202 
  203         m = PHYS_TO_VM_PAGE(pa);
  204         m->phys_addr = pa;
  205         m->flags = 0;
  206         m->pc = (pa >> PAGE_SHIFT) & PQ_L2_MASK;
  207         m->pat_mode = PAT_WRITE_BACK;
  208         /*
  209          * Twist for cpu localization in addition to page coloring, so
  210          * different cpus selecting by m->queue get different page colors.
  211          */
  212         m->pc ^= ((pa >> PAGE_SHIFT) / PQ_L2_SIZE) & PQ_L2_MASK;
  213         m->pc ^= ((pa >> PAGE_SHIFT) / (PQ_L2_SIZE * PQ_L2_SIZE)) & PQ_L2_MASK;
  214         /*
  215          * Reserve a certain number of contiguous low memory pages for
  216          * contigmalloc() to use.
  217          */
  218         if (pa < vm_low_phys_reserved) {
  219                 atomic_add_int(&vmstats.v_page_count, 1);
  220                 atomic_add_int(&vmstats.v_dma_pages, 1);
  221                 m->queue = PQ_NONE;
  222                 m->wire_count = 1;
  223                 atomic_add_int(&vmstats.v_wire_count, 1);
  224                 alist_free(&vm_contig_alist, pa >> PAGE_SHIFT, 1);
  225                 return;
  226         }
  227 
  228         /*
  229          * General page
  230          */
  231         m->queue = m->pc + PQ_FREE;
  232         KKASSERT(m->dirty == 0);
  233 
  234         atomic_add_int(&vmstats.v_page_count, 1);
  235         atomic_add_int(&vmstats.v_free_count, 1);
  236         vpq = &vm_page_queues[m->queue];
  237         if ((vpq->flipflop & 15) == 0) {
  238                 pmap_zero_page(VM_PAGE_TO_PHYS(m));
  239                 m->flags |= PG_ZERO;
  240                 TAILQ_INSERT_TAIL(&vpq->pl, m, pageq);
  241                 atomic_add_int(&vm_page_zero_count, 1);
  242         } else {
  243                 TAILQ_INSERT_HEAD(&vpq->pl, m, pageq);
  244         }
  245         ++vpq->flipflop;
  246         ++vpq->lcnt;
  247 }
  248 
  249 /*
  250  * (low level boot)
  251  *
  252  * Initializes the resident memory module.
  253  *
  254  * Preallocates memory for critical VM structures and arrays prior to
  255  * kernel_map becoming available.
  256  *
  257  * Memory is allocated from (virtual2_start, virtual2_end) if available,
  258  * otherwise memory is allocated from (virtual_start, virtual_end).
  259  *
  260  * On x86-64 (virtual_start, virtual_end) is only 2GB and may not be
  261  * large enough to hold vm_page_array & other structures for machines with
  262  * large amounts of ram, so we want to use virtual2* when available.
  263  */
  264 void
  265 vm_page_startup(void)
  266 {
  267         vm_offset_t vaddr = virtual2_start ? virtual2_start : virtual_start;
  268         vm_offset_t mapped;
  269         vm_size_t npages;
  270         vm_paddr_t page_range;
  271         vm_paddr_t new_end;
  272         int i;
  273         vm_paddr_t pa;
  274         int nblocks;
  275         vm_paddr_t last_pa;
  276         vm_paddr_t end;
  277         vm_paddr_t biggestone, biggestsize;
  278         vm_paddr_t total;
  279 
  280         total = 0;
  281         biggestsize = 0;
  282         biggestone = 0;
  283         nblocks = 0;
  284         vaddr = round_page(vaddr);
  285 
  286         for (i = 0; phys_avail[i + 1]; i += 2) {
  287                 phys_avail[i] = round_page64(phys_avail[i]);
  288                 phys_avail[i + 1] = trunc_page64(phys_avail[i + 1]);
  289         }
  290 
  291         for (i = 0; phys_avail[i + 1]; i += 2) {
  292                 vm_paddr_t size = phys_avail[i + 1] - phys_avail[i];
  293 
  294                 if (size > biggestsize) {
  295                         biggestone = i;
  296                         biggestsize = size;
  297                 }
  298                 ++nblocks;
  299                 total += size;
  300         }
  301 
  302         end = phys_avail[biggestone+1];
  303         end = trunc_page(end);
  304 
  305         /*
  306          * Initialize the queue headers for the free queue, the active queue
  307          * and the inactive queue.
  308          */
  309         vm_page_queue_init();
  310 
  311 #if !defined(_KERNEL_VIRTUAL)
  312         /*
  313          * VKERNELs don't support minidumps and as such don't need
  314          * vm_page_dump
  315          *
  316          * Allocate a bitmap to indicate that a random physical page
  317          * needs to be included in a minidump.
  318          *
  319          * The amd64 port needs this to indicate which direct map pages
  320          * need to be dumped, via calls to dump_add_page()/dump_drop_page().
  321          *
  322          * However, i386 still needs this workspace internally within the
  323          * minidump code.  In theory, they are not needed on i386, but are
  324          * included should the sf_buf code decide to use them.
  325          */
  326         page_range = phys_avail[(nblocks - 1) * 2 + 1] / PAGE_SIZE;
  327         vm_page_dump_size = round_page(roundup2(page_range, NBBY) / NBBY);
  328         end -= vm_page_dump_size;
  329         vm_page_dump = (void *)pmap_map(&vaddr, end, end + vm_page_dump_size,
  330             VM_PROT_READ | VM_PROT_WRITE);
  331         bzero((void *)vm_page_dump, vm_page_dump_size);
  332 #endif
  333         /*
  334          * Compute the number of pages of memory that will be available for
  335          * use (taking into account the overhead of a page structure per
  336          * page).
  337          */
  338         first_page = phys_avail[0] / PAGE_SIZE;
  339         page_range = phys_avail[(nblocks - 1) * 2 + 1] / PAGE_SIZE - first_page;
  340         npages = (total - (page_range * sizeof(struct vm_page))) / PAGE_SIZE;
  341 
  342 #ifndef _KERNEL_VIRTUAL
  343         /*
  344          * (only applies to real kernels)
  345          *
  346          * Initialize the contiguous reserve map.  We initially reserve up
  347          * to 1/4 available physical memory or 65536 pages (~256MB), whichever
  348          * is lower.
  349          *
  350          * Once device initialization is complete we return most of the
  351          * reserved memory back to the normal page queues but leave some
  352          * in reserve for things like usb attachments.
  353          */
  354         vm_low_phys_reserved = (vm_paddr_t)65536 << PAGE_SHIFT;
  355         if (vm_low_phys_reserved > total / 4)
  356                 vm_low_phys_reserved = total / 4;
  357         if (vm_dma_reserved == 0) {
  358                 vm_dma_reserved = 16 * 1024 * 1024;     /* 16MB */
  359                 if (vm_dma_reserved > total / 16)
  360                         vm_dma_reserved = total / 16;
  361         }
  362 #endif
  363         alist_init(&vm_contig_alist, 65536, vm_contig_ameta,
  364                    ALIST_RECORDS_65536);
  365 
  366         /*
  367          * Initialize the mem entry structures now, and put them in the free
  368          * queue.
  369          */
  370         new_end = trunc_page(end - page_range * sizeof(struct vm_page));
  371         mapped = pmap_map(&vaddr, new_end, end, VM_PROT_READ | VM_PROT_WRITE);
  372         vm_page_array = (vm_page_t)mapped;
  373 
  374 #if defined(__x86_64__) && !defined(_KERNEL_VIRTUAL)
  375         /*
  376          * since pmap_map on amd64 returns stuff out of a direct-map region,
  377          * we have to manually add these pages to the minidump tracking so
  378          * that they can be dumped, including the vm_page_array.
  379          */
  380         for (pa = new_end; pa < phys_avail[biggestone + 1]; pa += PAGE_SIZE)
  381                 dump_add_page(pa);
  382 #endif
  383 
  384         /*
  385          * Clear all of the page structures
  386          */
  387         bzero((caddr_t) vm_page_array, page_range * sizeof(struct vm_page));
  388         vm_page_array_size = page_range;
  389 
  390         /*
  391          * Construct the free queue(s) in ascending order (by physical
  392          * address) so that the first 16MB of physical memory is allocated
  393          * last rather than first.  On large-memory machines, this avoids
  394          * the exhaustion of low physical memory before isa_dmainit has run.
  395          */
  396         vmstats.v_page_count = 0;
  397         vmstats.v_free_count = 0;
  398         for (i = 0; phys_avail[i + 1] && npages > 0; i += 2) {
  399                 pa = phys_avail[i];
  400                 if (i == biggestone)
  401                         last_pa = new_end;
  402                 else
  403                         last_pa = phys_avail[i + 1];
  404                 while (pa < last_pa && npages-- > 0) {
  405                         vm_add_new_page(pa);
  406                         pa += PAGE_SIZE;
  407                 }
  408         }
  409         if (virtual2_start)
  410                 virtual2_start = vaddr;
  411         else
  412                 virtual_start = vaddr;
  413 }
  414 
  415 /*
  416  * We tended to reserve a ton of memory for contigmalloc().  Now that most
  417  * drivers have initialized we want to return most the remaining free
  418  * reserve back to the VM page queues so they can be used for normal
  419  * allocations.
  420  *
  421  * We leave vm_dma_reserved bytes worth of free pages in the reserve pool.
  422  */
  423 static void
  424 vm_page_startup_finish(void *dummy __unused)
  425 {
  426         alist_blk_t blk;
  427         alist_blk_t rblk;
  428         alist_blk_t count;
  429         alist_blk_t xcount;
  430         alist_blk_t bfree;
  431         vm_page_t m;
  432 
  433         spin_lock(&vm_contig_spin);
  434         for (;;) {
  435                 bfree = alist_free_info(&vm_contig_alist, &blk, &count);
  436                 if (bfree <= vm_dma_reserved / PAGE_SIZE)
  437                         break;
  438                 if (count == 0)
  439                         break;
  440 
  441                 /*
  442                  * Figure out how much of the initial reserve we have to
  443                  * free in order to reach our target.
  444                  */
  445                 bfree -= vm_dma_reserved / PAGE_SIZE;
  446                 if (count > bfree) {
  447                         blk += count - bfree;
  448                         count = bfree;
  449                 }
  450 
  451                 /*
  452                  * Calculate the nearest power of 2 <= count.
  453                  */
  454                 for (xcount = 1; xcount <= count; xcount <<= 1)
  455                         ;
  456                 xcount >>= 1;
  457                 blk += count - xcount;
  458                 count = xcount;
  459 
  460                 /*
  461                  * Allocate the pages from the alist, then free them to
  462                  * the normal VM page queues.
  463                  *
  464                  * Pages allocated from the alist are wired.  We have to
  465                  * busy, unwire, and free them.  We must also adjust
  466                  * vm_low_phys_reserved before freeing any pages to prevent
  467                  * confusion.
  468                  */
  469                 rblk = alist_alloc(&vm_contig_alist, blk, count);
  470                 if (rblk != blk) {
  471                         kprintf("vm_page_startup_finish: Unable to return "
  472                                 "dma space @0x%08x/%d -> 0x%08x\n",
  473                                 blk, count, rblk);
  474                         break;
  475                 }
  476                 atomic_add_int(&vmstats.v_dma_pages, -count);
  477                 spin_unlock(&vm_contig_spin);
  478 
  479                 m = PHYS_TO_VM_PAGE((vm_paddr_t)blk << PAGE_SHIFT);
  480                 vm_low_phys_reserved = VM_PAGE_TO_PHYS(m);
  481                 while (count) {
  482                         vm_page_busy_wait(m, FALSE, "cpgfr");
  483                         vm_page_unwire(m, 0);
  484                         vm_page_free(m);
  485                         --count;
  486                         ++m;
  487                 }
  488                 spin_lock(&vm_contig_spin);
  489         }
  490         spin_unlock(&vm_contig_spin);
  491 
  492         /*
  493          * Print out how much DMA space drivers have already allocated and
  494          * how much is left over.
  495          */
  496         kprintf("DMA space used: %jdk, remaining available: %jdk\n",
  497                 (intmax_t)(vmstats.v_dma_pages - vm_contig_alist.bl_free) *
  498                 (PAGE_SIZE / 1024),
  499                 (intmax_t)vm_contig_alist.bl_free * (PAGE_SIZE / 1024));
  500 }
  501 SYSINIT(vm_pgend, SI_SUB_PROC0_POST, SI_ORDER_ANY,
  502         vm_page_startup_finish, NULL)
  503 
  504 
  505 /*
  506  * Scan comparison function for Red-Black tree scans.  An inclusive
  507  * (start,end) is expected.  Other fields are not used.
  508  */
  509 int
  510 rb_vm_page_scancmp(struct vm_page *p, void *data)
  511 {
  512         struct rb_vm_page_scan_info *info = data;
  513 
  514         if (p->pindex < info->start_pindex)
  515                 return(-1);
  516         if (p->pindex > info->end_pindex)
  517                 return(1);
  518         return(0);
  519 }
  520 
  521 int
  522 rb_vm_page_compare(struct vm_page *p1, struct vm_page *p2)
  523 {
  524         if (p1->pindex < p2->pindex)
  525                 return(-1);
  526         if (p1->pindex > p2->pindex)
  527                 return(1);
  528         return(0);
  529 }
  530 
  531 void
  532 vm_page_init(vm_page_t m)
  533 {
  534         /* do nothing for now.  Called from pmap_page_init() */
  535 }
  536 
  537 /*
  538  * Each page queue has its own spin lock, which is fairly optimal for
  539  * allocating and freeing pages at least.
  540  *
  541  * The caller must hold the vm_page_spin_lock() before locking a vm_page's
  542  * queue spinlock via this function.  Also note that m->queue cannot change
  543  * unless both the page and queue are locked.
  544  */
  545 static __inline
  546 void
  547 _vm_page_queue_spin_lock(vm_page_t m)
  548 {
  549         u_short queue;
  550 
  551         queue = m->queue;
  552         if (queue != PQ_NONE) {
  553                 spin_lock(&vm_page_queues[queue].spin);
  554                 KKASSERT(queue == m->queue);
  555         }
  556 }
  557 
  558 static __inline
  559 void
  560 _vm_page_queue_spin_unlock(vm_page_t m)
  561 {
  562         u_short queue;
  563 
  564         queue = m->queue;
  565         cpu_ccfence();
  566         if (queue != PQ_NONE)
  567                 spin_unlock(&vm_page_queues[queue].spin);
  568 }
  569 
  570 static __inline
  571 void
  572 _vm_page_queues_spin_lock(u_short queue)
  573 {
  574         cpu_ccfence();
  575         if (queue != PQ_NONE)
  576                 spin_lock(&vm_page_queues[queue].spin);
  577 }
  578 
  579 
  580 static __inline
  581 void
  582 _vm_page_queues_spin_unlock(u_short queue)
  583 {
  584         cpu_ccfence();
  585         if (queue != PQ_NONE)
  586                 spin_unlock(&vm_page_queues[queue].spin);
  587 }
  588 
  589 void
  590 vm_page_queue_spin_lock(vm_page_t m)
  591 {
  592         _vm_page_queue_spin_lock(m);
  593 }
  594 
  595 void
  596 vm_page_queues_spin_lock(u_short queue)
  597 {
  598         _vm_page_queues_spin_lock(queue);
  599 }
  600 
  601 void
  602 vm_page_queue_spin_unlock(vm_page_t m)
  603 {
  604         _vm_page_queue_spin_unlock(m);
  605 }
  606 
  607 void
  608 vm_page_queues_spin_unlock(u_short queue)
  609 {
  610         _vm_page_queues_spin_unlock(queue);
  611 }
  612 
  613 /*
  614  * This locks the specified vm_page and its queue in the proper order
  615  * (page first, then queue).  The queue may change so the caller must
  616  * recheck on return.
  617  */
  618 static __inline
  619 void
  620 _vm_page_and_queue_spin_lock(vm_page_t m)
  621 {
  622         vm_page_spin_lock(m);
  623         _vm_page_queue_spin_lock(m);
  624 }
  625 
  626 static __inline
  627 void
  628 _vm_page_and_queue_spin_unlock(vm_page_t m)
  629 {
  630         _vm_page_queues_spin_unlock(m->queue);
  631         vm_page_spin_unlock(m);
  632 }
  633 
  634 void
  635 vm_page_and_queue_spin_unlock(vm_page_t m)
  636 {
  637         _vm_page_and_queue_spin_unlock(m);
  638 }
  639 
  640 void
  641 vm_page_and_queue_spin_lock(vm_page_t m)
  642 {
  643         _vm_page_and_queue_spin_lock(m);
  644 }
  645 
  646 /*
  647  * Helper function removes vm_page from its current queue.
  648  * Returns the base queue the page used to be on.
  649  *
  650  * The vm_page and the queue must be spinlocked.
  651  * This function will unlock the queue but leave the page spinlocked.
  652  */
  653 static __inline u_short
  654 _vm_page_rem_queue_spinlocked(vm_page_t m)
  655 {
  656         struct vpgqueues *pq;
  657         u_short queue;
  658 
  659         queue = m->queue;
  660         if (queue != PQ_NONE) {
  661                 pq = &vm_page_queues[queue];
  662                 TAILQ_REMOVE(&pq->pl, m, pageq);
  663                 atomic_add_int(pq->cnt, -1);
  664                 pq->lcnt--;
  665                 m->queue = PQ_NONE;
  666                 vm_page_queues_spin_unlock(queue);
  667                 if ((queue - m->pc) == PQ_FREE && (m->flags & PG_ZERO))
  668                         atomic_subtract_int(&vm_page_zero_count, 1);
  669                 if ((queue - m->pc) == PQ_CACHE || (queue - m->pc) == PQ_FREE)
  670                         return (queue - m->pc);
  671         }
  672         return queue;
  673 }
  674 
  675 /*
  676  * Helper function places the vm_page on the specified queue.
  677  *
  678  * The vm_page must be spinlocked.
  679  * This function will return with both the page and the queue locked.
  680  */
  681 static __inline void
  682 _vm_page_add_queue_spinlocked(vm_page_t m, u_short queue, int athead)
  683 {
  684         struct vpgqueues *pq;
  685 
  686         KKASSERT(m->queue == PQ_NONE);
  687 
  688         if (queue != PQ_NONE) {
  689                 vm_page_queues_spin_lock(queue);
  690                 pq = &vm_page_queues[queue];
  691                 ++pq->lcnt;
  692                 atomic_add_int(pq->cnt, 1);
  693                 m->queue = queue;
  694 
  695                 /*
  696                  * Put zero'd pages on the end ( where we look for zero'd pages
  697                  * first ) and non-zerod pages at the head.
  698                  */
  699                 if (queue - m->pc == PQ_FREE) {
  700                         if (m->flags & PG_ZERO) {
  701                                 TAILQ_INSERT_TAIL(&pq->pl, m, pageq);
  702                                 atomic_add_int(&vm_page_zero_count, 1);
  703                         } else {
  704                                 TAILQ_INSERT_HEAD(&pq->pl, m, pageq);
  705                         }
  706                 } else if (athead) {
  707                         TAILQ_INSERT_HEAD(&pq->pl, m, pageq);
  708                 } else {
  709                         TAILQ_INSERT_TAIL(&pq->pl, m, pageq);
  710                 }
  711                 /* leave the queue spinlocked */
  712         }
  713 }
  714 
  715 /*
  716  * Wait until page is no longer PG_BUSY or (if also_m_busy is TRUE)
  717  * m->busy is zero.  Returns TRUE if it had to sleep, FALSE if we
  718  * did not.  Only one sleep call will be made before returning.
  719  *
  720  * This function does NOT busy the page and on return the page is not
  721  * guaranteed to be available.
  722  */
  723 void
  724 vm_page_sleep_busy(vm_page_t m, int also_m_busy, const char *msg)
  725 {
  726         u_int32_t flags;
  727 
  728         for (;;) {
  729                 flags = m->flags;
  730                 cpu_ccfence();
  731 
  732                 if ((flags & PG_BUSY) == 0 &&
  733                     (also_m_busy == 0 || (flags & PG_SBUSY) == 0)) {
  734                         break;
  735                 }
  736                 tsleep_interlock(m, 0);
  737                 if (atomic_cmpset_int(&m->flags, flags,
  738                                       flags | PG_WANTED | PG_REFERENCED)) {
  739                         tsleep(m, PINTERLOCKED, msg, 0);
  740                         break;
  741                 }
  742         }
  743 }
  744 
  745 /*
  746  * Wait until PG_BUSY can be set, then set it.  If also_m_busy is TRUE we
  747  * also wait for m->busy to become 0 before setting PG_BUSY.
  748  */
  749 void
  750 VM_PAGE_DEBUG_EXT(vm_page_busy_wait)(vm_page_t m,
  751                                      int also_m_busy, const char *msg
  752                                      VM_PAGE_DEBUG_ARGS)
  753 {
  754         u_int32_t flags;
  755 
  756         for (;;) {
  757                 flags = m->flags;
  758                 cpu_ccfence();
  759                 if (flags & PG_BUSY) {
  760                         tsleep_interlock(m, 0);
  761                         if (atomic_cmpset_int(&m->flags, flags,
  762                                           flags | PG_WANTED | PG_REFERENCED)) {
  763                                 tsleep(m, PINTERLOCKED, msg, 0);
  764                         }
  765                 } else if (also_m_busy && (flags & PG_SBUSY)) {
  766                         tsleep_interlock(m, 0);
  767                         if (atomic_cmpset_int(&m->flags, flags,
  768                                           flags | PG_WANTED | PG_REFERENCED)) {
  769                                 tsleep(m, PINTERLOCKED, msg, 0);
  770                         }
  771                 } else {
  772                         if (atomic_cmpset_int(&m->flags, flags,
  773                                               flags | PG_BUSY)) {
  774 #ifdef VM_PAGE_DEBUG
  775                                 m->busy_func = func;
  776                                 m->busy_line = lineno;
  777 #endif
  778                                 break;
  779                         }
  780                 }
  781         }
  782 }
  783 
  784 /*
  785  * Attempt to set PG_BUSY.  If also_m_busy is TRUE we only succeed if m->busy
  786  * is also 0.
  787  *
  788  * Returns non-zero on failure.
  789  */
  790 int
  791 VM_PAGE_DEBUG_EXT(vm_page_busy_try)(vm_page_t m, int also_m_busy
  792                                     VM_PAGE_DEBUG_ARGS)
  793 {
  794         u_int32_t flags;
  795 
  796         for (;;) {
  797                 flags = m->flags;
  798                 cpu_ccfence();
  799                 if (flags & PG_BUSY)
  800                         return TRUE;
  801                 if (also_m_busy && (flags & PG_SBUSY))
  802                         return TRUE;
  803                 if (atomic_cmpset_int(&m->flags, flags, flags | PG_BUSY)) {
  804 #ifdef VM_PAGE_DEBUG
  805                                 m->busy_func = func;
  806                                 m->busy_line = lineno;
  807 #endif
  808                         return FALSE;
  809                 }
  810         }
  811 }
  812 
  813 /*
  814  * Clear the PG_BUSY flag and return non-zero to indicate to the caller
  815  * that a wakeup() should be performed.
  816  *
  817  * The vm_page must be spinlocked and will remain spinlocked on return.
  818  * The related queue must NOT be spinlocked (which could deadlock us).
  819  *
  820  * (inline version)
  821  */
  822 static __inline
  823 int
  824 _vm_page_wakeup(vm_page_t m)
  825 {
  826         u_int32_t flags;
  827 
  828         for (;;) {
  829                 flags = m->flags;
  830                 cpu_ccfence();
  831                 if (atomic_cmpset_int(&m->flags, flags,
  832                                       flags & ~(PG_BUSY | PG_WANTED))) {
  833                         break;
  834                 }
  835         }
  836         return(flags & PG_WANTED);
  837 }
  838 
  839 /*
  840  * Clear the PG_BUSY flag and wakeup anyone waiting for the page.  This
  841  * is typically the last call you make on a page before moving onto
  842  * other things.
  843  */
  844 void
  845 vm_page_wakeup(vm_page_t m)
  846 {
  847         KASSERT(m->flags & PG_BUSY, ("vm_page_wakeup: page not busy!!!"));
  848         vm_page_spin_lock(m);
  849         if (_vm_page_wakeup(m)) {
  850                 vm_page_spin_unlock(m);
  851                 wakeup(m);
  852         } else {
  853                 vm_page_spin_unlock(m);
  854         }
  855 }
  856 
  857 /*
  858  * Holding a page keeps it from being reused.  Other parts of the system
  859  * can still disassociate the page from its current object and free it, or
  860  * perform read or write I/O on it and/or otherwise manipulate the page,
  861  * but if the page is held the VM system will leave the page and its data
  862  * intact and not reuse the page for other purposes until the last hold
  863  * reference is released.  (see vm_page_wire() if you want to prevent the
  864  * page from being disassociated from its object too).
  865  *
  866  * The caller must still validate the contents of the page and, if necessary,
  867  * wait for any pending I/O (e.g. vm_page_sleep_busy() loop) to complete
  868  * before manipulating the page.
  869  *
  870  * XXX get vm_page_spin_lock() here and move FREE->HOLD if necessary
  871  */
  872 void
  873 vm_page_hold(vm_page_t m)
  874 {
  875         vm_page_spin_lock(m);
  876         atomic_add_int(&m->hold_count, 1);
  877         if (m->queue - m->pc == PQ_FREE) {
  878                 _vm_page_queue_spin_lock(m);
  879                 _vm_page_rem_queue_spinlocked(m);
  880                 _vm_page_add_queue_spinlocked(m, PQ_HOLD + m->pc, 0);
  881                 _vm_page_queue_spin_unlock(m);
  882         }
  883         vm_page_spin_unlock(m);
  884 }
  885 
  886 /*
  887  * The opposite of vm_page_hold().  A page can be freed while being held,
  888  * which places it on the PQ_HOLD queue.  If we are able to busy the page
  889  * after the hold count drops to zero we will move the page to the
  890  * appropriate PQ_FREE queue by calling vm_page_free_toq().
  891  */
  892 void
  893 vm_page_unhold(vm_page_t m)
  894 {
  895         vm_page_spin_lock(m);
  896         atomic_add_int(&m->hold_count, -1);
  897         if (m->hold_count == 0 && m->queue - m->pc == PQ_HOLD) {
  898                 _vm_page_queue_spin_lock(m);
  899                 _vm_page_rem_queue_spinlocked(m);
  900                 _vm_page_add_queue_spinlocked(m, PQ_FREE + m->pc, 0);
  901                 _vm_page_queue_spin_unlock(m);
  902         }
  903         vm_page_spin_unlock(m);
  904 }
  905 
  906 /*
  907  *      vm_page_getfake:
  908  *
  909  *      Create a fictitious page with the specified physical address and
  910  *      memory attribute.  The memory attribute is the only the machine-
  911  *      dependent aspect of a fictitious page that must be initialized.
  912  */
  913 
  914 void
  915 vm_page_initfake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr)
  916 {
  917 
  918         if ((m->flags & PG_FICTITIOUS) != 0) {
  919                 /*
  920                  * The page's memattr might have changed since the
  921                  * previous initialization.  Update the pmap to the
  922                  * new memattr.
  923                  */
  924                 goto memattr;
  925         }
  926         m->phys_addr = paddr;
  927         m->queue = PQ_NONE;
  928         /* Fictitious pages don't use "segind". */
  929         /* Fictitious pages don't use "order" or "pool". */
  930         m->flags = PG_FICTITIOUS | PG_UNMANAGED | PG_BUSY;
  931         m->wire_count = 1;
  932         pmap_page_init(m);
  933 memattr:
  934         pmap_page_set_memattr(m, memattr);
  935 }
  936 
  937 /*
  938  * Inserts the given vm_page into the object and object list.
  939  *
  940  * The pagetables are not updated but will presumably fault the page
  941  * in if necessary, or if a kernel page the caller will at some point
  942  * enter the page into the kernel's pmap.  We are not allowed to block
  943  * here so we *can't* do this anyway.
  944  *
  945  * This routine may not block.
  946  * This routine must be called with the vm_object held.
  947  * This routine must be called with a critical section held.
  948  *
  949  * This routine returns TRUE if the page was inserted into the object
  950  * successfully, and FALSE if the page already exists in the object.
  951  */
  952 int
  953 vm_page_insert(vm_page_t m, vm_object_t object, vm_pindex_t pindex)
  954 {
  955         ASSERT_LWKT_TOKEN_HELD_EXCL(vm_object_token(object));
  956         if (m->object != NULL)
  957                 panic("vm_page_insert: already inserted");
  958 
  959         object->generation++;
  960 
  961         /*
  962          * Record the object/offset pair in this page and add the
  963          * pv_list_count of the page to the object.
  964          *
  965          * The vm_page spin lock is required for interactions with the pmap.
  966          */
  967         vm_page_spin_lock(m);
  968         m->object = object;
  969         m->pindex = pindex;
  970         if (vm_page_rb_tree_RB_INSERT(&object->rb_memq, m)) {
  971                 m->object = NULL;
  972                 m->pindex = 0;
  973                 vm_page_spin_unlock(m);
  974                 return FALSE;
  975         }
  976         ++object->resident_page_count;
  977         ++mycpu->gd_vmtotal.t_rm;
  978         /* atomic_add_int(&object->agg_pv_list_count, m->md.pv_list_count); */
  979         vm_page_spin_unlock(m);
  980 
  981         /*
  982          * Since we are inserting a new and possibly dirty page,
  983          * update the object's OBJ_WRITEABLE and OBJ_MIGHTBEDIRTY flags.
  984          */
  985         if ((m->valid & m->dirty) ||
  986             (m->flags & (PG_WRITEABLE | PG_NEED_COMMIT)))
  987                 vm_object_set_writeable_dirty(object);
  988 
  989         /*
  990          * Checks for a swap assignment and sets PG_SWAPPED if appropriate.
  991          */
  992         swap_pager_page_inserted(m);
  993         return TRUE;
  994 }
  995 
  996 /*
  997  * Removes the given vm_page_t from the (object,index) table
  998  *
  999  * The underlying pmap entry (if any) is NOT removed here.
 1000  * This routine may not block.
 1001  *
 1002  * The page must be BUSY and will remain BUSY on return.
 1003  * No other requirements.
 1004  *
 1005  * NOTE: FreeBSD side effect was to unbusy the page on return.  We leave
 1006  *       it busy.
 1007  */
 1008 void
 1009 vm_page_remove(vm_page_t m)
 1010 {
 1011         vm_object_t object;
 1012 
 1013         if (m->object == NULL) {
 1014                 return;
 1015         }
 1016 
 1017         if ((m->flags & PG_BUSY) == 0)
 1018                 panic("vm_page_remove: page not busy");
 1019 
 1020         object = m->object;
 1021 
 1022         vm_object_hold(object);
 1023 
 1024         /*
 1025          * Remove the page from the object and update the object.
 1026          *
 1027          * The vm_page spin lock is required for interactions with the pmap.
 1028          */
 1029         vm_page_spin_lock(m);
 1030         vm_page_rb_tree_RB_REMOVE(&object->rb_memq, m);
 1031         --object->resident_page_count;
 1032         --mycpu->gd_vmtotal.t_rm;
 1033         /* atomic_add_int(&object->agg_pv_list_count, -m->md.pv_list_count); */
 1034         m->object = NULL;
 1035         vm_page_spin_unlock(m);
 1036 
 1037         object->generation++;
 1038 
 1039         vm_object_drop(object);
 1040 }
 1041 
 1042 /*
 1043  * Locate and return the page at (object, pindex), or NULL if the
 1044  * page could not be found.
 1045  *
 1046  * The caller must hold the vm_object token.
 1047  */
 1048 vm_page_t
 1049 vm_page_lookup(vm_object_t object, vm_pindex_t pindex)
 1050 {
 1051         vm_page_t m;
 1052 
 1053         /*
 1054          * Search the hash table for this object/offset pair
 1055          */
 1056         ASSERT_LWKT_TOKEN_HELD(vm_object_token(object));
 1057         m = vm_page_rb_tree_RB_LOOKUP(&object->rb_memq, pindex);
 1058         KKASSERT(m == NULL || (m->object == object && m->pindex == pindex));
 1059         return(m);
 1060 }
 1061 
 1062 vm_page_t
 1063 VM_PAGE_DEBUG_EXT(vm_page_lookup_busy_wait)(struct vm_object *object,
 1064                                             vm_pindex_t pindex,
 1065                                             int also_m_busy, const char *msg
 1066                                             VM_PAGE_DEBUG_ARGS)
 1067 {
 1068         u_int32_t flags;
 1069         vm_page_t m;
 1070 
 1071         ASSERT_LWKT_TOKEN_HELD(vm_object_token(object));
 1072         m = vm_page_rb_tree_RB_LOOKUP(&object->rb_memq, pindex);
 1073         while (m) {
 1074                 KKASSERT(m->object == object && m->pindex == pindex);
 1075                 flags = m->flags;
 1076                 cpu_ccfence();
 1077                 if (flags & PG_BUSY) {
 1078                         tsleep_interlock(m, 0);
 1079                         if (atomic_cmpset_int(&m->flags, flags,
 1080                                           flags | PG_WANTED | PG_REFERENCED)) {
 1081                                 tsleep(m, PINTERLOCKED, msg, 0);
 1082                                 m = vm_page_rb_tree_RB_LOOKUP(&object->rb_memq,
 1083                                                               pindex);
 1084                         }
 1085                 } else if (also_m_busy && (flags & PG_SBUSY)) {
 1086                         tsleep_interlock(m, 0);
 1087                         if (atomic_cmpset_int(&m->flags, flags,
 1088                                           flags | PG_WANTED | PG_REFERENCED)) {
 1089                                 tsleep(m, PINTERLOCKED, msg, 0);
 1090                                 m = vm_page_rb_tree_RB_LOOKUP(&object->rb_memq,
 1091                                                               pindex);
 1092                         }
 1093                 } else if (atomic_cmpset_int(&m->flags, flags,
 1094                                              flags | PG_BUSY)) {
 1095 #ifdef VM_PAGE_DEBUG
 1096                         m->busy_func = func;
 1097                         m->busy_line = lineno;
 1098 #endif
 1099                         break;
 1100                 }
 1101         }
 1102         return m;
 1103 }
 1104 
 1105 /*
 1106  * Attempt to lookup and busy a page.
 1107  *
 1108  * Returns NULL if the page could not be found
 1109  *
 1110  * Returns a vm_page and error == TRUE if the page exists but could not
 1111  * be busied.
 1112  *
 1113  * Returns a vm_page and error == FALSE on success.
 1114  */
 1115 vm_page_t
 1116 VM_PAGE_DEBUG_EXT(vm_page_lookup_busy_try)(struct vm_object *object,
 1117                                            vm_pindex_t pindex,
 1118                                            int also_m_busy, int *errorp
 1119                                            VM_PAGE_DEBUG_ARGS)
 1120 {
 1121         u_int32_t flags;
 1122         vm_page_t m;
 1123 
 1124         ASSERT_LWKT_TOKEN_HELD(vm_object_token(object));
 1125         m = vm_page_rb_tree_RB_LOOKUP(&object->rb_memq, pindex);
 1126         *errorp = FALSE;
 1127         while (m) {
 1128                 KKASSERT(m->object == object && m->pindex == pindex);
 1129                 flags = m->flags;
 1130                 cpu_ccfence();
 1131                 if (flags & PG_BUSY) {
 1132                         *errorp = TRUE;
 1133                         break;
 1134                 }
 1135                 if (also_m_busy && (flags & PG_SBUSY)) {
 1136                         *errorp = TRUE;
 1137                         break;
 1138                 }
 1139                 if (atomic_cmpset_int(&m->flags, flags, flags | PG_BUSY)) {
 1140 #ifdef VM_PAGE_DEBUG
 1141                         m->busy_func = func;
 1142                         m->busy_line = lineno;
 1143 #endif
 1144                         break;
 1145                 }
 1146         }
 1147         return m;
 1148 }
 1149 
 1150 /*
 1151  * Caller must hold the related vm_object
 1152  */
 1153 vm_page_t
 1154 vm_page_next(vm_page_t m)
 1155 {
 1156         vm_page_t next;
 1157 
 1158         next = vm_page_rb_tree_RB_NEXT(m);
 1159         if (next && next->pindex != m->pindex + 1)
 1160                 next = NULL;
 1161         return (next);
 1162 }
 1163 
 1164 /*
 1165  * vm_page_rename()
 1166  *
 1167  * Move the given vm_page from its current object to the specified
 1168  * target object/offset.  The page must be busy and will remain so
 1169  * on return.
 1170  *
 1171  * new_object must be held.
 1172  * This routine might block. XXX ?
 1173  *
 1174  * NOTE: Swap associated with the page must be invalidated by the move.  We
 1175  *       have to do this for several reasons:  (1) we aren't freeing the
 1176  *       page, (2) we are dirtying the page, (3) the VM system is probably
 1177  *       moving the page from object A to B, and will then later move
 1178  *       the backing store from A to B and we can't have a conflict.
 1179  *
 1180  * NOTE: We *always* dirty the page.  It is necessary both for the
 1181  *       fact that we moved it, and because we may be invalidating
 1182  *       swap.  If the page is on the cache, we have to deactivate it
 1183  *       or vm_page_dirty() will panic.  Dirty pages are not allowed
 1184  *       on the cache.
 1185  */
 1186 void
 1187 vm_page_rename(vm_page_t m, vm_object_t new_object, vm_pindex_t new_pindex)
 1188 {
 1189         KKASSERT(m->flags & PG_BUSY);
 1190         ASSERT_LWKT_TOKEN_HELD_EXCL(vm_object_token(new_object));
 1191         if (m->object) {
 1192                 ASSERT_LWKT_TOKEN_HELD_EXCL(vm_object_token(m->object));
 1193                 vm_page_remove(m);
 1194         }
 1195         if (vm_page_insert(m, new_object, new_pindex) == FALSE) {
 1196                 panic("vm_page_rename: target exists (%p,%"PRIu64")",
 1197                       new_object, new_pindex);
 1198         }
 1199         if (m->queue - m->pc == PQ_CACHE)
 1200                 vm_page_deactivate(m);
 1201         vm_page_dirty(m);
 1202 }
 1203 
 1204 /*
 1205  * vm_page_unqueue() without any wakeup.  This routine is used when a page
 1206  * is being moved between queues or otherwise is to remain BUSYied by the
 1207  * caller.
 1208  *
 1209  * This routine may not block.
 1210  */
 1211 void
 1212 vm_page_unqueue_nowakeup(vm_page_t m)
 1213 {
 1214         vm_page_and_queue_spin_lock(m);
 1215         (void)_vm_page_rem_queue_spinlocked(m);
 1216         vm_page_spin_unlock(m);
 1217 }
 1218 
 1219 /*
 1220  * vm_page_unqueue() - Remove a page from its queue, wakeup the pagedemon
 1221  * if necessary.
 1222  *
 1223  * This routine may not block.
 1224  */
 1225 void
 1226 vm_page_unqueue(vm_page_t m)
 1227 {
 1228         u_short queue;
 1229 
 1230         vm_page_and_queue_spin_lock(m);
 1231         queue = _vm_page_rem_queue_spinlocked(m);
 1232         if (queue == PQ_FREE || queue == PQ_CACHE) {
 1233                 vm_page_spin_unlock(m);
 1234                 pagedaemon_wakeup();
 1235         } else {
 1236                 vm_page_spin_unlock(m);
 1237         }
 1238 }
 1239 
 1240 /*
 1241  * vm_page_list_find()
 1242  *
 1243  * Find a page on the specified queue with color optimization.
 1244  *
 1245  * The page coloring optimization attempts to locate a page that does
 1246  * not overload other nearby pages in the object in the cpu's L1 or L2
 1247  * caches.  We need this optimization because cpu caches tend to be
 1248  * physical caches, while object spaces tend to be virtual.
 1249  *
 1250  * On MP systems each PQ_FREE and PQ_CACHE color queue has its own spinlock
 1251  * and the algorithm is adjusted to localize allocations on a per-core basis.
 1252  * This is done by 'twisting' the colors.
 1253  *
 1254  * The page is returned spinlocked and removed from its queue (it will
 1255  * be on PQ_NONE), or NULL. The page is not PG_BUSY'd.  The caller
 1256  * is responsible for dealing with the busy-page case (usually by
 1257  * deactivating the page and looping).
 1258  *
 1259  * NOTE:  This routine is carefully inlined.  A non-inlined version
 1260  *        is available for outside callers but the only critical path is
 1261  *        from within this source file.
 1262  *
 1263  * NOTE:  This routine assumes that the vm_pages found in PQ_CACHE and PQ_FREE
 1264  *        represent stable storage, allowing us to order our locks vm_page
 1265  *        first, then queue.
 1266  */
 1267 static __inline
 1268 vm_page_t
 1269 _vm_page_list_find(int basequeue, int index, boolean_t prefer_zero)
 1270 {
 1271         vm_page_t m;
 1272 
 1273         for (;;) {
 1274                 if (prefer_zero)
 1275                         m = TAILQ_LAST(&vm_page_queues[basequeue+index].pl, pglist);
 1276                 else
 1277                         m = TAILQ_FIRST(&vm_page_queues[basequeue+index].pl);
 1278                 if (m == NULL) {
 1279                         m = _vm_page_list_find2(basequeue, index);
 1280                         return(m);
 1281                 }
 1282                 vm_page_and_queue_spin_lock(m);
 1283                 if (m->queue == basequeue + index) {
 1284                         _vm_page_rem_queue_spinlocked(m);
 1285                         /* vm_page_t spin held, no queue spin */
 1286                         break;
 1287                 }
 1288                 vm_page_and_queue_spin_unlock(m);
 1289         }
 1290         return(m);
 1291 }
 1292 
 1293 static vm_page_t
 1294 _vm_page_list_find2(int basequeue, int index)
 1295 {
 1296         int i;
 1297         vm_page_t m = NULL;
 1298         struct vpgqueues *pq;
 1299 
 1300         pq = &vm_page_queues[basequeue];
 1301 
 1302         /*
 1303          * Note that for the first loop, index+i and index-i wind up at the
 1304          * same place.  Even though this is not totally optimal, we've already
 1305          * blown it by missing the cache case so we do not care.
 1306          */
 1307         for (i = PQ_L2_SIZE / 2; i > 0; --i) {
 1308                 for (;;) {
 1309                         m = TAILQ_FIRST(&pq[(index + i) & PQ_L2_MASK].pl);
 1310                         if (m) {
 1311                                 _vm_page_and_queue_spin_lock(m);
 1312                                 if (m->queue ==
 1313                                     basequeue + ((index + i) & PQ_L2_MASK)) {
 1314                                         _vm_page_rem_queue_spinlocked(m);
 1315                                         return(m);
 1316                                 }
 1317                                 _vm_page_and_queue_spin_unlock(m);
 1318                                 continue;
 1319                         }
 1320                         m = TAILQ_FIRST(&pq[(index - i) & PQ_L2_MASK].pl);
 1321                         if (m) {
 1322                                 _vm_page_and_queue_spin_lock(m);
 1323                                 if (m->queue ==
 1324                                     basequeue + ((index - i) & PQ_L2_MASK)) {
 1325                                         _vm_page_rem_queue_spinlocked(m);
 1326                                         return(m);
 1327                                 }
 1328                                 _vm_page_and_queue_spin_unlock(m);
 1329                                 continue;
 1330                         }
 1331                         break;  /* next i */
 1332                 }
 1333         }
 1334         return(m);
 1335 }
 1336 
 1337 /*
 1338  * Returns a vm_page candidate for allocation.  The page is not busied so
 1339  * it can move around.  The caller must busy the page (and typically
 1340  * deactivate it if it cannot be busied!)
 1341  *
 1342  * Returns a spinlocked vm_page that has been removed from its queue.
 1343  */
 1344 vm_page_t
 1345 vm_page_list_find(int basequeue, int index, boolean_t prefer_zero)
 1346 {
 1347         return(_vm_page_list_find(basequeue, index, prefer_zero));
 1348 }
 1349 
 1350 /*
 1351  * Find a page on the cache queue with color optimization, remove it
 1352  * from the queue, and busy it.  The returned page will not be spinlocked.
 1353  *
 1354  * A candidate failure will be deactivated.  Candidates can fail due to
 1355  * being busied by someone else, in which case they will be deactivated.
 1356  *
 1357  * This routine may not block.
 1358  *
 1359  */
 1360 static vm_page_t
 1361 vm_page_select_cache(u_short pg_color)
 1362 {
 1363         vm_page_t m;
 1364 
 1365         for (;;) {
 1366                 m = _vm_page_list_find(PQ_CACHE, pg_color & PQ_L2_MASK, FALSE);
 1367                 if (m == NULL)
 1368                         break;
 1369                 /*
 1370                  * (m) has been removed from its queue and spinlocked
 1371                  */
 1372                 if (vm_page_busy_try(m, TRUE)) {
 1373                         _vm_page_deactivate_locked(m, 0);
 1374                         vm_page_spin_unlock(m);
 1375                 } else {
 1376                         /*
 1377                          * We successfully busied the page
 1378                          */
 1379                         if ((m->flags & (PG_UNMANAGED | PG_NEED_COMMIT)) == 0 &&
 1380                             m->hold_count == 0 &&
 1381                             m->wire_count == 0 &&
 1382                             (m->dirty & m->valid) == 0) {
 1383                                 vm_page_spin_unlock(m);
 1384                                 pagedaemon_wakeup();
 1385                                 return(m);
 1386                         }
 1387 
 1388                         /*
 1389                          * The page cannot be recycled, deactivate it.
 1390                          */
 1391                         _vm_page_deactivate_locked(m, 0);
 1392                         if (_vm_page_wakeup(m)) {
 1393                                 vm_page_spin_unlock(m);
 1394                                 wakeup(m);
 1395                         } else {
 1396                                 vm_page_spin_unlock(m);
 1397                         }
 1398                 }
 1399         }
 1400         return (m);
 1401 }
 1402 
 1403 /*
 1404  * Find a free or zero page, with specified preference.  We attempt to
 1405  * inline the nominal case and fall back to _vm_page_select_free() 
 1406  * otherwise.  A busied page is removed from the queue and returned.
 1407  *
 1408  * This routine may not block.
 1409  */
 1410 static __inline vm_page_t
 1411 vm_page_select_free(u_short pg_color, boolean_t prefer_zero)
 1412 {
 1413         vm_page_t m;
 1414 
 1415         for (;;) {
 1416                 m = _vm_page_list_find(PQ_FREE, pg_color & PQ_L2_MASK,
 1417                                        prefer_zero);
 1418                 if (m == NULL)
 1419                         break;
 1420                 if (vm_page_busy_try(m, TRUE)) {
 1421                         /*
 1422                          * Various mechanisms such as a pmap_collect can
 1423                          * result in a busy page on the free queue.  We
 1424                          * have to move the page out of the way so we can
 1425                          * retry the allocation.  If the other thread is not
 1426                          * allocating the page then m->valid will remain 0 and
 1427                          * the pageout daemon will free the page later on.
 1428                          *
 1429                          * Since we could not busy the page, however, we
 1430                          * cannot make assumptions as to whether the page
 1431                          * will be allocated by the other thread or not,
 1432                          * so all we can do is deactivate it to move it out
 1433                          * of the way.  In particular, if the other thread
 1434                          * wires the page it may wind up on the inactive
 1435                          * queue and the pageout daemon will have to deal
 1436                          * with that case too.
 1437                          */
 1438                         _vm_page_deactivate_locked(m, 0);
 1439                         vm_page_spin_unlock(m);
 1440                 } else {
 1441                         /*
 1442                          * Theoretically if we are able to busy the page
 1443                          * atomic with the queue removal (using the vm_page
 1444                          * lock) nobody else should be able to mess with the
 1445                          * page before us.
 1446                          */
 1447                         KKASSERT((m->flags & (PG_UNMANAGED |
 1448                                               PG_NEED_COMMIT)) == 0);
 1449                         KKASSERT(m->hold_count == 0);
 1450                         KKASSERT(m->wire_count == 0);
 1451                         vm_page_spin_unlock(m);
 1452                         pagedaemon_wakeup();
 1453 
 1454                         /* return busied and removed page */
 1455                         return(m);
 1456                 }
 1457         }
 1458         return(m);
 1459 }
 1460 
 1461 /*
 1462  * This implements a per-cpu cache of free, zero'd, ready-to-go pages.
 1463  * The idea is to populate this cache prior to acquiring any locks so
 1464  * we don't wind up potentially zeroing VM pages (under heavy loads) while
 1465  * holding potentialy contending locks.
 1466  *
 1467  * Note that we allocate the page uninserted into anything and use a pindex
 1468  * of 0, the vm_page_alloc() will effectively add gd_cpuid so these
 1469  * allocations should wind up being uncontended.  However, we still want
 1470  * to rove across PQ_L2_SIZE.
 1471  */
 1472 void
 1473 vm_page_pcpu_cache(void)
 1474 {
 1475 #if 0
 1476         globaldata_t gd = mycpu;
 1477         vm_page_t m;
 1478 
 1479         if (gd->gd_vmpg_count < GD_MINVMPG) {
 1480                 crit_enter_gd(gd);
 1481                 while (gd->gd_vmpg_count < GD_MAXVMPG) {
 1482                         m = vm_page_alloc(NULL, ticks & ~ncpus2_mask,
 1483                                           VM_ALLOC_NULL_OK | VM_ALLOC_NORMAL |
 1484                                           VM_ALLOC_NULL_OK | VM_ALLOC_ZERO);
 1485                         if (gd->gd_vmpg_count < GD_MAXVMPG) {
 1486                                 if ((m->flags & PG_ZERO) == 0) {
 1487                                         pmap_zero_page(VM_PAGE_TO_PHYS(m));
 1488                                         vm_page_flag_set(m, PG_ZERO);
 1489                                 }
 1490                                 gd->gd_vmpg_array[gd->gd_vmpg_count++] = m;
 1491                         } else {
 1492                                 vm_page_free(m);
 1493                         }
 1494                 }
 1495                 crit_exit_gd(gd);
 1496         }
 1497 #endif
 1498 }
 1499 
 1500 /*
 1501  * vm_page_alloc()
 1502  *
 1503  * Allocate and return a memory cell associated with this VM object/offset
 1504  * pair.  If object is NULL an unassociated page will be allocated.
 1505  *
 1506  * The returned page will be busied and removed from its queues.  This
 1507  * routine can block and may return NULL if a race occurs and the page
 1508  * is found to already exist at the specified (object, pindex).
 1509  *
 1510  *      VM_ALLOC_NORMAL         allow use of cache pages, nominal free drain
 1511  *      VM_ALLOC_QUICK          like normal but cannot use cache
 1512  *      VM_ALLOC_SYSTEM         greater free drain
 1513  *      VM_ALLOC_INTERRUPT      allow free list to be completely drained
 1514  *      VM_ALLOC_ZERO           advisory request for pre-zero'd page only
 1515  *      VM_ALLOC_FORCE_ZERO     advisory request for pre-zero'd page only
 1516  *      VM_ALLOC_NULL_OK        ok to return NULL on insertion collision
 1517  *                              (see vm_page_grab())
 1518  *      VM_ALLOC_USE_GD         ok to use per-gd cache
 1519  *
 1520  * The object must be held if not NULL
 1521  * This routine may not block
 1522  *
 1523  * Additional special handling is required when called from an interrupt
 1524  * (VM_ALLOC_INTERRUPT).  We are not allowed to mess with the page cache
 1525  * in this case.
 1526  */
 1527 vm_page_t
 1528 vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int page_req)
 1529 {
 1530         globaldata_t gd = mycpu;
 1531         vm_object_t obj;
 1532         vm_page_t m;
 1533         u_short pg_color;
 1534 
 1535 #if 0
 1536         /*
 1537          * Special per-cpu free VM page cache.  The pages are pre-busied
 1538          * and pre-zerod for us.
 1539          */
 1540         if (gd->gd_vmpg_count && (page_req & VM_ALLOC_USE_GD)) {
 1541                 crit_enter_gd(gd);
 1542                 if (gd->gd_vmpg_count) {
 1543                         m = gd->gd_vmpg_array[--gd->gd_vmpg_count];
 1544                         crit_exit_gd(gd);
 1545                         goto done;
 1546                 }
 1547                 crit_exit_gd(gd);
 1548         }
 1549 #endif
 1550         m = NULL;
 1551 
 1552         /*
 1553          * Cpu twist - cpu localization algorithm
 1554          */
 1555         if (object) {
 1556                 pg_color = gd->gd_cpuid + (pindex & ~ncpus_fit_mask) +
 1557                            (object->pg_color & ~ncpus_fit_mask);
 1558         } else {
 1559                 pg_color = gd->gd_cpuid + (pindex & ~ncpus_fit_mask);
 1560         }
 1561         KKASSERT(page_req & 
 1562                 (VM_ALLOC_NORMAL|VM_ALLOC_QUICK|
 1563                  VM_ALLOC_INTERRUPT|VM_ALLOC_SYSTEM));
 1564 
 1565         /*
 1566          * Certain system threads (pageout daemon, buf_daemon's) are
 1567          * allowed to eat deeper into the free page list.
 1568          */
 1569         if (curthread->td_flags & TDF_SYSTHREAD)
 1570                 page_req |= VM_ALLOC_SYSTEM;
 1571 
 1572 loop:
 1573         if (vmstats.v_free_count > vmstats.v_free_reserved ||
 1574             ((page_req & VM_ALLOC_INTERRUPT) && vmstats.v_free_count > 0) ||
 1575             ((page_req & VM_ALLOC_SYSTEM) && vmstats.v_cache_count == 0 &&
 1576                 vmstats.v_free_count > vmstats.v_interrupt_free_min)
 1577         ) {
 1578                 /*
 1579                  * The free queue has sufficient free pages to take one out.
 1580                  */
 1581                 if (page_req & (VM_ALLOC_ZERO | VM_ALLOC_FORCE_ZERO))
 1582                         m = vm_page_select_free(pg_color, TRUE);
 1583                 else
 1584                         m = vm_page_select_free(pg_color, FALSE);
 1585         } else if (page_req & VM_ALLOC_NORMAL) {
 1586                 /*
 1587                  * Allocatable from the cache (non-interrupt only).  On
 1588                  * success, we must free the page and try again, thus
 1589                  * ensuring that vmstats.v_*_free_min counters are replenished.
 1590                  */
 1591 #ifdef INVARIANTS
 1592                 if (curthread->td_preempted) {
 1593                         kprintf("vm_page_alloc(): warning, attempt to allocate"
 1594                                 " cache page from preempting interrupt\n");
 1595                         m = NULL;
 1596                 } else {
 1597                         m = vm_page_select_cache(pg_color);
 1598                 }
 1599 #else
 1600                 m = vm_page_select_cache(pg_color);
 1601 #endif
 1602                 /*
 1603                  * On success move the page into the free queue and loop.
 1604                  *
 1605                  * Only do this if we can safely acquire the vm_object lock,
 1606                  * because this is effectively a random page and the caller
 1607                  * might be holding the lock shared, we don't want to
 1608                  * deadlock.
 1609                  */
 1610                 if (m != NULL) {
 1611                         KASSERT(m->dirty == 0,
 1612                                 ("Found dirty cache page %p", m));
 1613                         if ((obj = m->object) != NULL) {
 1614                                 if (vm_object_hold_try(obj)) {
 1615                                         vm_page_protect(m, VM_PROT_NONE);
 1616                                         vm_page_free(m);
 1617                                         /* m->object NULL here */
 1618                                         vm_object_drop(obj);
 1619                                 } else {
 1620                                         vm_page_deactivate(m);
 1621                                         vm_page_wakeup(m);
 1622                                 }
 1623                         } else {
 1624                                 vm_page_protect(m, VM_PROT_NONE);
 1625                                 vm_page_free(m);
 1626                         }
 1627                         goto loop;
 1628                 }
 1629 
 1630                 /*
 1631                  * On failure return NULL
 1632                  */
 1633 #if defined(DIAGNOSTIC)
 1634                 if (vmstats.v_cache_count > 0)
 1635                         kprintf("vm_page_alloc(NORMAL): missing pages on cache queue: %d\n", vmstats.v_cache_count);
 1636 #endif
 1637                 vm_pageout_deficit++;
 1638                 pagedaemon_wakeup();
 1639                 return (NULL);
 1640         } else {
 1641                 /*
 1642                  * No pages available, wakeup the pageout daemon and give up.
 1643                  */
 1644                 vm_pageout_deficit++;
 1645                 pagedaemon_wakeup();
 1646                 return (NULL);
 1647         }
 1648 
 1649         /*
 1650          * v_free_count can race so loop if we don't find the expected
 1651          * page.
 1652          */
 1653         if (m == NULL)
 1654                 goto loop;
 1655 
 1656         /*
 1657          * Good page found.  The page has already been busied for us and
 1658          * removed from its queues.
 1659          */
 1660         KASSERT(m->dirty == 0,
 1661                 ("vm_page_alloc: free/cache page %p was dirty", m));
 1662         KKASSERT(m->queue == PQ_NONE);
 1663 
 1664 #if 0
 1665 done:
 1666 #endif
 1667         /*
 1668          * Initialize the structure, inheriting some flags but clearing
 1669          * all the rest.  The page has already been busied for us.
 1670          */
 1671         vm_page_flag_clear(m, ~(PG_ZERO | PG_BUSY | PG_SBUSY));
 1672         KKASSERT(m->wire_count == 0);
 1673         KKASSERT(m->busy == 0);
 1674         m->act_count = 0;
 1675         m->valid = 0;
 1676 
 1677         /*
 1678          * Caller must be holding the object lock (asserted by
 1679          * vm_page_insert()).
 1680          *
 1681          * NOTE: Inserting a page here does not insert it into any pmaps
 1682          *       (which could cause us to block allocating memory).
 1683          *
 1684          * NOTE: If no object an unassociated page is allocated, m->pindex
 1685          *       can be used by the caller for any purpose.
 1686          */
 1687         if (object) {
 1688                 if (vm_page_insert(m, object, pindex) == FALSE) {
 1689                         vm_page_free(m);
 1690                         if ((page_req & VM_ALLOC_NULL_OK) == 0)
 1691                                 panic("PAGE RACE %p[%ld]/%p",
 1692                                       object, (long)pindex, m);
 1693                         m = NULL;
 1694                 }
 1695         } else {
 1696                 m->pindex = pindex;
 1697         }
 1698 
 1699         /*
 1700          * Don't wakeup too often - wakeup the pageout daemon when
 1701          * we would be nearly out of memory.
 1702          */
 1703         pagedaemon_wakeup();
 1704 
 1705         /*
 1706          * A PG_BUSY page is returned.
 1707          */
 1708         return (m);
 1709 }
 1710 
 1711 /*
 1712  * Attempt to allocate contiguous physical memory with the specified
 1713  * requirements.
 1714  */
 1715 vm_page_t
 1716 vm_page_alloc_contig(vm_paddr_t low, vm_paddr_t high,
 1717                      unsigned long alignment, unsigned long boundary,
 1718                      unsigned long size, vm_memattr_t memattr)
 1719 {
 1720         alist_blk_t blk;
 1721         vm_page_t m;
 1722         int i;
 1723 
 1724         alignment >>= PAGE_SHIFT;
 1725         if (alignment == 0)
 1726                 alignment = 1;
 1727         boundary >>= PAGE_SHIFT;
 1728         if (boundary == 0)
 1729                 boundary = 1;
 1730         size = (size + PAGE_MASK) >> PAGE_SHIFT;
 1731 
 1732         spin_lock(&vm_contig_spin);
 1733         blk = alist_alloc(&vm_contig_alist, 0, size);
 1734         if (blk == ALIST_BLOCK_NONE) {
 1735                 spin_unlock(&vm_contig_spin);
 1736                 if (bootverbose) {
 1737                         kprintf("vm_page_alloc_contig: %ldk nospace\n",
 1738                                 (size + PAGE_MASK) * (PAGE_SIZE / 1024));
 1739                 }
 1740                 return(NULL);
 1741         }
 1742         if (high && ((vm_paddr_t)(blk + size) << PAGE_SHIFT) > high) {
 1743                 alist_free(&vm_contig_alist, blk, size);
 1744                 spin_unlock(&vm_contig_spin);
 1745                 if (bootverbose) {
 1746                         kprintf("vm_page_alloc_contig: %ldk high "
 1747                                 "%016jx failed\n",
 1748                                 (size + PAGE_MASK) * (PAGE_SIZE / 1024),
 1749                                 (intmax_t)high);
 1750                 }
 1751                 return(NULL);
 1752         }
 1753         spin_unlock(&vm_contig_spin);
 1754         if (vm_contig_verbose) {
 1755                 kprintf("vm_page_alloc_contig: %016jx/%ldk\n",
 1756                         (intmax_t)(vm_paddr_t)blk << PAGE_SHIFT,
 1757                         (size + PAGE_MASK) * (PAGE_SIZE / 1024));
 1758         }
 1759 
 1760         m = PHYS_TO_VM_PAGE((vm_paddr_t)blk << PAGE_SHIFT);
 1761         if (memattr != VM_MEMATTR_DEFAULT)
 1762                 for (i = 0;i < size;i++)
 1763                         pmap_page_set_memattr(&m[i], memattr);
 1764         return m;
 1765 }
 1766 
 1767 /*
 1768  * Free contiguously allocated pages.  The pages will be wired but not busy.
 1769  * When freeing to the alist we leave them wired and not busy.
 1770  */
 1771 void
 1772 vm_page_free_contig(vm_page_t m, unsigned long size)
 1773 {
 1774         vm_paddr_t pa = VM_PAGE_TO_PHYS(m);
 1775         vm_pindex_t start = pa >> PAGE_SHIFT;
 1776         vm_pindex_t pages = (size + PAGE_MASK) >> PAGE_SHIFT;
 1777 
 1778         if (vm_contig_verbose) {
 1779                 kprintf("vm_page_free_contig:  %016jx/%ldk\n",
 1780                         (intmax_t)pa, size / 1024);
 1781         }
 1782         if (pa < vm_low_phys_reserved) {
 1783                 KKASSERT(pa + size <= vm_low_phys_reserved);
 1784                 spin_lock(&vm_contig_spin);
 1785                 alist_free(&vm_contig_alist, start, pages);
 1786                 spin_unlock(&vm_contig_spin);
 1787         } else {
 1788                 while (pages) {
 1789                         vm_page_busy_wait(m, FALSE, "cpgfr");
 1790                         vm_page_unwire(m, 0);
 1791                         vm_page_free(m);
 1792                         --pages;
 1793                         ++m;
 1794                 }
 1795 
 1796         }
 1797 }
 1798 
 1799 
 1800 /*
 1801  * Wait for sufficient free memory for nominal heavy memory use kernel
 1802  * operations.
 1803  *
 1804  * WARNING!  Be sure never to call this in any vm_pageout code path, which
 1805  *           will trivially deadlock the system.
 1806  */
 1807 void
 1808 vm_wait_nominal(void)
 1809 {
 1810         while (vm_page_count_min(0))
 1811                 vm_wait(0);
 1812 }
 1813 
 1814 /*
 1815  * Test if vm_wait_nominal() would block.
 1816  */
 1817 int
 1818 vm_test_nominal(void)
 1819 {
 1820         if (vm_page_count_min(0))
 1821                 return(1);
 1822         return(0);
 1823 }
 1824 
 1825 /*
 1826  * Block until free pages are available for allocation, called in various
 1827  * places before memory allocations.
 1828  *
 1829  * The caller may loop if vm_page_count_min() == FALSE so we cannot be
 1830  * more generous then that.
 1831  */
 1832 void
 1833 vm_wait(int timo)
 1834 {
 1835         /*
 1836          * never wait forever
 1837          */
 1838         if (timo == 0)
 1839                 timo = hz;
 1840         lwkt_gettoken(&vm_token);
 1841 
 1842         if (curthread == pagethread) {
 1843                 /*
 1844                  * The pageout daemon itself needs pages, this is bad.
 1845                  */
 1846                 if (vm_page_count_min(0)) {
 1847                         vm_pageout_pages_needed = 1;
 1848                         tsleep(&vm_pageout_pages_needed, 0, "VMWait", timo);
 1849                 }
 1850         } else {
 1851                 /*
 1852                  * Wakeup the pageout daemon if necessary and wait.
 1853                  *
 1854                  * Do not wait indefinitely for the target to be reached,
 1855                  * as load might prevent it from being reached any time soon.
 1856                  * But wait a little to try to slow down page allocations
 1857                  * and to give more important threads (the pagedaemon)
 1858                  * allocation priority.
 1859                  */
 1860                 if (vm_page_count_target()) {
 1861                         if (vm_pages_needed == 0) {
 1862                                 vm_pages_needed = 1;
 1863                                 wakeup(&vm_pages_needed);
 1864                         }
 1865                         ++vm_pages_waiting;     /* SMP race ok */
 1866                         tsleep(&vmstats.v_free_count, 0, "vmwait", timo);
 1867                 }
 1868         }
 1869         lwkt_reltoken(&vm_token);
 1870 }
 1871 
 1872 /*
 1873  * Block until free pages are available for allocation
 1874  *
 1875  * Called only from vm_fault so that processes page faulting can be
 1876  * easily tracked.
 1877  */
 1878 void
 1879 vm_wait_pfault(void)
 1880 {
 1881         /*
 1882          * Wakeup the pageout daemon if necessary and wait.
 1883          *
 1884          * Do not wait indefinitely for the target to be reached,
 1885          * as load might prevent it from being reached any time soon.
 1886          * But wait a little to try to slow down page allocations
 1887          * and to give more important threads (the pagedaemon)
 1888          * allocation priority.
 1889          */
 1890         if (vm_page_count_min(0)) {
 1891                 lwkt_gettoken(&vm_token);
 1892                 while (vm_page_count_severe()) {
 1893                         if (vm_page_count_target()) {
 1894                                 if (vm_pages_needed == 0) {
 1895                                         vm_pages_needed = 1;
 1896                                         wakeup(&vm_pages_needed);
 1897                                 }
 1898                                 ++vm_pages_waiting;     /* SMP race ok */
 1899                                 tsleep(&vmstats.v_free_count, 0, "pfault", hz);
 1900                         }
 1901                 }
 1902                 lwkt_reltoken(&vm_token);
 1903         }
 1904 }
 1905 
 1906 /*
 1907  * Put the specified page on the active list (if appropriate).  Ensure
 1908  * that act_count is at least ACT_INIT but do not otherwise mess with it.
 1909  *
 1910  * The caller should be holding the page busied ? XXX
 1911  * This routine may not block.
 1912  */
 1913 void
 1914 vm_page_activate(vm_page_t m)
 1915 {
 1916         u_short oqueue;
 1917 
 1918         vm_page_spin_lock(m);
 1919         if (m->queue - m->pc != PQ_ACTIVE) {
 1920                 _vm_page_queue_spin_lock(m);
 1921                 oqueue = _vm_page_rem_queue_spinlocked(m);
 1922                 /* page is left spinlocked, queue is unlocked */
 1923 
 1924                 if (oqueue == PQ_CACHE)
 1925                         mycpu->gd_cnt.v_reactivated++;
 1926                 if (m->wire_count == 0 && (m->flags & PG_UNMANAGED) == 0) {
 1927                         if (m->act_count < ACT_INIT)
 1928                                 m->act_count = ACT_INIT;
 1929                         _vm_page_add_queue_spinlocked(m, PQ_ACTIVE + m->pc, 0);
 1930                 }
 1931                 _vm_page_and_queue_spin_unlock(m);
 1932                 if (oqueue == PQ_CACHE || oqueue == PQ_FREE)
 1933                         pagedaemon_wakeup();
 1934         } else {
 1935                 if (m->act_count < ACT_INIT)
 1936                         m->act_count = ACT_INIT;
 1937                 vm_page_spin_unlock(m);
 1938         }
 1939 }
 1940 
 1941 /*
 1942  * Helper routine for vm_page_free_toq() and vm_page_cache().  This
 1943  * routine is called when a page has been added to the cache or free
 1944  * queues.
 1945  *
 1946  * This routine may not block.
 1947  */
 1948 static __inline void
 1949 vm_page_free_wakeup(void)
 1950 {
 1951         /*
 1952          * If the pageout daemon itself needs pages, then tell it that
 1953          * there are some free.
 1954          */
 1955         if (vm_pageout_pages_needed &&
 1956             vmstats.v_cache_count + vmstats.v_free_count >= 
 1957             vmstats.v_pageout_free_min
 1958         ) {
 1959                 vm_pageout_pages_needed = 0;
 1960                 wakeup(&vm_pageout_pages_needed);
 1961         }
 1962 
 1963         /*
 1964          * Wakeup processes that are waiting on memory.
 1965          *
 1966          * Generally speaking we want to wakeup stuck processes as soon as
 1967          * possible.  !vm_page_count_min(0) is the absolute minimum point
 1968          * where we can do this.  Wait a bit longer to reduce degenerate
 1969          * re-blocking (vm_page_free_hysteresis).  The target check is just
 1970          * to make sure the min-check w/hysteresis does not exceed the
 1971          * normal target.
 1972          */
 1973         if (vm_pages_waiting) {
 1974                 if (!vm_page_count_min(vm_page_free_hysteresis) ||
 1975                     !vm_page_count_target()) {
 1976                         vm_pages_waiting = 0;
 1977                         wakeup(&vmstats.v_free_count);
 1978                         ++mycpu->gd_cnt.v_ppwakeups;
 1979                 }
 1980 #if 0
 1981                 if (!vm_page_count_target()) {
 1982                         /*
 1983                          * Plenty of pages are free, wakeup everyone.
 1984                          */
 1985                         vm_pages_waiting = 0;
 1986                         wakeup(&vmstats.v_free_count);
 1987                         ++mycpu->gd_cnt.v_ppwakeups;
 1988                 } else if (!vm_page_count_min(0)) {
 1989                         /*
 1990                          * Some pages are free, wakeup someone.
 1991                          */
 1992                         int wcount = vm_pages_waiting;
 1993                         if (wcount > 0)
 1994                                 --wcount;
 1995                         vm_pages_waiting = wcount;
 1996                         wakeup_one(&vmstats.v_free_count);
 1997                         ++mycpu->gd_cnt.v_ppwakeups;
 1998                 }
 1999 #endif
 2000         }
 2001 }
 2002 
 2003 /*
 2004  * Returns the given page to the PQ_FREE or PQ_HOLD list and disassociates
 2005  * it from its VM object.
 2006  *
 2007  * The vm_page must be PG_BUSY on entry.  PG_BUSY will be released on
 2008  * return (the page will have been freed).
 2009  */
 2010 void
 2011 vm_page_free_toq(vm_page_t m)
 2012 {
 2013         mycpu->gd_cnt.v_tfree++;
 2014         KKASSERT((m->flags & PG_MAPPED) == 0);
 2015         KKASSERT(m->flags & PG_BUSY);
 2016 
 2017         if (m->busy || ((m->queue - m->pc) == PQ_FREE)) {
 2018                 kprintf("vm_page_free: pindex(%lu), busy(%d), "
 2019                         "PG_BUSY(%d), hold(%d)\n",
 2020                         (u_long)m->pindex, m->busy,
 2021                         ((m->flags & PG_BUSY) ? 1 : 0), m->hold_count);
 2022                 if ((m->queue - m->pc) == PQ_FREE)
 2023                         panic("vm_page_free: freeing free page");
 2024                 else
 2025                         panic("vm_page_free: freeing busy page");
 2026         }
 2027 
 2028         /*
 2029          * Remove from object, spinlock the page and its queues and
 2030          * remove from any queue.  No queue spinlock will be held
 2031          * after this section (because the page was removed from any
 2032          * queue).
 2033          */
 2034         vm_page_remove(m);
 2035         vm_page_and_queue_spin_lock(m);
 2036         _vm_page_rem_queue_spinlocked(m);
 2037 
 2038         /*
 2039          * No further management of fictitious pages occurs beyond object
 2040          * and queue removal.
 2041          */
 2042         if ((m->flags & PG_FICTITIOUS) != 0) {
 2043                 vm_page_spin_unlock(m);
 2044                 vm_page_wakeup(m);
 2045                 return;
 2046         }
 2047 
 2048         m->valid = 0;
 2049         vm_page_undirty(m);
 2050 
 2051         if (m->wire_count != 0) {
 2052                 if (m->wire_count > 1) {
 2053                     panic(
 2054                         "vm_page_free: invalid wire count (%d), pindex: 0x%lx",
 2055                         m->wire_count, (long)m->pindex);
 2056                 }
 2057                 panic("vm_page_free: freeing wired page");
 2058         }
 2059 
 2060         /*
 2061          * Clear the UNMANAGED flag when freeing an unmanaged page.
 2062          * Clear the NEED_COMMIT flag
 2063          */
 2064         if (m->flags & PG_UNMANAGED)
 2065                 vm_page_flag_clear(m, PG_UNMANAGED);
 2066         if (m->flags & PG_NEED_COMMIT)
 2067                 vm_page_flag_clear(m, PG_NEED_COMMIT);
 2068 
 2069         if (m->hold_count != 0) {
 2070                 vm_page_flag_clear(m, PG_ZERO);
 2071                 _vm_page_add_queue_spinlocked(m, PQ_HOLD + m->pc, 0);
 2072         } else {
 2073                 _vm_page_add_queue_spinlocked(m, PQ_FREE + m->pc, 0);
 2074         }
 2075 
 2076         /*
 2077          * This sequence allows us to clear PG_BUSY while still holding
 2078          * its spin lock, which reduces contention vs allocators.  We
 2079          * must not leave the queue locked or _vm_page_wakeup() may
 2080          * deadlock.
 2081          */
 2082         _vm_page_queue_spin_unlock(m);
 2083         if (_vm_page_wakeup(m)) {
 2084                 vm_page_spin_unlock(m);
 2085                 wakeup(m);
 2086         } else {
 2087                 vm_page_spin_unlock(m);
 2088         }
 2089         vm_page_free_wakeup();
 2090 }
 2091 
 2092 /*
 2093  * vm_page_free_fromq_fast()
 2094  *
 2095  * Remove a non-zero page from one of the free queues; the page is removed for
 2096  * zeroing, so do not issue a wakeup.
 2097  */
 2098 vm_page_t
 2099 vm_page_free_fromq_fast(void)
 2100 {
 2101         static int qi;
 2102         vm_page_t m;
 2103         int i;
 2104 
 2105         for (i = 0; i < PQ_L2_SIZE; ++i) {
 2106                 m = vm_page_list_find(PQ_FREE, qi, FALSE);
 2107                 /* page is returned spinlocked and removed from its queue */
 2108                 if (m) {
 2109                         if (vm_page_busy_try(m, TRUE)) {
 2110                                 /*
 2111                                  * We were unable to busy the page, deactivate
 2112                                  * it and loop.
 2113                                  */
 2114                                 _vm_page_deactivate_locked(m, 0);
 2115                                 vm_page_spin_unlock(m);
 2116                         } else if (m->flags & PG_ZERO) {
 2117                                 /*
 2118                                  * The page is PG_ZERO, requeue it and loop
 2119                                  */
 2120                                 _vm_page_add_queue_spinlocked(m,
 2121                                                               PQ_FREE + m->pc,
 2122                                                               0);
 2123                                 vm_page_queue_spin_unlock(m);
 2124                                 if (_vm_page_wakeup(m)) {
 2125                                         vm_page_spin_unlock(m);
 2126                                         wakeup(m);
 2127                                 } else {
 2128                                         vm_page_spin_unlock(m);
 2129                                 }
 2130                         } else {
 2131                                 /*
 2132                                  * The page is not PG_ZERO'd so return it.
 2133                                  */
 2134                                 vm_page_spin_unlock(m);
 2135                                 KKASSERT((m->flags & (PG_UNMANAGED |
 2136                                                       PG_NEED_COMMIT)) == 0);
 2137                                 KKASSERT(m->hold_count == 0);
 2138                                 KKASSERT(m->wire_count == 0);
 2139                                 break;
 2140                         }
 2141                         m = NULL;
 2142                 }
 2143                 qi = (qi + PQ_PRIME2) & PQ_L2_MASK;
 2144         }
 2145         return (m);
 2146 }
 2147 
 2148 /*
 2149  * vm_page_unmanage()
 2150  *
 2151  * Prevent PV management from being done on the page.  The page is
 2152  * removed from the paging queues as if it were wired, and as a 
 2153  * consequence of no longer being managed the pageout daemon will not
 2154  * touch it (since there is no way to locate the pte mappings for the
 2155  * page).  madvise() calls that mess with the pmap will also no longer
 2156  * operate on the page.
 2157  *
 2158  * Beyond that the page is still reasonably 'normal'.  Freeing the page
 2159  * will clear the flag.
 2160  *
 2161  * This routine is used by OBJT_PHYS objects - objects using unswappable
 2162  * physical memory as backing store rather then swap-backed memory and
 2163  * will eventually be extended to support 4MB unmanaged physical 
 2164  * mappings.
 2165  *
 2166  * Caller must be holding the page busy.
 2167  */
 2168 void
 2169 vm_page_unmanage(vm_page_t m)
 2170 {
 2171         KKASSERT(m->flags & PG_BUSY);
 2172         if ((m->flags & PG_UNMANAGED) == 0) {
 2173                 if (m->wire_count == 0)
 2174                         vm_page_unqueue(m);
 2175         }
 2176         vm_page_flag_set(m, PG_UNMANAGED);
 2177 }
 2178 
 2179 /*
 2180  * Mark this page as wired down by yet another map, removing it from
 2181  * paging queues as necessary.
 2182  *
 2183  * Caller must be holding the page busy.
 2184  */
 2185 void
 2186 vm_page_wire(vm_page_t m)
 2187 {
 2188         /*
 2189          * Only bump the wire statistics if the page is not already wired,
 2190          * and only unqueue the page if it is on some queue (if it is unmanaged
 2191          * it is already off the queues).  Don't do anything with fictitious
 2192          * pages because they are always wired.
 2193          */
 2194         KKASSERT(m->flags & PG_BUSY);
 2195         if ((m->flags & PG_FICTITIOUS) == 0) {
 2196                 if (atomic_fetchadd_int(&m->wire_count, 1) == 0) {
 2197                         if ((m->flags & PG_UNMANAGED) == 0)
 2198                                 vm_page_unqueue(m);
 2199                         atomic_add_int(&vmstats.v_wire_count, 1);
 2200                 }
 2201                 KASSERT(m->wire_count != 0,
 2202                         ("vm_page_wire: wire_count overflow m=%p", m));
 2203         }
 2204 }
 2205 
 2206 /*
 2207  * Release one wiring of this page, potentially enabling it to be paged again.
 2208  *
 2209  * Many pages placed on the inactive queue should actually go
 2210  * into the cache, but it is difficult to figure out which.  What
 2211  * we do instead, if the inactive target is well met, is to put
 2212  * clean pages at the head of the inactive queue instead of the tail.
 2213  * This will cause them to be moved to the cache more quickly and
 2214  * if not actively re-referenced, freed more quickly.  If we just
 2215  * stick these pages at the end of the inactive queue, heavy filesystem
 2216  * meta-data accesses can cause an unnecessary paging load on memory bound 
 2217  * processes.  This optimization causes one-time-use metadata to be
 2218  * reused more quickly.
 2219  *
 2220  * Pages marked PG_NEED_COMMIT are always activated and never placed on
 2221  * the inactive queue.  This helps the pageout daemon determine memory
 2222  * pressure and act on out-of-memory situations more quickly.
 2223  *
 2224  * BUT, if we are in a low-memory situation we have no choice but to
 2225  * put clean pages on the cache queue.
 2226  *
 2227  * A number of routines use vm_page_unwire() to guarantee that the page
 2228  * will go into either the inactive or active queues, and will NEVER
 2229  * be placed in the cache - for example, just after dirtying a page.
 2230  * dirty pages in the cache are not allowed.
 2231  *
 2232  * The page queues must be locked.
 2233  * This routine may not block.
 2234  */
 2235 void
 2236 vm_page_unwire(vm_page_t m, int activate)
 2237 {
 2238         KKASSERT(m->flags & PG_BUSY);
 2239         if (m->flags & PG_FICTITIOUS) {
 2240                 /* do nothing */
 2241         } else if (m->wire_count <= 0) {
 2242                 panic("vm_page_unwire: invalid wire count: %d", m->wire_count);
 2243         } else {
 2244                 if (atomic_fetchadd_int(&m->wire_count, -1) == 1) {
 2245                         atomic_add_int(&vmstats.v_wire_count, -1);
 2246                         if (m->flags & PG_UNMANAGED) {
 2247                                 ;
 2248                         } else if (activate || (m->flags & PG_NEED_COMMIT)) {
 2249                                 vm_page_spin_lock(m);
 2250                                 _vm_page_add_queue_spinlocked(m,
 2251                                                         PQ_ACTIVE + m->pc, 0);
 2252                                 _vm_page_and_queue_spin_unlock(m);
 2253                         } else {
 2254                                 vm_page_spin_lock(m);
 2255                                 vm_page_flag_clear(m, PG_WINATCFLS);
 2256                                 _vm_page_add_queue_spinlocked(m,
 2257                                                         PQ_INACTIVE + m->pc, 0);
 2258                                 ++vm_swapcache_inactive_heuristic;
 2259                                 _vm_page_and_queue_spin_unlock(m);
 2260                         }
 2261                 }
 2262         }
 2263 }
 2264 
 2265 /*
 2266  * Move the specified page to the inactive queue.  If the page has
 2267  * any associated swap, the swap is deallocated.
 2268  *
 2269  * Normally athead is 0 resulting in LRU operation.  athead is set
 2270  * to 1 if we want this page to be 'as if it were placed in the cache',
 2271  * except without unmapping it from the process address space.
 2272  *
 2273  * vm_page's spinlock must be held on entry and will remain held on return.
 2274  * This routine may not block.
 2275  */
 2276 static void
 2277 _vm_page_deactivate_locked(vm_page_t m, int athead)
 2278 {
 2279         u_short oqueue;
 2280 
 2281         /*
 2282          * Ignore if already inactive.
 2283          */
 2284         if (m->queue - m->pc == PQ_INACTIVE)
 2285                 return;
 2286         _vm_page_queue_spin_lock(m);
 2287         oqueue = _vm_page_rem_queue_spinlocked(m);
 2288 
 2289         if (m->wire_count == 0 && (m->flags & PG_UNMANAGED) == 0) {
 2290                 if (oqueue == PQ_CACHE)
 2291                         mycpu->gd_cnt.v_reactivated++;
 2292                 vm_page_flag_clear(m, PG_WINATCFLS);
 2293                 _vm_page_add_queue_spinlocked(m, PQ_INACTIVE + m->pc, athead);
 2294                 if (athead == 0)
 2295                         ++vm_swapcache_inactive_heuristic;
 2296         }
 2297         _vm_page_queue_spin_unlock(m);
 2298         /* leaves vm_page spinlocked */
 2299 }
 2300 
 2301 /*
 2302  * Attempt to deactivate a page.
 2303  *
 2304  * No requirements.
 2305  */
 2306 void
 2307 vm_page_deactivate(vm_page_t m)
 2308 {
 2309         vm_page_spin_lock(m);
 2310         _vm_page_deactivate_locked(m, 0);
 2311         vm_page_spin_unlock(m);
 2312 }
 2313 
 2314 void
 2315 vm_page_deactivate_locked(vm_page_t m)
 2316 {
 2317         _vm_page_deactivate_locked(m, 0);
 2318 }
 2319 
 2320 /*
 2321  * Attempt to move a page to PQ_CACHE.
 2322  *
 2323  * Returns 0 on failure, 1 on success
 2324  *
 2325  * The page should NOT be busied by the caller.  This function will validate
 2326  * whether the page can be safely moved to the cache.
 2327  */
 2328 int
 2329 vm_page_try_to_cache(vm_page_t m)
 2330 {
 2331         vm_page_spin_lock(m);
 2332         if (vm_page_busy_try(m, TRUE)) {
 2333                 vm_page_spin_unlock(m);
 2334                 return(0);
 2335         }
 2336         if (m->dirty || m->hold_count || m->wire_count ||
 2337             (m->flags & (PG_UNMANAGED | PG_NEED_COMMIT))) {
 2338                 if (_vm_page_wakeup(m)) {
 2339                         vm_page_spin_unlock(m);
 2340                         wakeup(m);
 2341                 } else {
 2342                         vm_page_spin_unlock(m);
 2343                 }
 2344                 return(0);
 2345         }
 2346         vm_page_spin_unlock(m);
 2347 
 2348         /*
 2349          * Page busied by us and no longer spinlocked.  Dirty pages cannot
 2350          * be moved to the cache.
 2351          */
 2352         vm_page_test_dirty(m);
 2353         if (m->dirty || (m->flags & PG_NEED_COMMIT)) {
 2354                 vm_page_wakeup(m);
 2355                 return(0);
 2356         }
 2357         vm_page_cache(m);
 2358         return(1);
 2359 }
 2360 
 2361 /*
 2362  * Attempt to free the page.  If we cannot free it, we do nothing.
 2363  * 1 is returned on success, 0 on failure.
 2364  *
 2365  * No requirements.
 2366  */
 2367 int
 2368 vm_page_try_to_free(vm_page_t m)
 2369 {
 2370         vm_page_spin_lock(m);
 2371         if (vm_page_busy_try(m, TRUE)) {
 2372                 vm_page_spin_unlock(m);
 2373                 return(0);
 2374         }
 2375 
 2376         /*
 2377          * The page can be in any state, including already being on the free
 2378          * queue.  Check to see if it really can be freed.
 2379          */
 2380         if (m->dirty ||                         /* can't free if it is dirty */
 2381             m->hold_count ||                    /* or held (XXX may be wrong) */
 2382             m->wire_count ||                    /* or wired */
 2383             (m->flags & (PG_UNMANAGED |         /* or unmanaged */
 2384                          PG_NEED_COMMIT)) ||    /* or needs a commit */
 2385             m->queue - m->pc == PQ_FREE ||      /* already on PQ_FREE */
 2386             m->queue - m->pc == PQ_HOLD) {      /* already on PQ_HOLD */
 2387                 if (_vm_page_wakeup(m)) {
 2388                         vm_page_spin_unlock(m);
 2389                         wakeup(m);
 2390                 } else {
 2391                         vm_page_spin_unlock(m);
 2392                 }
 2393                 return(0);
 2394         }
 2395         vm_page_spin_unlock(m);
 2396 
 2397         /*
 2398          * We can probably free the page.
 2399          *
 2400          * Page busied by us and no longer spinlocked.  Dirty pages will
 2401          * not be freed by this function.    We have to re-test the
 2402          * dirty bit after cleaning out the pmaps.
 2403          */
 2404         vm_page_test_dirty(m);
 2405         if (m->dirty || (m->flags & PG_NEED_COMMIT)) {
 2406                 vm_page_wakeup(m);
 2407                 return(0);
 2408         }
 2409         vm_page_protect(m, VM_PROT_NONE);
 2410         if (m->dirty || (m->flags & PG_NEED_COMMIT)) {
 2411                 vm_page_wakeup(m);
 2412                 return(0);
 2413         }
 2414         vm_page_free(m);
 2415         return(1);
 2416 }
 2417 
 2418 /*
 2419  * vm_page_cache
 2420  *
 2421  * Put the specified page onto the page cache queue (if appropriate).
 2422  *
 2423  * The page must be busy, and this routine will release the busy and
 2424  * possibly even free the page.
 2425  */
 2426 void
 2427 vm_page_cache(vm_page_t m)
 2428 {
 2429         if ((m->flags & (PG_UNMANAGED | PG_NEED_COMMIT)) ||
 2430             m->busy || m->wire_count || m->hold_count) {
 2431                 kprintf("vm_page_cache: attempting to cache busy/held page\n");
 2432                 vm_page_wakeup(m);
 2433                 return;
 2434         }
 2435 
 2436         /*
 2437          * Already in the cache (and thus not mapped)
 2438          */
 2439         if ((m->queue - m->pc) == PQ_CACHE) {
 2440                 KKASSERT((m->flags & PG_MAPPED) == 0);
 2441                 vm_page_wakeup(m);
 2442                 return;
 2443         }
 2444 
 2445         /*
 2446          * Caller is required to test m->dirty, but note that the act of
 2447          * removing the page from its maps can cause it to become dirty
 2448          * on an SMP system due to another cpu running in usermode.
 2449          */
 2450         if (m->dirty) {
 2451                 panic("vm_page_cache: caching a dirty page, pindex: %ld",
 2452                         (long)m->pindex);
 2453         }
 2454 
 2455         /*
 2456          * Remove all pmaps and indicate that the page is not
 2457          * writeable or mapped.  Our vm_page_protect() call may
 2458          * have blocked (especially w/ VM_PROT_NONE), so recheck
 2459          * everything.
 2460          */
 2461         vm_page_protect(m, VM_PROT_NONE);
 2462         if ((m->flags & (PG_UNMANAGED | PG_MAPPED)) ||
 2463             m->busy || m->wire_count || m->hold_count) {
 2464                 vm_page_wakeup(m);
 2465         } else if (m->dirty || (m->flags & PG_NEED_COMMIT)) {
 2466                 vm_page_deactivate(m);
 2467                 vm_page_wakeup(m);
 2468         } else {
 2469                 _vm_page_and_queue_spin_lock(m);
 2470                 _vm_page_rem_queue_spinlocked(m);
 2471                 _vm_page_add_queue_spinlocked(m, PQ_CACHE + m->pc, 0);
 2472                 _vm_page_queue_spin_unlock(m);
 2473                 if (_vm_page_wakeup(m)) {
 2474                         vm_page_spin_unlock(m);
 2475                         wakeup(m);
 2476                 } else {
 2477                         vm_page_spin_unlock(m);
 2478                 }
 2479                 vm_page_free_wakeup();
 2480         }
 2481 }
 2482 
 2483 /*
 2484  * vm_page_dontneed()
 2485  *
 2486  * Cache, deactivate, or do nothing as appropriate.  This routine
 2487  * is typically used by madvise() MADV_DONTNEED.
 2488  *
 2489  * Generally speaking we want to move the page into the cache so
 2490  * it gets reused quickly.  However, this can result in a silly syndrome
 2491  * due to the page recycling too quickly.  Small objects will not be
 2492  * fully cached.  On the otherhand, if we move the page to the inactive
 2493  * queue we wind up with a problem whereby very large objects 
 2494  * unnecessarily blow away our inactive and cache queues.
 2495  *
 2496  * The solution is to move the pages based on a fixed weighting.  We
 2497  * either leave them alone, deactivate them, or move them to the cache,
 2498  * where moving them to the cache has the highest weighting.
 2499  * By forcing some pages into other queues we eventually force the
 2500  * system to balance the queues, potentially recovering other unrelated
 2501  * space from active.  The idea is to not force this to happen too
 2502  * often.
 2503  *
 2504  * The page must be busied.
 2505  */
 2506 void
 2507 vm_page_dontneed(vm_page_t m)
 2508 {
 2509         static int dnweight;
 2510         int dnw;
 2511         int head;
 2512 
 2513         dnw = ++dnweight;
 2514 
 2515         /*
 2516          * occassionally leave the page alone
 2517          */
 2518         if ((dnw & 0x01F0) == 0 ||
 2519             m->queue - m->pc == PQ_INACTIVE ||
 2520             m->queue - m->pc == PQ_CACHE
 2521         ) {
 2522                 if (m->act_count >= ACT_INIT)
 2523                         --m->act_count;
 2524                 return;
 2525         }
 2526 
 2527         /*
 2528          * If vm_page_dontneed() is inactivating a page, it must clear
 2529          * the referenced flag; otherwise the pagedaemon will see references
 2530          * on the page in the inactive queue and reactivate it. Until the 
 2531          * page can move to the cache queue, madvise's job is not done.
 2532          */
 2533         vm_page_flag_clear(m, PG_REFERENCED);
 2534         pmap_clear_reference(m);
 2535 
 2536         if (m->dirty == 0)
 2537                 vm_page_test_dirty(m);
 2538 
 2539         if (m->dirty || (dnw & 0x0070) == 0) {
 2540                 /*
 2541                  * Deactivate the page 3 times out of 32.
 2542                  */
 2543                 head = 0;
 2544         } else {
 2545                 /*
 2546                  * Cache the page 28 times out of every 32.  Note that
 2547                  * the page is deactivated instead of cached, but placed
 2548                  * at the head of the queue instead of the tail.
 2549                  */
 2550                 head = 1;
 2551         }
 2552         vm_page_spin_lock(m);
 2553         _vm_page_deactivate_locked(m, head);
 2554         vm_page_spin_unlock(m);
 2555 }
 2556 
 2557 /*
 2558  * These routines manipulate the 'soft busy' count for a page.  A soft busy
 2559  * is almost like PG_BUSY except that it allows certain compatible operations
 2560  * to occur on the page while it is busy.  For example, a page undergoing a
 2561  * write can still be mapped read-only.
 2562  *
 2563  * Because vm_pages can overlap buffers m->busy can be > 1.  m->busy is only
 2564  * adjusted while the vm_page is PG_BUSY so the flash will occur when the
 2565  * busy bit is cleared.
 2566  */
 2567 void
 2568 vm_page_io_start(vm_page_t m)
 2569 {
 2570         KASSERT(m->flags & PG_BUSY, ("vm_page_io_start: page not busy!!!"));
 2571         atomic_add_char(&m->busy, 1);
 2572         vm_page_flag_set(m, PG_SBUSY);
 2573 }
 2574 
 2575 void
 2576 vm_page_io_finish(vm_page_t m)
 2577 {
 2578         KASSERT(m->flags & PG_BUSY, ("vm_page_io_finish: page not busy!!!"));
 2579         atomic_subtract_char(&m->busy, 1);
 2580         if (m->busy == 0)
 2581                 vm_page_flag_clear(m, PG_SBUSY);
 2582 }
 2583 
 2584 /*
 2585  * Indicate that a clean VM page requires a filesystem commit and cannot
 2586  * be reused.  Used by tmpfs.
 2587  */
 2588 void
 2589 vm_page_need_commit(vm_page_t m)
 2590 {
 2591         vm_page_flag_set(m, PG_NEED_COMMIT);
 2592         vm_object_set_writeable_dirty(m->object);
 2593 }
 2594 
 2595 void
 2596 vm_page_clear_commit(vm_page_t m)
 2597 {
 2598         vm_page_flag_clear(m, PG_NEED_COMMIT);
 2599 }
 2600 
 2601 /*
 2602  * Grab a page, blocking if it is busy and allocating a page if necessary.
 2603  * A busy page is returned or NULL.  The page may or may not be valid and
 2604  * might not be on a queue (the caller is responsible for the disposition of
 2605  * the page).
 2606  *
 2607  * If VM_ALLOC_ZERO is specified and the grab must allocate a new page, the
 2608  * page will be zero'd and marked valid.
 2609  *
 2610  * If VM_ALLOC_FORCE_ZERO is specified the page will be zero'd and marked
 2611  * valid even if it already exists.
 2612  *
 2613  * If VM_ALLOC_RETRY is specified this routine will never return NULL.  Also
 2614  * note that VM_ALLOC_NORMAL must be specified if VM_ALLOC_RETRY is specified.
 2615  * VM_ALLOC_NULL_OK is implied when VM_ALLOC_RETRY is specified.
 2616  *
 2617  * This routine may block, but if VM_ALLOC_RETRY is not set then NULL is
 2618  * always returned if we had blocked.  
 2619  *
 2620  * This routine may not be called from an interrupt.
 2621  *
 2622  * PG_ZERO is *ALWAYS* cleared by this routine.
 2623  *
 2624  * No other requirements.
 2625  */
 2626 vm_page_t
 2627 vm_page_grab(vm_object_t object, vm_pindex_t pindex, int allocflags)
 2628 {
 2629         vm_page_t m;
 2630         int error;
 2631         int shared = 1;
 2632 
 2633         KKASSERT(allocflags &
 2634                 (VM_ALLOC_NORMAL|VM_ALLOC_INTERRUPT|VM_ALLOC_SYSTEM));
 2635         vm_object_hold_shared(object);
 2636         for (;;) {
 2637                 m = vm_page_lookup_busy_try(object, pindex, TRUE, &error);
 2638                 if (error) {
 2639                         vm_page_sleep_busy(m, TRUE, "pgrbwt");
 2640                         if ((allocflags & VM_ALLOC_RETRY) == 0) {
 2641                                 m = NULL;
 2642                                 break;
 2643                         }
 2644                         /* retry */
 2645                 } else if (m == NULL) {
 2646                         if (shared) {
 2647                                 vm_object_upgrade(object);
 2648                                 shared = 0;
 2649                         }
 2650                         if (allocflags & VM_ALLOC_RETRY)
 2651                                 allocflags |= VM_ALLOC_NULL_OK;
 2652                         m = vm_page_alloc(object, pindex,
 2653                                           allocflags & ~VM_ALLOC_RETRY);
 2654                         if (m)
 2655                                 break;
 2656                         vm_wait(0);
 2657                         if ((allocflags & VM_ALLOC_RETRY) == 0)
 2658                                 goto failed;
 2659                 } else {
 2660                         /* m found */
 2661                         break;
 2662                 }
 2663         }
 2664 
 2665         /*
 2666          * If VM_ALLOC_ZERO an invalid page will be zero'd and set valid.
 2667          *
 2668          * If VM_ALLOC_FORCE_ZERO the page is unconditionally zero'd and set
 2669          * valid even if already valid.
 2670          */
 2671         if (m->valid == 0) {
 2672                 if (allocflags & (VM_ALLOC_ZERO | VM_ALLOC_FORCE_ZERO)) {
 2673                         if ((m->flags & PG_ZERO) == 0)
 2674                                 pmap_zero_page(VM_PAGE_TO_PHYS(m));
 2675                         m->valid = VM_PAGE_BITS_ALL;
 2676                 }
 2677         } else if (allocflags & VM_ALLOC_FORCE_ZERO) {
 2678                 pmap_zero_page(VM_PAGE_TO_PHYS(m));
 2679                 m->valid = VM_PAGE_BITS_ALL;
 2680         }
 2681         vm_page_flag_clear(m, PG_ZERO);
 2682 failed:
 2683         vm_object_drop(object);
 2684         return(m);
 2685 }
 2686 
 2687 /*
 2688  * Mapping function for valid bits or for dirty bits in
 2689  * a page.  May not block.
 2690  *
 2691  * Inputs are required to range within a page.
 2692  *
 2693  * No requirements.
 2694  * Non blocking.
 2695  */
 2696 int
 2697 vm_page_bits(int base, int size)
 2698 {
 2699         int first_bit;
 2700         int last_bit;
 2701 
 2702         KASSERT(
 2703             base + size <= PAGE_SIZE,
 2704             ("vm_page_bits: illegal base/size %d/%d", base, size)
 2705         );
 2706 
 2707         if (size == 0)          /* handle degenerate case */
 2708                 return(0);
 2709 
 2710         first_bit = base >> DEV_BSHIFT;
 2711         last_bit = (base + size - 1) >> DEV_BSHIFT;
 2712 
 2713         return ((2 << last_bit) - (1 << first_bit));
 2714 }
 2715 
 2716 /*
 2717  * Sets portions of a page valid and clean.  The arguments are expected
 2718  * to be DEV_BSIZE aligned but if they aren't the bitmap is inclusive
 2719  * of any partial chunks touched by the range.  The invalid portion of
 2720  * such chunks will be zero'd.
 2721  *
 2722  * NOTE: When truncating a buffer vnode_pager_setsize() will automatically
 2723  *       align base to DEV_BSIZE so as not to mark clean a partially
 2724  *       truncated device block.  Otherwise the dirty page status might be
 2725  *       lost.
 2726  *
 2727  * This routine may not block.
 2728  *
 2729  * (base + size) must be less then or equal to PAGE_SIZE.
 2730  */
 2731 static void
 2732 _vm_page_zero_valid(vm_page_t m, int base, int size)
 2733 {
 2734         int frag;
 2735         int endoff;
 2736 
 2737         if (size == 0)  /* handle degenerate case */
 2738                 return;
 2739 
 2740         /*
 2741          * If the base is not DEV_BSIZE aligned and the valid
 2742          * bit is clear, we have to zero out a portion of the
 2743          * first block.
 2744          */
 2745 
 2746         if ((frag = base & ~(DEV_BSIZE - 1)) != base &&
 2747             (m->valid & (1 << (base >> DEV_BSHIFT))) == 0
 2748         ) {
 2749                 pmap_zero_page_area(
 2750                     VM_PAGE_TO_PHYS(m),
 2751                     frag,
 2752                     base - frag
 2753                 );
 2754         }
 2755 
 2756         /*
 2757          * If the ending offset is not DEV_BSIZE aligned and the 
 2758          * valid bit is clear, we have to zero out a portion of
 2759          * the last block.
 2760          */
 2761 
 2762         endoff = base + size;
 2763 
 2764         if ((frag = endoff & ~(DEV_BSIZE - 1)) != endoff &&
 2765             (m->valid & (1 << (endoff >> DEV_BSHIFT))) == 0
 2766         ) {
 2767                 pmap_zero_page_area(
 2768                     VM_PAGE_TO_PHYS(m),
 2769                     endoff,
 2770                     DEV_BSIZE - (endoff & (DEV_BSIZE - 1))
 2771                 );
 2772         }
 2773 }
 2774 
 2775 /*
 2776  * Set valid, clear dirty bits.  If validating the entire
 2777  * page we can safely clear the pmap modify bit.  We also
 2778  * use this opportunity to clear the PG_NOSYNC flag.  If a process
 2779  * takes a write fault on a MAP_NOSYNC memory area the flag will
 2780  * be set again.
 2781  *
 2782  * We set valid bits inclusive of any overlap, but we can only
 2783  * clear dirty bits for DEV_BSIZE chunks that are fully within
 2784  * the range.
 2785  *
 2786  * Page must be busied?
 2787  * No other requirements.
 2788  */
 2789 void
 2790 vm_page_set_valid(vm_page_t m, int base, int size)
 2791 {
 2792         _vm_page_zero_valid(m, base, size);
 2793         m->valid |= vm_page_bits(base, size);
 2794 }
 2795 
 2796 
 2797 /*
 2798  * Set valid bits and clear dirty bits.
 2799  *
 2800  * NOTE: This function does not clear the pmap modified bit.
 2801  *       Also note that e.g. NFS may use a byte-granular base
 2802  *       and size.
 2803  *
 2804  * WARNING: Page must be busied?  But vfs_clean_one_page() will call
 2805  *          this without necessarily busying the page (via bdwrite()).
 2806  *          So for now vm_token must also be held.
 2807  *
 2808  * No other requirements.
 2809  */
 2810 void
 2811 vm_page_set_validclean(vm_page_t m, int base, int size)
 2812 {
 2813         int pagebits;
 2814 
 2815         _vm_page_zero_valid(m, base, size);
 2816         pagebits = vm_page_bits(base, size);
 2817         m->valid |= pagebits;
 2818         m->dirty &= ~pagebits;
 2819         if (base == 0 && size == PAGE_SIZE) {
 2820                 /*pmap_clear_modify(m);*/
 2821                 vm_page_flag_clear(m, PG_NOSYNC);
 2822         }
 2823 }
 2824 
 2825 /*
 2826  * Set valid & dirty.  Used by buwrite()
 2827  *
 2828  * WARNING: Page must be busied?  But vfs_dirty_one_page() will
 2829  *          call this function in buwrite() so for now vm_token must
 2830  *          be held.
 2831  *
 2832  * No other requirements.
 2833  */
 2834 void
 2835 vm_page_set_validdirty(vm_page_t m, int base, int size)
 2836 {
 2837         int pagebits;
 2838 
 2839         pagebits = vm_page_bits(base, size);
 2840         m->valid |= pagebits;
 2841         m->dirty |= pagebits;
 2842         if (m->object)
 2843                vm_object_set_writeable_dirty(m->object);
 2844 }
 2845 
 2846 /*
 2847  * Clear dirty bits.
 2848  *
 2849  * NOTE: This function does not clear the pmap modified bit.
 2850  *       Also note that e.g. NFS may use a byte-granular base
 2851  *       and size.
 2852  *
 2853  * Page must be busied?
 2854  * No other requirements.
 2855  */
 2856 void
 2857 vm_page_clear_dirty(vm_page_t m, int base, int size)
 2858 {
 2859         m->dirty &= ~vm_page_bits(base, size);
 2860         if (base == 0 && size == PAGE_SIZE) {
 2861                 /*pmap_clear_modify(m);*/
 2862                 vm_page_flag_clear(m, PG_NOSYNC);
 2863         }
 2864 }
 2865 
 2866 /*
 2867  * Make the page all-dirty.
 2868  *
 2869  * Also make sure the related object and vnode reflect the fact that the
 2870  * object may now contain a dirty page.
 2871  *
 2872  * Page must be busied?
 2873  * No other requirements.
 2874  */
 2875 void
 2876 vm_page_dirty(vm_page_t m)
 2877 {
 2878 #ifdef INVARIANTS
 2879         int pqtype = m->queue - m->pc;
 2880 #endif
 2881         KASSERT(pqtype != PQ_CACHE && pqtype != PQ_FREE,
 2882                 ("vm_page_dirty: page in free/cache queue!"));
 2883         if (m->dirty != VM_PAGE_BITS_ALL) {
 2884                 m->dirty = VM_PAGE_BITS_ALL;
 2885                 if (m->object)
 2886                         vm_object_set_writeable_dirty(m->object);
 2887         }
 2888 }
 2889 
 2890 /*
 2891  * Invalidates DEV_BSIZE'd chunks within a page.  Both the
 2892  * valid and dirty bits for the effected areas are cleared.
 2893  *
 2894  * Page must be busied?
 2895  * Does not block.
 2896  * No other requirements.
 2897  */
 2898 void
 2899 vm_page_set_invalid(vm_page_t m, int base, int size)
 2900 {
 2901         int bits;
 2902 
 2903         bits = vm_page_bits(base, size);
 2904         m->valid &= ~bits;
 2905         m->dirty &= ~bits;
 2906         m->object->generation++;
 2907 }
 2908 
 2909 /*
 2910  * The kernel assumes that the invalid portions of a page contain 
 2911  * garbage, but such pages can be mapped into memory by user code.
 2912  * When this occurs, we must zero out the non-valid portions of the
 2913  * page so user code sees what it expects.
 2914  *
 2915  * Pages are most often semi-valid when the end of a file is mapped 
 2916  * into memory and the file's size is not page aligned.
 2917  *
 2918  * Page must be busied?
 2919  * No other requirements.
 2920  */
 2921 void
 2922 vm_page_zero_invalid(vm_page_t m, boolean_t setvalid)
 2923 {
 2924         int b;
 2925         int i;
 2926 
 2927         /*
 2928          * Scan the valid bits looking for invalid sections that
 2929          * must be zerod.  Invalid sub-DEV_BSIZE'd areas ( where the
 2930          * valid bit may be set ) have already been zerod by
 2931          * vm_page_set_validclean().
 2932          */
 2933         for (b = i = 0; i <= PAGE_SIZE / DEV_BSIZE; ++i) {
 2934                 if (i == (PAGE_SIZE / DEV_BSIZE) || 
 2935                     (m->valid & (1 << i))
 2936                 ) {
 2937                         if (i > b) {
 2938                                 pmap_zero_page_area(
 2939                                     VM_PAGE_TO_PHYS(m), 
 2940                                     b << DEV_BSHIFT,
 2941                                     (i - b) << DEV_BSHIFT
 2942                                 );
 2943                         }
 2944                         b = i + 1;
 2945                 }
 2946         }
 2947 
 2948         /*
 2949          * setvalid is TRUE when we can safely set the zero'd areas
 2950          * as being valid.  We can do this if there are no cache consistency
 2951          * issues.  e.g. it is ok to do with UFS, but not ok to do with NFS.
 2952          */
 2953         if (setvalid)
 2954                 m->valid = VM_PAGE_BITS_ALL;
 2955 }
 2956 
 2957 /*
 2958  * Is a (partial) page valid?  Note that the case where size == 0
 2959  * will return FALSE in the degenerate case where the page is entirely
 2960  * invalid, and TRUE otherwise.
 2961  *
 2962  * Does not block.
 2963  * No other requirements.
 2964  */
 2965 int
 2966 vm_page_is_valid(vm_page_t m, int base, int size)
 2967 {
 2968         int bits = vm_page_bits(base, size);
 2969 
 2970         if (m->valid && ((m->valid & bits) == bits))
 2971                 return 1;
 2972         else
 2973                 return 0;
 2974 }
 2975 
 2976 /*
 2977  * update dirty bits from pmap/mmu.  May not block.
 2978  *
 2979  * Caller must hold the page busy
 2980  */
 2981 void
 2982 vm_page_test_dirty(vm_page_t m)
 2983 {
 2984         if ((m->dirty != VM_PAGE_BITS_ALL) && pmap_is_modified(m)) {
 2985                 vm_page_dirty(m);
 2986         }
 2987 }
 2988 
 2989 /*
 2990  * Register an action, associating it with its vm_page
 2991  */
 2992 void
 2993 vm_page_register_action(vm_page_action_t action, vm_page_event_t event)
 2994 {
 2995         struct vm_page_action_list *list;
 2996         int hv;
 2997 
 2998         hv = (int)((intptr_t)action->m >> 8) & VMACTION_HMASK;
 2999         list = &action_list[hv];
 3000 
 3001         lwkt_gettoken(&vm_token);
 3002         vm_page_flag_set(action->m, PG_ACTIONLIST);
 3003         action->event = event;
 3004         LIST_INSERT_HEAD(list, action, entry);
 3005         lwkt_reltoken(&vm_token);
 3006 }
 3007 
 3008 /*
 3009  * Unregister an action, disassociating it from its related vm_page
 3010  */
 3011 void
 3012 vm_page_unregister_action(vm_page_action_t action)
 3013 {
 3014         struct vm_page_action_list *list;
 3015         int hv;
 3016 
 3017         lwkt_gettoken(&vm_token);
 3018         if (action->event != VMEVENT_NONE) {
 3019                 action->event = VMEVENT_NONE;
 3020                 LIST_REMOVE(action, entry);
 3021 
 3022                 hv = (int)((intptr_t)action->m >> 8) & VMACTION_HMASK;
 3023                 list = &action_list[hv];
 3024                 if (LIST_EMPTY(list))
 3025                         vm_page_flag_clear(action->m, PG_ACTIONLIST);
 3026         }
 3027         lwkt_reltoken(&vm_token);
 3028 }
 3029 
 3030 /*
 3031  * Issue an event on a VM page.  Corresponding action structures are
 3032  * removed from the page's list and called.
 3033  *
 3034  * If the vm_page has no more pending action events we clear its
 3035  * PG_ACTIONLIST flag.
 3036  */
 3037 void
 3038 vm_page_event_internal(vm_page_t m, vm_page_event_t event)
 3039 {
 3040         struct vm_page_action_list *list;
 3041         struct vm_page_action *scan;
 3042         struct vm_page_action *next;
 3043         int hv;
 3044         int all;
 3045 
 3046         hv = (int)((intptr_t)m >> 8) & VMACTION_HMASK;
 3047         list = &action_list[hv];
 3048         all = 1;
 3049 
 3050         lwkt_gettoken(&vm_token);
 3051         LIST_FOREACH_MUTABLE(scan, list, entry, next) {
 3052                 if (scan->m == m) {
 3053                         if (scan->event == event) {
 3054                                 scan->event = VMEVENT_NONE;
 3055                                 LIST_REMOVE(scan, entry);
 3056                                 scan->func(m, scan);
 3057                                 /* XXX */
 3058                         } else {
 3059                                 all = 0;
 3060                         }
 3061                 }
 3062         }
 3063         if (all)
 3064                 vm_page_flag_clear(m, PG_ACTIONLIST);
 3065         lwkt_reltoken(&vm_token);
 3066 }
 3067 
 3068 #include "opt_ddb.h"
 3069 #ifdef DDB
 3070 #include <sys/kernel.h>
 3071 
 3072 #include <ddb/ddb.h>
 3073 
 3074 DB_SHOW_COMMAND(page, vm_page_print_page_info)
 3075 {
 3076         db_printf("vmstats.v_free_count: %d\n", vmstats.v_free_count);
 3077         db_printf("vmstats.v_cache_count: %d\n", vmstats.v_cache_count);
 3078         db_printf("vmstats.v_inactive_count: %d\n", vmstats.v_inactive_count);
 3079         db_printf("vmstats.v_active_count: %d\n", vmstats.v_active_count);
 3080         db_printf("vmstats.v_wire_count: %d\n", vmstats.v_wire_count);
 3081         db_printf("vmstats.v_free_reserved: %d\n", vmstats.v_free_reserved);
 3082         db_printf("vmstats.v_free_min: %d\n", vmstats.v_free_min);
 3083         db_printf("vmstats.v_free_target: %d\n", vmstats.v_free_target);
 3084         db_printf("vmstats.v_cache_min: %d\n", vmstats.v_cache_min);
 3085         db_printf("vmstats.v_inactive_target: %d\n", vmstats.v_inactive_target);
 3086 }
 3087 
 3088 DB_SHOW_COMMAND(pageq, vm_page_print_pageq_info)
 3089 {
 3090         int i;
 3091         db_printf("PQ_FREE:");
 3092         for(i=0;i<PQ_L2_SIZE;i++) {
 3093                 db_printf(" %d", vm_page_queues[PQ_FREE + i].lcnt);
 3094         }
 3095         db_printf("\n");
 3096                 
 3097         db_printf("PQ_CACHE:");
 3098         for(i=0;i<PQ_L2_SIZE;i++) {
 3099                 db_printf(" %d", vm_page_queues[PQ_CACHE + i].lcnt);
 3100         }
 3101         db_printf("\n");
 3102 
 3103         db_printf("PQ_ACTIVE:");
 3104         for(i=0;i<PQ_L2_SIZE;i++) {
 3105                 db_printf(" %d", vm_page_queues[PQ_ACTIVE + i].lcnt);
 3106         }
 3107         db_printf("\n");
 3108 
 3109         db_printf("PQ_INACTIVE:");
 3110         for(i=0;i<PQ_L2_SIZE;i++) {
 3111                 db_printf(" %d", vm_page_queues[PQ_INACTIVE + i].lcnt);
 3112         }
 3113         db_printf("\n");
 3114 }
 3115 #endif /* DDB */
Cache object: 7d6c1e6a7c456fe5329b5eda1b4a3451
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/vm/vm_page.c

FreeBSD/Linux Kernel Cross Reference
sys/vm/vm_page.c