vm_page.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*
    2  * Copyright (c) 1991 Regents of the University of California.
    3  * All rights reserved.
    4  *
    5  * This code is derived from software contributed to Berkeley by
    6  * The Mach Operating System project at Carnegie-Mellon University.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  * 3. All advertising materials mentioning features or use of this software
   17  *    must display the following acknowledgement:
   18  *      This product includes software developed by the University of
   19  *      California, Berkeley and its contributors.
   20  * 4. Neither the name of the University nor the names of its contributors
   21  *    may be used to endorse or promote products derived from this software
   22  *    without specific prior written permission.
   23  *
   24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   34  * SUCH DAMAGE.
   35  *
   36  *      from: @(#)vm_page.c     7.4 (Berkeley) 5/7/91
   37  * $FreeBSD$
   38  */
   39 
   40 /*
   41  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
   42  * All rights reserved.
   43  *
   44  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
   45  *
   46  * Permission to use, copy, modify and distribute this software and
   47  * its documentation is hereby granted, provided that both the copyright
   48  * notice and this permission notice appear in all copies of the
   49  * software, derivative works or modified versions, and any portions
   50  * thereof, and that both notices appear in supporting documentation.
   51  *
   52  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
   53  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
   54  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
   55  *
   56  * Carnegie Mellon requests users of this software to return to
   57  *
   58  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   59  *  School of Computer Science
   60  *  Carnegie Mellon University
   61  *  Pittsburgh PA 15213-3890
   62  *
   63  * any improvements or extensions that they make and grant Carnegie the
   64  * rights to redistribute these changes.
   65  */
   66 
   67 /*
   68  *      Resident memory management module.
   69  */
   70 
   71 #include <sys/param.h>
   72 #include <sys/systm.h>
   73 #include <sys/malloc.h>
   74 #include <sys/proc.h>
   75 #include <sys/vmmeter.h>
   76 #include <sys/vnode.h>
   77 
   78 #include <vm/vm.h>
   79 #include <vm/vm_param.h>
   80 #include <vm/vm_prot.h>
   81 #include <sys/lock.h>
   82 #include <vm/vm_kern.h>
   83 #include <vm/vm_object.h>
   84 #include <vm/vm_page.h>
   85 #include <vm/vm_pageout.h>
   86 #include <vm/vm_extern.h>
   87 
   88 static void     vm_page_queue_init __P((void));
   89 static vm_page_t vm_page_select_free __P((vm_object_t object,
   90                         vm_pindex_t pindex, int prefqueue));
   91 static vm_page_t vm_page_select_cache __P((vm_object_t, vm_pindex_t));
   92 
   93 /*
   94  *      Associated with page of user-allocatable memory is a
   95  *      page structure.
   96  */
   97 
   98 static struct pglist *vm_page_buckets;  /* Array of buckets */
   99 static int vm_page_bucket_count;        /* How big is array? */
  100 static int vm_page_hash_mask;           /* Mask for hash function */
  101 static volatile int vm_page_bucket_generation;
  102 
  103 struct pglist vm_page_queue_free[PQ_L2_SIZE] = {0};
  104 struct pglist vm_page_queue_zero[PQ_L2_SIZE] = {0};
  105 struct pglist vm_page_queue_active = {0};
  106 struct pglist vm_page_queue_inactive = {0};
  107 struct pglist vm_page_queue_cache[PQ_L2_SIZE] = {0};
  108 
  109 static int no_queue=0;
  110 
  111 struct vpgqueues vm_page_queues[PQ_COUNT] = {0};
  112 static int pqcnt[PQ_COUNT] = {0};
  113 
  114 static void
  115 vm_page_queue_init(void) {
  116         int i;
  117 
  118         vm_page_queues[PQ_NONE].pl = NULL;
  119         vm_page_queues[PQ_NONE].cnt = &no_queue;
  120         for(i=0;i<PQ_L2_SIZE;i++) {
  121                 vm_page_queues[PQ_FREE+i].pl = &vm_page_queue_free[i];
  122                 vm_page_queues[PQ_FREE+i].cnt = &cnt.v_free_count;
  123         }
  124         for(i=0;i<PQ_L2_SIZE;i++) {
  125                 vm_page_queues[PQ_ZERO+i].pl = &vm_page_queue_zero[i];
  126                 vm_page_queues[PQ_ZERO+i].cnt = &cnt.v_free_count;
  127         }
  128         vm_page_queues[PQ_INACTIVE].pl = &vm_page_queue_inactive;
  129         vm_page_queues[PQ_INACTIVE].cnt = &cnt.v_inactive_count;
  130 
  131         vm_page_queues[PQ_ACTIVE].pl = &vm_page_queue_active;
  132         vm_page_queues[PQ_ACTIVE].cnt = &cnt.v_active_count;
  133         for(i=0;i<PQ_L2_SIZE;i++) {
  134                 vm_page_queues[PQ_CACHE+i].pl = &vm_page_queue_cache[i];
  135                 vm_page_queues[PQ_CACHE+i].cnt = &cnt.v_cache_count;
  136         }
  137         for(i=0;i<PQ_COUNT;i++) {
  138                 if (vm_page_queues[i].pl) {
  139                         TAILQ_INIT(vm_page_queues[i].pl);
  140                 } else if (i != 0) {
  141                         panic("vm_page_queue_init: queue %d is null", i);
  142                 }
  143                 vm_page_queues[i].lcnt = &pqcnt[i];
  144         }
  145 }
  146 
  147 vm_page_t vm_page_array = 0;
  148 static int vm_page_array_size = 0;
  149 long first_page = 0;
  150 static long last_page;
  151 static vm_size_t page_mask;
  152 static int page_shift;
  153 int vm_page_zero_count = 0;
  154 
  155 /*
  156  * map of contiguous valid DEV_BSIZE chunks in a page
  157  * (this list is valid for page sizes upto 16*DEV_BSIZE)
  158  */
  159 static u_short vm_page_dev_bsize_chunks[] = {
  160         0x0, 0x1, 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f, 0xff,
  161         0x1ff, 0x3ff, 0x7ff, 0xfff, 0x1fff, 0x3fff, 0x7fff, 0xffff
  162 };
  163 
  164 static __inline int vm_page_hash __P((vm_object_t object, vm_pindex_t pindex));
  165 static int vm_page_freechk_and_unqueue __P((vm_page_t m));
  166 static void vm_page_free_wakeup __P((void));
  167 
  168 /*
  169  *      vm_set_page_size:
  170  *
  171  *      Sets the page size, perhaps based upon the memory
  172  *      size.  Must be called before any use of page-size
  173  *      dependent functions.
  174  *
  175  *      Sets page_shift and page_mask from cnt.v_page_size.
  176  */
  177 void
  178 vm_set_page_size()
  179 {
  180 
  181         if (cnt.v_page_size == 0)
  182                 cnt.v_page_size = PAGE_SIZE;
  183         page_mask = cnt.v_page_size - 1;
  184         if ((page_mask & cnt.v_page_size) != 0)
  185                 panic("vm_set_page_size: page size not a power of two");
  186         for (page_shift = 0;; page_shift++)
  187                 if ((1 << page_shift) == cnt.v_page_size)
  188                         break;
  189 }
  190 
  191 /*
  192  *      vm_page_startup:
  193  *
  194  *      Initializes the resident memory module.
  195  *
  196  *      Allocates memory for the page cells, and
  197  *      for the object/offset-to-page hash table headers.
  198  *      Each page cell is initialized and placed on the free list.
  199  */
  200 
  201 vm_offset_t
  202 vm_page_startup(starta, enda, vaddr)
  203         register vm_offset_t starta;
  204         vm_offset_t enda;
  205         register vm_offset_t vaddr;
  206 {
  207         register vm_offset_t mapped;
  208         register vm_page_t m;
  209         register struct pglist *bucket;
  210         vm_size_t npages, page_range;
  211         register vm_offset_t new_start;
  212         int i;
  213         vm_offset_t pa;
  214         int nblocks;
  215         vm_offset_t first_managed_page;
  216 
  217         /* the biggest memory array is the second group of pages */
  218         vm_offset_t start;
  219         vm_offset_t biggestone, biggestsize;
  220 
  221         vm_offset_t total;
  222 
  223         total = 0;
  224         biggestsize = 0;
  225         biggestone = 0;
  226         nblocks = 0;
  227         vaddr = round_page(vaddr);
  228 
  229         for (i = 0; phys_avail[i + 1]; i += 2) {
  230                 phys_avail[i] = round_page(phys_avail[i]);
  231                 phys_avail[i + 1] = trunc_page(phys_avail[i + 1]);
  232         }
  233 
  234         for (i = 0; phys_avail[i + 1]; i += 2) {
  235                 int size = phys_avail[i + 1] - phys_avail[i];
  236 
  237                 if (size > biggestsize) {
  238                         biggestone = i;
  239                         biggestsize = size;
  240                 }
  241                 ++nblocks;
  242                 total += size;
  243         }
  244 
  245         start = phys_avail[biggestone];
  246 
  247         /*
  248          * Initialize the queue headers for the free queue, the active queue
  249          * and the inactive queue.
  250          */
  251 
  252         vm_page_queue_init();
  253 
  254         /*
  255          * Allocate (and initialize) the hash table buckets.
  256          *
  257          * The number of buckets MUST BE a power of 2, and the actual value is
  258          * the next power of 2 greater than the number of physical pages in
  259          * the system.
  260          *
  261          * Note: This computation can be tweaked if desired.
  262          */
  263         vm_page_buckets = (struct pglist *) vaddr;
  264         bucket = vm_page_buckets;
  265         if (vm_page_bucket_count == 0) {
  266                 vm_page_bucket_count = 1;
  267                 while (vm_page_bucket_count < atop(total))
  268                         vm_page_bucket_count <<= 1;
  269         }
  270         vm_page_hash_mask = vm_page_bucket_count - 1;
  271 
  272         /*
  273          * Validate these addresses.
  274          */
  275 
  276         new_start = start + vm_page_bucket_count * sizeof(struct pglist);
  277         new_start = round_page(new_start);
  278         mapped = round_page(vaddr);
  279         vaddr = pmap_map(mapped, start, new_start,
  280             VM_PROT_READ | VM_PROT_WRITE);
  281         start = new_start;
  282         vaddr = round_page(vaddr);
  283         bzero((caddr_t) mapped, vaddr - mapped);
  284 
  285         for (i = 0; i < vm_page_bucket_count; i++) {
  286                 TAILQ_INIT(bucket);
  287                 bucket++;
  288         }
  289 
  290         /*
  291          * Compute the number of pages of memory that will be available for
  292          * use (taking into account the overhead of a page structure per
  293          * page).
  294          */
  295 
  296         first_page = phys_avail[0] / PAGE_SIZE;
  297         last_page = phys_avail[(nblocks - 1) * 2 + 1] / PAGE_SIZE;
  298 
  299         page_range = last_page - (phys_avail[0] / PAGE_SIZE);
  300         npages = (total - (page_range * sizeof(struct vm_page)) -
  301             (start - phys_avail[biggestone])) / PAGE_SIZE;
  302 
  303         /*
  304          * Initialize the mem entry structures now, and put them in the free
  305          * queue.
  306          */
  307         vm_page_array = (vm_page_t) vaddr;
  308         mapped = vaddr;
  309 
  310         /*
  311          * Validate these addresses.
  312          */
  313         new_start = round_page(start + page_range * sizeof(struct vm_page));
  314         mapped = pmap_map(mapped, start, new_start,
  315             VM_PROT_READ | VM_PROT_WRITE);
  316         start = new_start;
  317 
  318         first_managed_page = start / PAGE_SIZE;
  319 
  320         /*
  321          * Clear all of the page structures
  322          */
  323         bzero((caddr_t) vm_page_array, page_range * sizeof(struct vm_page));
  324         vm_page_array_size = page_range;
  325 
  326         /*
  327          * Construct the free queue(s) in descending order (by physical
  328          * address) so that the first 16MB of physical memory is allocated
  329          * last rather than first.  On large-memory machines, this avoids
  330          * the exhaustion of low physical memory before isa_dmainit has run.
  331          */
  332         cnt.v_page_count = 0;
  333         cnt.v_free_count = 0;
  334         for (i = 0; phys_avail[i + 1] && npages > 0; i += 2) {
  335                 if (i == biggestone)
  336                         pa = ptoa(first_managed_page);
  337                 else
  338                         pa = phys_avail[i];
  339                 while (pa < phys_avail[i + 1] && npages-- > 0) {
  340                         ++cnt.v_page_count;
  341                         ++cnt.v_free_count;
  342                         m = PHYS_TO_VM_PAGE(pa);
  343                         m->phys_addr = pa;
  344                         m->flags = 0;
  345                         m->pc = (pa >> PAGE_SHIFT) & PQ_L2_MASK;
  346                         m->queue = m->pc + PQ_FREE;
  347                         TAILQ_INSERT_HEAD(vm_page_queues[m->queue].pl, m, pageq);
  348                         ++(*vm_page_queues[m->queue].lcnt);
  349                         pa += PAGE_SIZE;
  350                 }
  351         }
  352         return (mapped);
  353 }
  354 
  355 /*
  356  *      vm_page_hash:
  357  *
  358  *      Distributes the object/offset key pair among hash buckets.
  359  *
  360  *      NOTE:  This macro depends on vm_page_bucket_count being a power of 2.
  361  *      This routine may not block.
  362  */
  363 static __inline int
  364 vm_page_hash(object, pindex)
  365         vm_object_t object;
  366         vm_pindex_t pindex;
  367 {
  368         return ((((uintptr_t) object) >> 5) + (pindex >> 1)) & vm_page_hash_mask;
  369 }
  370 
  371 /*
  372  *      vm_page_insert:         [ internal use only ]
  373  *
  374  *      Inserts the given mem entry into the object and object list.
  375  *
  376  *      The pagetables are not updated but will presumably fault the page
  377  *      in if necessary, or if a kernel page the caller will at some point
  378  *      enter the page into the kernel's pmap.  We are not allowed to block
  379  *      here so we *can't* do this anyway.
  380  *
  381  *      The object and page must be locked, and must be splhigh.
  382  *      This routine may not block.
  383  */
  384 
  385 void
  386 vm_page_insert(m, object, pindex)
  387         register vm_page_t m;
  388         register vm_object_t object;
  389         register vm_pindex_t pindex;
  390 {
  391         register struct pglist *bucket;
  392 
  393         if (m->object != NULL)
  394                 panic("vm_page_insert: already inserted");
  395 
  396         /*
  397          * Record the object/offset pair in this page
  398          */
  399 
  400         m->object = object;
  401         m->pindex = pindex;
  402 
  403         /*
  404          * Insert it into the object_object/offset hash table
  405          */
  406 
  407         bucket = &vm_page_buckets[vm_page_hash(object, pindex)];
  408         TAILQ_INSERT_TAIL(bucket, m, hashq);
  409         vm_page_bucket_generation++;
  410 
  411         /*
  412          * Now link into the object's list of backed pages.
  413          */
  414 
  415         TAILQ_INSERT_TAIL(&object->memq, m, listq);
  416         m->object->page_hint = m;
  417         m->object->generation++;
  418 
  419         /*
  420          * And show that the object has one more resident page.
  421          */
  422 
  423         object->resident_page_count++;
  424 }
  425 
  426 /*
  427  *      vm_page_remove:         [ internal use only ]
  428  *                              NOTE: used by device pager as well -wfj
  429  *
  430  *      Removes the given mem entry from the object/offset-page
  431  *      table and the object page list.
  432  *
  433  *      The object and page must be locked, and at splhigh.
  434  *      This routine may not block.
  435  *
  436  *      I do not think the underlying pmap entry (if any) is removed here.
  437  */
  438 
  439 void
  440 vm_page_remove(m)
  441         register vm_page_t m;
  442 {
  443         register struct pglist *bucket;
  444         vm_object_t object;
  445 
  446         if (m->object == NULL)
  447                 return;
  448 
  449 #if !defined(MAX_PERF)
  450         if ((m->flags & PG_BUSY) == 0) {
  451                 panic("vm_page_remove: page not busy");
  452         }
  453 #endif
  454         
  455         vm_page_flag_clear(m, PG_BUSY);
  456         if (m->flags & PG_WANTED) {
  457                 vm_page_flag_clear(m, PG_WANTED);
  458                 wakeup(m);
  459         }
  460 
  461         object = m->object;
  462         if (object->page_hint == m)
  463                 object->page_hint = NULL;
  464 
  465         /*
  466          * Remove from the object_object/offset hash table
  467          */
  468 
  469         bucket = &vm_page_buckets[vm_page_hash(m->object, m->pindex)];
  470         TAILQ_REMOVE(bucket, m, hashq);
  471         vm_page_bucket_generation++;
  472 
  473         /*
  474          * Now remove from the object's list of backed pages.
  475          */
  476 
  477         TAILQ_REMOVE(&object->memq, m, listq);
  478 
  479         /*
  480          * And show that the object has one fewer resident page.
  481          */
  482 
  483         object->resident_page_count--;
  484         object->generation++;
  485 
  486         m->object = NULL;
  487 }
  488 
  489 /*
  490  *      vm_page_lookup:
  491  *
  492  *      Returns the page associated with the object/offset
  493  *      pair specified; if none is found, NULL is returned.
  494  *
  495  *      The object must be locked.  No side effects.
  496  *      This routine may not block.
  497  */
  498 
  499 vm_page_t
  500 vm_page_lookup(object, pindex)
  501         register vm_object_t object;
  502         register vm_pindex_t pindex;
  503 {
  504         register vm_page_t m;
  505         register struct pglist *bucket;
  506         int generation;
  507 
  508         /*
  509          * Search the hash table for this object/offset pair
  510          */
  511 
  512         if (object->page_hint && (object->page_hint->pindex == pindex) &&
  513                 (object->page_hint->object == object))
  514                 return object->page_hint;
  515 
  516 retry:
  517         generation = vm_page_bucket_generation;
  518         bucket = &vm_page_buckets[vm_page_hash(object, pindex)];
  519         for (m = TAILQ_FIRST(bucket); m != NULL; m = TAILQ_NEXT(m,hashq)) {
  520                 if ((m->object == object) && (m->pindex == pindex)) {
  521                         if (vm_page_bucket_generation != generation)
  522                                 goto retry;
  523                         m->object->page_hint = m;
  524                         return (m);
  525                 }
  526         }
  527         if (vm_page_bucket_generation != generation)
  528                 goto retry;
  529         return (NULL);
  530 }
  531 
  532 /*
  533  *      vm_page_rename:
  534  *
  535  *      Move the given memory entry from its
  536  *      current object to the specified target object/offset.
  537  *
  538  *      The object must be locked.
  539  *      This routine may not block.
  540  *
  541  *      Note: this routine will raise itself to splvm(), the caller need not. 
  542  */
  543 
  544 void
  545 vm_page_rename(m, new_object, new_pindex)
  546         register vm_page_t m;
  547         register vm_object_t new_object;
  548         vm_pindex_t new_pindex;
  549 {
  550         int s;
  551 
  552         s = splvm();
  553         vm_page_remove(m);
  554         vm_page_insert(m, new_object, new_pindex);
  555         splx(s);
  556 }
  557 
  558 /*
  559  * vm_page_unqueue_nowakeup:
  560  *
  561  *      vm_page_unqueue() without any wakeup
  562  *
  563  *      This routine must be called at splhigh().
  564  *      This routine may not block.
  565  */
  566 
  567 void
  568 vm_page_unqueue_nowakeup(m)
  569         vm_page_t m;
  570 {
  571         int queue = m->queue;
  572         struct vpgqueues *pq;
  573         if (queue != PQ_NONE) {
  574                 pq = &vm_page_queues[queue];
  575                 m->queue = PQ_NONE;
  576                 TAILQ_REMOVE(pq->pl, m, pageq);
  577                 (*pq->cnt)--;
  578                 (*pq->lcnt)--;
  579         }
  580 }
  581 
  582 /*
  583  * vm_page_unqueue:
  584  *
  585  *      Remove a page from its queue.
  586  *
  587  *      This routine must be called at splhigh().
  588  *      This routine may not block.
  589  */
  590 
  591 void
  592 vm_page_unqueue(m)
  593         vm_page_t m;
  594 {
  595         int queue = m->queue;
  596         struct vpgqueues *pq;
  597         if (queue != PQ_NONE) {
  598                 m->queue = PQ_NONE;
  599                 pq = &vm_page_queues[queue];
  600                 TAILQ_REMOVE(pq->pl, m, pageq);
  601                 (*pq->cnt)--;
  602                 (*pq->lcnt)--;
  603                 if ((queue - m->pc) == PQ_CACHE) {
  604                         if ((cnt.v_cache_count + cnt.v_free_count) <
  605                                 (cnt.v_free_reserved + cnt.v_cache_min))
  606                                 pagedaemon_wakeup();
  607                 }
  608         }
  609 }
  610 
  611 /*
  612  *      vm_page_list_find:
  613  *
  614  *      Find a page on the specified queue with color optimization.
  615  *
  616  *      This routine must be called at splvm().
  617  *      This routine may not block.
  618  */
  619 vm_page_t
  620 vm_page_list_find(basequeue, index)
  621         int basequeue, index;
  622 {
  623 #if PQ_L2_SIZE > 1
  624 
  625         int i,j;
  626         vm_page_t m;
  627         int hindex;
  628         struct vpgqueues *pq;
  629 
  630         pq = &vm_page_queues[basequeue];
  631 
  632         m = TAILQ_FIRST(pq[index].pl);
  633         if (m)
  634                 return m;
  635 
  636         for(j = 0; j < PQ_L1_SIZE; j++) {
  637                 int ij;
  638                 for(i = (PQ_L2_SIZE / 2) - PQ_L1_SIZE;
  639                         (ij = i + j) > 0;
  640                         i -= PQ_L1_SIZE) {
  641 
  642                         hindex = index + ij;
  643                         if (hindex >= PQ_L2_SIZE)
  644                                 hindex -= PQ_L2_SIZE;
  645                         if (m = TAILQ_FIRST(pq[hindex].pl))
  646                                 return m;
  647 
  648                         hindex = index - ij;
  649                         if (hindex < 0)
  650                                 hindex += PQ_L2_SIZE;
  651                         if (m = TAILQ_FIRST(pq[hindex].pl))
  652                                 return m;
  653                 }
  654         }
  655 
  656         hindex = index + PQ_L2_SIZE / 2;
  657         if (hindex >= PQ_L2_SIZE)
  658                 hindex -= PQ_L2_SIZE;
  659         m = TAILQ_FIRST(pq[hindex].pl);
  660         if (m)
  661                 return m;
  662 
  663         return NULL;
  664 #else
  665         return TAILQ_FIRST(vm_page_queues[basequeue].pl);
  666 #endif
  667 
  668 }
  669 
  670 /*
  671  *      vm_page_select:
  672  *
  673  *      Find a page on the specified queue with color optimization.
  674  *
  675  *      This routine must be called at splvm().
  676  *      This routine may not block.
  677  */
  678 vm_page_t
  679 vm_page_select(object, pindex, basequeue)
  680         vm_object_t object;
  681         vm_pindex_t pindex;
  682         int basequeue;
  683 {
  684 
  685 #if PQ_L2_SIZE > 1
  686         int index;
  687         index = (pindex + object->pg_color) & PQ_L2_MASK;
  688         return vm_page_list_find(basequeue, index);
  689 
  690 #else
  691         return TAILQ_FIRST(vm_page_queues[basequeue].pl);
  692 #endif
  693 
  694 }
  695 
  696 /*
  697  *      vm_page_select_cache:
  698  *
  699  *      Find a page on the cache queue with color optimization.  As pages
  700  *      might be found, but not applicable, they are deactivated.  This
  701  *      keeps us from using potentially busy cached pages.
  702  *
  703  *      This routine must be called at splvm().
  704  *      This routine may not block.
  705  */
  706 vm_page_t
  707 vm_page_select_cache(object, pindex)
  708         vm_object_t object;
  709         vm_pindex_t pindex;
  710 {
  711         vm_page_t m;
  712 
  713         while (TRUE) {
  714 #if PQ_L2_SIZE > 1
  715                 int index;
  716                 index = (pindex + object->pg_color) & PQ_L2_MASK;
  717                 m = vm_page_list_find(PQ_CACHE, index);
  718 
  719 #else
  720                 m = TAILQ_FIRST(vm_page_queues[PQ_CACHE].pl);
  721 #endif
  722                 if (m && ((m->flags & PG_BUSY) || m->busy ||
  723                                m->hold_count || m->wire_count)) {
  724                         vm_page_deactivate(m);
  725                         continue;
  726                 }
  727                 return m;
  728         }
  729 }
  730 
  731 /*
  732  *      vm_page_select_free:
  733  *
  734  *      Find a free or zero page, with specified preference.
  735  *
  736  *      This routine must be called at splvm().
  737  *      This routine may not block.
  738  */
  739 
  740 static vm_page_t
  741 vm_page_select_free(object, pindex, prefqueue)
  742         vm_object_t object;
  743         vm_pindex_t pindex;
  744         int prefqueue;
  745 {
  746 #if PQ_L2_SIZE > 1
  747         int i,j;
  748         int index, hindex;
  749 #endif
  750         vm_page_t m, mh;
  751         int oqueuediff;
  752         struct vpgqueues *pq;
  753 
  754         if (prefqueue == PQ_ZERO)
  755                 oqueuediff = PQ_FREE - PQ_ZERO;
  756         else
  757                 oqueuediff = PQ_ZERO - PQ_FREE;
  758 
  759         if (mh = object->page_hint) {
  760                  if (mh->pindex == (pindex - 1)) {
  761                         if ((mh->flags & PG_FICTITIOUS) == 0) {
  762                                 if ((mh < &vm_page_array[cnt.v_page_count-1]) &&
  763                                         (mh >= &vm_page_array[0])) {
  764                                         int queue;
  765                                         m = mh + 1;
  766                                         if (VM_PAGE_TO_PHYS(m) == (VM_PAGE_TO_PHYS(mh) + PAGE_SIZE)) {
  767                                                 queue = m->queue - m->pc;
  768                                                 if (queue == PQ_FREE || queue == PQ_ZERO) {
  769                                                         return m;
  770                                                 }
  771                                         }
  772                                 }
  773                         }
  774                 }
  775         }
  776 
  777         pq = &vm_page_queues[prefqueue];
  778 
  779 #if PQ_L2_SIZE > 1
  780 
  781         index = (pindex + object->pg_color) & PQ_L2_MASK;
  782 
  783         if (m = TAILQ_FIRST(pq[index].pl))
  784                 return m;
  785         if (m = TAILQ_FIRST(pq[index + oqueuediff].pl))
  786                 return m;
  787 
  788         for(j = 0; j < PQ_L1_SIZE; j++) {
  789                 int ij;
  790                 for(i = (PQ_L2_SIZE / 2) - PQ_L1_SIZE;
  791                         (ij = i + j) >= 0;
  792                         i -= PQ_L1_SIZE) {
  793 
  794                         hindex = index + ij;
  795                         if (hindex >= PQ_L2_SIZE)
  796                                 hindex -= PQ_L2_SIZE;
  797                         if (m = TAILQ_FIRST(pq[hindex].pl)) 
  798                                 return m;
  799                         if (m = TAILQ_FIRST(pq[hindex + oqueuediff].pl))
  800                                 return m;
  801 
  802                         hindex = index - ij;
  803                         if (hindex < 0)
  804                                 hindex += PQ_L2_SIZE;
  805                         if (m = TAILQ_FIRST(pq[hindex].pl)) 
  806                                 return m;
  807                         if (m = TAILQ_FIRST(pq[hindex + oqueuediff].pl))
  808                                 return m;
  809                 }
  810         }
  811 
  812         hindex = index + PQ_L2_SIZE / 2;
  813         if (hindex >= PQ_L2_SIZE)
  814                 hindex -= PQ_L2_SIZE;
  815         if (m = TAILQ_FIRST(pq[hindex].pl))
  816                 return m;
  817         if (m = TAILQ_FIRST(pq[hindex+oqueuediff].pl))
  818                 return m;
  819 
  820 #else
  821         if (m = TAILQ_FIRST(pq[0].pl))
  822                 return m;
  823         else
  824                 return TAILQ_FIRST(pq[oqueuediff].pl);
  825 #endif
  826 
  827         return NULL;
  828 }
  829 
  830 /*
  831  *      vm_page_alloc:
  832  *
  833  *      Allocate and return a memory cell associated
  834  *      with this VM object/offset pair.
  835  *
  836  *      page_req classes:
  837  *      VM_ALLOC_NORMAL         normal process request
  838  *      VM_ALLOC_SYSTEM         system *really* needs a page
  839  *      VM_ALLOC_INTERRUPT      interrupt time request
  840  *      VM_ALLOC_ZERO           zero page
  841  *
  842  *      Object must be locked.
  843  *      This routine may not block.
  844  *
  845  *      Additional special handling is required when called from an
  846  *      interrupt (VM_ALLOC_INTERRUPT).  We are not allowed to mess with
  847  *      the page cache in this case.
  848  */
  849 vm_page_t
  850 vm_page_alloc(object, pindex, page_req)
  851         vm_object_t object;
  852         vm_pindex_t pindex;
  853         int page_req;
  854 {
  855         register vm_page_t m;
  856         struct vpgqueues *pq;
  857         vm_object_t oldobject;
  858         int queue, qtype;
  859         int s;
  860 
  861         KASSERT(!vm_page_lookup(object, pindex),
  862                 ("vm_page_alloc: page already allocated"));
  863 
  864         if ((curproc == pageproc) && (page_req != VM_ALLOC_INTERRUPT)) {
  865                 page_req = VM_ALLOC_SYSTEM;
  866         };
  867 
  868         s = splvm();
  869 
  870         switch (page_req) {
  871 
  872         case VM_ALLOC_NORMAL:
  873                 if (cnt.v_free_count >= cnt.v_free_reserved) {
  874                         m = vm_page_select_free(object, pindex, PQ_FREE);
  875                         KASSERT(m != NULL, ("vm_page_alloc(NORMAL): missing page on free queue\n"));
  876                 } else {
  877                         m = vm_page_select_cache(object, pindex);
  878                         if (m == NULL) {
  879                                 splx(s);
  880 #if defined(DIAGNOSTIC)
  881                                 if (cnt.v_cache_count > 0)
  882                                         printf("vm_page_alloc(NORMAL): missing pages on cache queue: %d\n", cnt.v_cache_count);
  883 #endif
  884                                 vm_pageout_deficit++;
  885                                 pagedaemon_wakeup();
  886                                 return (NULL);
  887                         }
  888                 }
  889                 break;
  890 
  891         case VM_ALLOC_ZERO:
  892                 if (cnt.v_free_count >= cnt.v_free_reserved) {
  893                         m = vm_page_select_free(object, pindex, PQ_ZERO);
  894                         KASSERT(m != NULL, ("vm_page_alloc(ZERO): missing page on free queue\n"));
  895                 } else {
  896                         m = vm_page_select_cache(object, pindex);
  897                         if (m == NULL) {
  898                                 splx(s);
  899 #if defined(DIAGNOSTIC)
  900                                 if (cnt.v_cache_count > 0)
  901                                         printf("vm_page_alloc(ZERO): missing pages on cache queue: %d\n", cnt.v_cache_count);
  902 #endif
  903                                 vm_pageout_deficit++;
  904                                 pagedaemon_wakeup();
  905                                 return (NULL);
  906                         }
  907                 }
  908                 break;
  909 
  910         case VM_ALLOC_SYSTEM:
  911                 if ((cnt.v_free_count >= cnt.v_free_reserved) ||
  912                     ((cnt.v_cache_count == 0) &&
  913                     (cnt.v_free_count >= cnt.v_interrupt_free_min))) {
  914                         m = vm_page_select_free(object, pindex, PQ_FREE);
  915                         KASSERT(m != NULL, ("vm_page_alloc(SYSTEM): missing page on free queue\n"));
  916                 } else {
  917                         m = vm_page_select_cache(object, pindex);
  918                         if (m == NULL) {
  919                                 splx(s);
  920 #if defined(DIAGNOSTIC)
  921                                 if (cnt.v_cache_count > 0)
  922                                         printf("vm_page_alloc(SYSTEM): missing pages on cache queue: %d\n", cnt.v_cache_count);
  923 #endif
  924                                 vm_pageout_deficit++;
  925                                 pagedaemon_wakeup();
  926                                 return (NULL);
  927                         }
  928                 }
  929                 break;
  930 
  931         case VM_ALLOC_INTERRUPT:
  932                 if (cnt.v_free_count > 0) {
  933                         m = vm_page_select_free(object, pindex, PQ_FREE);
  934                         KASSERT(m != NULL, ("vm_page_alloc(INTERRUPT): missing page on free queue\n"));
  935                 } else {
  936                         splx(s);
  937                         vm_pageout_deficit++;
  938                         pagedaemon_wakeup();
  939                         return (NULL);
  940                 }
  941                 break;
  942 
  943         default:
  944                 m = NULL;
  945 #if !defined(MAX_PERF)
  946                 panic("vm_page_alloc: invalid allocation class");
  947 #endif
  948         }
  949 
  950         queue = m->queue;
  951         qtype = queue - m->pc;
  952         if (qtype == PQ_ZERO)
  953                 vm_page_zero_count--;
  954         pq = &vm_page_queues[queue];
  955         TAILQ_REMOVE(pq->pl, m, pageq);
  956         (*pq->cnt)--;
  957         (*pq->lcnt)--;
  958         oldobject = NULL;
  959         if (qtype == PQ_ZERO) {
  960                 m->flags = PG_ZERO | PG_BUSY;
  961         } else if (qtype == PQ_CACHE) {
  962                 oldobject = m->object;
  963                 vm_page_busy(m);
  964                 vm_page_remove(m);
  965                 m->flags = PG_BUSY;
  966         } else {
  967                 m->flags = PG_BUSY;
  968         }
  969         m->wire_count = 0;
  970         m->hold_count = 0;
  971         m->act_count = 0;
  972         m->busy = 0;
  973         m->valid = 0;
  974         m->dirty = 0;
  975         m->queue = PQ_NONE;
  976 
  977         /*
  978          * vm_page_insert() is safe prior to the splx().  Note also that
  979          * inserting a page here does not insert it into the pmap (which
  980          * could cause us to block allocating memory).  We cannot block 
  981          * anywhere.
  982          */
  983 
  984         vm_page_insert(m, object, pindex);
  985 
  986         /*
  987          * Don't wakeup too often - wakeup the pageout daemon when
  988          * we would be nearly out of memory.
  989          */
  990         if (((cnt.v_free_count + cnt.v_cache_count) <
  991                 (cnt.v_free_reserved + cnt.v_cache_min)) ||
  992                         (cnt.v_free_count < cnt.v_pageout_free_min))
  993                 pagedaemon_wakeup();
  994 
  995         if ((qtype == PQ_CACHE) &&
  996                 ((page_req == VM_ALLOC_NORMAL) || (page_req == VM_ALLOC_ZERO)) &&
  997                 oldobject && (oldobject->type == OBJT_VNODE) &&
  998                 ((oldobject->flags & OBJ_DEAD) == 0)) {
  999                 struct vnode *vp;
 1000                 vp = (struct vnode *) oldobject->handle;
 1001                 if (vp && VSHOULDFREE(vp)) {
 1002                         if ((vp->v_flag & (VFREE|VTBFREE|VDOOMED)) == 0) {
 1003                                 TAILQ_INSERT_TAIL(&vnode_tobefree_list, vp, v_freelist);
 1004                                 vp->v_flag |= VTBFREE;
 1005                         }
 1006                 }
 1007         }
 1008         splx(s);
 1009 
 1010         return (m);
 1011 }
 1012 
 1013 /*
 1014  *      vm_wait:        (also see VM_WAIT macro)
 1015  *
 1016  *      Block until free pages are available for allocation
 1017  */
 1018 
 1019 void
 1020 vm_wait()
 1021 {
 1022         int s;
 1023 
 1024         s = splvm();
 1025         if (curproc == pageproc) {
 1026                 vm_pageout_pages_needed = 1;
 1027                 tsleep(&vm_pageout_pages_needed, PSWP, "vmwait", 0);
 1028         } else {
 1029                 if (!vm_pages_needed) {
 1030                         vm_pages_needed++;
 1031                         wakeup(&vm_pages_needed);
 1032                 }
 1033                 tsleep(&cnt.v_free_count, PVM, "vmwait", 0);
 1034         }
 1035         splx(s);
 1036 }
 1037 
 1038 /*
 1039  *      vm_page_sleep:
 1040  *
 1041  *      Block until page is no longer busy.
 1042  */
 1043 
 1044 int
 1045 vm_page_sleep(vm_page_t m, char *msg, char *busy) {
 1046         int slept = 0;
 1047         if ((busy && *busy) || (m->flags & PG_BUSY)) {
 1048                 int s;
 1049                 s = splvm();
 1050                 if ((busy && *busy) || (m->flags & PG_BUSY)) {
 1051                         vm_page_flag_set(m, PG_WANTED);
 1052                         tsleep(m, PVM, msg, 0);
 1053                         slept = 1;
 1054                 }
 1055                 splx(s);
 1056         }
 1057         return slept;
 1058 }
 1059 
 1060 /*
 1061  *      vm_page_activate:
 1062  *
 1063  *      Put the specified page on the active list (if appropriate).
 1064  *
 1065  *      The page queues must be locked.
 1066  *      This routine may not block.
 1067  */
 1068 void
 1069 vm_page_activate(m)
 1070         register vm_page_t m;
 1071 {
 1072         int s;
 1073 
 1074         s = splvm();
 1075         if (m->queue != PQ_ACTIVE) {
 1076                 if ((m->queue - m->pc) == PQ_CACHE)
 1077                         cnt.v_reactivated++;
 1078 
 1079                 vm_page_unqueue(m);
 1080 
 1081                 if (m->wire_count == 0) {
 1082                         m->queue = PQ_ACTIVE;
 1083                         ++(*vm_page_queues[PQ_ACTIVE].lcnt);
 1084                         TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq);
 1085                         if (m->act_count < ACT_INIT)
 1086                                 m->act_count = ACT_INIT;
 1087                         cnt.v_active_count++;
 1088                 }
 1089         } else {
 1090                 if (m->act_count < ACT_INIT)
 1091                         m->act_count = ACT_INIT;
 1092         }
 1093 
 1094         splx(s);
 1095 }
 1096 
 1097 /*
 1098  * helper routine for vm_page_free and vm_page_free_zero.
 1099  *
 1100  * This routine may not block.
 1101  */
 1102 static int
 1103 vm_page_freechk_and_unqueue(m)
 1104         vm_page_t m;
 1105 {
 1106         vm_object_t oldobject;
 1107 
 1108         oldobject = m->object;
 1109 
 1110 #if !defined(MAX_PERF)
 1111         if (m->busy || ((m->queue - m->pc) == PQ_FREE) ||
 1112                 (m->hold_count != 0)) {
 1113                 printf(
 1114                 "vm_page_free: pindex(%lu), busy(%d), PG_BUSY(%d), hold(%d)\n",
 1115                     (u_long)m->pindex, m->busy, (m->flags & PG_BUSY) ? 1 : 0,
 1116                     m->hold_count);
 1117                 if ((m->queue - m->pc) == PQ_FREE)
 1118                         panic("vm_page_free: freeing free page");
 1119                 else
 1120                         panic("vm_page_free: freeing busy page");
 1121         }
 1122 #endif
 1123 
 1124         vm_page_unqueue_nowakeup(m);
 1125         vm_page_remove(m);
 1126 
 1127         if ((m->flags & PG_FICTITIOUS) != 0) {
 1128                 return 0;
 1129         }
 1130 
 1131         m->valid = 0;
 1132 
 1133         if (m->wire_count != 0) {
 1134 #if !defined(MAX_PERF)
 1135                 if (m->wire_count > 1) {
 1136                         panic("vm_page_free: invalid wire count (%d), pindex: 0x%x",
 1137                                 m->wire_count, m->pindex);
 1138                 }
 1139 #endif
 1140                 printf("vm_page_free: freeing wired page\n");
 1141                 m->wire_count = 0;
 1142                 cnt.v_wire_count--;
 1143         }
 1144 
 1145         if (oldobject && (oldobject->type == OBJT_VNODE) &&
 1146                 ((oldobject->flags & OBJ_DEAD) == 0)) {
 1147                 struct vnode *vp;
 1148                 vp = (struct vnode *) oldobject->handle;
 1149                 if (vp && VSHOULDFREE(vp)) {
 1150                         if ((vp->v_flag & (VTBFREE|VDOOMED|VFREE)) == 0) {
 1151                                 TAILQ_INSERT_TAIL(&vnode_tobefree_list, vp, v_freelist);
 1152                                 vp->v_flag |= VTBFREE;
 1153                         }
 1154                 }
 1155         }
 1156 
 1157 #ifdef __alpha__
 1158         pmap_page_is_free(m);
 1159 #endif
 1160 
 1161         return 1;
 1162 }
 1163 
 1164 /*
 1165  * helper routine for vm_page_free and vm_page_free_zero.
 1166  *
 1167  * This routine may not block.
 1168  */
 1169 static __inline void
 1170 vm_page_free_wakeup()
 1171 {
 1172         
 1173 /*
 1174  * if pageout daemon needs pages, then tell it that there are
 1175  * some free.
 1176  */
 1177         if (vm_pageout_pages_needed) {
 1178                 wakeup(&vm_pageout_pages_needed);
 1179                 vm_pageout_pages_needed = 0;
 1180         }
 1181         /*
 1182          * wakeup processes that are waiting on memory if we hit a
 1183          * high water mark. And wakeup scheduler process if we have
 1184          * lots of memory. this process will swapin processes.
 1185          */
 1186         if (vm_pages_needed &&
 1187                 ((cnt.v_free_count + cnt.v_cache_count) >= cnt.v_free_min)) {
 1188                 wakeup(&cnt.v_free_count);
 1189                 vm_pages_needed = 0;
 1190         }
 1191 }
 1192 
 1193 /*
 1194  *      vm_page_free:
 1195  *
 1196  *      Returns the given page to the free list,
 1197  *      disassociating it with any VM object.
 1198  *
 1199  *      Object and page must be locked prior to entry.
 1200  *      This routine may not block.
 1201  */
 1202 void
 1203 vm_page_free(m)
 1204         register vm_page_t m;
 1205 {
 1206         int s;
 1207         struct vpgqueues *pq;
 1208 
 1209         s = splvm();
 1210 
 1211         cnt.v_tfree++;
 1212 
 1213         if (!vm_page_freechk_and_unqueue(m)) {
 1214                 splx(s);
 1215                 return;
 1216         }
 1217 
 1218         m->queue = PQ_FREE + m->pc;
 1219         pq = &vm_page_queues[m->queue];
 1220         ++(*pq->lcnt);
 1221         ++(*pq->cnt);
 1222         /*
 1223          * If the pageout process is grabbing the page, it is likely
 1224          * that the page is NOT in the cache.  It is more likely that
 1225          * the page will be partially in the cache if it is being
 1226          * explicitly freed.
 1227          */
 1228         if (curproc == pageproc) {
 1229                 TAILQ_INSERT_TAIL(pq->pl, m, pageq);
 1230         } else {
 1231                 TAILQ_INSERT_HEAD(pq->pl, m, pageq);
 1232         }
 1233 
 1234         vm_page_free_wakeup();
 1235         splx(s);
 1236 }
 1237 
 1238 void
 1239 vm_page_free_zero(m)
 1240         register vm_page_t m;
 1241 {
 1242         int s;
 1243         struct vpgqueues *pq;
 1244 
 1245         s = splvm();
 1246 
 1247         cnt.v_tfree++;
 1248 
 1249         if (!vm_page_freechk_and_unqueue(m)) {
 1250                 splx(s);
 1251                 return;
 1252         }
 1253 
 1254         m->queue = PQ_ZERO + m->pc;
 1255         pq = &vm_page_queues[m->queue];
 1256         ++(*pq->lcnt);
 1257         ++(*pq->cnt);
 1258 
 1259         TAILQ_INSERT_HEAD(pq->pl, m, pageq);
 1260         ++vm_page_zero_count;
 1261         vm_page_free_wakeup();
 1262         splx(s);
 1263 }
 1264 
 1265 /*
 1266  *      vm_page_wire:
 1267  *
 1268  *      Mark this page as wired down by yet
 1269  *      another map, removing it from paging queues
 1270  *      as necessary.
 1271  *
 1272  *      The page queues must be locked.
 1273  *      This routine may not block.
 1274  */
 1275 void
 1276 vm_page_wire(m)
 1277         register vm_page_t m;
 1278 {
 1279         int s;
 1280 
 1281         s = splvm();
 1282         if (m->wire_count == 0) {
 1283                 vm_page_unqueue(m);
 1284                 cnt.v_wire_count++;
 1285         }
 1286         m->wire_count++;
 1287         splx(s);
 1288         (*vm_page_queues[PQ_NONE].lcnt)++;
 1289         vm_page_flag_set(m, PG_MAPPED);
 1290 }
 1291 
 1292 /*
 1293  *      vm_page_unwire:
 1294  *
 1295  *      Release one wiring of this page, potentially
 1296  *      enabling it to be paged again.
 1297  *
 1298  *      The page queues must be locked.
 1299  *      This routine may not block.
 1300  */
 1301 void
 1302 vm_page_unwire(m, activate)
 1303         register vm_page_t m;
 1304         int activate;
 1305 {
 1306         int s;
 1307 
 1308         s = splvm();
 1309 
 1310         if (m->wire_count > 0) {
 1311                 m->wire_count--;
 1312                 if (m->wire_count == 0) {
 1313                         cnt.v_wire_count--;
 1314                         if (activate) {
 1315                                 TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq);
 1316                                 m->queue = PQ_ACTIVE;
 1317                                 (*vm_page_queues[PQ_ACTIVE].lcnt)++;
 1318                                 cnt.v_active_count++;
 1319                         } else {
 1320                                 TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq);
 1321                                 m->queue = PQ_INACTIVE;
 1322                                 (*vm_page_queues[PQ_INACTIVE].lcnt)++;
 1323                                 cnt.v_inactive_count++;
 1324                         }
 1325                 }
 1326         } else {
 1327 #if !defined(MAX_PERF)
 1328                 panic("vm_page_unwire: invalid wire count: %d\n", m->wire_count);
 1329 #endif
 1330         }
 1331         splx(s);
 1332 }
 1333 
 1334 
 1335 /*
 1336  * Move the specified page to the inactive queue.
 1337  *
 1338  * This routine may not block.
 1339  */
 1340 void
 1341 vm_page_deactivate(m)
 1342         register vm_page_t m;
 1343 {
 1344         int s;
 1345 
 1346         /*
 1347          * Ignore if already inactive.
 1348          */
 1349         if (m->queue == PQ_INACTIVE)
 1350                 return;
 1351 
 1352         s = splvm();
 1353         if (m->wire_count == 0) {
 1354                 if ((m->queue - m->pc) == PQ_CACHE)
 1355                         cnt.v_reactivated++;
 1356                 vm_page_unqueue(m);
 1357                 TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq);
 1358                 m->queue = PQ_INACTIVE;
 1359                 ++(*vm_page_queues[PQ_INACTIVE].lcnt);
 1360                 cnt.v_inactive_count++;
 1361         }
 1362         splx(s);
 1363 }
 1364 
 1365 /*
 1366  * vm_page_cache
 1367  *
 1368  * Put the specified page onto the page cache queue (if appropriate). 
 1369  * This routine may not block.
 1370  */
 1371 void
 1372 vm_page_cache(m)
 1373         register vm_page_t m;
 1374 {
 1375         int s;
 1376 
 1377 #if !defined(MAX_PERF)
 1378         if ((m->flags & PG_BUSY) || m->busy || m->wire_count) {
 1379                 printf("vm_page_cache: attempting to cache busy page\n");
 1380                 return;
 1381         }
 1382 #endif
 1383         if ((m->queue - m->pc) == PQ_CACHE)
 1384                 return;
 1385 
 1386         vm_page_protect(m, VM_PROT_NONE);
 1387 #if !defined(MAX_PERF)
 1388         if (m->dirty != 0) {
 1389                 panic("vm_page_cache: caching a dirty page, pindex: %d", m->pindex);
 1390         }
 1391 #endif
 1392         s = splvm();
 1393         vm_page_unqueue_nowakeup(m);
 1394         m->queue = PQ_CACHE + m->pc;
 1395         (*vm_page_queues[m->queue].lcnt)++;
 1396         TAILQ_INSERT_TAIL(vm_page_queues[m->queue].pl, m, pageq);
 1397         cnt.v_cache_count++;
 1398         vm_page_free_wakeup();
 1399         splx(s);
 1400 }
 1401 
 1402 /*
 1403  * Grab a page, waiting until we are waken up due to the page
 1404  * changing state.  We keep on waiting, if the page continues
 1405  * to be in the object.  If the page doesn't exist, allocate it.
 1406  *
 1407  * This routine may block.
 1408  */
 1409 vm_page_t
 1410 vm_page_grab(object, pindex, allocflags)
 1411         vm_object_t object;
 1412         vm_pindex_t pindex;
 1413         int allocflags;
 1414 {
 1415 
 1416         vm_page_t m;
 1417         int s, generation;
 1418 
 1419 retrylookup:
 1420         if ((m = vm_page_lookup(object, pindex)) != NULL) {
 1421                 if (m->busy || (m->flags & PG_BUSY)) {
 1422                         generation = object->generation;
 1423 
 1424                         s = splvm();
 1425                         while ((object->generation == generation) &&
 1426                                         (m->busy || (m->flags & PG_BUSY))) {
 1427                                 vm_page_flag_set(m, PG_WANTED | PG_REFERENCED);
 1428                                 tsleep(m, PVM, "pgrbwt", 0);
 1429                                 if ((allocflags & VM_ALLOC_RETRY) == 0) {
 1430                                         splx(s);
 1431                                         return NULL;
 1432                                 }
 1433                         }
 1434                         splx(s);
 1435                         goto retrylookup;
 1436                 } else {
 1437                         vm_page_busy(m);
 1438                         return m;
 1439                 }
 1440         }
 1441 
 1442         m = vm_page_alloc(object, pindex, allocflags & ~VM_ALLOC_RETRY);
 1443         if (m == NULL) {
 1444                 VM_WAIT;
 1445                 if ((allocflags & VM_ALLOC_RETRY) == 0)
 1446                         return NULL;
 1447                 goto retrylookup;
 1448         }
 1449 
 1450         return m;
 1451 }
 1452 
 1453 /*
 1454  * mapping function for valid bits or for dirty bits in
 1455  * a page.  May not block.
 1456  */
 1457 __inline int
 1458 vm_page_bits(int base, int size)
 1459 {
 1460         u_short chunk;
 1461 
 1462         if ((base == 0) && (size >= PAGE_SIZE))
 1463                 return VM_PAGE_BITS_ALL;
 1464 
 1465         size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
 1466         base &= PAGE_MASK;
 1467         if (size > PAGE_SIZE - base) {
 1468                 size = PAGE_SIZE - base;
 1469         }
 1470 
 1471         base = base / DEV_BSIZE;
 1472         chunk = vm_page_dev_bsize_chunks[size / DEV_BSIZE];
 1473         return (chunk << base) & VM_PAGE_BITS_ALL;
 1474 }
 1475 
 1476 /*
 1477  * set a page valid and clean.  May not block.
 1478  */
 1479 void
 1480 vm_page_set_validclean(m, base, size)
 1481         vm_page_t m;
 1482         int base;
 1483         int size;
 1484 {
 1485         int pagebits = vm_page_bits(base, size);
 1486         m->valid |= pagebits;
 1487         m->dirty &= ~pagebits;
 1488         if( base == 0 && size == PAGE_SIZE)
 1489                 pmap_clear_modify(VM_PAGE_TO_PHYS(m));
 1490 }
 1491 
 1492 /*
 1493  * set a page (partially) invalid.  May not block.
 1494  */
 1495 void
 1496 vm_page_set_invalid(m, base, size)
 1497         vm_page_t m;
 1498         int base;
 1499         int size;
 1500 {
 1501         int bits;
 1502 
 1503         m->valid &= ~(bits = vm_page_bits(base, size));
 1504         if (m->valid == 0)
 1505                 m->dirty &= ~bits;
 1506         m->object->generation++;
 1507 }
 1508 
 1509 /*
 1510  * is (partial) page valid?  May not block.
 1511  */
 1512 int
 1513 vm_page_is_valid(m, base, size)
 1514         vm_page_t m;
 1515         int base;
 1516         int size;
 1517 {
 1518         int bits = vm_page_bits(base, size);
 1519 
 1520         if (m->valid && ((m->valid & bits) == bits))
 1521                 return 1;
 1522         else
 1523                 return 0;
 1524 }
 1525 
 1526 /*
 1527  * update dirty bits from pmap/mmu.  May not block.
 1528  */
 1529 
 1530 void
 1531 vm_page_test_dirty(m)
 1532         vm_page_t m;
 1533 {
 1534         if ((m->dirty != VM_PAGE_BITS_ALL) &&
 1535             pmap_is_modified(VM_PAGE_TO_PHYS(m))) {
 1536                 m->dirty = VM_PAGE_BITS_ALL;
 1537         }
 1538 }
 1539 
 1540 /*
 1541  * This interface is for merging with malloc() someday.
 1542  * Even if we never implement compaction so that contiguous allocation
 1543  * works after initialization time, malloc()'s data structures are good
 1544  * for statistics and for allocations of less than a page.
 1545  */
 1546 void *
 1547 contigmalloc1(size, type, flags, low, high, alignment, boundary, map)
 1548         unsigned long size;     /* should be size_t here and for malloc() */
 1549         struct malloc_type *type;
 1550         int flags;
 1551         unsigned long low;
 1552         unsigned long high;
 1553         unsigned long alignment;
 1554         unsigned long boundary;
 1555         vm_map_t map;
 1556 {
 1557         int i, s, start;
 1558         vm_offset_t addr, phys, tmp_addr;
 1559         int pass;
 1560         vm_page_t pga = vm_page_array;
 1561 
 1562         size = round_page(size);
 1563 #if !defined(MAX_PERF)
 1564         if (size == 0)
 1565                 panic("contigmalloc1: size must not be 0");
 1566         if ((alignment & (alignment - 1)) != 0)
 1567                 panic("contigmalloc1: alignment must be a power of 2");
 1568         if ((boundary & (boundary - 1)) != 0)
 1569                 panic("contigmalloc1: boundary must be a power of 2");
 1570 #endif
 1571 
 1572         start = 0;
 1573         for (pass = 0; pass <= 1; pass++) {
 1574                 s = splvm();
 1575 again:
 1576                 /*
 1577                  * Find first page in array that is free, within range, aligned, and
 1578                  * such that the boundary won't be crossed.
 1579                  */
 1580                 for (i = start; i < cnt.v_page_count; i++) {
 1581                         int pqtype;
 1582                         phys = VM_PAGE_TO_PHYS(&pga[i]);
 1583                         pqtype = pga[i].queue - pga[i].pc;
 1584                         if (((pqtype == PQ_ZERO) || (pqtype == PQ_FREE) || (pqtype == PQ_CACHE)) &&
 1585                             (phys >= low) && (phys < high) &&
 1586                             ((phys & (alignment - 1)) == 0) &&
 1587                             (((phys ^ (phys + size - 1)) & ~(boundary - 1)) == 0))
 1588                                 break;
 1589                 }
 1590 
 1591                 /*
 1592                  * If the above failed or we will exceed the upper bound, fail.
 1593                  */
 1594                 if ((i == cnt.v_page_count) ||
 1595                         ((VM_PAGE_TO_PHYS(&pga[i]) + size) > high)) {
 1596                         vm_page_t m, next;
 1597 
 1598 again1:
 1599                         for (m = TAILQ_FIRST(&vm_page_queue_inactive);
 1600                                 m != NULL;
 1601                                 m = next) {
 1602 
 1603                                 if (m->queue != PQ_INACTIVE) {
 1604                                         break;
 1605                                 }
 1606 
 1607                                 next = TAILQ_NEXT(m, pageq);
 1608                                 if (vm_page_sleep(m, "vpctw0", &m->busy))
 1609                                         goto again1;
 1610                                 vm_page_test_dirty(m);
 1611                                 if (m->dirty) {
 1612                                         if (m->object->type == OBJT_VNODE) {
 1613                                                 vn_lock(m->object->handle, LK_EXCLUSIVE | LK_RETRY, curproc);
 1614                                                 vm_object_page_clean(m->object, 0, 0, OBJPC_SYNC);
 1615                                                 VOP_UNLOCK(m->object->handle, 0, curproc);
 1616                                                 goto again1;
 1617                                         } else if (m->object->type == OBJT_SWAP ||
 1618                                                                 m->object->type == OBJT_DEFAULT) {
 1619                                                 vm_pageout_flush(&m, 1, 0);
 1620                                                 goto again1;
 1621                                         }
 1622                                 }
 1623                                 if ((m->dirty == 0) && (m->busy == 0) && (m->hold_count == 0))
 1624                                         vm_page_cache(m);
 1625                         }
 1626 
 1627                         for (m = TAILQ_FIRST(&vm_page_queue_active);
 1628                                 m != NULL;
 1629                                 m = next) {
 1630 
 1631                                 if (m->queue != PQ_ACTIVE) {
 1632                                         break;
 1633                                 }
 1634 
 1635                                 next = TAILQ_NEXT(m, pageq);
 1636                                 if (vm_page_sleep(m, "vpctw1", &m->busy))
 1637                                         goto again1;
 1638                                 vm_page_test_dirty(m);
 1639                                 if (m->dirty) {
 1640                                         if (m->object->type == OBJT_VNODE) {
 1641                                                 vn_lock(m->object->handle, LK_EXCLUSIVE | LK_RETRY, curproc);
 1642                                                 vm_object_page_clean(m->object, 0, 0, OBJPC_SYNC);
 1643                                                 VOP_UNLOCK(m->object->handle, 0, curproc);
 1644                                                 goto again1;
 1645                                         } else if (m->object->type == OBJT_SWAP ||
 1646                                                                 m->object->type == OBJT_DEFAULT) {
 1647                                                 vm_pageout_flush(&m, 1, 0);
 1648                                                 goto again1;
 1649                                         }
 1650                                 }
 1651                                 if ((m->dirty == 0) && (m->busy == 0) && (m->hold_count == 0))
 1652                                         vm_page_cache(m);
 1653                         }
 1654 
 1655                         splx(s);
 1656                         continue;
 1657                 }
 1658                 start = i;
 1659 
 1660                 /*
 1661                  * Check successive pages for contiguous and free.
 1662                  */
 1663                 for (i = start + 1; i < (start + size / PAGE_SIZE); i++) {
 1664                         int pqtype;
 1665                         pqtype = pga[i].queue - pga[i].pc;
 1666                         if ((VM_PAGE_TO_PHYS(&pga[i]) !=
 1667                             (VM_PAGE_TO_PHYS(&pga[i - 1]) + PAGE_SIZE)) ||
 1668                             ((pqtype != PQ_ZERO) && (pqtype != PQ_FREE) && (pqtype != PQ_CACHE))) {
 1669                                 start++;
 1670                                 goto again;
 1671                         }
 1672                 }
 1673 
 1674                 for (i = start; i < (start + size / PAGE_SIZE); i++) {
 1675                         int pqtype;
 1676                         vm_page_t m = &pga[i];
 1677 
 1678                         pqtype = m->queue - m->pc;
 1679                         if (pqtype == PQ_CACHE) {
 1680                                 vm_page_busy(m);
 1681                                 vm_page_free(m);
 1682                         }
 1683 
 1684                         TAILQ_REMOVE(vm_page_queues[m->queue].pl, m, pageq);
 1685                         (*vm_page_queues[m->queue].lcnt)--;
 1686                         cnt.v_free_count--;
 1687                         m->valid = VM_PAGE_BITS_ALL;
 1688                         m->flags = 0;
 1689                         m->dirty = 0;
 1690                         m->wire_count = 0;
 1691                         m->busy = 0;
 1692                         m->queue = PQ_NONE;
 1693                         m->object = NULL;
 1694                         vm_page_wire(m);
 1695                 }
 1696 
 1697                 /*
 1698                  * We've found a contiguous chunk that meets are requirements.
 1699                  * Allocate kernel VM, unfree and assign the physical pages to it and
 1700                  * return kernel VM pointer.
 1701                  */
 1702                 tmp_addr = addr = kmem_alloc_pageable(map, size);
 1703                 if (addr == 0) {
 1704                         /*
 1705                          * XXX We almost never run out of kernel virtual
 1706                          * space, so we don't make the allocated memory
 1707                          * above available.
 1708                          */
 1709                         splx(s);
 1710                         return (NULL);
 1711                 }
 1712 
 1713                 for (i = start; i < (start + size / PAGE_SIZE); i++) {
 1714                         vm_page_t m = &pga[i];
 1715                         vm_page_insert(m, kernel_object,
 1716                                 OFF_TO_IDX(tmp_addr - VM_MIN_KERNEL_ADDRESS));
 1717                         pmap_kenter(tmp_addr, VM_PAGE_TO_PHYS(m));
 1718                         tmp_addr += PAGE_SIZE;
 1719                 }
 1720 
 1721                 splx(s);
 1722                 return ((void *)addr);
 1723         }
 1724         return NULL;
 1725 }
 1726 
 1727 void *
 1728 contigmalloc(size, type, flags, low, high, alignment, boundary)
 1729         unsigned long size;     /* should be size_t here and for malloc() */
 1730         struct malloc_type *type;
 1731         int flags;
 1732         unsigned long low;
 1733         unsigned long high;
 1734         unsigned long alignment;
 1735         unsigned long boundary;
 1736 {
 1737         return contigmalloc1(size, type, flags, low, high, alignment, boundary,
 1738                              kernel_map);
 1739 }
 1740 
 1741 vm_offset_t
 1742 vm_page_alloc_contig(size, low, high, alignment)
 1743         vm_offset_t size;
 1744         vm_offset_t low;
 1745         vm_offset_t high;
 1746         vm_offset_t alignment;
 1747 {
 1748         return ((vm_offset_t)contigmalloc1(size, M_DEVBUF, M_NOWAIT, low, high,
 1749                                           alignment, 0ul, kernel_map));
 1750 }
 1751 
 1752 #include "opt_ddb.h"
 1753 #ifdef DDB
 1754 #include <sys/kernel.h>
 1755 
 1756 #include <ddb/ddb.h>
 1757 
 1758 DB_SHOW_COMMAND(page, vm_page_print_page_info)
 1759 {
 1760         db_printf("cnt.v_free_count: %d\n", cnt.v_free_count);
 1761         db_printf("cnt.v_cache_count: %d\n", cnt.v_cache_count);
 1762         db_printf("cnt.v_inactive_count: %d\n", cnt.v_inactive_count);
 1763         db_printf("cnt.v_active_count: %d\n", cnt.v_active_count);
 1764         db_printf("cnt.v_wire_count: %d\n", cnt.v_wire_count);
 1765         db_printf("cnt.v_free_reserved: %d\n", cnt.v_free_reserved);
 1766         db_printf("cnt.v_free_min: %d\n", cnt.v_free_min);
 1767         db_printf("cnt.v_free_target: %d\n", cnt.v_free_target);
 1768         db_printf("cnt.v_cache_min: %d\n", cnt.v_cache_min);
 1769         db_printf("cnt.v_inactive_target: %d\n", cnt.v_inactive_target);
 1770 }
 1771 
 1772 DB_SHOW_COMMAND(pageq, vm_page_print_pageq_info)
 1773 {
 1774         int i;
 1775         db_printf("PQ_FREE:");
 1776         for(i=0;i<PQ_L2_SIZE;i++) {
 1777                 db_printf(" %d", *vm_page_queues[PQ_FREE + i].lcnt);
 1778         }
 1779         db_printf("\n");
 1780                 
 1781         db_printf("PQ_CACHE:");
 1782         for(i=0;i<PQ_L2_SIZE;i++) {
 1783                 db_printf(" %d", *vm_page_queues[PQ_CACHE + i].lcnt);
 1784         }
 1785         db_printf("\n");
 1786 
 1787         db_printf("PQ_ZERO:");
 1788         for(i=0;i<PQ_L2_SIZE;i++) {
 1789                 db_printf(" %d", *vm_page_queues[PQ_ZERO + i].lcnt);
 1790         }
 1791         db_printf("\n");
 1792 
 1793         db_printf("PQ_ACTIVE: %d, PQ_INACTIVE: %d\n",
 1794                 *vm_page_queues[PQ_ACTIVE].lcnt,
 1795                 *vm_page_queues[PQ_INACTIVE].lcnt);
 1796 }
 1797 #endif /* DDB */
Cache object: 6437b16b5cef5316756d1f2ee748cf62
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/vm/vm_page.c

FreeBSD/Linux Kernel Cross Reference
sys/vm/vm_page.c