The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/vm/vm_resident.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /* 
    2  * Mach Operating System
    3  * Copyright (c) 1993-1987 Carnegie Mellon University
    4  * All Rights Reserved.
    5  * 
    6  * Permission to use, copy, modify and distribute this software and its
    7  * documentation is hereby granted, provided that both the copyright
    8  * notice and this permission notice appear in all copies of the
    9  * software, derivative works or modified versions, and any portions
   10  * thereof, and that both notices appear in supporting documentation.
   11  * 
   12  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
   13  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
   14  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
   15  * 
   16  * Carnegie Mellon requests users of this software to return to
   17  * 
   18  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   19  *  School of Computer Science
   20  *  Carnegie Mellon University
   21  *  Pittsburgh PA 15213-3890
   22  * 
   23  * any improvements or extensions that they make and grant Carnegie Mellon
   24  * the rights to redistribute these changes.
   25  */
   26 /*
   27  * HISTORY
   28  * $Log:        vm_resident.c,v $
   29  * Revision 2.26  93/03/09  10:58:45  danner
   30  *      Added typecast to hash macro to quiet GCC.
   31  *      [93/03/06            af]
   32  * 
   33  * Revision 2.25  93/01/14  18:02:15  danner
   34  *      Added ANSI function prototypes.
   35  *      [92/12/30            dbg]
   36  *      Added vm_page_grab_contiguous_pages.
   37  *      [92/12/23            af]
   38  *      64bit cleanup.
   39  *      [92/12/10  20:51:23  af]
   40  *      64bit cleanup.
   41  *      [92/12/10  20:51:23  af]
   42  * 
   43  * Revision 2.24  92/08/03  18:02:21  jfriedl
   44  *      removed silly prototypes
   45  *      [92/08/02            jfriedl]
   46  * 
   47  * Revision 2.23  92/05/21  17:26:53  jfriedl
   48  *      Cleanup to quiet gcc warnings.
   49  *      Moved MACH_DEBUG includes above prototypes.
   50  *      [92/05/16            jfriedl]
   51  * 
   52  * Revision 2.22  92/02/20  13:09:34  elf
   53  *      Added changes to pmap_startup() to leave the free physical
   54  *      pages in ascending address order.
   55  *      [92/02/20            elf]
   56  * 
   57  * Revision 2.21  92/01/14  16:48:32  rpd
   58  *      Changed vm_page_info for CountInOut.
   59  *      [92/01/14            rpd]
   60  * 
   61  * Revision 2.20  91/10/09  16:20:54  af
   62  *      Added vm_page_deactivate_behind.
   63  *      [91/10/05            rpd]
   64  * 
   65  * Revision 2.19  91/08/28  11:19:00  jsb
   66  *      Added vm_page_free_count_minimum.
   67  *      Fixed divergence between vm_page_free and vm_page_replace.
   68  *      Fixed vm_page_deactivate to handle absent/fictitious pages properly.
   69  *      [91/08/07            rpd]
   70  *      Replaced divergent, expanded vm_page_free code in vm_page_replace
   71  *      with a call to vm_page_free itself.
   72  *      [91/08/15  18:49:59  jsb]
   73  * 
   74  * Revision 2.18  91/07/31  18:22:15  dbg
   75  *      Redefine 'private' to mean private memory, not private page
   76  *      structure.  Calling vm_page_free on a private page frees the
   77  *      page structure but not the page.
   78  *      [91/07/30  17:27:50  dbg]
   79  * 
   80  * Revision 2.17  91/07/01  08:28:24  jsb
   81  *      Removed accidently merged hack.
   82  *      [91/06/29  17:47:21  jsb]
   83  * 
   84  *      20-Jun-91 David L. Black (dlb) at Open Software Foundation
   85  *      Need vm_page_replace in all configurations.
   86  *      [91/06/29  16:37:31  jsb]
   87  * 
   88  * Revision 2.16  91/06/20  07:33:45  rvb
   89  *      Add pmap_page_grap_phys_addr() so that we don't have to
   90  *      export vm_page_t.
   91  * 
   92  * Revision 2.15  91/06/17  15:49:43  jsb
   93  *      Renamed NORMA conditionals. Fixed vm_page_rename implementation.
   94  *      [91/06/17  11:25:16  jsb]
   95  * 
   96  * Revision 2.14  91/06/06  17:08:43  jsb
   97  *      NORMA_IPC: added vm_page_replace.
   98  *      [91/05/14  09:40:19  jsb]
   99  * 
  100  * Revision 2.13  91/05/18  14:42:01  rpd
  101  *      Renamed vm_page_fictitious_zone to vm_page_zone.
  102  *      [91/05/16            rpd]
  103  * 
  104  *      Moved deactivate-behind code from vm_page_alloc to vm_page_insert.
  105  *      [91/04/21            rpd]
  106  * 
  107  *      Fixed vm_page_deactivate as suggested by rfr,
  108  *      to clear the reference bit on inactive/referenced pages.
  109  *      [91/04/20            rpd]
  110  * 
  111  *      Added vm_page_fictitious_addr.
  112  *      [91/04/10            rpd]
  113  * 
  114  *      Restored vm_page_laundry_count.
  115  *      [91/04/07            rpd]
  116  * 
  117  *      Changed vm_page_release to use thread_wakeup_one.
  118  *      [91/04/05            rpd]
  119  *      Added vm_page_grab_fictitious, etc.
  120  *      [91/03/29            rpd]
  121  *      Added vm_page_bootstrap, pmap_startup, pmap_steal_memory.
  122  *      [91/03/25            rpd]
  123  * 
  124  * Revision 2.12  91/05/14  17:51:19  mrt
  125  *      Correcting copyright
  126  * 
  127  * Revision 2.11  91/03/16  15:07:02  rpd
  128  *      Reverted to the previous version of vm_page_deactivate,
  129  *      which doesn't look at the busy bit.  Changed vm_page_alloc
  130  *      to not deactivate busy pages.
  131  *      [91/03/11            rpd]
  132  * 
  133  *      Fixed simple-locking typo.
  134  *      [91/03/09            rpd]
  135  *      Added continuation argument to vm_page_wait.
  136  *      [91/02/05            rpd]
  137  * 
  138  * Revision 2.10  91/02/05  18:00:27  mrt
  139  *      Changed to new Mach copyright
  140  *      [91/02/01  16:34:44  mrt]
  141  * 
  142  * Revision 2.9  91/01/08  16:46:06  rpd
  143  *      Changed to singly-linked VP bucket chains.
  144  *      [91/01/03            rpd]
  145  * 
  146  *      Removed count field from VP buckets.
  147  *      Added vm_page_info.
  148  *      [91/01/02            rpd]
  149  *      Added vm_page_grab, vm_page_release.
  150  *      Changed vm_wait/VM_WAIT to vm_page_wait/VM_PAGE_WAIT.
  151  *      [90/12/09  17:41:15  rpd]
  152  * 
  153  * Revision 2.8  90/11/05  14:35:12  rpd
  154  *      Changed vm_page_deactivate to remove busy pages from the page queues.
  155  *      Now it requires that the page's object be locked.
  156  *      [90/11/04            rpd]
  157  * 
  158  * Revision 2.7  90/10/25  14:50:50  rwd
  159  *      Made vm_page_alloc_deactivate_behind TRUE.
  160  *      [90/10/24            rwd]
  161  * 
  162  *      Removed the max_mapping field of pages.
  163  *      [90/10/22            rpd]
  164  * 
  165  * Revision 2.6  90/10/12  13:07:03  rpd
  166  *      Initialize vm_page_template's software reference bit.
  167  *      In vm_page_deactivate, clear the software reference bit
  168  *      in addition to using pmap_clear_reference.
  169  *      [90/10/08            rpd]
  170  * 
  171  * Revision 2.5  90/08/27  22:16:11  dbg
  172  *      Fixed vm_page_free, vm_page_wire, vm_page_unwire
  173  *      to only modify vm_page_wire_count for real pages.
  174  *      [90/08/23            rpd]
  175  * 
  176  * Revision 2.4  90/02/22  20:06:53  dbg
  177  *      Fix vm_page_deactivate to work for pages that are wired or
  178  *      already inactive.
  179  *      [90/02/09            dbg]
  180  *              PAGE_WAKEUP --> PAGE_WAKEUP_DONE in vm_page_free() to reflect
  181  *              the fact that it clears the busy flag.  Remove PAGE_WAKEUP from
  182  *              vm_page_unwire; callers are responsible for this, and it didn't
  183  *              work right if the page was wired more than once.
  184  *              [89/12/13            dlb]
  185  * 
  186  * Revision 2.3  90/01/11  11:48:34  dbg
  187  *      Removed all spl protection from VM system.
  188  *      Removed vm_page_free_synchronized.
  189  *      [90/01/03            dbg]
  190  * 
  191  *      Added changes from mainline:
  192  * 
  193  *              Retract special preemption technology for pageout daemon.
  194  *              [89/10/10            mwyoung]
  195  * 
  196  *              Add documentation of global variables.
  197  *              Declare vm_page_bucket_t for VP table; add count and
  198  *              lock fields.
  199  *              [89/04/29            mwyoung]
  200  * 
  201  *              Separate "private" from "fictitious" page attributes.
  202  *              [89/04/22            mwyoung]
  203  * 
  204  *              Made the deactivate-behind optimization conditional on
  205  *              vm_page_alloc_deactivate_behind, which is FALSE for now.
  206  *              [89/08/31  19:32:59  rpd]
  207  * 
  208  *              Increased zdata_size to allow for more zones.
  209  *              [89/07/31  17:13:06  jsb]
  210  * 
  211  *              Changed from 8 to 15 the threshold that triggers invocation of
  212  *              Debugger() in vm_page_alloc().  On the M500 with few buffers
  213  *              was causing trouble. [af]
  214  * 
  215  * Revision 2.2  89/09/08  11:29:00  dbg
  216  *      Fixed vm_page_free to decrement vm_page_laundry_count
  217  *      only if the freed page was in the laundry.  Also
  218  *      made vm_page_free fix vm_page_wire_count when freeing
  219  *      a wired page.
  220  * 
  221  *      Revision 2.16  89/06/12  14:53:18  jsb
  222  *              Picked up bug fix (missing splimp) from Sequent via dlb.
  223  *              [89/06/12  14:38:34  jsb]
  224  * 
  225  *      Revision 2.15  89/06/02  11:38:00  rvb
  226  *              Changed from 8 to 15 the threshold that triggers invocation of
  227  *              Debugger() in vm_page_alloc().  On the M500 with few buffers
  228  *              was causing trouble. [af]
  229  * 
  230  * Revision 2.14  89/04/18  21:29:17  mwyoung
  231  *      Recent history:
  232  *              Add vm_page_fictitious_zone.
  233  *              Handle absent pages in vm_page_free().
  234  *              Eliminate use of owner and clean fields to vm_page_t.
  235  *      History condensation:
  236  *              Reorganize vm_page_startup to avoid bad physical addresses.
  237  *              Use a template for initialization [mwyoung].
  238  *              Provide separate vm_page_init() function for outside use [mwyoung].
  239  *              Split up page system lock [dbg].
  240  *              Initial external memory management integration [bolosky, mwyoung].
  241  *              Plenty of bug fixes [dbg, avie, mwyoung].
  242  *              Converted to active/inactive/free list queues [avie].
  243  *              Created [avie].
  244  *      [89/04/18            mwyoung]
  245  * 
  246  */
  247 /*
  248  *      File:   vm/vm_page.c
  249  *      Author: Avadis Tevanian, Jr., Michael Wayne Young
  250  *
  251  *      Resident memory management module.
  252  */
  253 #include <cpus.h>
  254 
  255 #include <mach/vm_prot.h>
  256 #include <kern/counters.h>
  257 #include <kern/sched_prim.h>
  258 #include <kern/task.h>
  259 #include <kern/thread.h>
  260 #include <mach/vm_statistics.h>
  261 #include <kern/xpr.h>
  262 #include <kern/zalloc.h>
  263 #include <vm/pmap.h>
  264 #include <vm/vm_map.h>
  265 #include <vm/vm_page.h>
  266 #include <vm/vm_pageout.h>
  267 
  268 #include <mach_vm_debug.h>
  269 #if     MACH_VM_DEBUG
  270 #include <mach/kern_return.h>
  271 #include <mach_debug/hash_info.h>
  272 #include <vm/vm_kern.h>
  273 #include <vm/vm_user.h>
  274 #endif
  275 
  276 
  277 /*
  278  *      Associated with eacn page of user-allocatable memory is a
  279  *      page structure.
  280  */
  281 
  282 /*
  283  *      These variables record the values returned by vm_page_bootstrap,
  284  *      for debugging purposes.  The implementation of pmap_steal_memory
  285  *      and pmap_startup here also uses them internally.
  286  */
  287 
  288 vm_offset_t virtual_space_start;
  289 vm_offset_t virtual_space_end;
  290 
  291 /*
  292  *      The vm_page_lookup() routine, which provides for fast
  293  *      (virtual memory object, offset) to page lookup, employs
  294  *      the following hash table.  The vm_page_{insert,remove}
  295  *      routines install and remove associations in the table.
  296  *      [This table is often called the virtual-to-physical,
  297  *      or VP, table.]
  298  */
  299 typedef struct {
  300         decl_simple_lock_data(,lock)
  301         vm_page_t pages;
  302 } vm_page_bucket_t;
  303 
  304 vm_page_bucket_t *vm_page_buckets;              /* Array of buckets */
  305 unsigned int    vm_page_bucket_count = 0;       /* How big is array? */
  306 unsigned int    vm_page_hash_mask;              /* Mask for hash function */
  307 
  308 /*
  309  *      The virtual page size is currently implemented as a runtime
  310  *      variable, but is constant once initialized using vm_set_page_size.
  311  *      This initialization must be done in the machine-dependent
  312  *      bootstrap sequence, before calling other machine-independent
  313  *      initializations.
  314  *
  315  *      All references to the virtual page size outside this
  316  *      module must use the PAGE_SIZE constant.
  317  */
  318 vm_size_t       page_size  = 4096;
  319 vm_size_t       page_mask  = 4095;
  320 int             page_shift = 12;
  321 
  322 /*
  323  *      Resident page structures are initialized from
  324  *      a template (see vm_page_alloc).
  325  *
  326  *      When adding a new field to the virtual memory
  327  *      object structure, be sure to add initialization
  328  *      (see vm_page_bootstrap).
  329  */
  330 struct vm_page  vm_page_template;
  331 
  332 /*
  333  *      Resident pages that represent real memory
  334  *      are allocated from a free list.
  335  */
  336 vm_page_t       vm_page_queue_free;
  337 vm_page_t       vm_page_queue_fictitious;
  338 decl_simple_lock_data(,vm_page_queue_free_lock)
  339 unsigned int    vm_page_free_wanted;
  340 int             vm_page_free_count;
  341 int             vm_page_fictitious_count;
  342 
  343 unsigned int    vm_page_free_count_minimum;     /* debugging */
  344 
  345 /*
  346  *      Occasionally, the virtual memory system uses
  347  *      resident page structures that do not refer to
  348  *      real pages, for example to leave a page with
  349  *      important state information in the VP table.
  350  *
  351  *      These page structures are allocated the way
  352  *      most other kernel structures are.
  353  */
  354 zone_t  vm_page_zone;
  355 
  356 /*
  357  *      Fictitious pages don't have a physical address,
  358  *      but we must initialize phys_addr to something.
  359  *      For debugging, this should be a strange value
  360  *      that the pmap module can recognize in assertions.
  361  */
  362 vm_offset_t vm_page_fictitious_addr = (vm_offset_t) -1;
  363 
  364 /*
  365  *      Resident page structures are also chained on
  366  *      queues that are used by the page replacement
  367  *      system (pageout daemon).  These queues are
  368  *      defined here, but are shared by the pageout
  369  *      module.
  370  */
  371 queue_head_t    vm_page_queue_active;
  372 queue_head_t    vm_page_queue_inactive;
  373 decl_simple_lock_data(,vm_page_queue_lock)
  374 int     vm_page_active_count;
  375 int     vm_page_inactive_count;
  376 int     vm_page_wire_count;
  377 
  378 /*
  379  *      Several page replacement parameters are also
  380  *      shared with this module, so that page allocation
  381  *      (done here in vm_page_alloc) can trigger the
  382  *      pageout daemon.
  383  */
  384 int     vm_page_free_target = 0;
  385 int     vm_page_free_min = 0;
  386 int     vm_page_inactive_target = 0;
  387 int     vm_page_free_reserved = 0;
  388 int     vm_page_laundry_count = 0;
  389 
  390 /*
  391  *      The VM system has a couple of heuristics for deciding
  392  *      that pages are "uninteresting" and should be placed
  393  *      on the inactive queue as likely candidates for replacement.
  394  *      These variables let the heuristics be controlled at run-time
  395  *      to make experimentation easier.
  396  */
  397 
  398 boolean_t vm_page_deactivate_behind = TRUE;
  399 boolean_t vm_page_deactivate_hint = TRUE;
  400 
  401 /*
  402  *      vm_set_page_size:
  403  *
  404  *      Sets the page size, perhaps based upon the memory
  405  *      size.  Must be called before any use of page-size
  406  *      dependent functions.
  407  *
  408  *      Sets page_shift and page_mask from page_size.
  409  */
  410 void vm_set_page_size(void)
  411 {
  412         page_mask = page_size - 1;
  413 
  414         if ((page_mask & page_size) != 0)
  415                 panic("vm_set_page_size: page size not a power of two");
  416 
  417         for (page_shift = 0; ; page_shift++)
  418                 if ((1 << page_shift) == page_size)
  419                         break;
  420 }
  421 
  422 /*
  423  *      vm_page_bootstrap:
  424  *
  425  *      Initializes the resident memory module.
  426  *
  427  *      Allocates memory for the page cells, and
  428  *      for the object/offset-to-page hash table headers.
  429  *      Each page cell is initialized and placed on the free list.
  430  *      Returns the range of available kernel virtual memory.
  431  */
  432 
  433 void vm_page_bootstrap(
  434         vm_offset_t *startp,
  435         vm_offset_t *endp)
  436 {
  437         register vm_page_t m;
  438         int i;
  439 
  440         /*
  441          *      Initialize the vm_page template.
  442          */
  443 
  444         m = &vm_page_template;
  445         m->object = VM_OBJECT_NULL;     /* reset later */
  446         m->offset = 0;                  /* reset later */
  447         m->wire_count = 0;
  448 
  449         m->inactive = FALSE;
  450         m->active = FALSE;
  451         m->laundry = FALSE;
  452         m->free = FALSE;
  453 
  454         m->busy = TRUE;
  455         m->wanted = FALSE;
  456         m->tabled = FALSE;
  457         m->fictitious = FALSE;
  458         m->private = FALSE;
  459         m->absent = FALSE;
  460         m->error = FALSE;
  461         m->dirty = FALSE;
  462         m->precious = FALSE;
  463         m->reference = FALSE;
  464 
  465         m->phys_addr = 0;               /* reset later */
  466 
  467         m->page_lock = VM_PROT_NONE;
  468         m->unlock_request = VM_PROT_NONE;
  469 
  470         /*
  471          *      Initialize the page queues.
  472          */
  473 
  474         simple_lock_init(&vm_page_queue_free_lock);
  475         simple_lock_init(&vm_page_queue_lock);
  476 
  477         vm_page_queue_free = VM_PAGE_NULL;
  478         vm_page_queue_fictitious = VM_PAGE_NULL;
  479         queue_init(&vm_page_queue_active);
  480         queue_init(&vm_page_queue_inactive);
  481 
  482         vm_page_free_wanted = 0;
  483 
  484         /*
  485          *      Steal memory for the zone system.
  486          */
  487 
  488         kentry_data_size = kentry_count * sizeof(struct vm_map_entry);
  489         kentry_data = pmap_steal_memory(kentry_data_size);
  490 
  491         zdata = pmap_steal_memory(zdata_size);
  492 
  493         /*
  494          *      Allocate (and initialize) the virtual-to-physical
  495          *      table hash buckets.
  496          *
  497          *      The number of buckets should be a power of two to
  498          *      get a good hash function.  The following computation
  499          *      chooses the first power of two that is greater
  500          *      than the number of physical pages in the system.
  501          */
  502 
  503         if (vm_page_bucket_count == 0) {
  504                 unsigned int npages = pmap_free_pages();
  505 
  506                 vm_page_bucket_count = 1;
  507                 while (vm_page_bucket_count < npages)
  508                         vm_page_bucket_count <<= 1;
  509         }
  510 
  511         vm_page_hash_mask = vm_page_bucket_count - 1;
  512 
  513         if (vm_page_hash_mask & vm_page_bucket_count)
  514                 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
  515 
  516         vm_page_buckets = (vm_page_bucket_t *)
  517                 pmap_steal_memory(vm_page_bucket_count *
  518                                   sizeof(vm_page_bucket_t));
  519 
  520         for (i = 0; i < vm_page_bucket_count; i++) {
  521                 register vm_page_bucket_t *bucket = &vm_page_buckets[i];
  522 
  523                 bucket->pages = VM_PAGE_NULL;
  524                 simple_lock_init(&bucket->lock);
  525         }
  526 
  527         /*
  528          *      Machine-dependent code allocates the resident page table.
  529          *      It uses vm_page_init to initialize the page frames.
  530          *      The code also returns to us the virtual space available
  531          *      to the kernel.  We don't trust the pmap module
  532          *      to get the alignment right.
  533          */
  534 
  535         pmap_startup(&virtual_space_start, &virtual_space_end);
  536         virtual_space_start = round_page(virtual_space_start);
  537         virtual_space_end = trunc_page(virtual_space_end);
  538 
  539         *startp = virtual_space_start;
  540         *endp = virtual_space_end;
  541 
  542         printf("vm_page_bootstrap: %d free pages\n", vm_page_free_count);
  543         vm_page_free_count_minimum = vm_page_free_count;
  544 }
  545 
  546 #ifndef MACHINE_PAGES
  547 /*
  548  *      We implement pmap_steal_memory and pmap_startup with the help
  549  *      of two simpler functions, pmap_virtual_space and pmap_next_page.
  550  */
  551 
  552 vm_offset_t pmap_steal_memory(
  553         vm_size_t size)
  554 {
  555         vm_offset_t addr, vaddr, paddr;
  556 
  557         /*
  558          *      We round the size to an integer multiple.
  559          */
  560 
  561         size = (size + 3) &~ 3;
  562 
  563         /*
  564          *      If this is the first call to pmap_steal_memory,
  565          *      we have to initialize ourself.
  566          */
  567 
  568         if (virtual_space_start == virtual_space_end) {
  569                 pmap_virtual_space(&virtual_space_start, &virtual_space_end);
  570 
  571                 /*
  572                  *      The initial values must be aligned properly, and
  573                  *      we don't trust the pmap module to do it right.
  574                  */
  575 
  576                 virtual_space_start = round_page(virtual_space_start);
  577                 virtual_space_end = trunc_page(virtual_space_end);
  578         }
  579 
  580         /*
  581          *      Allocate virtual memory for this request.
  582          */
  583 
  584         addr = virtual_space_start;
  585         virtual_space_start += size;
  586 
  587         /*
  588          *      Allocate and map physical pages to back new virtual pages.
  589          */
  590 
  591         for (vaddr = round_page(addr);
  592              vaddr < addr + size;
  593              vaddr += PAGE_SIZE) {
  594                 if (!pmap_next_page(&paddr))
  595                         panic("pmap_steal_memory");
  596 
  597                 /*
  598                  *      XXX Logically, these mappings should be wired,
  599                  *      but some pmap modules barf if they are.
  600                  */
  601 
  602                 pmap_enter(kernel_pmap, vaddr, paddr,
  603                            VM_PROT_READ|VM_PROT_WRITE, FALSE);
  604         }
  605 
  606         return addr;
  607 }
  608 
  609 void pmap_startup(
  610         vm_offset_t *startp,
  611         vm_offset_t *endp)
  612 {
  613         unsigned int i, npages, pages_initialized;
  614         vm_page_t pages;
  615         vm_offset_t paddr;
  616 
  617         /*
  618          *      We calculate how many page frames we will have
  619          *      and then allocate the page structures in one chunk.
  620          */
  621 
  622         npages = ((PAGE_SIZE * pmap_free_pages() +
  623                    (round_page(virtual_space_start) - virtual_space_start)) /
  624                   (PAGE_SIZE + sizeof *pages));
  625 
  626         pages = (vm_page_t) pmap_steal_memory(npages * sizeof *pages);
  627 
  628         /*
  629          *      Initialize the page frames.
  630          */
  631 
  632         for (i = 0, pages_initialized = 0; i < npages; i++) {
  633                 if (!pmap_next_page(&paddr))
  634                         break;
  635 
  636                 vm_page_init(&pages[i], paddr);
  637                 pages_initialized++;
  638         }
  639 
  640         /*
  641          * Release pages in reverse order so that physical pages
  642          * initially get allocated in ascending addresses. This keeps
  643          * the devices (which must address physical memory) happy if
  644          * they require several consecutive pages.
  645          */
  646 
  647         for (i = pages_initialized; i > 0; i--) {
  648                 vm_page_release(&pages[i - 1]);
  649         }
  650 
  651         /*
  652          *      We have to re-align virtual_space_start,
  653          *      because pmap_steal_memory has been using it.
  654          */
  655 
  656         virtual_space_start = round_page(virtual_space_start);
  657 
  658         *startp = virtual_space_start;
  659         *endp = virtual_space_end;
  660 }
  661 #endif  /* MACHINE_PAGES */
  662 
  663 /*
  664  *      Routine:        vm_page_module_init
  665  *      Purpose:
  666  *              Second initialization pass, to be done after
  667  *              the basic VM system is ready.
  668  */
  669 void            vm_page_module_init(void)
  670 {
  671         vm_page_zone = zinit((vm_size_t) sizeof(struct vm_page),
  672                              VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS,
  673                              PAGE_SIZE,
  674                              FALSE, "vm pages");
  675 }
  676 
  677 /*
  678  *      Routine:        vm_page_create
  679  *      Purpose:
  680  *              After the VM system is up, machine-dependent code
  681  *              may stumble across more physical memory.  For example,
  682  *              memory that it was reserving for a frame buffer.
  683  *              vm_page_create turns this memory into available pages.
  684  */
  685 
  686 void vm_page_create(
  687         vm_offset_t     start,
  688         vm_offset_t     end)
  689 {
  690         vm_offset_t paddr;
  691         vm_page_t m;
  692 
  693         for (paddr = round_page(start);
  694              paddr < trunc_page(end);
  695              paddr += PAGE_SIZE) {
  696                 m = (vm_page_t) zalloc(vm_page_zone);
  697                 if (m == VM_PAGE_NULL)
  698                         panic("vm_page_create");
  699 
  700                 vm_page_init(m, paddr);
  701                 vm_page_release(m);
  702         }
  703 }
  704 
  705 /*
  706  *      vm_page_hash:
  707  *
  708  *      Distributes the object/offset key pair among hash buckets.
  709  *
  710  *      NOTE:   To get a good hash function, the bucket count should
  711  *              be a power of two.
  712  */
  713 #define vm_page_hash(object, offset) \
  714         (((unsigned int)(vm_offset_t)object + (unsigned int)atop(offset)) \
  715                 & vm_page_hash_mask)
  716 
  717 /*
  718  *      vm_page_insert:         [ internal use only ]
  719  *
  720  *      Inserts the given mem entry into the object/object-page
  721  *      table and object list.
  722  *
  723  *      The object and page must be locked.
  724  */
  725 
  726 void vm_page_insert(
  727         register vm_page_t      mem,
  728         register vm_object_t    object,
  729         register vm_offset_t    offset)
  730 {
  731         register vm_page_bucket_t *bucket;
  732 
  733         VM_PAGE_CHECK(mem);
  734 
  735         if (mem->tabled)
  736                 panic("vm_page_insert");
  737 
  738         /*
  739          *      Record the object/offset pair in this page
  740          */
  741 
  742         mem->object = object;
  743         mem->offset = offset;
  744 
  745         /*
  746          *      Insert it into the object_object/offset hash table
  747          */
  748 
  749         bucket = &vm_page_buckets[vm_page_hash(object, offset)];
  750         simple_lock(&bucket->lock);
  751         mem->next = bucket->pages;
  752         bucket->pages = mem;
  753         simple_unlock(&bucket->lock);
  754 
  755         /*
  756          *      Now link into the object's list of backed pages.
  757          */
  758 
  759         queue_enter(&object->memq, mem, vm_page_t, listq);
  760         mem->tabled = TRUE;
  761 
  762         /*
  763          *      Show that the object has one more resident page.
  764          */
  765 
  766         object->resident_page_count++;
  767 
  768         /*
  769          *      Detect sequential access and inactivate previous page.
  770          *      We ignore busy pages.
  771          */
  772 
  773         if (vm_page_deactivate_behind &&
  774             (offset == object->last_alloc + PAGE_SIZE)) {
  775                 vm_page_t       last_mem;
  776 
  777                 last_mem = vm_page_lookup(object, object->last_alloc);
  778                 if ((last_mem != VM_PAGE_NULL) && !last_mem->busy)
  779                         vm_page_deactivate(last_mem);
  780         }
  781         object->last_alloc = offset;
  782 }
  783 
  784 /*
  785  *      vm_page_replace:
  786  *
  787  *      Exactly like vm_page_insert, except that we first
  788  *      remove any existing page at the given offset in object
  789  *      and we don't do deactivate-behind.
  790  *
  791  *      The object and page must be locked.
  792  */
  793 
  794 void vm_page_replace(
  795         register vm_page_t      mem,
  796         register vm_object_t    object,
  797         register vm_offset_t    offset)
  798 {
  799         register vm_page_bucket_t *bucket;
  800 
  801         VM_PAGE_CHECK(mem);
  802 
  803         if (mem->tabled)
  804                 panic("vm_page_replace");
  805 
  806         /*
  807          *      Record the object/offset pair in this page
  808          */
  809 
  810         mem->object = object;
  811         mem->offset = offset;
  812 
  813         /*
  814          *      Insert it into the object_object/offset hash table,
  815          *      replacing any page that might have been there.
  816          */
  817 
  818         bucket = &vm_page_buckets[vm_page_hash(object, offset)];
  819         simple_lock(&bucket->lock);
  820         if (bucket->pages) {
  821                 vm_page_t *mp = &bucket->pages;
  822                 register vm_page_t m = *mp;
  823                 do {
  824                         if (m->object == object && m->offset == offset) {
  825                                 /*
  826                                  * Remove page from bucket and from object,
  827                                  * and return it to the free list.
  828                                  */
  829                                 *mp = m->next;
  830                                 queue_remove(&object->memq, m, vm_page_t,
  831                                              listq);
  832                                 m->tabled = FALSE;
  833                                 object->resident_page_count--;
  834 
  835                                 /*
  836                                  * Return page to the free list.
  837                                  * Note the page is not tabled now, so this
  838                                  * won't self-deadlock on the bucket lock.
  839                                  */
  840 
  841                                 vm_page_free(m);
  842                                 break;
  843                         }
  844                         mp = &m->next;
  845                 } while (m = *mp);
  846                 mem->next = bucket->pages;
  847         } else {
  848                 mem->next = VM_PAGE_NULL;
  849         }
  850         bucket->pages = mem;
  851         simple_unlock(&bucket->lock);
  852 
  853         /*
  854          *      Now link into the object's list of backed pages.
  855          */
  856 
  857         queue_enter(&object->memq, mem, vm_page_t, listq);
  858         mem->tabled = TRUE;
  859 
  860         /*
  861          *      And show that the object has one more resident
  862          *      page.
  863          */
  864 
  865         object->resident_page_count++;
  866 }
  867 
  868 /*
  869  *      vm_page_remove:         [ internal use only ]
  870  *
  871  *      Removes the given mem entry from the object/offset-page
  872  *      table and the object page list.
  873  *
  874  *      The object and page must be locked.
  875  */
  876 
  877 void vm_page_remove(
  878         register vm_page_t      mem)
  879 {
  880         register vm_page_bucket_t       *bucket;
  881         register vm_page_t      this;
  882 
  883         assert(mem->tabled);
  884         VM_PAGE_CHECK(mem);
  885 
  886         /*
  887          *      Remove from the object_object/offset hash table
  888          */
  889 
  890         bucket = &vm_page_buckets[vm_page_hash(mem->object, mem->offset)];
  891         simple_lock(&bucket->lock);
  892         if ((this = bucket->pages) == mem) {
  893                 /* optimize for common case */
  894 
  895                 bucket->pages = mem->next;
  896         } else {
  897                 register vm_page_t      *prev;
  898 
  899                 for (prev = &this->next;
  900                      (this = *prev) != mem;
  901                      prev = &this->next)
  902                         continue;
  903                 *prev = this->next;
  904         }
  905         simple_unlock(&bucket->lock);
  906 
  907         /*
  908          *      Now remove from the object's list of backed pages.
  909          */
  910 
  911         queue_remove(&mem->object->memq, mem, vm_page_t, listq);
  912 
  913         /*
  914          *      And show that the object has one fewer resident
  915          *      page.
  916          */
  917 
  918         mem->object->resident_page_count--;
  919 
  920         mem->tabled = FALSE;
  921 }
  922 
  923 /*
  924  *      vm_page_lookup:
  925  *
  926  *      Returns the page associated with the object/offset
  927  *      pair specified; if none is found, VM_PAGE_NULL is returned.
  928  *
  929  *      The object must be locked.  No side effects.
  930  */
  931 
  932 vm_page_t vm_page_lookup(
  933         register vm_object_t    object,
  934         register vm_offset_t    offset)
  935 {
  936         register vm_page_t      mem;
  937         register vm_page_bucket_t *bucket;
  938 
  939         /*
  940          *      Search the hash table for this object/offset pair
  941          */
  942 
  943         bucket = &vm_page_buckets[vm_page_hash(object, offset)];
  944 
  945         simple_lock(&bucket->lock);
  946         for (mem = bucket->pages; mem != VM_PAGE_NULL; mem = mem->next) {
  947                 VM_PAGE_CHECK(mem);
  948                 if ((mem->object == object) && (mem->offset == offset))
  949                         break;
  950         }
  951         simple_unlock(&bucket->lock);
  952         return mem;
  953 }
  954 
  955 /*
  956  *      vm_page_rename:
  957  *
  958  *      Move the given memory entry from its
  959  *      current object to the specified target object/offset.
  960  *
  961  *      The object must be locked.
  962  */
  963 void vm_page_rename(
  964         register vm_page_t      mem,
  965         register vm_object_t    new_object,
  966         vm_offset_t             new_offset)
  967 {
  968         /*
  969          *      Changes to mem->object require the page lock because
  970          *      the pageout daemon uses that lock to get the object.
  971          */
  972 
  973         vm_page_lock_queues();
  974         vm_page_remove(mem);
  975         vm_page_insert(mem, new_object, new_offset);
  976         vm_page_unlock_queues();
  977 }
  978 
  979 /*
  980  *      vm_page_init:
  981  *
  982  *      Initialize the fields in a new page.
  983  *      This takes a structure with random values and initializes it
  984  *      so that it can be given to vm_page_release or vm_page_insert.
  985  */
  986 void vm_page_init(
  987         vm_page_t       mem,
  988         vm_offset_t     phys_addr)
  989 {
  990         *mem = vm_page_template;
  991         mem->phys_addr = phys_addr;
  992 }
  993 
  994 /*
  995  *      vm_page_grab_fictitious:
  996  *
  997  *      Remove a fictitious page from the free list.
  998  *      Returns VM_PAGE_NULL if there are no free pages.
  999  */
 1000 
 1001 vm_page_t vm_page_grab_fictitious(void)
 1002 {
 1003         register vm_page_t m;
 1004 
 1005         simple_lock(&vm_page_queue_free_lock);
 1006         m = vm_page_queue_fictitious;
 1007         if (m != VM_PAGE_NULL) {
 1008                 vm_page_fictitious_count--;
 1009                 vm_page_queue_fictitious = (vm_page_t) m->pageq.next;
 1010                 m->free = FALSE;
 1011         }
 1012         simple_unlock(&vm_page_queue_free_lock);
 1013 
 1014         return m;
 1015 }
 1016 
 1017 /*
 1018  *      vm_page_release_fictitious:
 1019  *
 1020  *      Release a fictitious page to the free list.
 1021  */
 1022 
 1023 void vm_page_release_fictitious(
 1024         register vm_page_t m)
 1025 {
 1026         simple_lock(&vm_page_queue_free_lock);
 1027         if (m->free)
 1028                 panic("vm_page_release_fictitious");
 1029         m->free = TRUE;
 1030         m->pageq.next = (queue_entry_t) vm_page_queue_fictitious;
 1031         vm_page_queue_fictitious = m;
 1032         vm_page_fictitious_count++;
 1033         simple_unlock(&vm_page_queue_free_lock);
 1034 }
 1035 
 1036 /*
 1037  *      vm_page_more_fictitious:
 1038  *
 1039  *      Add more fictitious pages to the free list.
 1040  *      Allowed to block.
 1041  */
 1042 
 1043 int vm_page_fictitious_quantum = 5;
 1044 
 1045 void vm_page_more_fictitious(void)
 1046 {
 1047         register vm_page_t m;
 1048         int i;
 1049 
 1050         for (i = 0; i < vm_page_fictitious_quantum; i++) {
 1051                 m = (vm_page_t) zalloc(vm_page_zone);
 1052                 if (m == VM_PAGE_NULL)
 1053                         panic("vm_page_more_fictitious");
 1054 
 1055                 vm_page_init(m, vm_page_fictitious_addr);
 1056                 m->fictitious = TRUE;
 1057                 vm_page_release_fictitious(m);
 1058         }
 1059 }
 1060 
 1061 /*
 1062  *      vm_page_convert:
 1063  *
 1064  *      Attempt to convert a fictitious page into a real page.
 1065  */
 1066 
 1067 boolean_t vm_page_convert(
 1068         register vm_page_t m)
 1069 {
 1070         register vm_page_t real_m;
 1071 
 1072         real_m = vm_page_grab();
 1073         if (real_m == VM_PAGE_NULL)
 1074                 return FALSE;
 1075 
 1076         m->phys_addr = real_m->phys_addr;
 1077         m->fictitious = FALSE;
 1078 
 1079         real_m->phys_addr = vm_page_fictitious_addr;
 1080         real_m->fictitious = TRUE;
 1081 
 1082         vm_page_release_fictitious(real_m);
 1083         return TRUE;
 1084 }
 1085 
 1086 /*
 1087  *      vm_page_grab:
 1088  *
 1089  *      Remove a page from the free list.
 1090  *      Returns VM_PAGE_NULL if the free list is too small.
 1091  */
 1092 
 1093 vm_page_t vm_page_grab(void)
 1094 {
 1095         register vm_page_t      mem;
 1096 
 1097         simple_lock(&vm_page_queue_free_lock);
 1098 
 1099         /*
 1100          *      Only let privileged threads (involved in pageout)
 1101          *      dip into the reserved pool.
 1102          */
 1103 
 1104         if ((vm_page_free_count < vm_page_free_reserved) &&
 1105             !current_thread()->vm_privilege) {
 1106                 simple_unlock(&vm_page_queue_free_lock);
 1107                 return VM_PAGE_NULL;
 1108         }
 1109 
 1110         if (vm_page_queue_free == VM_PAGE_NULL)
 1111                 panic("vm_page_grab");
 1112 
 1113         if (--vm_page_free_count < vm_page_free_count_minimum)
 1114                 vm_page_free_count_minimum = vm_page_free_count;
 1115         mem = vm_page_queue_free;
 1116         vm_page_queue_free = (vm_page_t) mem->pageq.next;
 1117         mem->free = FALSE;
 1118         simple_unlock(&vm_page_queue_free_lock);
 1119 
 1120         /*
 1121          *      Decide if we should poke the pageout daemon.
 1122          *      We do this if the free count is less than the low
 1123          *      water mark, or if the free count is less than the high
 1124          *      water mark (but above the low water mark) and the inactive
 1125          *      count is less than its target.
 1126          *
 1127          *      We don't have the counts locked ... if they change a little,
 1128          *      it doesn't really matter.
 1129          */
 1130 
 1131         if ((vm_page_free_count < vm_page_free_min) ||
 1132             ((vm_page_free_count < vm_page_free_target) &&
 1133              (vm_page_inactive_count < vm_page_inactive_target)))
 1134                 thread_wakeup((event_t) &vm_page_free_wanted);
 1135 
 1136         return mem;
 1137 }
 1138 
 1139 vm_offset_t vm_page_grab_phys_addr(void)
 1140 {
 1141         vm_page_t p = vm_page_grab();
 1142         if (p == VM_PAGE_NULL)
 1143                 return -1;
 1144         else
 1145                 return p->phys_addr;
 1146 }
 1147 
 1148 /*
 1149  *      vm_page_grab_contiguous_pages:
 1150  *
 1151  *      Take N pages off the free list, the pages should
 1152  *      cover a contiguous range of physical addresses.
 1153  *      [Used by device drivers to cope with DMA limitations]
 1154  *
 1155  *      Returns the page descriptors in ascending order, or
 1156  *      Returns KERN_RESOURCE_SHORTAGE if it could not.
 1157  */
 1158 
 1159 /* Biggest phys page number for the pages we handle in VM */
 1160 
 1161 vm_size_t       vm_page_big_pagenum = 0;        /* Set this before call! */
 1162 
 1163 kern_return_t
 1164 vm_page_grab_contiguous_pages(
 1165         int             npages,
 1166         vm_page_t       pages[],
 1167         natural_t       *bits)
 1168 {
 1169         register int    first_set;
 1170         int             size, alloc_size;
 1171         kern_return_t   ret;
 1172         vm_page_t       mem, prevmem;
 1173 
 1174 #ifndef NBBY
 1175 #define NBBY    8       /* size in bits of sizeof()`s unity */
 1176 #endif
 1177 
 1178 #define NBPEL   (sizeof(natural_t)*NBBY)
 1179 
 1180         size = (vm_page_big_pagenum + NBPEL - 1)
 1181                 & ~(NBPEL - 1);                         /* in bits */
 1182 
 1183         size = size / NBBY;                             /* in bytes */
 1184 
 1185         /*
 1186          * If we are called before the VM system is fully functional
 1187          * the invoker must provide us with the work space. [one bit
 1188          * per page starting at phys 0 and up to vm_page_big_pagenum]
 1189          */
 1190         if (bits == 0) {
 1191                 alloc_size = round_page(size);
 1192                 if (kmem_alloc_wired( kernel_map, &bits, alloc_size) !=
 1193                         KERN_SUCCESS)
 1194                         return KERN_RESOURCE_SHORTAGE;
 1195         } else
 1196                 alloc_size = 0;
 1197 
 1198         bzero(bits, size);
 1199 
 1200         /*
 1201          * A very large granularity call, its rare so that is ok
 1202          */
 1203         simple_lock(&vm_page_queue_free_lock);
 1204 
 1205         /*
 1206          *      Do not dip into the reserved pool.
 1207          */
 1208 
 1209         if (vm_page_free_count < vm_page_free_reserved) {
 1210                 simple_unlock(&vm_page_queue_free_lock);
 1211                 return KERN_RESOURCE_SHORTAGE;
 1212         }
 1213 
 1214         /*
 1215          *      First pass through, build a big bit-array of
 1216          *      the pages that are free.  It is not going to
 1217          *      be too large anyways, in 4k we can fit info
 1218          *      for 32k pages.
 1219          */
 1220         mem = vm_page_queue_free;
 1221         while (mem) {
 1222                 register int word_index, bit_index;
 1223 
 1224                 bit_index = (mem->phys_addr >> page_shift);
 1225                 word_index = bit_index / NBPEL;
 1226                 bit_index = bit_index - (word_index * NBPEL);
 1227                 bits[word_index] |= 1 << bit_index;
 1228 
 1229                 mem = (vm_page_t) mem->pageq.next;
 1230         }
 1231 
 1232         /*
 1233          *      Second loop. Scan the bit array for NPAGES
 1234          *      contiguous bits.  That gives us, if any,
 1235          *      the range of pages we will be grabbing off
 1236          *      the free list.
 1237          */
 1238         {
 1239             register int        bits_so_far = 0, i;
 1240 
 1241                 first_set = 0;
 1242 
 1243                 for (i = 0; i < size; i += sizeof(natural_t)) {
 1244 
 1245                     register natural_t  v = bits[i / sizeof(natural_t)];
 1246                     register int        bitpos;
 1247 
 1248                     /*
 1249                      * Bitscan this one word
 1250                      */
 1251                     if (v) {
 1252                         /*
 1253                          * keep counting them beans ?
 1254                          */
 1255                         bitpos = 0;
 1256 
 1257                         if (bits_so_far) {
 1258 count_ones:
 1259                             while (v & 1) {
 1260                                 bitpos++;
 1261                                 /*
 1262                                  * got enough beans ?
 1263                                  */
 1264                                 if (++bits_so_far == npages)
 1265                                     goto found_em;
 1266                                 v >>= 1;
 1267                             }
 1268                             /* if we are being lucky, roll again */
 1269                             if (bitpos == NBPEL)
 1270                                 continue;
 1271                         }
 1272 
 1273                         /*
 1274                          * search for beans here
 1275                          */
 1276                         bits_so_far = 0;
 1277 count_zeroes:
 1278                         while ((bitpos < NBPEL) && ((v & 1) == 0)) {
 1279                             bitpos++;
 1280                             v >>= 1;
 1281                         }
 1282                         if (v & 1) {
 1283                             first_set = (i * NBBY) + bitpos;
 1284                             goto count_ones;
 1285                         }
 1286                     }
 1287                     /*
 1288                      * No luck
 1289                      */
 1290                     bits_so_far = 0;
 1291                 }
 1292         }
 1293 
 1294         /*
 1295          *      We could not find enough contiguous pages.
 1296          */
 1297 not_found_em:
 1298         simple_unlock(&vm_page_queue_free_lock);
 1299 
 1300         ret = KERN_RESOURCE_SHORTAGE;
 1301         goto out;
 1302 
 1303         /*
 1304          *      Final pass. Now we know which pages we want.
 1305          *      Scan the list until we find them all, grab
 1306          *      pages as we go.  FIRST_SET tells us where
 1307          *      in the bit-array our pages start.
 1308          */
 1309 found_em:
 1310         vm_page_free_count -= npages;
 1311         if (vm_page_free_count < vm_page_free_count_minimum)
 1312                 vm_page_free_count_minimum = vm_page_free_count;
 1313 
 1314         {
 1315             register vm_offset_t        first_phys, last_phys;
 1316 
 1317             /* cache values for compare */
 1318             first_phys = first_set << page_shift;
 1319             last_phys = first_phys + (npages << page_shift);/* not included */
 1320 
 1321             /* running pointers */
 1322             mem = vm_page_queue_free;
 1323             prevmem = VM_PAGE_NULL;
 1324 
 1325             while (mem) {
 1326 
 1327                 register vm_offset_t    addr;
 1328 
 1329                 addr = mem->phys_addr;
 1330 
 1331                 if ((addr >= first_phys) &&
 1332                     (addr <  last_phys)) {
 1333                     if (prevmem)
 1334                         prevmem->pageq.next = mem->pageq.next;
 1335                     pages[(addr - first_phys) >> page_shift] = mem;
 1336                     mem->free = FALSE;
 1337                     /*
 1338                      * Got them all ?
 1339                      */
 1340                     if (--npages == 0) break;
 1341                 } else
 1342                     prevmem = mem;
 1343 
 1344                 mem = (vm_page_t) mem->pageq.next;
 1345             }
 1346         }
 1347 
 1348         simple_unlock(&vm_page_queue_free_lock);
 1349 
 1350         /*
 1351          *      Decide if we should poke the pageout daemon.
 1352          *      We do this if the free count is less than the low
 1353          *      water mark, or if the free count is less than the high
 1354          *      water mark (but above the low water mark) and the inactive
 1355          *      count is less than its target.
 1356          *
 1357          *      We don't have the counts locked ... if they change a little,
 1358          *      it doesn't really matter.
 1359          */
 1360 
 1361         if ((vm_page_free_count < vm_page_free_min) ||
 1362             ((vm_page_free_count < vm_page_free_target) &&
 1363              (vm_page_inactive_count < vm_page_inactive_target)))
 1364                 thread_wakeup(&vm_page_free_wanted);
 1365 
 1366         ret = KERN_SUCCESS;
 1367 out:
 1368         if (alloc_size)
 1369                 kmem_free( kernel_map, bits, alloc_size);
 1370 
 1371         return ret;
 1372 }
 1373 
 1374 /*
 1375  *      vm_page_release:
 1376  *
 1377  *      Return a page to the free list.
 1378  */
 1379 
 1380 void vm_page_release(
 1381         register vm_page_t      mem)
 1382 {
 1383         simple_lock(&vm_page_queue_free_lock);
 1384         if (mem->free)
 1385                 panic("vm_page_release");
 1386         mem->free = TRUE;
 1387         mem->pageq.next = (queue_entry_t) vm_page_queue_free;
 1388         vm_page_queue_free = mem;
 1389         vm_page_free_count++;
 1390 
 1391         /*
 1392          *      Check if we should wake up someone waiting for page.
 1393          *      But don't bother waking them unless they can allocate.
 1394          *
 1395          *      We wakeup only one thread, to prevent starvation.
 1396          *      Because the scheduling system handles wait queues FIFO,
 1397          *      if we wakeup all waiting threads, one greedy thread
 1398          *      can starve multiple niceguy threads.  When the threads
 1399          *      all wakeup, the greedy threads runs first, grabs the page,
 1400          *      and waits for another page.  It will be the first to run
 1401          *      when the next page is freed.
 1402          *
 1403          *      However, there is a slight danger here.
 1404          *      The thread we wake might not use the free page.
 1405          *      Then the other threads could wait indefinitely
 1406          *      while the page goes unused.  To forestall this,
 1407          *      the pageout daemon will keep making free pages
 1408          *      as long as vm_page_free_wanted is non-zero.
 1409          */
 1410 
 1411         if ((vm_page_free_wanted > 0) &&
 1412             (vm_page_free_count >= vm_page_free_reserved)) {
 1413                 vm_page_free_wanted--;
 1414                 thread_wakeup_one((event_t) &vm_page_free_count);
 1415         }
 1416 
 1417         simple_unlock(&vm_page_queue_free_lock);
 1418 }
 1419 
 1420 /*
 1421  *      vm_page_wait:
 1422  *
 1423  *      Wait for a page to become available.
 1424  *      If there are plenty of free pages, then we don't sleep.
 1425  */
 1426 
 1427 void vm_page_wait(
 1428         void (*continuation)(void))
 1429 {
 1430         /*
 1431          *      We can't use vm_page_free_reserved to make this
 1432          *      determination.  Consider: some thread might
 1433          *      need to allocate two pages.  The first allocation
 1434          *      succeeds, the second fails.  After the first page is freed,
 1435          *      a call to vm_page_wait must really block.
 1436          */
 1437 
 1438         simple_lock(&vm_page_queue_free_lock);
 1439         if (vm_page_free_count < vm_page_free_target) {
 1440                 if (vm_page_free_wanted++ == 0)
 1441                         thread_wakeup((event_t)&vm_page_free_wanted);
 1442                 assert_wait((event_t)&vm_page_free_count, FALSE);
 1443                 simple_unlock(&vm_page_queue_free_lock);
 1444                 if (continuation != 0) {
 1445                         counter(c_vm_page_wait_block_user++);
 1446                         thread_block(continuation);
 1447                 } else {
 1448                         counter(c_vm_page_wait_block_kernel++);
 1449                         thread_block((void (*)(void)) 0);
 1450                 }
 1451         } else
 1452                 simple_unlock(&vm_page_queue_free_lock);
 1453 }
 1454 
 1455 /*
 1456  *      vm_page_alloc:
 1457  *
 1458  *      Allocate and return a memory cell associated
 1459  *      with this VM object/offset pair.
 1460  *
 1461  *      Object must be locked.
 1462  */
 1463 
 1464 vm_page_t vm_page_alloc(
 1465         vm_object_t     object,
 1466         vm_offset_t     offset)
 1467 {
 1468         register vm_page_t      mem;
 1469 
 1470         mem = vm_page_grab();
 1471         if (mem == VM_PAGE_NULL)
 1472                 return VM_PAGE_NULL;
 1473 
 1474         vm_page_lock_queues();
 1475         vm_page_insert(mem, object, offset);
 1476         vm_page_unlock_queues();
 1477 
 1478         return mem;
 1479 }
 1480 
 1481 /*
 1482  *      vm_page_free:
 1483  *
 1484  *      Returns the given page to the free list,
 1485  *      disassociating it with any VM object.
 1486  *
 1487  *      Object and page queues must be locked prior to entry.
 1488  */
 1489 void vm_page_free(
 1490         register vm_page_t      mem)
 1491 {
 1492         if (mem->free)
 1493                 panic("vm_page_free");
 1494 
 1495         if (mem->tabled)
 1496                 vm_page_remove(mem);
 1497         VM_PAGE_QUEUES_REMOVE(mem);
 1498 
 1499         if (mem->wire_count != 0) {
 1500                 if (!mem->private && !mem->fictitious)
 1501                         vm_page_wire_count--;
 1502                 mem->wire_count = 0;
 1503         }
 1504 
 1505         if (mem->laundry) {
 1506                 vm_page_laundry_count--;
 1507                 mem->laundry = FALSE;
 1508         }
 1509 
 1510         PAGE_WAKEUP_DONE(mem);
 1511 
 1512         if (mem->absent)
 1513                 vm_object_absent_release(mem->object);
 1514 
 1515         /*
 1516          *      XXX The calls to vm_page_init here are
 1517          *      really overkill.
 1518          */
 1519 
 1520         if (mem->private || mem->fictitious) {
 1521                 vm_page_init(mem, vm_page_fictitious_addr);
 1522                 mem->fictitious = TRUE;
 1523                 vm_page_release_fictitious(mem);
 1524         } else {
 1525                 vm_page_init(mem, mem->phys_addr);
 1526                 vm_page_release(mem);
 1527         }
 1528 }
 1529 
 1530 /*
 1531  *      vm_page_wire:
 1532  *
 1533  *      Mark this page as wired down by yet
 1534  *      another map, removing it from paging queues
 1535  *      as necessary.
 1536  *
 1537  *      The page's object and the page queues must be locked.
 1538  */
 1539 void vm_page_wire(
 1540         register vm_page_t      mem)
 1541 {
 1542         VM_PAGE_CHECK(mem);
 1543 
 1544         if (mem->wire_count == 0) {
 1545                 VM_PAGE_QUEUES_REMOVE(mem);
 1546                 if (!mem->private && !mem->fictitious)
 1547                         vm_page_wire_count++;
 1548         }
 1549         mem->wire_count++;
 1550 }
 1551 
 1552 /*
 1553  *      vm_page_unwire:
 1554  *
 1555  *      Release one wiring of this page, potentially
 1556  *      enabling it to be paged again.
 1557  *
 1558  *      The page's object and the page queues must be locked.
 1559  */
 1560 void vm_page_unwire(
 1561         register vm_page_t      mem)
 1562 {
 1563         VM_PAGE_CHECK(mem);
 1564 
 1565         if (--mem->wire_count == 0) {
 1566                 queue_enter(&vm_page_queue_active, mem, vm_page_t, pageq);
 1567                 vm_page_active_count++;
 1568                 mem->active = TRUE;
 1569                 if (!mem->private && !mem->fictitious)
 1570                         vm_page_wire_count--;
 1571         }
 1572 }
 1573 
 1574 /*
 1575  *      vm_page_deactivate:
 1576  *
 1577  *      Returns the given page to the inactive list,
 1578  *      indicating that no physical maps have access
 1579  *      to this page.  [Used by the physical mapping system.]
 1580  *
 1581  *      The page queues must be locked.
 1582  */
 1583 void vm_page_deactivate(
 1584         register vm_page_t      m)
 1585 {
 1586         VM_PAGE_CHECK(m);
 1587 
 1588         /*
 1589          *      This page is no longer very interesting.  If it was
 1590          *      interesting (active or inactive/referenced), then we
 1591          *      clear the reference bit and (re)enter it in the
 1592          *      inactive queue.  Note wired pages should not have
 1593          *      their reference bit cleared.
 1594          */
 1595 
 1596         if (m->active || (m->inactive && m->reference)) {
 1597                 if (!m->fictitious && !m->absent)
 1598                         pmap_clear_reference(m->phys_addr);
 1599                 m->reference = FALSE;
 1600                 VM_PAGE_QUEUES_REMOVE(m);
 1601         }
 1602         if (m->wire_count == 0 && !m->inactive) {
 1603                 queue_enter(&vm_page_queue_inactive, m, vm_page_t, pageq);
 1604                 m->inactive = TRUE;
 1605                 vm_page_inactive_count++;
 1606         }
 1607 }
 1608 
 1609 /*
 1610  *      vm_page_activate:
 1611  *
 1612  *      Put the specified page on the active list (if appropriate).
 1613  *
 1614  *      The page queues must be locked.
 1615  */
 1616 
 1617 void vm_page_activate(
 1618         register vm_page_t      m)
 1619 {
 1620         VM_PAGE_CHECK(m);
 1621 
 1622         if (m->inactive) {
 1623                 queue_remove(&vm_page_queue_inactive, m, vm_page_t,
 1624                                                 pageq);
 1625                 vm_page_inactive_count--;
 1626                 m->inactive = FALSE;
 1627         }
 1628         if (m->wire_count == 0) {
 1629                 if (m->active)
 1630                         panic("vm_page_activate: already active");
 1631 
 1632                 queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
 1633                 m->active = TRUE;
 1634                 vm_page_active_count++;
 1635         }
 1636 }
 1637 
 1638 /*
 1639  *      vm_page_zero_fill:
 1640  *
 1641  *      Zero-fill the specified page.
 1642  */
 1643 void vm_page_zero_fill(
 1644         vm_page_t       m)
 1645 {
 1646         VM_PAGE_CHECK(m);
 1647 
 1648         pmap_zero_page(m->phys_addr);
 1649 }
 1650 
 1651 /*
 1652  *      vm_page_copy:
 1653  *
 1654  *      Copy one page to another
 1655  */
 1656 
 1657 void vm_page_copy(
 1658         vm_page_t       src_m,
 1659         vm_page_t       dest_m)
 1660 {
 1661         VM_PAGE_CHECK(src_m);
 1662         VM_PAGE_CHECK(dest_m);
 1663 
 1664         pmap_copy_page(src_m->phys_addr, dest_m->phys_addr);
 1665 }
 1666 
 1667 #if     MACH_VM_DEBUG
 1668 /*
 1669  *      Routine:        vm_page_info
 1670  *      Purpose:
 1671  *              Return information about the global VP table.
 1672  *              Fills the buffer with as much information as possible
 1673  *              and returns the desired size of the buffer.
 1674  *      Conditions:
 1675  *              Nothing locked.  The caller should provide
 1676  *              possibly-pageable memory.
 1677  */
 1678 
 1679 unsigned int
 1680 vm_page_info(
 1681         hash_info_bucket_t *info,
 1682         unsigned int    count)
 1683 {
 1684         int i;
 1685 
 1686         if (vm_page_bucket_count < count)
 1687                 count = vm_page_bucket_count;
 1688 
 1689         for (i = 0; i < count; i++) {
 1690                 vm_page_bucket_t *bucket = &vm_page_buckets[i];
 1691                 unsigned int bucket_count = 0;
 1692                 vm_page_t m;
 1693 
 1694                 simple_lock(&bucket->lock);
 1695                 for (m = bucket->pages; m != VM_PAGE_NULL; m = m->next)
 1696                         bucket_count++;
 1697                 simple_unlock(&bucket->lock);
 1698 
 1699                 /* don't touch pageable memory while holding locks */
 1700                 info[i].hib_count = bucket_count;
 1701         }
 1702 
 1703         return vm_page_bucket_count;
 1704 }
 1705 #endif  /* MACH_VM_DEBUG */
 1706 
 1707 #include <mach_kdb.h>
 1708 #if     MACH_KDB
 1709 #define printf  kdbprintf
 1710 
 1711 /*
 1712  *      Routine:        vm_page_print [exported]
 1713  */
 1714 void            vm_page_print(p)
 1715         vm_page_t       p;
 1716 {
 1717         iprintf("Page 0x%X: object 0x%X,", (vm_offset_t) p, (vm_offset_t) p->object);
 1718          printf(" offset 0x%X", (vm_offset_t) p->offset);
 1719          printf("wire_count %d,", p->wire_count);
 1720          printf(" %s",
 1721                 (p->active ? "active" : (p->inactive ? "inactive" : "loose")));
 1722          printf("%s",
 1723                 (p->free ? " free" : ""));
 1724          printf("%s ",
 1725                 (p->laundry ? " laundry" : ""));
 1726          printf("%s",
 1727                 (p->dirty ? "dirty" : "clean"));
 1728          printf("%s",
 1729                 (p->busy ? " busy" : ""));
 1730          printf("%s",
 1731                 (p->absent ? " absent" : ""));
 1732          printf("%s",
 1733                 (p->error ? " error" : ""));
 1734          printf("%s",
 1735                 (p->fictitious ? " fictitious" : ""));
 1736          printf("%s",
 1737                 (p->private ? " private" : ""));
 1738          printf("%s",
 1739                 (p->wanted ? " wanted" : ""));
 1740          printf("%s,",
 1741                 (p->tabled ? "" : "not_tabled"));
 1742          printf("phys_addr = 0x%X, lock = 0x%X, unlock_request = 0x%X\n",
 1743                 (vm_offset_t) p->phys_addr,
 1744                 (vm_offset_t) p->page_lock,
 1745                 (vm_offset_t) p->unlock_request);
 1746 }
 1747 #endif  /* MACH_KDB */

Cache object: 66f87804ea3a5fd9f8a165fc11d619ab


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.