The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/vm/vm_resident.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /* 
    2  * Mach Operating System
    3  * Copyright (c) 1993-1987 Carnegie Mellon University
    4  * All Rights Reserved.
    5  * 
    6  * Permission to use, copy, modify and distribute this software and its
    7  * documentation is hereby granted, provided that both the copyright
    8  * notice and this permission notice appear in all copies of the
    9  * software, derivative works or modified versions, and any portions
   10  * thereof, and that both notices appear in supporting documentation.
   11  * 
   12  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
   13  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
   14  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
   15  * 
   16  * Carnegie Mellon requests users of this software to return to
   17  * 
   18  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   19  *  School of Computer Science
   20  *  Carnegie Mellon University
   21  *  Pittsburgh PA 15213-3890
   22  * 
   23  * any improvements or extensions that they make and grant Carnegie Mellon
   24  * the rights to redistribute these changes.
   25  */
   26 /*
   27  * HISTORY
   28  * $Log:        vm_resident.c,v $
   29  * Revision 2.27  93/11/17  18:57:38  dbg
   30  *      Always include vm/vm_kern.h, since it is needed by
   31  *      vm_page_grab_contiguous_pages.
   32  *      [93/08/26            dbg]
   33  * 
   34  *      Cleaned up lint.
   35  *      [93/06/16            dbg]
   36  * 
   37  * Revision 2.26  93/03/09  10:58:45  danner
   38  *      Added typecast to hash macro to quiet GCC.
   39  *      [93/03/06            af]
   40  * 
   41  * Revision 2.25  93/01/14  18:02:15  danner
   42  *      Added ANSI function prototypes.
   43  *      [92/12/30            dbg]
   44  *      Added vm_page_grab_contiguous_pages.
   45  *      [92/12/23            af]
   46  *      64bit cleanup.
   47  *      [92/12/10  20:51:23  af]
   48  *      64bit cleanup.
   49  *      [92/12/10  20:51:23  af]
   50  * 
   51  * Revision 2.24  92/08/03  18:02:21  jfriedl
   52  *      removed silly prototypes
   53  *      [92/08/02            jfriedl]
   54  * 
   55  * Revision 2.23  92/05/21  17:26:53  jfriedl
   56  *      Cleanup to quiet gcc warnings.
   57  *      Moved MACH_DEBUG includes above prototypes.
   58  *      [92/05/16            jfriedl]
   59  * 
   60  * Revision 2.22  92/02/20  13:09:34  elf
   61  *      Added changes to pmap_startup() to leave the free physical
   62  *      pages in ascending address order.
   63  *      [92/02/20            elf]
   64  * 
   65  * Revision 2.21  92/01/14  16:48:32  rpd
   66  *      Changed vm_page_info for CountInOut.
   67  *      [92/01/14            rpd]
   68  * 
   69  * Revision 2.20  91/10/09  16:20:54  af
   70  *      Added vm_page_deactivate_behind.
   71  *      [91/10/05            rpd]
   72  * 
   73  * Revision 2.19  91/08/28  11:19:00  jsb
   74  *      Added vm_page_free_count_minimum.
   75  *      Fixed divergence between vm_page_free and vm_page_replace.
   76  *      Fixed vm_page_deactivate to handle absent/fictitious pages properly.
   77  *      [91/08/07            rpd]
   78  *      Replaced divergent, expanded vm_page_free code in vm_page_replace
   79  *      with a call to vm_page_free itself.
   80  *      [91/08/15  18:49:59  jsb]
   81  * 
   82  * Revision 2.18  91/07/31  18:22:15  dbg
   83  *      Redefine 'private' to mean private memory, not private page
   84  *      structure.  Calling vm_page_free on a private page frees the
   85  *      page structure but not the page.
   86  *      [91/07/30  17:27:50  dbg]
   87  * 
   88  * Revision 2.17  91/07/01  08:28:24  jsb
   89  *      Removed accidently merged hack.
   90  *      [91/06/29  17:47:21  jsb]
   91  * 
   92  *      20-Jun-91 David L. Black (dlb) at Open Software Foundation
   93  *      Need vm_page_replace in all configurations.
   94  *      [91/06/29  16:37:31  jsb]
   95  * 
   96  * Revision 2.16  91/06/20  07:33:45  rvb
   97  *      Add pmap_page_grap_phys_addr() so that we don't have to
   98  *      export vm_page_t.
   99  * 
  100  * Revision 2.15  91/06/17  15:49:43  jsb
  101  *      Renamed NORMA conditionals. Fixed vm_page_rename implementation.
  102  *      [91/06/17  11:25:16  jsb]
  103  * 
  104  * Revision 2.14  91/06/06  17:08:43  jsb
  105  *      NORMA_IPC: added vm_page_replace.
  106  *      [91/05/14  09:40:19  jsb]
  107  * 
  108  * Revision 2.13  91/05/18  14:42:01  rpd
  109  *      Renamed vm_page_fictitious_zone to vm_page_zone.
  110  *      [91/05/16            rpd]
  111  * 
  112  *      Moved deactivate-behind code from vm_page_alloc to vm_page_insert.
  113  *      [91/04/21            rpd]
  114  * 
  115  *      Fixed vm_page_deactivate as suggested by rfr,
  116  *      to clear the reference bit on inactive/referenced pages.
  117  *      [91/04/20            rpd]
  118  * 
  119  *      Added vm_page_fictitious_addr.
  120  *      [91/04/10            rpd]
  121  * 
  122  *      Restored vm_page_laundry_count.
  123  *      [91/04/07            rpd]
  124  * 
  125  *      Changed vm_page_release to use thread_wakeup_one.
  126  *      [91/04/05            rpd]
  127  *      Added vm_page_grab_fictitious, etc.
  128  *      [91/03/29            rpd]
  129  *      Added vm_page_bootstrap, pmap_startup, pmap_steal_memory.
  130  *      [91/03/25            rpd]
  131  * 
  132  * Revision 2.12  91/05/14  17:51:19  mrt
  133  *      Correcting copyright
  134  * 
  135  * Revision 2.11  91/03/16  15:07:02  rpd
  136  *      Reverted to the previous version of vm_page_deactivate,
  137  *      which doesn't look at the busy bit.  Changed vm_page_alloc
  138  *      to not deactivate busy pages.
  139  *      [91/03/11            rpd]
  140  * 
  141  *      Fixed simple-locking typo.
  142  *      [91/03/09            rpd]
  143  *      Added continuation argument to vm_page_wait.
  144  *      [91/02/05            rpd]
  145  * 
  146  * Revision 2.10  91/02/05  18:00:27  mrt
  147  *      Changed to new Mach copyright
  148  *      [91/02/01  16:34:44  mrt]
  149  * 
  150  * Revision 2.9  91/01/08  16:46:06  rpd
  151  *      Changed to singly-linked VP bucket chains.
  152  *      [91/01/03            rpd]
  153  * 
  154  *      Removed count field from VP buckets.
  155  *      Added vm_page_info.
  156  *      [91/01/02            rpd]
  157  *      Added vm_page_grab, vm_page_release.
  158  *      Changed vm_wait/VM_WAIT to vm_page_wait/VM_PAGE_WAIT.
  159  *      [90/12/09  17:41:15  rpd]
  160  * 
  161  * Revision 2.8  90/11/05  14:35:12  rpd
  162  *      Changed vm_page_deactivate to remove busy pages from the page queues.
  163  *      Now it requires that the page's object be locked.
  164  *      [90/11/04            rpd]
  165  * 
  166  * Revision 2.7  90/10/25  14:50:50  rwd
  167  *      Made vm_page_alloc_deactivate_behind TRUE.
  168  *      [90/10/24            rwd]
  169  * 
  170  *      Removed the max_mapping field of pages.
  171  *      [90/10/22            rpd]
  172  * 
  173  * Revision 2.6  90/10/12  13:07:03  rpd
  174  *      Initialize vm_page_template's software reference bit.
  175  *      In vm_page_deactivate, clear the software reference bit
  176  *      in addition to using pmap_clear_reference.
  177  *      [90/10/08            rpd]
  178  * 
  179  * Revision 2.5  90/08/27  22:16:11  dbg
  180  *      Fixed vm_page_free, vm_page_wire, vm_page_unwire
  181  *      to only modify vm_page_wire_count for real pages.
  182  *      [90/08/23            rpd]
  183  * 
  184  * Revision 2.4  90/02/22  20:06:53  dbg
  185  *      Fix vm_page_deactivate to work for pages that are wired or
  186  *      already inactive.
  187  *      [90/02/09            dbg]
  188  *              PAGE_WAKEUP --> PAGE_WAKEUP_DONE in vm_page_free() to reflect
  189  *              the fact that it clears the busy flag.  Remove PAGE_WAKEUP from
  190  *              vm_page_unwire; callers are responsible for this, and it didn't
  191  *              work right if the page was wired more than once.
  192  *              [89/12/13            dlb]
  193  * 
  194  * Revision 2.3  90/01/11  11:48:34  dbg
  195  *      Removed all spl protection from VM system.
  196  *      Removed vm_page_free_synchronized.
  197  *      [90/01/03            dbg]
  198  * 
  199  *      Added changes from mainline:
  200  * 
  201  *              Retract special preemption technology for pageout daemon.
  202  *              [89/10/10            mwyoung]
  203  * 
  204  *              Add documentation of global variables.
  205  *              Declare vm_page_bucket_t for VP table; add count and
  206  *              lock fields.
  207  *              [89/04/29            mwyoung]
  208  * 
  209  *              Separate "private" from "fictitious" page attributes.
  210  *              [89/04/22            mwyoung]
  211  * 
  212  *              Made the deactivate-behind optimization conditional on
  213  *              vm_page_alloc_deactivate_behind, which is FALSE for now.
  214  *              [89/08/31  19:32:59  rpd]
  215  * 
  216  *              Increased zdata_size to allow for more zones.
  217  *              [89/07/31  17:13:06  jsb]
  218  * 
  219  *              Changed from 8 to 15 the threshold that triggers invocation of
  220  *              Debugger() in vm_page_alloc().  On the M500 with few buffers
  221  *              was causing trouble. [af]
  222  * 
  223  * Revision 2.2  89/09/08  11:29:00  dbg
  224  *      Fixed vm_page_free to decrement vm_page_laundry_count
  225  *      only if the freed page was in the laundry.  Also
  226  *      made vm_page_free fix vm_page_wire_count when freeing
  227  *      a wired page.
  228  * 
  229  *      Revision 2.16  89/06/12  14:53:18  jsb
  230  *              Picked up bug fix (missing splimp) from Sequent via dlb.
  231  *              [89/06/12  14:38:34  jsb]
  232  * 
  233  *      Revision 2.15  89/06/02  11:38:00  rvb
  234  *              Changed from 8 to 15 the threshold that triggers invocation of
  235  *              Debugger() in vm_page_alloc().  On the M500 with few buffers
  236  *              was causing trouble. [af]
  237  * 
  238  * Revision 2.14  89/04/18  21:29:17  mwyoung
  239  *      Recent history:
  240  *              Add vm_page_fictitious_zone.
  241  *              Handle absent pages in vm_page_free().
  242  *              Eliminate use of owner and clean fields to vm_page_t.
  243  *      History condensation:
  244  *              Reorganize vm_page_startup to avoid bad physical addresses.
  245  *              Use a template for initialization [mwyoung].
  246  *              Provide separate vm_page_init() function for outside use [mwyoung].
  247  *              Split up page system lock [dbg].
  248  *              Initial external memory management integration [bolosky, mwyoung].
  249  *              Plenty of bug fixes [dbg, avie, mwyoung].
  250  *              Converted to active/inactive/free list queues [avie].
  251  *              Created [avie].
  252  *      [89/04/18            mwyoung]
  253  * 
  254  */
  255 /*
  256  *      File:   vm/vm_page.c
  257  *      Author: Avadis Tevanian, Jr., Michael Wayne Young
  258  *
  259  *      Resident memory management module.
  260  */
  261 #include <cpus.h>
  262 
  263 #include <mach/vm_prot.h>
  264 #include <kern/counters.h>
  265 #include <kern/kern_io.h>
  266 #include <kern/memory.h>
  267 #include <kern/sched_prim.h>
  268 #include <kern/task.h>
  269 #include <kern/thread.h>
  270 #include <mach/vm_statistics.h>
  271 #include <kern/xpr.h>
  272 #include <kern/zalloc.h>
  273 #include <vm/pmap.h>
  274 #include <vm/vm_map.h>
  275 #include <vm/vm_page.h>
  276 #include <vm/vm_pageout.h>
  277 #include <vm/vm_kern.h>
  278 
  279 #include <mach_vm_debug.h>
  280 #if     MACH_VM_DEBUG
  281 #include <mach/kern_return.h>
  282 #include <mach_debug/hash_info.h>
  283 #include <vm/vm_user.h>
  284 #endif
  285 
  286 
  287 /*
  288  *      Associated with eacn page of user-allocatable memory is a
  289  *      page structure.
  290  */
  291 
  292 /*
  293  *      These variables record the values returned by vm_page_bootstrap,
  294  *      for debugging purposes.  The implementation of pmap_steal_memory
  295  *      and pmap_startup here also uses them internally.
  296  */
  297 
  298 vm_offset_t virtual_space_start;
  299 vm_offset_t virtual_space_end;
  300 
  301 /*
  302  *      The vm_page_lookup() routine, which provides for fast
  303  *      (virtual memory object, offset) to page lookup, employs
  304  *      the following hash table.  The vm_page_{insert,remove}
  305  *      routines install and remove associations in the table.
  306  *      [This table is often called the virtual-to-physical,
  307  *      or VP, table.]
  308  */
  309 typedef struct {
  310         decl_simple_lock_data(,lock)
  311         vm_page_t pages;
  312 } vm_page_bucket_t;
  313 
  314 vm_page_bucket_t *vm_page_buckets;              /* Array of buckets */
  315 unsigned int    vm_page_bucket_count = 0;       /* How big is array? */
  316 unsigned int    vm_page_hash_mask;              /* Mask for hash function */
  317 
  318 /*
  319  *      The virtual page size is currently implemented as a runtime
  320  *      variable, but is constant once initialized using vm_set_page_size.
  321  *      This initialization must be done in the machine-dependent
  322  *      bootstrap sequence, before calling other machine-independent
  323  *      initializations.
  324  *
  325  *      All references to the virtual page size outside this
  326  *      module must use the PAGE_SIZE constant.
  327  */
  328 vm_size_t       page_size  = 4096;
  329 vm_size_t       page_mask  = 4095;
  330 int             page_shift = 12;
  331 
  332 /*
  333  *      Resident page structures are initialized from
  334  *      a template (see vm_page_alloc).
  335  *
  336  *      When adding a new field to the virtual memory
  337  *      object structure, be sure to add initialization
  338  *      (see vm_page_bootstrap).
  339  */
  340 struct vm_page  vm_page_template;
  341 
  342 /*
  343  *      Resident pages that represent real memory
  344  *      are allocated from a free list.
  345  */
  346 vm_page_t       vm_page_queue_free;
  347 vm_page_t       vm_page_queue_fictitious;
  348 decl_simple_lock_data(,vm_page_queue_free_lock)
  349 unsigned int    vm_page_free_wanted;
  350 int             vm_page_free_count;
  351 int             vm_page_fictitious_count;
  352 
  353 unsigned int    vm_page_free_count_minimum;     /* debugging */
  354 
  355 /*
  356  *      Occasionally, the virtual memory system uses
  357  *      resident page structures that do not refer to
  358  *      real pages, for example to leave a page with
  359  *      important state information in the VP table.
  360  *
  361  *      These page structures are allocated the way
  362  *      most other kernel structures are.
  363  */
  364 zone_t  vm_page_zone;
  365 
  366 /*
  367  *      Fictitious pages don't have a physical address,
  368  *      but we must initialize phys_addr to something.
  369  *      For debugging, this should be a strange value
  370  *      that the pmap module can recognize in assertions.
  371  */
  372 vm_offset_t vm_page_fictitious_addr = (vm_offset_t) -1;
  373 
  374 /*
  375  *      Resident page structures are also chained on
  376  *      queues that are used by the page replacement
  377  *      system (pageout daemon).  These queues are
  378  *      defined here, but are shared by the pageout
  379  *      module.
  380  */
  381 queue_head_t    vm_page_queue_active;
  382 queue_head_t    vm_page_queue_inactive;
  383 decl_simple_lock_data(,vm_page_queue_lock)
  384 int     vm_page_active_count;
  385 int     vm_page_inactive_count;
  386 int     vm_page_wire_count;
  387 
  388 /*
  389  *      Several page replacement parameters are also
  390  *      shared with this module, so that page allocation
  391  *      (done here in vm_page_alloc) can trigger the
  392  *      pageout daemon.
  393  */
  394 int     vm_page_free_target = 0;
  395 int     vm_page_free_min = 0;
  396 int     vm_page_inactive_target = 0;
  397 int     vm_page_free_reserved = 0;
  398 int     vm_page_laundry_count = 0;
  399 
  400 /*
  401  *      The VM system has a couple of heuristics for deciding
  402  *      that pages are "uninteresting" and should be placed
  403  *      on the inactive queue as likely candidates for replacement.
  404  *      These variables let the heuristics be controlled at run-time
  405  *      to make experimentation easier.
  406  */
  407 
  408 boolean_t vm_page_deactivate_behind = TRUE;
  409 boolean_t vm_page_deactivate_hint = TRUE;
  410 
  411 /*
  412  *      vm_set_page_size:
  413  *
  414  *      Sets the page size, perhaps based upon the memory
  415  *      size.  Must be called before any use of page-size
  416  *      dependent functions.
  417  *
  418  *      Sets page_shift and page_mask from page_size.
  419  */
  420 void vm_set_page_size(void)
  421 {
  422         page_mask = page_size - 1;
  423 
  424         if ((page_mask & page_size) != 0)
  425                 panic("vm_set_page_size: page size not a power of two");
  426 
  427         for (page_shift = 0; ; page_shift++)
  428                 if ((1 << page_shift) == page_size)
  429                         break;
  430 }
  431 
  432 /*
  433  *      vm_page_bootstrap:
  434  *
  435  *      Initializes the resident memory module.
  436  *
  437  *      Allocates memory for the page cells, and
  438  *      for the object/offset-to-page hash table headers.
  439  *      Each page cell is initialized and placed on the free list.
  440  *      Returns the range of available kernel virtual memory.
  441  */
  442 
  443 void vm_page_bootstrap(
  444         vm_offset_t *startp,
  445         vm_offset_t *endp)
  446 {
  447         register vm_page_t m;
  448         int i;
  449 
  450         /*
  451          *      Initialize the vm_page template.
  452          */
  453 
  454         m = &vm_page_template;
  455         m->object = VM_OBJECT_NULL;     /* reset later */
  456         m->offset = 0;                  /* reset later */
  457         m->wire_count = 0;
  458 
  459         m->inactive = FALSE;
  460         m->active = FALSE;
  461         m->laundry = FALSE;
  462         m->free = FALSE;
  463 
  464         m->busy = TRUE;
  465         m->wanted = FALSE;
  466         m->tabled = FALSE;
  467         m->fictitious = FALSE;
  468         m->private = FALSE;
  469         m->absent = FALSE;
  470         m->error = FALSE;
  471         m->dirty = FALSE;
  472         m->precious = FALSE;
  473         m->reference = FALSE;
  474 
  475         m->phys_addr = 0;               /* reset later */
  476 
  477         m->page_lock = VM_PROT_NONE;
  478         m->unlock_request = VM_PROT_NONE;
  479 
  480         /*
  481          *      Initialize the page queues.
  482          */
  483 
  484         simple_lock_init(&vm_page_queue_free_lock);
  485         simple_lock_init(&vm_page_queue_lock);
  486 
  487         vm_page_queue_free = VM_PAGE_NULL;
  488         vm_page_queue_fictitious = VM_PAGE_NULL;
  489         queue_init(&vm_page_queue_active);
  490         queue_init(&vm_page_queue_inactive);
  491 
  492         vm_page_free_wanted = 0;
  493 
  494         /*
  495          *      Steal memory for the zone system.
  496          */
  497 
  498         kentry_data_size = kentry_count * sizeof(struct vm_map_entry);
  499         kentry_data = pmap_steal_memory(kentry_data_size);
  500 
  501         zdata = pmap_steal_memory(zdata_size);
  502 
  503         /*
  504          *      Allocate (and initialize) the virtual-to-physical
  505          *      table hash buckets.
  506          *
  507          *      The number of buckets should be a power of two to
  508          *      get a good hash function.  The following computation
  509          *      chooses the first power of two that is greater
  510          *      than the number of physical pages in the system.
  511          */
  512 
  513         if (vm_page_bucket_count == 0) {
  514                 unsigned int npages = pmap_free_pages();
  515 
  516                 vm_page_bucket_count = 1;
  517                 while (vm_page_bucket_count < npages)
  518                         vm_page_bucket_count <<= 1;
  519         }
  520 
  521         vm_page_hash_mask = vm_page_bucket_count - 1;
  522 
  523         if (vm_page_hash_mask & vm_page_bucket_count)
  524                 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
  525 
  526         vm_page_buckets = (vm_page_bucket_t *)
  527                 pmap_steal_memory(vm_page_bucket_count *
  528                                   sizeof(vm_page_bucket_t));
  529 
  530         for (i = 0; i < vm_page_bucket_count; i++) {
  531                 register vm_page_bucket_t *bucket = &vm_page_buckets[i];
  532 
  533                 bucket->pages = VM_PAGE_NULL;
  534                 simple_lock_init(&bucket->lock);
  535         }
  536 
  537         /*
  538          *      Machine-dependent code allocates the resident page table.
  539          *      It uses vm_page_init to initialize the page frames.
  540          *      The code also returns to us the virtual space available
  541          *      to the kernel.  We don't trust the pmap module
  542          *      to get the alignment right.
  543          */
  544 
  545         pmap_startup(&virtual_space_start, &virtual_space_end);
  546         virtual_space_start = round_page(virtual_space_start);
  547         virtual_space_end = trunc_page(virtual_space_end);
  548 
  549         *startp = virtual_space_start;
  550         *endp = virtual_space_end;
  551 
  552         printf("vm_page_bootstrap: %d free pages\n", vm_page_free_count);
  553         vm_page_free_count_minimum = vm_page_free_count;
  554 }
  555 
  556 #ifndef MACHINE_PAGES
  557 /*
  558  *      We implement pmap_steal_memory and pmap_startup with the help
  559  *      of two simpler functions, pmap_virtual_space and pmap_next_page.
  560  */
  561 
  562 vm_offset_t pmap_steal_memory(
  563         vm_size_t size)
  564 {
  565         vm_offset_t addr, vaddr, paddr;
  566 
  567         /*
  568          *      We round the size to an integer multiple.
  569          */
  570 
  571         size = (size + 3) &~ 3;
  572 
  573         /*
  574          *      If this is the first call to pmap_steal_memory,
  575          *      we have to initialize ourself.
  576          */
  577 
  578         if (virtual_space_start == virtual_space_end) {
  579                 pmap_virtual_space(&virtual_space_start, &virtual_space_end);
  580 
  581                 /*
  582                  *      The initial values must be aligned properly, and
  583                  *      we don't trust the pmap module to do it right.
  584                  */
  585 
  586                 virtual_space_start = round_page(virtual_space_start);
  587                 virtual_space_end = trunc_page(virtual_space_end);
  588         }
  589 
  590         /*
  591          *      Allocate virtual memory for this request.
  592          */
  593 
  594         addr = virtual_space_start;
  595         virtual_space_start += size;
  596 
  597         /*
  598          *      Allocate and map physical pages to back new virtual pages.
  599          */
  600 
  601         for (vaddr = round_page(addr);
  602              vaddr < addr + size;
  603              vaddr += PAGE_SIZE) {
  604                 if (!pmap_next_page(&paddr))
  605                         panic("pmap_steal_memory");
  606 
  607                 /*
  608                  *      XXX Logically, these mappings should be wired,
  609                  *      but some pmap modules barf if they are.
  610                  */
  611 
  612                 pmap_enter(kernel_pmap, vaddr, paddr,
  613                            VM_PROT_READ|VM_PROT_WRITE, FALSE);
  614         }
  615 
  616         return addr;
  617 }
  618 
  619 void pmap_startup(
  620         vm_offset_t *startp,
  621         vm_offset_t *endp)
  622 {
  623         unsigned int i, npages, pages_initialized;
  624         vm_page_t pages;
  625         vm_offset_t paddr;
  626 
  627         /*
  628          *      We calculate how many page frames we will have
  629          *      and then allocate the page structures in one chunk.
  630          */
  631 
  632         npages = ((PAGE_SIZE * pmap_free_pages() +
  633                    (round_page(virtual_space_start) - virtual_space_start)) /
  634                   (PAGE_SIZE + sizeof *pages));
  635 
  636         pages = (vm_page_t) pmap_steal_memory(npages * sizeof *pages);
  637 
  638         /*
  639          *      Initialize the page frames.
  640          */
  641 
  642         for (i = 0, pages_initialized = 0; i < npages; i++) {
  643                 if (!pmap_next_page(&paddr))
  644                         break;
  645 
  646                 vm_page_init(&pages[i], paddr);
  647                 pages_initialized++;
  648         }
  649 
  650         /*
  651          * Release pages in reverse order so that physical pages
  652          * initially get allocated in ascending addresses. This keeps
  653          * the devices (which must address physical memory) happy if
  654          * they require several consecutive pages.
  655          */
  656 
  657         for (i = pages_initialized; i > 0; i--) {
  658                 vm_page_release(&pages[i - 1]);
  659         }
  660 
  661         /*
  662          *      We have to re-align virtual_space_start,
  663          *      because pmap_steal_memory has been using it.
  664          */
  665 
  666         virtual_space_start = round_page(virtual_space_start);
  667 
  668         *startp = virtual_space_start;
  669         *endp = virtual_space_end;
  670 }
  671 #endif  /* MACHINE_PAGES */
  672 
  673 /*
  674  *      Routine:        vm_page_module_init
  675  *      Purpose:
  676  *              Second initialization pass, to be done after
  677  *              the basic VM system is ready.
  678  */
  679 void            vm_page_module_init(void)
  680 {
  681         vm_page_zone = zinit((vm_size_t) sizeof(struct vm_page),
  682                              VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS,
  683                              PAGE_SIZE,
  684                              FALSE, "vm pages");
  685 }
  686 
  687 /*
  688  *      Routine:        vm_page_create
  689  *      Purpose:
  690  *              After the VM system is up, machine-dependent code
  691  *              may stumble across more physical memory.  For example,
  692  *              memory that it was reserving for a frame buffer.
  693  *              vm_page_create turns this memory into available pages.
  694  */
  695 
  696 void vm_page_create(
  697         vm_offset_t     start,
  698         vm_offset_t     end)
  699 {
  700         vm_offset_t paddr;
  701         vm_page_t m;
  702 
  703         for (paddr = round_page(start);
  704              paddr < trunc_page(end);
  705              paddr += PAGE_SIZE) {
  706                 m = (vm_page_t) zalloc(vm_page_zone);
  707                 if (m == VM_PAGE_NULL)
  708                         panic("vm_page_create");
  709 
  710                 vm_page_init(m, paddr);
  711                 vm_page_release(m);
  712         }
  713 }
  714 
  715 /*
  716  *      vm_page_hash:
  717  *
  718  *      Distributes the object/offset key pair among hash buckets.
  719  *
  720  *      NOTE:   To get a good hash function, the bucket count should
  721  *              be a power of two.
  722  */
  723 #define vm_page_hash(object, offset) \
  724         (((unsigned int)(vm_offset_t)object + (unsigned int)atop(offset)) \
  725                 & vm_page_hash_mask)
  726 
  727 /*
  728  *      vm_page_insert:         [ internal use only ]
  729  *
  730  *      Inserts the given mem entry into the object/object-page
  731  *      table and object list.
  732  *
  733  *      The object and page must be locked.
  734  */
  735 
  736 void vm_page_insert(
  737         register vm_page_t      mem,
  738         register vm_object_t    object,
  739         register vm_offset_t    offset)
  740 {
  741         register vm_page_bucket_t *bucket;
  742 
  743         VM_PAGE_CHECK(mem);
  744 
  745         if (mem->tabled)
  746                 panic("vm_page_insert");
  747 
  748         /*
  749          *      Record the object/offset pair in this page
  750          */
  751 
  752         mem->object = object;
  753         mem->offset = offset;
  754 
  755         /*
  756          *      Insert it into the object_object/offset hash table
  757          */
  758 
  759         bucket = &vm_page_buckets[vm_page_hash(object, offset)];
  760         simple_lock(&bucket->lock);
  761         mem->next = bucket->pages;
  762         bucket->pages = mem;
  763         simple_unlock(&bucket->lock);
  764 
  765         /*
  766          *      Now link into the object's list of backed pages.
  767          */
  768 
  769         queue_enter(&object->memq, mem, vm_page_t, listq);
  770         mem->tabled = TRUE;
  771 
  772         /*
  773          *      Show that the object has one more resident page.
  774          */
  775 
  776         object->resident_page_count++;
  777 
  778         /*
  779          *      Detect sequential access and inactivate previous page.
  780          *      We ignore busy pages.
  781          */
  782 
  783         if (vm_page_deactivate_behind &&
  784             (offset == object->last_alloc + PAGE_SIZE)) {
  785                 vm_page_t       last_mem;
  786 
  787                 last_mem = vm_page_lookup(object, object->last_alloc);
  788                 if ((last_mem != VM_PAGE_NULL) && !last_mem->busy)
  789                         vm_page_deactivate(last_mem);
  790         }
  791         object->last_alloc = offset;
  792 }
  793 
  794 /*
  795  *      vm_page_replace:
  796  *
  797  *      Exactly like vm_page_insert, except that we first
  798  *      remove any existing page at the given offset in object
  799  *      and we don't do deactivate-behind.
  800  *
  801  *      The object and page must be locked.
  802  */
  803 
  804 void vm_page_replace(
  805         register vm_page_t      mem,
  806         register vm_object_t    object,
  807         register vm_offset_t    offset)
  808 {
  809         register vm_page_bucket_t *bucket;
  810 
  811         VM_PAGE_CHECK(mem);
  812 
  813         if (mem->tabled)
  814                 panic("vm_page_replace");
  815 
  816         /*
  817          *      Record the object/offset pair in this page
  818          */
  819 
  820         mem->object = object;
  821         mem->offset = offset;
  822 
  823         /*
  824          *      Insert it into the object_object/offset hash table,
  825          *      replacing any page that might have been there.
  826          */
  827 
  828         bucket = &vm_page_buckets[vm_page_hash(object, offset)];
  829         simple_lock(&bucket->lock);
  830         if (bucket->pages) {
  831                 vm_page_t *mp = &bucket->pages;
  832                 register vm_page_t m = *mp;
  833                 do {
  834                         if (m->object == object && m->offset == offset) {
  835                                 /*
  836                                  * Remove page from bucket and from object,
  837                                  * and return it to the free list.
  838                                  */
  839                                 *mp = m->next;
  840                                 queue_remove(&object->memq, m, vm_page_t,
  841                                              listq);
  842                                 m->tabled = FALSE;
  843                                 object->resident_page_count--;
  844 
  845                                 /*
  846                                  * Return page to the free list.
  847                                  * Note the page is not tabled now, so this
  848                                  * won't self-deadlock on the bucket lock.
  849                                  */
  850 
  851                                 vm_page_free(m);
  852                                 break;
  853                         }
  854                         mp = &m->next;
  855                 } while ((m = *mp) != 0);
  856                 mem->next = bucket->pages;
  857         } else {
  858                 mem->next = VM_PAGE_NULL;
  859         }
  860         bucket->pages = mem;
  861         simple_unlock(&bucket->lock);
  862 
  863         /*
  864          *      Now link into the object's list of backed pages.
  865          */
  866 
  867         queue_enter(&object->memq, mem, vm_page_t, listq);
  868         mem->tabled = TRUE;
  869 
  870         /*
  871          *      And show that the object has one more resident
  872          *      page.
  873          */
  874 
  875         object->resident_page_count++;
  876 }
  877 
  878 /*
  879  *      vm_page_remove:         [ internal use only ]
  880  *
  881  *      Removes the given mem entry from the object/offset-page
  882  *      table and the object page list.
  883  *
  884  *      The object and page must be locked.
  885  */
  886 
  887 void vm_page_remove(
  888         register vm_page_t      mem)
  889 {
  890         register vm_page_bucket_t       *bucket;
  891         register vm_page_t      this;
  892 
  893         assert(mem->tabled);
  894         VM_PAGE_CHECK(mem);
  895 
  896         /*
  897          *      Remove from the object_object/offset hash table
  898          */
  899 
  900         bucket = &vm_page_buckets[vm_page_hash(mem->object, mem->offset)];
  901         simple_lock(&bucket->lock);
  902         if ((this = bucket->pages) == mem) {
  903                 /* optimize for common case */
  904 
  905                 bucket->pages = mem->next;
  906         } else {
  907                 register vm_page_t      *prev;
  908 
  909                 for (prev = &this->next;
  910                      (this = *prev) != mem;
  911                      prev = &this->next)
  912                         continue;
  913                 *prev = this->next;
  914         }
  915         simple_unlock(&bucket->lock);
  916 
  917         /*
  918          *      Now remove from the object's list of backed pages.
  919          */
  920 
  921         queue_remove(&mem->object->memq, mem, vm_page_t, listq);
  922 
  923         /*
  924          *      And show that the object has one fewer resident
  925          *      page.
  926          */
  927 
  928         mem->object->resident_page_count--;
  929 
  930         mem->tabled = FALSE;
  931 }
  932 
  933 /*
  934  *      vm_page_lookup:
  935  *
  936  *      Returns the page associated with the object/offset
  937  *      pair specified; if none is found, VM_PAGE_NULL is returned.
  938  *
  939  *      The object must be locked.  No side effects.
  940  */
  941 
  942 vm_page_t vm_page_lookup(
  943         register vm_object_t    object,
  944         register vm_offset_t    offset)
  945 {
  946         register vm_page_t      mem;
  947         register vm_page_bucket_t *bucket;
  948 
  949         /*
  950          *      Search the hash table for this object/offset pair
  951          */
  952 
  953         bucket = &vm_page_buckets[vm_page_hash(object, offset)];
  954 
  955         simple_lock(&bucket->lock);
  956         for (mem = bucket->pages; mem != VM_PAGE_NULL; mem = mem->next) {
  957                 VM_PAGE_CHECK(mem);
  958                 if ((mem->object == object) && (mem->offset == offset))
  959                         break;
  960         }
  961         simple_unlock(&bucket->lock);
  962         return mem;
  963 }
  964 
  965 /*
  966  *      vm_page_rename:
  967  *
  968  *      Move the given memory entry from its
  969  *      current object to the specified target object/offset.
  970  *
  971  *      The object must be locked.
  972  */
  973 void vm_page_rename(
  974         register vm_page_t      mem,
  975         register vm_object_t    new_object,
  976         vm_offset_t             new_offset)
  977 {
  978         /*
  979          *      Changes to mem->object require the page lock because
  980          *      the pageout daemon uses that lock to get the object.
  981          */
  982 
  983         vm_page_lock_queues();
  984         vm_page_remove(mem);
  985         vm_page_insert(mem, new_object, new_offset);
  986         vm_page_unlock_queues();
  987 }
  988 
  989 /*
  990  *      vm_page_init:
  991  *
  992  *      Initialize the fields in a new page.
  993  *      This takes a structure with random values and initializes it
  994  *      so that it can be given to vm_page_release or vm_page_insert.
  995  */
  996 void vm_page_init(
  997         vm_page_t       mem,
  998         vm_offset_t     phys_addr)
  999 {
 1000         *mem = vm_page_template;
 1001         mem->phys_addr = phys_addr;
 1002 }
 1003 
 1004 /*
 1005  *      vm_page_grab_fictitious:
 1006  *
 1007  *      Remove a fictitious page from the free list.
 1008  *      Returns VM_PAGE_NULL if there are no free pages.
 1009  */
 1010 
 1011 vm_page_t vm_page_grab_fictitious(void)
 1012 {
 1013         register vm_page_t m;
 1014 
 1015         simple_lock(&vm_page_queue_free_lock);
 1016         m = vm_page_queue_fictitious;
 1017         if (m != VM_PAGE_NULL) {
 1018                 vm_page_fictitious_count--;
 1019                 vm_page_queue_fictitious = (vm_page_t) m->pageq.next;
 1020                 m->free = FALSE;
 1021         }
 1022         simple_unlock(&vm_page_queue_free_lock);
 1023 
 1024         return m;
 1025 }
 1026 
 1027 /*
 1028  *      vm_page_release_fictitious:
 1029  *
 1030  *      Release a fictitious page to the free list.
 1031  */
 1032 
 1033 void vm_page_release_fictitious(
 1034         register vm_page_t m)
 1035 {
 1036         simple_lock(&vm_page_queue_free_lock);
 1037         if (m->free)
 1038                 panic("vm_page_release_fictitious");
 1039         m->free = TRUE;
 1040         m->pageq.next = (queue_entry_t) vm_page_queue_fictitious;
 1041         vm_page_queue_fictitious = m;
 1042         vm_page_fictitious_count++;
 1043         simple_unlock(&vm_page_queue_free_lock);
 1044 }
 1045 
 1046 /*
 1047  *      vm_page_more_fictitious:
 1048  *
 1049  *      Add more fictitious pages to the free list.
 1050  *      Allowed to block.
 1051  */
 1052 
 1053 int vm_page_fictitious_quantum = 5;
 1054 
 1055 void vm_page_more_fictitious(void)
 1056 {
 1057         register vm_page_t m;
 1058         int i;
 1059 
 1060         for (i = 0; i < vm_page_fictitious_quantum; i++) {
 1061                 m = (vm_page_t) zalloc(vm_page_zone);
 1062                 if (m == VM_PAGE_NULL)
 1063                         panic("vm_page_more_fictitious");
 1064 
 1065                 vm_page_init(m, vm_page_fictitious_addr);
 1066                 m->fictitious = TRUE;
 1067                 vm_page_release_fictitious(m);
 1068         }
 1069 }
 1070 
 1071 /*
 1072  *      vm_page_convert:
 1073  *
 1074  *      Attempt to convert a fictitious page into a real page.
 1075  */
 1076 
 1077 boolean_t vm_page_convert(
 1078         register vm_page_t m)
 1079 {
 1080         register vm_page_t real_m;
 1081 
 1082         real_m = vm_page_grab();
 1083         if (real_m == VM_PAGE_NULL)
 1084                 return FALSE;
 1085 
 1086         m->phys_addr = real_m->phys_addr;
 1087         m->fictitious = FALSE;
 1088 
 1089         real_m->phys_addr = vm_page_fictitious_addr;
 1090         real_m->fictitious = TRUE;
 1091 
 1092         vm_page_release_fictitious(real_m);
 1093         return TRUE;
 1094 }
 1095 
 1096 /*
 1097  *      vm_page_grab:
 1098  *
 1099  *      Remove a page from the free list.
 1100  *      Returns VM_PAGE_NULL if the free list is too small.
 1101  */
 1102 
 1103 vm_page_t vm_page_grab(void)
 1104 {
 1105         register vm_page_t      mem;
 1106 
 1107         simple_lock(&vm_page_queue_free_lock);
 1108 
 1109         /*
 1110          *      Only let privileged threads (involved in pageout)
 1111          *      dip into the reserved pool.
 1112          */
 1113 
 1114         if ((vm_page_free_count < vm_page_free_reserved) &&
 1115             !current_thread()->vm_privilege) {
 1116                 simple_unlock(&vm_page_queue_free_lock);
 1117                 return VM_PAGE_NULL;
 1118         }
 1119 
 1120         if (vm_page_queue_free == VM_PAGE_NULL)
 1121                 panic("vm_page_grab");
 1122 
 1123         if (--vm_page_free_count < vm_page_free_count_minimum)
 1124                 vm_page_free_count_minimum = vm_page_free_count;
 1125         mem = vm_page_queue_free;
 1126         vm_page_queue_free = (vm_page_t) mem->pageq.next;
 1127         mem->free = FALSE;
 1128         simple_unlock(&vm_page_queue_free_lock);
 1129 
 1130         /*
 1131          *      Decide if we should poke the pageout daemon.
 1132          *      We do this if the free count is less than the low
 1133          *      water mark, or if the free count is less than the high
 1134          *      water mark (but above the low water mark) and the inactive
 1135          *      count is less than its target.
 1136          *
 1137          *      We don't have the counts locked ... if they change a little,
 1138          *      it doesn't really matter.
 1139          */
 1140 
 1141         if ((vm_page_free_count < vm_page_free_min) ||
 1142             ((vm_page_free_count < vm_page_free_target) &&
 1143              (vm_page_inactive_count < vm_page_inactive_target)))
 1144                 thread_wakeup((event_t) &vm_page_free_wanted);
 1145 
 1146         return mem;
 1147 }
 1148 
 1149 vm_offset_t vm_page_grab_phys_addr(void)
 1150 {
 1151         vm_page_t p = vm_page_grab();
 1152         if (p == VM_PAGE_NULL)
 1153                 return -1;
 1154         else
 1155                 return p->phys_addr;
 1156 }
 1157 
 1158 /*
 1159  *      vm_page_grab_contiguous_pages:
 1160  *
 1161  *      Take N pages off the free list, the pages should
 1162  *      cover a contiguous range of physical addresses.
 1163  *      [Used by device drivers to cope with DMA limitations]
 1164  *
 1165  *      Returns the page descriptors in ascending order, or
 1166  *      Returns KERN_RESOURCE_SHORTAGE if it could not.
 1167  */
 1168 
 1169 /* Biggest phys page number for the pages we handle in VM */
 1170 
 1171 vm_size_t       vm_page_big_pagenum = 0;        /* Set this before call! */
 1172 
 1173 kern_return_t
 1174 vm_page_grab_contiguous_pages(
 1175         int             npages,
 1176         vm_page_t       pages[],
 1177         natural_t       *bits)
 1178 {
 1179         register int    first_set;
 1180         int             size, alloc_size;
 1181         kern_return_t   ret;
 1182         vm_page_t       mem, prevmem;
 1183 
 1184 #ifndef NBBY
 1185 #define NBBY    8       /* size in bits of sizeof()`s unity */
 1186 #endif
 1187 
 1188 #define NBPEL   (sizeof(natural_t)*NBBY)
 1189 
 1190         size = (vm_page_big_pagenum + NBPEL - 1)
 1191                 & ~(NBPEL - 1);                         /* in bits */
 1192 
 1193         size = size / NBBY;                             /* in bytes */
 1194 
 1195         /*
 1196          * If we are called before the VM system is fully functional
 1197          * the invoker must provide us with the work space. [one bit
 1198          * per page starting at phys 0 and up to vm_page_big_pagenum]
 1199          */
 1200         if (bits == 0) {
 1201                 alloc_size = round_page(size);
 1202                 if (kmem_alloc_wired(kernel_map,
 1203                                      (vm_offset_t *)&bits,
 1204                                      alloc_size)
 1205                         != KERN_SUCCESS)
 1206                     return KERN_RESOURCE_SHORTAGE;
 1207         } else
 1208                 alloc_size = 0;
 1209 
 1210         bzero(bits, size);
 1211 
 1212         /*
 1213          * A very large granularity call, its rare so that is ok
 1214          */
 1215         simple_lock(&vm_page_queue_free_lock);
 1216 
 1217         /*
 1218          *      Do not dip into the reserved pool.
 1219          */
 1220 
 1221         if (vm_page_free_count < vm_page_free_reserved) {
 1222                 simple_unlock(&vm_page_queue_free_lock);
 1223                 return KERN_RESOURCE_SHORTAGE;
 1224         }
 1225 
 1226         /*
 1227          *      First pass through, build a big bit-array of
 1228          *      the pages that are free.  It is not going to
 1229          *      be too large anyways, in 4k we can fit info
 1230          *      for 32k pages.
 1231          */
 1232         mem = vm_page_queue_free;
 1233         while (mem) {
 1234                 register int word_index, bit_index;
 1235 
 1236                 bit_index = (mem->phys_addr >> page_shift);
 1237                 word_index = bit_index / NBPEL;
 1238                 bit_index = bit_index - (word_index * NBPEL);
 1239                 bits[word_index] |= 1 << bit_index;
 1240 
 1241                 mem = (vm_page_t) mem->pageq.next;
 1242         }
 1243 
 1244         /*
 1245          *      Second loop. Scan the bit array for NPAGES
 1246          *      contiguous bits.  That gives us, if any,
 1247          *      the range of pages we will be grabbing off
 1248          *      the free list.
 1249          */
 1250         {
 1251             register int        bits_so_far = 0, i;
 1252 
 1253                 first_set = 0;
 1254 
 1255                 for (i = 0; i < size; i += sizeof(natural_t)) {
 1256 
 1257                     register natural_t  v = bits[i / sizeof(natural_t)];
 1258                     register int        bitpos;
 1259 
 1260                     /*
 1261                      * Bitscan this one word
 1262                      */
 1263                     if (v) {
 1264                         /*
 1265                          * keep counting them beans ?
 1266                          */
 1267                         bitpos = 0;
 1268 
 1269                         if (bits_so_far) {
 1270 count_ones:
 1271                             while (v & 1) {
 1272                                 bitpos++;
 1273                                 /*
 1274                                  * got enough beans ?
 1275                                  */
 1276                                 if (++bits_so_far == npages)
 1277                                     goto found_em;
 1278                                 v >>= 1;
 1279                             }
 1280                             /* if we are being lucky, roll again */
 1281                             if (bitpos == NBPEL)
 1282                                 continue;
 1283                         }
 1284 
 1285                         /*
 1286                          * search for beans here
 1287                          */
 1288                         bits_so_far = 0;
 1289 /*count_zeroes:*/
 1290                         while ((bitpos < NBPEL) && ((v & 1) == 0)) {
 1291                             bitpos++;
 1292                             v >>= 1;
 1293                         }
 1294                         if (v & 1) {
 1295                             first_set = (i * NBBY) + bitpos;
 1296                             goto count_ones;
 1297                         }
 1298                     }
 1299                     /*
 1300                      * No luck
 1301                      */
 1302                     bits_so_far = 0;
 1303                 }
 1304         }
 1305 
 1306         /*
 1307          *      We could not find enough contiguous pages.
 1308          */
 1309 /*not_found_em:*/
 1310         simple_unlock(&vm_page_queue_free_lock);
 1311 
 1312         ret = KERN_RESOURCE_SHORTAGE;
 1313         goto out;
 1314 
 1315         /*
 1316          *      Final pass. Now we know which pages we want.
 1317          *      Scan the list until we find them all, grab
 1318          *      pages as we go.  FIRST_SET tells us where
 1319          *      in the bit-array our pages start.
 1320          */
 1321 found_em:
 1322         vm_page_free_count -= npages;
 1323         if (vm_page_free_count < vm_page_free_count_minimum)
 1324                 vm_page_free_count_minimum = vm_page_free_count;
 1325 
 1326         {
 1327             register vm_offset_t        first_phys, last_phys;
 1328 
 1329             /* cache values for compare */
 1330             first_phys = first_set << page_shift;
 1331             last_phys = first_phys + (npages << page_shift);/* not included */
 1332 
 1333             /* running pointers */
 1334             mem = vm_page_queue_free;
 1335             prevmem = VM_PAGE_NULL;
 1336 
 1337             while (mem) {
 1338 
 1339                 register vm_offset_t    addr;
 1340 
 1341                 addr = mem->phys_addr;
 1342 
 1343                 if ((addr >= first_phys) &&
 1344                     (addr <  last_phys)) {
 1345                     if (prevmem)
 1346                         prevmem->pageq.next = mem->pageq.next;
 1347                     pages[(addr - first_phys) >> page_shift] = mem;
 1348                     mem->free = FALSE;
 1349                     /*
 1350                      * Got them all ?
 1351                      */
 1352                     if (--npages == 0) break;
 1353                 } else
 1354                     prevmem = mem;
 1355 
 1356                 mem = (vm_page_t) mem->pageq.next;
 1357             }
 1358         }
 1359 
 1360         simple_unlock(&vm_page_queue_free_lock);
 1361 
 1362         /*
 1363          *      Decide if we should poke the pageout daemon.
 1364          *      We do this if the free count is less than the low
 1365          *      water mark, or if the free count is less than the high
 1366          *      water mark (but above the low water mark) and the inactive
 1367          *      count is less than its target.
 1368          *
 1369          *      We don't have the counts locked ... if they change a little,
 1370          *      it doesn't really matter.
 1371          */
 1372 
 1373         if ((vm_page_free_count < vm_page_free_min) ||
 1374             ((vm_page_free_count < vm_page_free_target) &&
 1375              (vm_page_inactive_count < vm_page_inactive_target)))
 1376                 thread_wakeup(&vm_page_free_wanted);
 1377 
 1378         ret = KERN_SUCCESS;
 1379 out:
 1380         if (alloc_size)
 1381                 kmem_free(kernel_map, (vm_offset_t) bits, alloc_size);
 1382 
 1383         return ret;
 1384 }
 1385 
 1386 /*
 1387  *      vm_page_release:
 1388  *
 1389  *      Return a page to the free list.
 1390  */
 1391 
 1392 void vm_page_release(
 1393         register vm_page_t      mem)
 1394 {
 1395         simple_lock(&vm_page_queue_free_lock);
 1396         if (mem->free)
 1397                 panic("vm_page_release");
 1398         mem->free = TRUE;
 1399         mem->pageq.next = (queue_entry_t) vm_page_queue_free;
 1400         vm_page_queue_free = mem;
 1401         vm_page_free_count++;
 1402 
 1403         /*
 1404          *      Check if we should wake up someone waiting for page.
 1405          *      But don't bother waking them unless they can allocate.
 1406          *
 1407          *      We wakeup only one thread, to prevent starvation.
 1408          *      Because the scheduling system handles wait queues FIFO,
 1409          *      if we wakeup all waiting threads, one greedy thread
 1410          *      can starve multiple niceguy threads.  When the threads
 1411          *      all wakeup, the greedy threads runs first, grabs the page,
 1412          *      and waits for another page.  It will be the first to run
 1413          *      when the next page is freed.
 1414          *
 1415          *      However, there is a slight danger here.
 1416          *      The thread we wake might not use the free page.
 1417          *      Then the other threads could wait indefinitely
 1418          *      while the page goes unused.  To forestall this,
 1419          *      the pageout daemon will keep making free pages
 1420          *      as long as vm_page_free_wanted is non-zero.
 1421          */
 1422 
 1423         if ((vm_page_free_wanted > 0) &&
 1424             (vm_page_free_count >= vm_page_free_reserved)) {
 1425                 vm_page_free_wanted--;
 1426                 thread_wakeup_one((event_t) &vm_page_free_count);
 1427         }
 1428 
 1429         simple_unlock(&vm_page_queue_free_lock);
 1430 }
 1431 
 1432 /*
 1433  *      vm_page_wait:
 1434  *
 1435  *      Wait for a page to become available.
 1436  *      If there are plenty of free pages, then we don't sleep.
 1437  */
 1438 
 1439 void vm_page_wait(
 1440         continuation_t continuation)
 1441 {
 1442         /*
 1443          *      We can't use vm_page_free_reserved to make this
 1444          *      determination.  Consider: some thread might
 1445          *      need to allocate two pages.  The first allocation
 1446          *      succeeds, the second fails.  After the first page is freed,
 1447          *      a call to vm_page_wait must really block.
 1448          */
 1449 
 1450         simple_lock(&vm_page_queue_free_lock);
 1451         if (vm_page_free_count < vm_page_free_target) {
 1452                 if (vm_page_free_wanted++ == 0)
 1453                         thread_wakeup((event_t)&vm_page_free_wanted);
 1454                 assert_wait((event_t)&vm_page_free_count, FALSE);
 1455                 simple_unlock(&vm_page_queue_free_lock);
 1456                 if (continuation != 0) {
 1457                         counter(c_vm_page_wait_block_user++);
 1458                         thread_block(continuation);
 1459                 } else {
 1460                         counter(c_vm_page_wait_block_kernel++);
 1461                         thread_block(CONTINUE_NULL);
 1462                 }
 1463         } else
 1464                 simple_unlock(&vm_page_queue_free_lock);
 1465 }
 1466 
 1467 /*
 1468  *      vm_page_alloc:
 1469  *
 1470  *      Allocate and return a memory cell associated
 1471  *      with this VM object/offset pair.
 1472  *
 1473  *      Object must be locked.
 1474  */
 1475 
 1476 vm_page_t vm_page_alloc(
 1477         vm_object_t     object,
 1478         vm_offset_t     offset)
 1479 {
 1480         register vm_page_t      mem;
 1481 
 1482         mem = vm_page_grab();
 1483         if (mem == VM_PAGE_NULL)
 1484                 return VM_PAGE_NULL;
 1485 
 1486         vm_page_lock_queues();
 1487         vm_page_insert(mem, object, offset);
 1488         vm_page_unlock_queues();
 1489 
 1490         return mem;
 1491 }
 1492 
 1493 /*
 1494  *      vm_page_free:
 1495  *
 1496  *      Returns the given page to the free list,
 1497  *      disassociating it with any VM object.
 1498  *
 1499  *      Object and page queues must be locked prior to entry.
 1500  */
 1501 void vm_page_free(
 1502         register vm_page_t      mem)
 1503 {
 1504         if (mem->free)
 1505                 panic("vm_page_free");
 1506 
 1507         if (mem->tabled)
 1508                 vm_page_remove(mem);
 1509         VM_PAGE_QUEUES_REMOVE(mem);
 1510 
 1511         if (mem->wire_count != 0) {
 1512                 if (!mem->private && !mem->fictitious)
 1513                         vm_page_wire_count--;
 1514                 mem->wire_count = 0;
 1515         }
 1516 
 1517         if (mem->laundry) {
 1518                 vm_page_laundry_count--;
 1519                 mem->laundry = FALSE;
 1520         }
 1521 
 1522         PAGE_WAKEUP_DONE(mem);
 1523 
 1524         if (mem->absent)
 1525                 vm_object_absent_release(mem->object);
 1526 
 1527         /*
 1528          *      XXX The calls to vm_page_init here are
 1529          *      really overkill.
 1530          */
 1531 
 1532         if (mem->private || mem->fictitious) {
 1533                 vm_page_init(mem, vm_page_fictitious_addr);
 1534                 mem->fictitious = TRUE;
 1535                 vm_page_release_fictitious(mem);
 1536         } else {
 1537                 vm_page_init(mem, mem->phys_addr);
 1538                 vm_page_release(mem);
 1539         }
 1540 }
 1541 
 1542 /*
 1543  *      vm_page_wire:
 1544  *
 1545  *      Mark this page as wired down by yet
 1546  *      another map, removing it from paging queues
 1547  *      as necessary.
 1548  *
 1549  *      The page's object and the page queues must be locked.
 1550  */
 1551 void vm_page_wire(
 1552         register vm_page_t      mem)
 1553 {
 1554         VM_PAGE_CHECK(mem);
 1555 
 1556         if (mem->wire_count == 0) {
 1557                 VM_PAGE_QUEUES_REMOVE(mem);
 1558                 if (!mem->private && !mem->fictitious)
 1559                         vm_page_wire_count++;
 1560         }
 1561         mem->wire_count++;
 1562 }
 1563 
 1564 /*
 1565  *      vm_page_unwire:
 1566  *
 1567  *      Release one wiring of this page, potentially
 1568  *      enabling it to be paged again.
 1569  *
 1570  *      The page's object and the page queues must be locked.
 1571  */
 1572 void vm_page_unwire(
 1573         register vm_page_t      mem)
 1574 {
 1575         VM_PAGE_CHECK(mem);
 1576 
 1577         if (--mem->wire_count == 0) {
 1578                 queue_enter(&vm_page_queue_active, mem, vm_page_t, pageq);
 1579                 vm_page_active_count++;
 1580                 mem->active = TRUE;
 1581                 if (!mem->private && !mem->fictitious)
 1582                         vm_page_wire_count--;
 1583         }
 1584 }
 1585 
 1586 /*
 1587  *      vm_page_deactivate:
 1588  *
 1589  *      Returns the given page to the inactive list,
 1590  *      indicating that no physical maps have access
 1591  *      to this page.  [Used by the physical mapping system.]
 1592  *
 1593  *      The page queues must be locked.
 1594  */
 1595 void vm_page_deactivate(
 1596         register vm_page_t      m)
 1597 {
 1598         VM_PAGE_CHECK(m);
 1599 
 1600         /*
 1601          *      This page is no longer very interesting.  If it was
 1602          *      interesting (active or inactive/referenced), then we
 1603          *      clear the reference bit and (re)enter it in the
 1604          *      inactive queue.  Note wired pages should not have
 1605          *      their reference bit cleared.
 1606          */
 1607 
 1608         if (m->active || (m->inactive && m->reference)) {
 1609                 if (!m->fictitious && !m->absent)
 1610                         pmap_clear_reference(m->phys_addr);
 1611                 m->reference = FALSE;
 1612                 VM_PAGE_QUEUES_REMOVE(m);
 1613         }
 1614         if (m->wire_count == 0 && !m->inactive) {
 1615                 queue_enter(&vm_page_queue_inactive, m, vm_page_t, pageq);
 1616                 m->inactive = TRUE;
 1617                 vm_page_inactive_count++;
 1618         }
 1619 }
 1620 
 1621 /*
 1622  *      vm_page_activate:
 1623  *
 1624  *      Put the specified page on the active list (if appropriate).
 1625  *
 1626  *      The page queues must be locked.
 1627  */
 1628 
 1629 void vm_page_activate(
 1630         register vm_page_t      m)
 1631 {
 1632         VM_PAGE_CHECK(m);
 1633 
 1634         if (m->inactive) {
 1635                 queue_remove(&vm_page_queue_inactive, m, vm_page_t,
 1636                                                 pageq);
 1637                 vm_page_inactive_count--;
 1638                 m->inactive = FALSE;
 1639         }
 1640         if (m->wire_count == 0) {
 1641                 if (m->active)
 1642                         panic("vm_page_activate: already active");
 1643 
 1644                 queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
 1645                 m->active = TRUE;
 1646                 vm_page_active_count++;
 1647         }
 1648 }
 1649 
 1650 /*
 1651  *      vm_page_zero_fill:
 1652  *
 1653  *      Zero-fill the specified page.
 1654  */
 1655 void vm_page_zero_fill(
 1656         vm_page_t       m)
 1657 {
 1658         VM_PAGE_CHECK(m);
 1659 
 1660         pmap_zero_page(m->phys_addr);
 1661 }
 1662 
 1663 /*
 1664  *      vm_page_copy:
 1665  *
 1666  *      Copy one page to another
 1667  */
 1668 
 1669 void vm_page_copy(
 1670         vm_page_t       src_m,
 1671         vm_page_t       dest_m)
 1672 {
 1673         VM_PAGE_CHECK(src_m);
 1674         VM_PAGE_CHECK(dest_m);
 1675 
 1676         pmap_copy_page(src_m->phys_addr, dest_m->phys_addr);
 1677 }
 1678 
 1679 #if     MACH_VM_DEBUG
 1680 /*
 1681  *      Routine:        vm_page_info
 1682  *      Purpose:
 1683  *              Return information about the global VP table.
 1684  *              Fills the buffer with as much information as possible
 1685  *              and returns the desired size of the buffer.
 1686  *      Conditions:
 1687  *              Nothing locked.  The caller should provide
 1688  *              possibly-pageable memory.
 1689  */
 1690 
 1691 unsigned int
 1692 vm_page_info(
 1693         hash_info_bucket_t *info,
 1694         unsigned int    count)
 1695 {
 1696         int i;
 1697 
 1698         if (vm_page_bucket_count < count)
 1699                 count = vm_page_bucket_count;
 1700 
 1701         for (i = 0; i < count; i++) {
 1702                 vm_page_bucket_t *bucket = &vm_page_buckets[i];
 1703                 unsigned int bucket_count = 0;
 1704                 vm_page_t m;
 1705 
 1706                 simple_lock(&bucket->lock);
 1707                 for (m = bucket->pages; m != VM_PAGE_NULL; m = m->next)
 1708                         bucket_count++;
 1709                 simple_unlock(&bucket->lock);
 1710 
 1711                 /* don't touch pageable memory while holding locks */
 1712                 info[i].hib_count = bucket_count;
 1713         }
 1714 
 1715         return vm_page_bucket_count;
 1716 }
 1717 #endif  /* MACH_VM_DEBUG */
 1718 
 1719 #include <mach_kdb.h>
 1720 #if     MACH_KDB
 1721 #include <ddb/db_output.h>
 1722 
 1723 /*
 1724  *      Routine:        vm_page_print [exported]
 1725  */
 1726 void            vm_page_print(p)
 1727         vm_page_t       p;
 1728 {
 1729         db_iprintf("Page 0x%X: object 0x%X,",
 1730                    (vm_offset_t) p, (vm_offset_t) p->object);
 1731          db_printf(" offset 0x%X", (vm_offset_t) p->offset);
 1732          db_printf("wire_count %d,", p->wire_count);
 1733          db_printf(" %s",
 1734                 (p->active ? "active" : (p->inactive ? "inactive" : "loose")));
 1735          db_printf("%s",
 1736                 (p->free ? " free" : ""));
 1737          db_printf("%s ",
 1738                 (p->laundry ? " laundry" : ""));
 1739          db_printf("%s",
 1740                 (p->dirty ? "dirty" : "clean"));
 1741          db_printf("%s",
 1742                 (p->busy ? " busy" : ""));
 1743          db_printf("%s",
 1744                 (p->absent ? " absent" : ""));
 1745          db_printf("%s",
 1746                 (p->error ? " error" : ""));
 1747          db_printf("%s",
 1748                 (p->fictitious ? " fictitious" : ""));
 1749          db_printf("%s",
 1750                 (p->private ? " private" : ""));
 1751          db_printf("%s",
 1752                 (p->wanted ? " wanted" : ""));
 1753          db_printf("%s,",
 1754                 (p->tabled ? "" : "not_tabled"));
 1755          db_printf("phys_addr = 0x%X, lock = 0x%X, unlock_request = 0x%X\n",
 1756                 (vm_offset_t) p->phys_addr,
 1757                 (vm_offset_t) p->page_lock,
 1758                 (vm_offset_t) p->unlock_request);
 1759 }
 1760 #endif  /* MACH_KDB */

Cache object: 76c04ceaa781b773a9ec718cac4f125e


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.