vm_fault.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /* 
    2  * Mach Operating System
    3  * Copyright (c) 1993-1987 Carnegie Mellon University
    4  * All Rights Reserved.
    5  * 
    6  * Permission to use, copy, modify and distribute this software and its
    7  * documentation is hereby granted, provided that both the copyright
    8  * notice and this permission notice appear in all copies of the
    9  * software, derivative works or modified versions, and any portions
   10  * thereof, and that both notices appear in supporting documentation.
   11  * 
   12  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
   13  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
   14  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
   15  * 
   16  * Carnegie Mellon requests users of this software to return to
   17  * 
   18  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   19  *  School of Computer Science
   20  *  Carnegie Mellon University
   21  *  Pittsburgh PA 15213-3890
   22  * 
   23  * any improvements or extensions that they make and grant Carnegie Mellon
   24  * the rights to redistribute these changes.
   25  */
   26 /*
   27  * HISTORY
   28  * $Log:        vm_fault.c,v $
   29  * Revision 2.21  93/08/03  12:35:11  mrt
   30  *      [93/08/02  16:55:49  bershad]
   31  * 
   32  *      Added vm sample support.
   33  *      [93/07/30  10:27:00  bershad]
   34  * 
   35  * Revision 2.20  93/01/14  18:00:55  danner
   36  *      Removed unneeded cast from argument to thread_wakeup_with_result.
   37  *      [92/12/30            dbg]
   38  *      64bit cleanup.
   39  *      [92/12/01            af]
   40  * 
   41  *      Added temporary variable to ensure that cpu_number (hence
   42  *      current_thread) does not change around blocking call to zalloc.
   43  *      Fix from Grenoble.
   44  *      [92/10/23            dbg]
   45  * 
   46  * Revision 2.19  92/08/03  18:00:24  jfriedl
   47  *      removed silly prototypes
   48  *      [92/08/02            jfriedl]
   49  * 
   50  * Revision 2.18  92/05/21  17:25:45  jfriedl
   51  *      Cleanup to quiet gcc warnings.
   52  *      [92/05/16            jfriedl]
   53  * 
   54  * Revision 2.17  92/02/23  19:50:43  elf
   55  *      Check for both copy and shadow objects due to asymmetric
   56  *      copy on write for temporary objects.
   57  *      [92/02/19  14:13:33  dlb]
   58  * 
   59  *      No more sharing maps.  Eliminated single_use argument
   60  *      from vm_map_lookup() calls.
   61  *      [92/01/07  11:03:34  dlb]
   62  * 
   63  * Revision 2.16  92/01/14  16:47:44  rpd
   64  *      Fixed some locking/assertion bugs in vm_fault_copy.
   65  *      [92/01/06            rpd]
   66  * 
   67  * Revision 2.15  91/11/12  11:52:02  rvb
   68  *      Added simple_lock_pause.
   69  *      [91/11/12            rpd]
   70  * 
   71  * Revision 2.14  91/07/01  08:27:05  jsb
   72  *      Changed remaining pager_* references to memory_object_*.
   73  *      [91/06/29  16:26:22  jsb]
   74  * 
   75  * Revision 2.13  91/05/18  14:40:02  rpd
   76  *      Added proper locking for vm_page_insert.
   77  *      [91/04/21            rpd]
   78  *      Changed vm_fault_page to use fictitious pages.
   79  *      Increased vm_object_absent_max to 50.
   80  *      [91/03/29            rpd]
   81  * 
   82  * Revision 2.12  91/05/14  17:48:42  mrt
   83  *      Correcting copyright
   84  * 
   85  * Revision 2.11  91/03/16  15:04:49  rpd
   86  *      Fixed vm_fault_page to give vm_pageout_page busy pages.
   87  *      [91/03/11            rpd]
   88  *      Added vm_fault_init, vm_fault_state_t.
   89  *      [91/02/16            rpd]
   90  * 
   91  *      Added resume, continuation arguments to vm_fault, vm_fault_page.
   92  *      Added continuation argument to VM_PAGE_WAIT.
   93  *      Added vm_fault_continue.
   94  *      [91/02/05            rpd]
   95  * 
   96  * Revision 2.10  91/02/05  17:58:00  mrt
   97  *      Changed to new Mach copyright
   98  *      [91/02/01  16:31:44  mrt]
   99  * 
  100  * Revision 2.9  91/01/08  16:44:45  rpd
  101  *      Turned software_reference_bits on.
  102  *      [90/12/29            rpd]
  103  *      Added continuation argument to thread_block.
  104  *      [90/12/08            rpd]
  105  * 
  106  *      Changed VM_WAIT to VM_PAGE_WAIT.
  107  *      [90/11/13            rpd]
  108  * 
  109  * Revision 2.8  90/10/25  14:49:52  rwd
  110  *      Turn software_reference_bits off by default.
  111  *      [90/10/25            rwd]
  112  * 
  113  *      Extended software_reference_bits to vm_fault_page.
  114  *      [90/10/24            rpd]
  115  *      Fixed vm_fault_page to clear the modify bit on zero-filled pages.
  116  *      [90/10/23            rpd]
  117  *      Added watchpoint debugger support.
  118  *      [90/10/16            rpd]
  119  *      Added software_reference_bits runtime switch.
  120  *      [90/10/13            rpd]
  121  * 
  122  * Revision 2.7  90/10/12  13:05:21  rpd
  123  *      Added missing vm_map_verify_done calls to vm_fault and
  124  *      vm_fault_copy.  From OSF.
  125  *      [90/10/10            rpd]
  126  *      Modified vm_fault_page to leave its result page on the pageout queues.
  127  *      Only activate the pages returned by vm_fault_page if they aren't
  128  *      already on a pageout queue.  In vm_fault, turn on the software
  129  *      reference bit for the page.
  130  *      [90/10/08            rpd]
  131  * 
  132  * Revision 2.6  90/06/02  15:10:32  rpd
  133  *      Fixed vm_fault_copy to handle null source objects.
  134  *      [90/04/24            rpd]
  135  *      Converted to new IPC.
  136  *      [90/03/26  23:11:58  rpd]
  137  * 
  138  * Revision 2.5  90/05/29  18:38:39  rwd
  139  *      Picked up rfr debugging changes.
  140  *      [90/04/12  13:47:40  rwd]
  141  * 
  142  * Revision 2.4  90/05/03  15:58:29  dbg
  143  *      Pass 'flush' argument to vm_pageout_page.
  144  *      [90/03/28            dbg]
  145  * 
  146  * Revision 2.3  90/02/22  20:05:21  dbg
  147  *      Deactivate the copied-from page.
  148  *      [90/02/09            dbg]
  149  *      Add changes from mainline:
  150  *              Assert that page is not busy before marking it busy.
  151  *              [89/12/21            dlb]
  152  *              Check for absent as well as busy before freeing a page when
  153  *              pagein fails.
  154  *              [89/12/13            dlb]
  155  *              Change all occurrences of PAGE_WAKEUP to PAGE_WAKEUP_DONE to
  156  *              reflect the fact that they clear the busy flag.  See
  157  *              vm/vm_page.h.  Add PAGE_WAKEUP_DONE to vm_fault_unwire().
  158  *              [89/12/13            dlb]
  159  *              Break out of fault loop after zero filling in response to
  160  *              finding an absent page; the zero filled page was either absent
  161  *              or newly allocated and so can't be page locked.
  162  *              [89/12/12            dlb]
  163  *              Must recheck page to object relationship before freeing
  164  *              page if pagein fails.
  165  *              [89/12/11            dlb]
  166  * 
  167  *              Use vme_start, vme_end when accessing map entries.
  168  *              [89/08/31  21:10:05  rpd]
  169  * 
  170  *              Add vm_fault_copy(), for overwriting a permanent object.
  171  *              [89/07/28  16:14:27  mwyoung]
  172  * 
  173  * Revision 2.2  90/01/11  11:47:36  dbg
  174  *      Add vm_fault_cleanup to save space.
  175  *      [89/12/13            dbg]
  176  * 
  177  *      Pick up changes from mainline:
  178  * 
  179  *              Consider paging_offset when looking at external page state.
  180  *              [89/10/16  15:31:17  af]
  181  * 
  182  *              Only require read access for the original page once past the
  183  *              top-level object... it will only be copied to a new page.
  184  *              [89/05/19  17:45:05  mwyoung]
  185  * 
  186  *              Also remove "absent" page from pageout queues before zero-filling.
  187  *              [89/05/01            mwyoung]
  188  *              When transforming an "absent" page into a placeholder page,
  189  *              remove it from the page queues.
  190  *              [89/04/22            mwyoung]
  191  * 
  192  *              Fixed usage of must_be_resident in vm_fault_page when
  193  *              descending down shadow chain.  Fixed corresponding
  194  *              assertion in vm_fault.
  195  *              [89/10/02  16:17:20  rpd]
  196  *      Remove vm_fault_copy_entry and non-XP code.
  197  *      [89/04/28            dbg]
  198  * 
  199  * Revision 2.1  89/08/03  16:44:50  rwd
  200  * Created.
  201  * 
  202  * Revision 2.17  89/05/06  02:58:43  rpd
  203  *      Picked up fix from mwyoung for a COW-triggered page leak:
  204  *      when copying from a copy-on-write page, activate the page
  205  *      instead of deactivating it.  Also picked up two innocuous
  206  *      VM_PAGE_QUEUES_REMOVE() additions in the "unavailable page" code.
  207  *      [89/05/06            rpd]
  208  *      Fixed the call to vm_fault_wire_fast in vm_fault_copy_entry.
  209  *      [89/05/05            rpd]
  210  * 
  211  * Revision 2.16  89/04/18  21:25:12  mwyoung
  212  *      Recent history:
  213  *              Limit the number of outstanding page requests for
  214  *               non-internal objects.
  215  *              Use hint to determine whether a page of temporary memory may
  216  *               have been written to backing storage.
  217  *      History condensation:
  218  *              Separate fault handling into separate routine (mwyoung).
  219  *              Handle I/O errors (dbg, mwyoung).
  220  *              Use map_verify technology (mwyoung).
  221  *              Allow faults to be interrupted (mwyoung).
  222  *              Optimized wiring code (dlb).
  223  *              Initial external memory management (mwyoung, bolosky).
  224  *              Original version (avie, mwyoung, dbg).
  225  * 
  226  */
  227 /*
  228  *      File:   vm_fault.c
  229  *      Author: Avadis Tevanian, Jr., Michael Wayne Young
  230  *
  231  *      Page fault handling module.
  232  */
  233 #include <mach_pagemap.h>
  234 #include <mach_kdb.h>
  235 #include <mach_pcsample.h>
  236 
  237 
  238 #include <vm/vm_fault.h>
  239 #include <mach/kern_return.h>
  240 #include <mach/message.h>       /* for error codes */
  241 #include <kern/counters.h>
  242 #include <kern/thread.h>
  243 #include <kern/sched_prim.h>
  244 #include <vm/vm_map.h>
  245 #include <vm/vm_object.h>
  246 #include <vm/vm_page.h>
  247 #include <vm/pmap.h>
  248 #include <mach/vm_statistics.h>
  249 #include <vm/vm_pageout.h>
  250 #include <mach/vm_param.h>
  251 #include <mach/memory_object.h>
  252 #include <mach/memory_object_user.h>
  253                                 /* For memory_object_data_{request,unlock} */
  254 #include <kern/mach_param.h>
  255 #include <kern/macro_help.h>
  256 #include <kern/zalloc.h>
  257 
  258 
  259 
  260 /*
  261  *      State needed by vm_fault_continue.
  262  *      This is a little hefty to drop directly
  263  *      into the thread structure.
  264  */
  265 typedef struct vm_fault_state {
  266         struct vm_map *vmf_map;
  267         vm_offset_t vmf_vaddr;
  268         vm_prot_t vmf_fault_type;
  269         boolean_t vmf_change_wiring;
  270         void (*vmf_continuation)();
  271         vm_map_version_t vmf_version;
  272         boolean_t vmf_wired;
  273         struct vm_object *vmf_object;
  274         vm_offset_t vmf_offset;
  275         vm_prot_t vmf_prot;
  276 
  277         boolean_t vmfp_backoff;
  278         struct vm_object *vmfp_object;
  279         vm_offset_t vmfp_offset;
  280         struct vm_page *vmfp_first_m;
  281         vm_prot_t vmfp_access;
  282 } vm_fault_state_t;
  283 
  284 zone_t          vm_fault_state_zone = 0;
  285 
  286 int             vm_object_absent_max = 50;
  287 
  288 int             vm_fault_debug = 0;
  289 
  290 boolean_t       vm_fault_dirty_handling = FALSE;
  291 boolean_t       vm_fault_interruptible = TRUE;
  292 
  293 boolean_t       software_reference_bits = TRUE;
  294 
  295 #if     MACH_KDB
  296 extern struct db_watchpoint *db_watchpoint_list;
  297 #endif  MACH_KDB
  298 
  299 /*
  300  *      Routine:        vm_fault_init
  301  *      Purpose:
  302  *              Initialize our private data structures.
  303  */
  304 void vm_fault_init()
  305 {
  306         vm_fault_state_zone = zinit(sizeof(vm_fault_state_t),
  307                                     THREAD_MAX * sizeof(vm_fault_state_t),
  308                                     sizeof(vm_fault_state_t),
  309                                     FALSE,
  310                                     "vm fault state");
  311 }
  312 
  313 /*
  314  *      Routine:        vm_fault_cleanup
  315  *      Purpose:
  316  *              Clean up the result of vm_fault_page.
  317  *      Results:
  318  *              The paging reference for "object" is released.
  319  *              "object" is unlocked.
  320  *              If "top_page" is not null,  "top_page" is
  321  *              freed and the paging reference for the object
  322  *              containing it is released.
  323  *
  324  *      In/out conditions:
  325  *              "object" must be locked.
  326  */
  327 void
  328 vm_fault_cleanup(object, top_page)
  329         register vm_object_t    object;
  330         register vm_page_t      top_page;
  331 {
  332         vm_object_paging_end(object);
  333         vm_object_unlock(object);
  334 
  335         if (top_page != VM_PAGE_NULL) {
  336             object = top_page->object;
  337             vm_object_lock(object);
  338             VM_PAGE_FREE(top_page);
  339             vm_object_paging_end(object);
  340             vm_object_unlock(object);
  341         }
  342 }
  343 
  344 
  345 #if MACH_PCSAMPLE > 0
  346 void
  347 vm_stat_sample(flavor)
  348     sampled_pc_flavor_t flavor;
  349 {
  350     thread_t thread = current_thread();
  351 
  352     if (thread->pc_sample.buffer &&
  353         ( (thread->pc_sample.sampletypes & flavor) == flavor))  {
  354         take_pc_sample(thread, &thread->pc_sample, flavor);
  355     }
  356 
  357     if (thread->task->pc_sample.buffer &&
  358         ((thread->task->pc_sample.sampletypes & flavor) == flavor))  {
  359         take_pc_sample(thread, &thread->task->pc_sample, flavor);
  360     }
  361 }
  362 #else
  363 #define vm_stat_sample(x)
  364 #endif /* MACH_PCSAMPLE > 0 */
  365 
  366 
  367 
  368 /*
  369  *      Routine:        vm_fault_page
  370  *      Purpose:
  371  *              Find the resident page for the virtual memory
  372  *              specified by the given virtual memory object
  373  *              and offset.
  374  *      Additional arguments:
  375  *              The required permissions for the page is given
  376  *              in "fault_type".  Desired permissions are included
  377  *              in "protection".
  378  *
  379  *              If the desired page is known to be resident (for
  380  *              example, because it was previously wired down), asserting
  381  *              the "unwiring" parameter will speed the search.
  382  *
  383  *              If the operation can be interrupted (by thread_abort
  384  *              or thread_terminate), then the "interruptible"
  385  *              parameter should be asserted.
  386  *
  387  *      Results:
  388  *              The page containing the proper data is returned
  389  *              in "result_page".
  390  *
  391  *      In/out conditions:
  392  *              The source object must be locked and referenced,
  393  *              and must donate one paging reference.  The reference
  394  *              is not affected.  The paging reference and lock are
  395  *              consumed.
  396  *
  397  *              If the call succeeds, the object in which "result_page"
  398  *              resides is left locked and holding a paging reference.
  399  *              If this is not the original object, a busy page in the
  400  *              original object is returned in "top_page", to prevent other
  401  *              callers from pursuing this same data, along with a paging
  402  *              reference for the original object.  The "top_page" should
  403  *              be destroyed when this guarantee is no longer required.
  404  *              The "result_page" is also left busy.  It is not removed
  405  *              from the pageout queues.
  406  */
  407 vm_fault_return_t vm_fault_page(first_object, first_offset,
  408                                 fault_type, must_be_resident, interruptible,
  409                                 protection,
  410                                 result_page, top_page,
  411                                 resume, continuation)
  412  /* Arguments: */
  413         vm_object_t     first_object;   /* Object to begin search */
  414         vm_offset_t     first_offset;   /* Offset into object */
  415         vm_prot_t       fault_type;     /* What access is requested */
  416         boolean_t       must_be_resident;/* Must page be resident? */
  417         boolean_t       interruptible;  /* May fault be interrupted? */
  418  /* Modifies in place: */
  419         vm_prot_t       *protection;    /* Protection for mapping */
  420  /* Returns: */
  421         vm_page_t       *result_page;   /* Page found, if successful */
  422         vm_page_t       *top_page;      /* Page in top object, if
  423                                          * not result_page.
  424                                          */
  425  /* More arguments: */
  426         boolean_t       resume;         /* We are restarting. */
  427         void            (*continuation)(); /* Continuation for blocking. */
  428 {
  429         register
  430         vm_page_t       m;
  431         register
  432         vm_object_t     object;
  433         register
  434         vm_offset_t     offset;
  435         vm_page_t       first_m;
  436         vm_object_t     next_object;
  437         vm_object_t     copy_object;
  438         boolean_t       look_for_page;
  439         vm_prot_t       access_required;
  440 
  441         if (resume) {
  442                 register vm_fault_state_t *state =
  443                         (vm_fault_state_t *) current_thread()->ith_other;
  444 
  445                 if (state->vmfp_backoff)
  446                         goto after_block_and_backoff;
  447 
  448                 object = state->vmfp_object;
  449                 offset = state->vmfp_offset;
  450                 first_m = state->vmfp_first_m;
  451                 access_required = state->vmfp_access;
  452                 goto after_thread_block;
  453         }
  454         
  455         vm_stat_sample(SAMPLED_PC_VM_FAULTS_ANY);
  456         vm_stat.faults++;               /* needs lock XXX */
  457 
  458 /*
  459  *      Recovery actions
  460  */
  461 #define RELEASE_PAGE(m)                                 \
  462         MACRO_BEGIN                                     \
  463         PAGE_WAKEUP_DONE(m);                            \
  464         vm_page_lock_queues();                          \
  465         if (!m->active && !m->inactive)                 \
  466                 vm_page_activate(m);                    \
  467         vm_page_unlock_queues();                        \
  468         MACRO_END
  469 
  470         if (vm_fault_dirty_handling
  471 #if     MACH_KDB
  472                 /*
  473                  *      If there are watchpoints set, then
  474                  *      we don't want to give away write permission
  475                  *      on a read fault.  Make the task write fault,
  476                  *      so that the watchpoint code notices the access.
  477                  */
  478             || db_watchpoint_list
  479 #endif  MACH_KDB
  480             ) {
  481                 /*
  482                  *      If we aren't asking for write permission,
  483                  *      then don't give it away.  We're using write
  484                  *      faults to set the dirty bit.
  485                  */
  486                 if (!(fault_type & VM_PROT_WRITE))
  487                         *protection &= ~VM_PROT_WRITE;
  488         }
  489 
  490         if (!vm_fault_interruptible)
  491                 interruptible = FALSE;
  492 
  493         /*
  494          *      INVARIANTS (through entire routine):
  495          *
  496          *      1)      At all times, we must either have the object
  497          *              lock or a busy page in some object to prevent
  498          *              some other thread from trying to bring in
  499          *              the same page.
  500          *
  501          *              Note that we cannot hold any locks during the
  502          *              pager access or when waiting for memory, so
  503          *              we use a busy page then.
  504          *
  505          *              Note also that we aren't as concerned about more than
  506          *              one thread attempting to memory_object_data_unlock
  507          *              the same page at once, so we don't hold the page
  508          *              as busy then, but do record the highest unlock
  509          *              value so far.  [Unlock requests may also be delivered
  510          *              out of order.]
  511          *
  512          *      2)      To prevent another thread from racing us down the
  513          *              shadow chain and entering a new page in the top
  514          *              object before we do, we must keep a busy page in
  515          *              the top object while following the shadow chain.
  516          *
  517          *      3)      We must increment paging_in_progress on any object
  518          *              for which we have a busy page, to prevent
  519          *              vm_object_collapse from removing the busy page
  520          *              without our noticing.
  521          *
  522          *      4)      We leave busy pages on the pageout queues.
  523          *              If the pageout daemon comes across a busy page,
  524          *              it will remove the page from the pageout queues.
  525          */
  526 
  527         /*
  528          *      Search for the page at object/offset.
  529          */
  530 
  531         object = first_object;
  532         offset = first_offset;
  533         first_m = VM_PAGE_NULL;
  534         access_required = fault_type;
  535 
  536         /*
  537          *      See whether this page is resident
  538          */
  539 
  540         while (TRUE) {
  541                 m = vm_page_lookup(object, offset);
  542                 if (m != VM_PAGE_NULL) {
  543                         /*
  544                          *      If the page is being brought in,
  545                          *      wait for it and then retry.
  546                          *
  547                          *      A possible optimization: if the page
  548                          *      is known to be resident, we can ignore
  549                          *      pages that are absent (regardless of
  550                          *      whether they're busy).
  551                          */
  552 
  553                         if (m->busy) {
  554                                 kern_return_t   wait_result;
  555 
  556                                 PAGE_ASSERT_WAIT(m, interruptible);
  557                                 vm_object_unlock(object);
  558                                 if (continuation != (void (*)()) 0) {
  559                                         register vm_fault_state_t *state =
  560                                                 (vm_fault_state_t *) current_thread()->ith_other;
  561 
  562                                         /*
  563                                          *      Save variables in case
  564                                          *      thread_block discards
  565                                          *      our kernel stack.
  566                                          */
  567 
  568                                         state->vmfp_backoff = FALSE;
  569                                         state->vmfp_object = object;
  570                                         state->vmfp_offset = offset;
  571                                         state->vmfp_first_m = first_m;
  572                                         state->vmfp_access =
  573                                                 access_required;
  574                                         state->vmf_prot = *protection;
  575 
  576                                         counter(c_vm_fault_page_block_busy_user++);
  577                                         thread_block(continuation);
  578                                 } else {
  579                                         counter(c_vm_fault_page_block_busy_kernel++);
  580                                         thread_block((void (*)()) 0);
  581                                 }
  582                             after_thread_block:
  583                                 wait_result = current_thread()->wait_result;
  584                                 vm_object_lock(object);
  585                                 if (wait_result != THREAD_AWAKENED) {
  586                                         vm_fault_cleanup(object, first_m);
  587                                         if (wait_result == THREAD_RESTART)
  588                                                 return(VM_FAULT_RETRY);
  589                                         else
  590                                                 return(VM_FAULT_INTERRUPTED);
  591                                 }
  592                                 continue;
  593                         }
  594 
  595                         /*
  596                          *      If the page is in error, give up now.
  597                          */
  598 
  599                         if (m->error) {
  600                                 VM_PAGE_FREE(m);
  601                                 vm_fault_cleanup(object, first_m);
  602                                 return(VM_FAULT_MEMORY_ERROR);
  603                         }
  604 
  605                         /*
  606                          *      If the page isn't busy, but is absent,
  607                          *      then it was deemed "unavailable".
  608                          */
  609 
  610                         if (m->absent) {
  611                                 /*
  612                                  * Remove the non-existent page (unless it's
  613                                  * in the top object) and move on down to the
  614                                  * next object (if there is one).
  615                                  */
  616 
  617                                 offset += object->shadow_offset;
  618                                 access_required = VM_PROT_READ;
  619                                 next_object = object->shadow;
  620                                 if (next_object == VM_OBJECT_NULL) {
  621                                         vm_page_t real_m;
  622 
  623                                         assert(!must_be_resident);
  624 
  625                                         /*
  626                                          * Absent page at bottom of shadow
  627                                          * chain; zero fill the page we left
  628                                          * busy in the first object, and flush
  629                                          * the absent page.  But first we
  630                                          * need to allocate a real page.
  631                                          */
  632 
  633                                         real_m = vm_page_grab();
  634                                         if (real_m == VM_PAGE_NULL) {
  635                                                 vm_fault_cleanup(object, first_m);
  636                                                 return(VM_FAULT_MEMORY_SHORTAGE);
  637                                         }
  638 
  639                                         if (object != first_object) {
  640                                                 VM_PAGE_FREE(m);
  641                                                 vm_object_paging_end(object);
  642                                                 vm_object_unlock(object);
  643                                                 object = first_object;
  644                                                 offset = first_offset;
  645                                                 m = first_m;
  646                                                 first_m = VM_PAGE_NULL;
  647                                                 vm_object_lock(object);
  648                                         }
  649 
  650                                         VM_PAGE_FREE(m);
  651                                         assert(real_m->busy);
  652                                         vm_page_lock_queues();
  653                                         vm_page_insert(real_m, object, offset);
  654                                         vm_page_unlock_queues();
  655                                         m = real_m;
  656 
  657                                         /*
  658                                          *  Drop the lock while zero filling
  659                                          *  page.  Then break because this
  660                                          *  is the page we wanted.  Checking
  661                                          *  the page lock is a waste of time;
  662                                          *  this page was either absent or
  663                                          *  newly allocated -- in both cases
  664                                          *  it can't be page locked by a pager.
  665                                          */
  666                                         vm_object_unlock(object);
  667 
  668                                         vm_page_zero_fill(m);
  669 
  670                                         vm_stat_sample(SAMPLED_PC_VM_ZFILL_FAULTS);
  671                                         
  672                                         vm_stat.zero_fill_count++;
  673                                         vm_object_lock(object);
  674                                         pmap_clear_modify(m->phys_addr);
  675                                         break;
  676                                 } else {
  677                                         if (must_be_resident) {
  678                                                 vm_object_paging_end(object);
  679                                         } else if (object != first_object) {
  680                                                 vm_object_paging_end(object);
  681                                                 VM_PAGE_FREE(m);
  682                                         } else {
  683                                                 first_m = m;
  684                                                 m->absent = FALSE;
  685                                                 vm_object_absent_release(object);
  686                                                 m->busy = TRUE;
  687 
  688                                                 vm_page_lock_queues();
  689                                                 VM_PAGE_QUEUES_REMOVE(m);
  690                                                 vm_page_unlock_queues();
  691                                         }
  692                                         vm_object_lock(next_object);
  693                                         vm_object_unlock(object);
  694                                         object = next_object;
  695                                         vm_object_paging_begin(object);
  696                                         continue;
  697                                 }
  698                         }
  699 
  700                         /*
  701                          *      If the desired access to this page has
  702                          *      been locked out, request that it be unlocked.
  703                          */
  704 
  705                         if (access_required & m->page_lock) {
  706                                 if ((access_required & m->unlock_request) != access_required) {
  707                                         vm_prot_t       new_unlock_request;
  708                                         kern_return_t   rc;
  709                                         
  710                                         if (!object->pager_ready) {
  711                                                 vm_object_assert_wait(object,
  712                                                         VM_OBJECT_EVENT_PAGER_READY,
  713                                                         interruptible);
  714                                                 goto block_and_backoff;
  715                                         }
  716 
  717                                         new_unlock_request = m->unlock_request =
  718                                                 (access_required | m->unlock_request);
  719                                         vm_object_unlock(object);
  720                                         if ((rc = memory_object_data_unlock(
  721                                                 object->pager,
  722                                                 object->pager_request,
  723                                                 offset + object->paging_offset,
  724                                                 PAGE_SIZE,
  725                                                 new_unlock_request))
  726                                              != KERN_SUCCESS) {
  727                                                 printf("vm_fault: memory_object_data_unlock failed\n");
  728                                                 vm_object_lock(object);
  729                                                 vm_fault_cleanup(object, first_m);
  730                                                 return((rc == MACH_SEND_INTERRUPTED) ?
  731                                                         VM_FAULT_INTERRUPTED :
  732                                                         VM_FAULT_MEMORY_ERROR);
  733                                         }
  734                                         vm_object_lock(object);
  735                                         continue;
  736                                 }
  737 
  738                                 PAGE_ASSERT_WAIT(m, interruptible);
  739                                 goto block_and_backoff;
  740                         }
  741 
  742                         /*
  743                          *      We mark the page busy and leave it on
  744                          *      the pageout queues.  If the pageout
  745                          *      deamon comes across it, then it will
  746                          *      remove the page.
  747                          */
  748 
  749                         if (!software_reference_bits) {
  750                                 vm_page_lock_queues();
  751                                 if (m->inactive)  {
  752                                         vm_stat_sample(SAMPLED_PC_VM_REACTIVATION_FAULTS);
  753                                         vm_stat.reactivations++;
  754                                 }
  755 
  756                                 VM_PAGE_QUEUES_REMOVE(m);
  757                                 vm_page_unlock_queues();
  758                         }
  759 
  760                         assert(!m->busy);
  761                         m->busy = TRUE;
  762                         assert(!m->absent);
  763                         break;
  764                 }
  765 
  766                 look_for_page =
  767                         (object->pager_created)
  768 #if     MACH_PAGEMAP
  769                         && (vm_external_state_get(object->existence_info, offset + object->paging_offset) !=
  770                          VM_EXTERNAL_STATE_ABSENT)
  771 #endif  MACH_PAGEMAP
  772                          ;
  773 
  774                 if ((look_for_page || (object == first_object))
  775                                  && !must_be_resident) {
  776                         /*
  777                          *      Allocate a new page for this object/offset
  778                          *      pair.
  779                          */
  780 
  781                         m = vm_page_grab_fictitious();
  782                         if (m == VM_PAGE_NULL) {
  783                                 vm_fault_cleanup(object, first_m);
  784                                 return(VM_FAULT_FICTITIOUS_SHORTAGE);
  785                         }
  786 
  787                         vm_page_lock_queues();
  788                         vm_page_insert(m, object, offset);
  789                         vm_page_unlock_queues();
  790                 }
  791 
  792                 if (look_for_page && !must_be_resident) {
  793                         kern_return_t   rc;
  794 
  795                         /*
  796                          *      If the memory manager is not ready, we
  797                          *      cannot make requests.
  798                          */
  799                         if (!object->pager_ready) {
  800                                 vm_object_assert_wait(object,
  801                                         VM_OBJECT_EVENT_PAGER_READY,
  802                                         interruptible);
  803                                 VM_PAGE_FREE(m);
  804                                 goto block_and_backoff;
  805                         }
  806 
  807                         if (object->internal) {
  808                                 /*
  809                                  *      Requests to the default pager
  810                                  *      must reserve a real page in advance,
  811                                  *      because the pager's data-provided
  812                                  *      won't block for pages.
  813                                  */
  814 
  815                                 if (m->fictitious && !vm_page_convert(m)) {
  816                                         VM_PAGE_FREE(m);
  817                                         vm_fault_cleanup(object, first_m);
  818                                         return(VM_FAULT_MEMORY_SHORTAGE);
  819                                 }
  820                         } else if (object->absent_count >
  821                                                 vm_object_absent_max) {
  822                                 /*
  823                                  *      If there are too many outstanding page
  824                                  *      requests pending on this object, we
  825                                  *      wait for them to be resolved now.
  826                                  */
  827 
  828                                 vm_object_absent_assert_wait(object, interruptible);
  829                                 VM_PAGE_FREE(m);
  830                                 goto block_and_backoff;
  831                         }
  832 
  833                         /*
  834                          *      Indicate that the page is waiting for data
  835                          *      from the memory manager.
  836                          */
  837 
  838                         m->absent = TRUE;
  839                         object->absent_count++;
  840 
  841                         /*
  842                          *      We have a busy page, so we can
  843                          *      release the object lock.
  844                          */
  845                         vm_object_unlock(object);
  846 
  847                         /*
  848                          *      Call the memory manager to retrieve the data.
  849                          */
  850 
  851                         vm_stat.pageins++;
  852                         vm_stat_sample(SAMPLED_PC_VM_PAGEIN_FAULTS);
  853 
  854                         if ((rc = memory_object_data_request(object->pager, 
  855                                 object->pager_request,
  856                                 m->offset + object->paging_offset, 
  857                                 PAGE_SIZE, access_required)) != KERN_SUCCESS) {
  858                                 if (rc != MACH_SEND_INTERRUPTED)
  859                                         printf("%s(0x%x, 0x%x, 0x%x, 0x%x, 0x%x) failed, %d\n",
  860                                                 "memory_object_data_request",
  861                                                 object->pager,
  862                                                 object->pager_request,
  863                                                 m->offset + object->paging_offset, 
  864                                                 PAGE_SIZE, access_required, rc);
  865                                 /*
  866                                  *      Don't want to leave a busy page around,
  867                                  *      but the data request may have blocked,
  868                                  *      so check if it's still there and busy.
  869                                  */
  870                                 vm_object_lock(object);
  871                                 if (m == vm_page_lookup(object,offset) &&
  872                                     m->absent && m->busy)
  873                                         VM_PAGE_FREE(m);
  874                                 vm_fault_cleanup(object, first_m);
  875                                 return((rc == MACH_SEND_INTERRUPTED) ?
  876                                         VM_FAULT_INTERRUPTED :
  877                                         VM_FAULT_MEMORY_ERROR);
  878                         }
  879                         
  880                         /*
  881                          * Retry with same object/offset, since new data may
  882                          * be in a different page (i.e., m is meaningless at
  883                          * this point).
  884                          */
  885                         vm_object_lock(object);
  886                         continue;
  887                 }
  888 
  889                 /*
  890                  * For the XP system, the only case in which we get here is if
  891                  * object has no pager (or unwiring).  If the pager doesn't
  892                  * have the page this is handled in the m->absent case above
  893                  * (and if you change things here you should look above).
  894                  */
  895                 if (object == first_object)
  896                         first_m = m;
  897                 else
  898                 {
  899                         assert(m == VM_PAGE_NULL);
  900                 }
  901 
  902                 /*
  903                  *      Move on to the next object.  Lock the next
  904                  *      object before unlocking the current one.
  905                  */
  906                 access_required = VM_PROT_READ;
  907 
  908                 offset += object->shadow_offset;
  909                 next_object = object->shadow;
  910                 if (next_object == VM_OBJECT_NULL) {
  911                         assert(!must_be_resident);
  912 
  913                         /*
  914                          *      If there's no object left, fill the page
  915                          *      in the top object with zeros.  But first we
  916                          *      need to allocate a real page.
  917                          */
  918 
  919                         if (object != first_object) {
  920                                 vm_object_paging_end(object);
  921                                 vm_object_unlock(object);
  922 
  923                                 object = first_object;
  924                                 offset = first_offset;
  925                                 vm_object_lock(object);
  926                         }
  927 
  928                         m = first_m;
  929                         assert(m->object == object);
  930                         first_m = VM_PAGE_NULL;
  931 
  932                         if (m->fictitious && !vm_page_convert(m)) {
  933                                 VM_PAGE_FREE(m);
  934                                 vm_fault_cleanup(object, VM_PAGE_NULL);
  935                                 return(VM_FAULT_MEMORY_SHORTAGE);
  936                         }
  937 
  938                         vm_object_unlock(object);
  939                         vm_page_zero_fill(m);
  940                         vm_stat_sample(SAMPLED_PC_VM_ZFILL_FAULTS);
  941                         vm_stat.zero_fill_count++;
  942                         vm_object_lock(object);
  943                         pmap_clear_modify(m->phys_addr);
  944                         break;
  945                 }
  946                 else {
  947                         vm_object_lock(next_object);
  948                         if ((object != first_object) || must_be_resident)
  949                                 vm_object_paging_end(object);
  950                         vm_object_unlock(object);
  951                         object = next_object;
  952                         vm_object_paging_begin(object);
  953                 }
  954         }
  955 
  956         /*
  957          *      PAGE HAS BEEN FOUND.
  958          *
  959          *      This page (m) is:
  960          *              busy, so that we can play with it;
  961          *              not absent, so that nobody else will fill it;
  962          *              possibly eligible for pageout;
  963          *
  964          *      The top-level page (first_m) is:
  965          *              VM_PAGE_NULL if the page was found in the
  966          *               top-level object;
  967          *              busy, not absent, and ineligible for pageout.
  968          *
  969          *      The current object (object) is locked.  A paging
  970          *      reference is held for the current and top-level
  971          *      objects.
  972          */
  973 
  974 #if     EXTRA_ASSERTIONS
  975         assert(m->busy && !m->absent);
  976         assert((first_m == VM_PAGE_NULL) ||
  977                 (first_m->busy && !first_m->absent &&
  978                  !first_m->active && !first_m->inactive));
  979 #endif  EXTRA_ASSERTIONS
  980 
  981         /*
  982          *      If the page is being written, but isn't
  983          *      already owned by the top-level object,
  984          *      we have to copy it into a new page owned
  985          *      by the top-level object.
  986          */
  987 
  988         if (object != first_object) {
  989                 /*
  990                  *      We only really need to copy if we
  991                  *      want to write it.
  992                  */
  993 
  994                 if (fault_type & VM_PROT_WRITE) {
  995                         vm_page_t copy_m;
  996 
  997                         assert(!must_be_resident);
  998 
  999                         /*
 1000                          *      If we try to collapse first_object at this
 1001                          *      point, we may deadlock when we try to get
 1002                          *      the lock on an intermediate object (since we
 1003                          *      have the bottom object locked).  We can't
 1004                          *      unlock the bottom object, because the page
 1005                          *      we found may move (by collapse) if we do.
 1006                          *
 1007                          *      Instead, we first copy the page.  Then, when
 1008                          *      we have no more use for the bottom object,
 1009                          *      we unlock it and try to collapse.
 1010                          *
 1011                          *      Note that we copy the page even if we didn't
 1012                          *      need to... that's the breaks.
 1013                          */
 1014 
 1015                         /*
 1016                          *      Allocate a page for the copy
 1017                          */
 1018                         copy_m = vm_page_grab();
 1019                         if (copy_m == VM_PAGE_NULL) {
 1020                                 RELEASE_PAGE(m);
 1021                                 vm_fault_cleanup(object, first_m);
 1022                                 return(VM_FAULT_MEMORY_SHORTAGE);
 1023                         }
 1024 
 1025                         vm_object_unlock(object);
 1026                         vm_page_copy(m, copy_m);
 1027                         vm_object_lock(object);
 1028 
 1029                         /*
 1030                          *      If another map is truly sharing this
 1031                          *      page with us, we have to flush all
 1032                          *      uses of the original page, since we
 1033                          *      can't distinguish those which want the
 1034                          *      original from those which need the
 1035                          *      new copy.
 1036                          *
 1037                          *      XXXO If we know that only one map has
 1038                          *      access to this page, then we could
 1039                          *      avoid the pmap_page_protect() call.
 1040                          */
 1041 
 1042                         vm_page_lock_queues();
 1043                         vm_page_deactivate(m);
 1044                         pmap_page_protect(m->phys_addr, VM_PROT_NONE);
 1045                         vm_page_unlock_queues();
 1046 
 1047                         /*
 1048                          *      We no longer need the old page or object.
 1049                          */
 1050 
 1051                         PAGE_WAKEUP_DONE(m);
 1052                         vm_object_paging_end(object);
 1053                         vm_object_unlock(object);
 1054 
 1055                         vm_stat.cow_faults++;
 1056                         vm_stat_sample(SAMPLED_PC_VM_COW_FAULTS);
 1057                         object = first_object;
 1058                         offset = first_offset;
 1059 
 1060                         vm_object_lock(object);
 1061                         VM_PAGE_FREE(first_m);
 1062                         first_m = VM_PAGE_NULL;
 1063                         assert(copy_m->busy);
 1064                         vm_page_lock_queues();
 1065                         vm_page_insert(copy_m, object, offset);
 1066                         vm_page_unlock_queues();
 1067                         m = copy_m;
 1068 
 1069                         /*
 1070                          *      Now that we've gotten the copy out of the
 1071                          *      way, let's try to collapse the top object.
 1072                          *      But we have to play ugly games with
 1073                          *      paging_in_progress to do that...
 1074                          */
 1075 
 1076                         vm_object_paging_end(object);
 1077                         vm_object_collapse(object);
 1078                         vm_object_paging_begin(object);
 1079                 }
 1080                 else {
 1081                         *protection &= (~VM_PROT_WRITE);
 1082                 }
 1083         }
 1084 
 1085         /*
 1086          *      Now check whether the page needs to be pushed into the
 1087          *      copy object.  The use of asymmetric copy on write for
 1088          *      shared temporary objects means that we may do two copies to
 1089          *      satisfy the fault; one above to get the page from a
 1090          *      shadowed object, and one here to push it into the copy.
 1091          */
 1092 
 1093         while ((copy_object = first_object->copy) != VM_OBJECT_NULL) {
 1094                 vm_offset_t     copy_offset;
 1095                 vm_page_t       copy_m;
 1096 
 1097                 /*
 1098                  *      If the page is being written, but hasn't been
 1099                  *      copied to the copy-object, we have to copy it there.
 1100                  */
 1101 
 1102                 if ((fault_type & VM_PROT_WRITE) == 0) {
 1103                         *protection &= ~VM_PROT_WRITE;
 1104                         break;
 1105                 }
 1106 
 1107                 /*
 1108                  *      If the page was guaranteed to be resident,
 1109                  *      we must have already performed the copy.
 1110                  */
 1111 
 1112                 if (must_be_resident)
 1113                         break;
 1114 
 1115                 /*
 1116                  *      Try to get the lock on the copy_object.
 1117                  */
 1118                 if (!vm_object_lock_try(copy_object)) {
 1119                         vm_object_unlock(object);
 1120 
 1121                         simple_lock_pause();    /* wait a bit */
 1122 
 1123                         vm_object_lock(object);
 1124                         continue;
 1125                 }
 1126 
 1127                 /*
 1128                  *      Make another reference to the copy-object,
 1129                  *      to keep it from disappearing during the
 1130                  *      copy.
 1131                  */
 1132                 assert(copy_object->ref_count > 0);
 1133                 copy_object->ref_count++;
 1134 
 1135                 /*
 1136                  *      Does the page exist in the copy?
 1137                  */
 1138                 copy_offset = first_offset - copy_object->shadow_offset;
 1139                 copy_m = vm_page_lookup(copy_object, copy_offset);
 1140                 if (copy_m != VM_PAGE_NULL) {
 1141                         if (copy_m->busy) {
 1142                                 /*
 1143                                  *      If the page is being brought
 1144                                  *      in, wait for it and then retry.
 1145                                  */
 1146                                 PAGE_ASSERT_WAIT(copy_m, interruptible);
 1147                                 RELEASE_PAGE(m);
 1148                                 copy_object->ref_count--;
 1149                                 assert(copy_object->ref_count > 0);
 1150                                 vm_object_unlock(copy_object);
 1151                                 goto block_and_backoff;
 1152                         }
 1153                 }
 1154                 else {
 1155                         /*
 1156                          *      Allocate a page for the copy
 1157                          */
 1158                         copy_m = vm_page_alloc(copy_object, copy_offset);
 1159                         if (copy_m == VM_PAGE_NULL) {
 1160                                 RELEASE_PAGE(m);
 1161                                 copy_object->ref_count--;
 1162                                 assert(copy_object->ref_count > 0);
 1163                                 vm_object_unlock(copy_object);
 1164                                 vm_fault_cleanup(object, first_m);
 1165                                 return(VM_FAULT_MEMORY_SHORTAGE);
 1166                         }
 1167 
 1168                         /*
 1169                          *      Must copy page into copy-object.
 1170                          */
 1171 
 1172                         vm_page_copy(m, copy_m);
 1173                         
 1174                         /*
 1175                          *      If the old page was in use by any users
 1176                          *      of the copy-object, it must be removed
 1177                          *      from all pmaps.  (We can't know which
 1178                          *      pmaps use it.)
 1179                          */
 1180 
 1181                         vm_page_lock_queues();
 1182                         pmap_page_protect(m->phys_addr, VM_PROT_NONE);
 1183                         copy_m->dirty = TRUE;
 1184                         vm_page_unlock_queues();
 1185 
 1186                         /*
 1187                          *      If there's a pager, then immediately
 1188                          *      page out this page, using the "initialize"
 1189                          *      option.  Else, we use the copy.
 1190                          */
 1191 
 1192                         if (!copy_object->pager_created) {
 1193                                 vm_page_lock_queues();
 1194                                 vm_page_activate(copy_m);
 1195                                 vm_page_unlock_queues();
 1196                                 PAGE_WAKEUP_DONE(copy_m);
 1197                         } else {
 1198                                 /*
 1199                                  *      The page is already ready for pageout:
 1200                                  *      not on pageout queues and busy.
 1201                                  *      Unlock everything except the
 1202                                  *      copy_object itself.
 1203                                  */
 1204 
 1205                                 vm_object_unlock(object);
 1206 
 1207                                 /*
 1208                                  *      Write the page to the copy-object,
 1209                                  *      flushing it from the kernel.
 1210                                  */
 1211 
 1212                                 vm_pageout_page(copy_m, TRUE, TRUE);
 1213 
 1214                                 /*
 1215                                  *      Since the pageout may have
 1216                                  *      temporarily dropped the
 1217                                  *      copy_object's lock, we
 1218                                  *      check whether we'll have
 1219                                  *      to deallocate the hard way.
 1220                                  */
 1221 
 1222                                 if ((copy_object->shadow != object) ||
 1223                                     (copy_object->ref_count == 1)) {
 1224                                         vm_object_unlock(copy_object);
 1225                                         vm_object_deallocate(copy_object);
 1226                                         vm_object_lock(object);
 1227                                         continue;
 1228                                 }
 1229 
 1230                                 /*
 1231                                  *      Pick back up the old object's
 1232                                  *      lock.  [It is safe to do so,
 1233                                  *      since it must be deeper in the
 1234                                  *      object tree.]
 1235                                  */
 1236 
 1237                                 vm_object_lock(object);
 1238                         }
 1239 
 1240                         /*
 1241                          *      Because we're pushing a page upward
 1242                          *      in the object tree, we must restart
 1243                          *      any faults that are waiting here.
 1244                          *      [Note that this is an expansion of
 1245                          *      PAGE_WAKEUP that uses the THREAD_RESTART
 1246                          *      wait result].  Can't turn off the page's
 1247                          *      busy bit because we're not done with it.
 1248                          */
 1249                          
 1250                         if (m->wanted) {
 1251                                 m->wanted = FALSE;
 1252                                 thread_wakeup_with_result((event_t) m,
 1253                                         THREAD_RESTART);
 1254                         }
 1255                 }
 1256 
 1257                 /*
 1258                  *      The reference count on copy_object must be
 1259                  *      at least 2: one for our extra reference,
 1260                  *      and at least one from the outside world
 1261                  *      (we checked that when we last locked
 1262                  *      copy_object).
 1263                  */
 1264                 copy_object->ref_count--;
 1265                 assert(copy_object->ref_count > 0);
 1266                 vm_object_unlock(copy_object);
 1267 
 1268                 break;
 1269         }
 1270 
 1271         *result_page = m;
 1272         *top_page = first_m;
 1273 
 1274         /*
 1275          *      If the page can be written, assume that it will be.
 1276          *      [Earlier, we restrict the permission to allow write
 1277          *      access only if the fault so required, so we don't
 1278          *      mark read-only data as dirty.]
 1279          */
 1280 
 1281         if (vm_fault_dirty_handling && (*protection & VM_PROT_WRITE))
 1282                 m->dirty = TRUE;
 1283 
 1284         return(VM_FAULT_SUCCESS);
 1285 
 1286     block_and_backoff:
 1287         vm_fault_cleanup(object, first_m);
 1288 
 1289         if (continuation != (void (*)()) 0) {
 1290                 register vm_fault_state_t *state =
 1291                         (vm_fault_state_t *) current_thread()->ith_other;
 1292 
 1293                 /*
 1294                  *      Save variables in case we must restart.
 1295                  */
 1296 
 1297                 state->vmfp_backoff = TRUE;
 1298                 state->vmf_prot = *protection;
 1299 
 1300                 counter(c_vm_fault_page_block_backoff_user++);
 1301                 thread_block(continuation);
 1302         } else {
 1303                 counter(c_vm_fault_page_block_backoff_kernel++);
 1304                 thread_block((void (*)()) 0);
 1305         }
 1306     after_block_and_backoff:
 1307         if (current_thread()->wait_result == THREAD_AWAKENED)
 1308                 return VM_FAULT_RETRY;
 1309         else
 1310                 return VM_FAULT_INTERRUPTED;
 1311 
 1312 #undef  RELEASE_PAGE
 1313 }
 1314 
 1315 /*
 1316  *      Routine:        vm_fault
 1317  *      Purpose:
 1318  *              Handle page faults, including pseudo-faults
 1319  *              used to change the wiring status of pages.
 1320  *      Returns:
 1321  *              If an explicit (expression) continuation is supplied,
 1322  *              then we call the continuation instead of returning.
 1323  *      Implementation:
 1324  *              Explicit continuations make this a little icky,
 1325  *              because it hasn't been rewritten to embrace CPS.
 1326  *              Instead, we have resume arguments for vm_fault and
 1327  *              vm_fault_page, to let continue the fault computation.
 1328  *
 1329  *              vm_fault and vm_fault_page save mucho state
 1330  *              in the moral equivalent of a closure.  The state
 1331  *              structure is allocated when first entering vm_fault
 1332  *              and deallocated when leaving vm_fault.
 1333  */
 1334 
 1335 void
 1336 vm_fault_continue()
 1337 {
 1338         register vm_fault_state_t *state =
 1339                 (vm_fault_state_t *) current_thread()->ith_other;
 1340 
 1341         (void) vm_fault(state->vmf_map,
 1342                         state->vmf_vaddr,
 1343                         state->vmf_fault_type,
 1344                         state->vmf_change_wiring,
 1345                         TRUE, state->vmf_continuation);
 1346         /*NOTREACHED*/
 1347 }
 1348 
 1349 kern_return_t vm_fault(map, vaddr, fault_type, change_wiring,
 1350                        resume, continuation)
 1351         vm_map_t        map;
 1352         vm_offset_t     vaddr;
 1353         vm_prot_t       fault_type;
 1354         boolean_t       change_wiring;
 1355         boolean_t       resume;
 1356         void            (*continuation)();
 1357 {
 1358         vm_map_version_t        version;        /* Map version for verificiation */
 1359         boolean_t               wired;          /* Should mapping be wired down? */
 1360         vm_object_t             object;         /* Top-level object */
 1361         vm_offset_t             offset;         /* Top-level offset */
 1362         vm_prot_t               prot;           /* Protection for mapping */
 1363         vm_object_t             old_copy_object; /* Saved copy object */
 1364         vm_page_t               result_page;    /* Result of vm_fault_page */
 1365         vm_page_t               top_page;       /* Placeholder page */
 1366         kern_return_t           kr;
 1367 
 1368         register
 1369         vm_page_t               m;      /* Fast access to result_page */
 1370 
 1371         if (resume) {
 1372                 register vm_fault_state_t *state =
 1373                         (vm_fault_state_t *) current_thread()->ith_other;
 1374 
 1375                 /*
 1376                  *      Retrieve cached variables and
 1377                  *      continue vm_fault_page.
 1378                  */
 1379 
 1380                 object = state->vmf_object;
 1381                 if (object == VM_OBJECT_NULL)
 1382                         goto RetryFault;
 1383                 version = state->vmf_version;
 1384                 wired = state->vmf_wired;
 1385                 offset = state->vmf_offset;
 1386                 prot = state->vmf_prot;
 1387 
 1388                 kr = vm_fault_page(object, offset, fault_type,
 1389                                 (change_wiring && !wired), !change_wiring,
 1390                                 &prot, &result_page, &top_page,
 1391                                 TRUE, vm_fault_continue);
 1392                 goto after_vm_fault_page;
 1393         }
 1394 
 1395         if (continuation != (void (*)()) 0) {
 1396                 /*
 1397                  *      We will probably need to save state.
 1398                  */
 1399 
 1400                 char *  state;
 1401 
 1402                 /*
 1403                  * if this assignment stmt is written as
 1404                  * 'active_threads[cpu_number()] = zalloc()',
 1405                  * cpu_number may be evaluated before zalloc;
 1406                  * if zalloc blocks, cpu_number will be wrong
 1407                  */
 1408 
 1409                 state = (char *) zalloc(vm_fault_state_zone);
 1410                 current_thread()->ith_other = state;
 1411 
 1412         }
 1413 
 1414     RetryFault: ;
 1415 
 1416         /*
 1417          *      Find the backing store object and offset into
 1418          *      it to begin the search.
 1419          */
 1420 
 1421         if ((kr = vm_map_lookup(&map, vaddr, fault_type, &version,
 1422                                 &object, &offset,
 1423                                 &prot, &wired)) != KERN_SUCCESS) {
 1424                 goto done;
 1425         }
 1426 
 1427         /*
 1428          *      If the page is wired, we must fault for the current protection
 1429          *      value, to avoid further faults.
 1430          */
 1431 
 1432         if (wired)
 1433                 fault_type = prot;
 1434 
 1435         /*
 1436          *      Make a reference to this object to
 1437          *      prevent its disposal while we are messing with
 1438          *      it.  Once we have the reference, the map is free
 1439          *      to be diddled.  Since objects reference their
 1440          *      shadows (and copies), they will stay around as well.
 1441          */
 1442 
 1443         assert(object->ref_count > 0);
 1444         object->ref_count++;
 1445         vm_object_paging_begin(object);
 1446 
 1447         if (continuation != (void (*)()) 0) {
 1448                 register vm_fault_state_t *state =
 1449                         (vm_fault_state_t *) current_thread()->ith_other;
 1450 
 1451                 /*
 1452                  *      Save variables, in case vm_fault_page discards
 1453                  *      our kernel stack and we have to restart.
 1454                  */
 1455 
 1456                 state->vmf_map = map;
 1457                 state->vmf_vaddr = vaddr;
 1458                 state->vmf_fault_type = fault_type;
 1459                 state->vmf_change_wiring = change_wiring;
 1460                 state->vmf_continuation = continuation;
 1461 
 1462                 state->vmf_version = version;
 1463                 state->vmf_wired = wired;
 1464                 state->vmf_object = object;
 1465                 state->vmf_offset = offset;
 1466                 state->vmf_prot = prot;
 1467 
 1468                 kr = vm_fault_page(object, offset, fault_type,
 1469                                    (change_wiring && !wired), !change_wiring,
 1470                                    &prot, &result_page, &top_page,
 1471                                    FALSE, vm_fault_continue);
 1472         } else {
 1473                 kr = vm_fault_page(object, offset, fault_type,
 1474                                    (change_wiring && !wired), !change_wiring,
 1475                                    &prot, &result_page, &top_page,
 1476                                    FALSE, (void (*)()) 0);
 1477         }
 1478     after_vm_fault_page:
 1479 
 1480         /*
 1481          *      If we didn't succeed, lose the object reference immediately.
 1482          */
 1483 
 1484         if (kr != VM_FAULT_SUCCESS)
 1485                 vm_object_deallocate(object);
 1486 
 1487         /*
 1488          *      See why we failed, and take corrective action.
 1489          */
 1490 
 1491         switch (kr) {
 1492                 case VM_FAULT_SUCCESS:
 1493                         break;
 1494                 case VM_FAULT_RETRY:
 1495                         goto RetryFault;
 1496                 case VM_FAULT_INTERRUPTED:
 1497                         kr = KERN_SUCCESS;
 1498                         goto done;
 1499                 case VM_FAULT_MEMORY_SHORTAGE:
 1500                         if (continuation != (void (*)()) 0) {
 1501                                 register vm_fault_state_t *state =
 1502                                         (vm_fault_state_t *) current_thread()->ith_other;
 1503 
 1504                                 /*
 1505                                  *      Save variables in case VM_PAGE_WAIT
 1506                                  *      discards our kernel stack.
 1507                                  */
 1508 
 1509                                 state->vmf_map = map;
 1510                                 state->vmf_vaddr = vaddr;
 1511                                 state->vmf_fault_type = fault_type;
 1512                                 state->vmf_change_wiring = change_wiring;
 1513                                 state->vmf_continuation = continuation;
 1514                                 state->vmf_object = VM_OBJECT_NULL;
 1515 
 1516                                 VM_PAGE_WAIT(vm_fault_continue);
 1517                         } else
 1518                                 VM_PAGE_WAIT((void (*)()) 0);
 1519                         goto RetryFault;
 1520                 case VM_FAULT_FICTITIOUS_SHORTAGE:
 1521                         vm_page_more_fictitious();
 1522                         goto RetryFault;
 1523                 case VM_FAULT_MEMORY_ERROR:
 1524                         kr = KERN_MEMORY_ERROR;
 1525                         goto done;
 1526         }
 1527 
 1528         m = result_page;
 1529 
 1530         assert((change_wiring && !wired) ?
 1531                (top_page == VM_PAGE_NULL) :
 1532                ((top_page == VM_PAGE_NULL) == (m->object == object)));
 1533 
 1534         /*
 1535          *      How to clean up the result of vm_fault_page.  This
 1536          *      happens whether the mapping is entered or not.
 1537          */
 1538 
 1539 #define UNLOCK_AND_DEALLOCATE                           \
 1540         MACRO_BEGIN                                     \
 1541         vm_fault_cleanup(m->object, top_page);          \
 1542         vm_object_deallocate(object);                   \
 1543         MACRO_END
 1544 
 1545         /*
 1546          *      What to do with the resulting page from vm_fault_page
 1547          *      if it doesn't get entered into the physical map:
 1548          */
 1549 
 1550 #define RELEASE_PAGE(m)                                 \
 1551         MACRO_BEGIN                                     \
 1552         PAGE_WAKEUP_DONE(m);                            \
 1553         vm_page_lock_queues();                          \
 1554         if (!m->active && !m->inactive)                 \
 1555                 vm_page_activate(m);                    \
 1556         vm_page_unlock_queues();                        \
 1557         MACRO_END
 1558 
 1559         /*
 1560          *      We must verify that the maps have not changed
 1561          *      since our last lookup.
 1562          */
 1563 
 1564         old_copy_object = m->object->copy;
 1565 
 1566         vm_object_unlock(m->object);
 1567         while (!vm_map_verify(map, &version)) {
 1568                 vm_object_t     retry_object;
 1569                 vm_offset_t     retry_offset;
 1570                 vm_prot_t       retry_prot;
 1571 
 1572                 /*
 1573                  *      To avoid trying to write_lock the map while another
 1574                  *      thread has it read_locked (in vm_map_pageable), we
 1575                  *      do not try for write permission.  If the page is
 1576                  *      still writable, we will get write permission.  If it
 1577                  *      is not, or has been marked needs_copy, we enter the
 1578                  *      mapping without write permission, and will merely
 1579                  *      take another fault.
 1580                  */
 1581                 kr = vm_map_lookup(&map, vaddr,
 1582                                    fault_type & ~VM_PROT_WRITE, &version,
 1583                                    &retry_object, &retry_offset, &retry_prot,
 1584                                    &wired);
 1585 
 1586                 if (kr != KERN_SUCCESS) {
 1587                         vm_object_lock(m->object);
 1588                         RELEASE_PAGE(m);
 1589                         UNLOCK_AND_DEALLOCATE;
 1590                         goto done;
 1591                 }
 1592 
 1593                 vm_object_unlock(retry_object);
 1594                 vm_object_lock(m->object);
 1595 
 1596                 if ((retry_object != object) ||
 1597                     (retry_offset != offset)) {
 1598                         RELEASE_PAGE(m);
 1599                         UNLOCK_AND_DEALLOCATE;
 1600                         goto RetryFault;
 1601                 }
 1602 
 1603                 /*
 1604                  *      Check whether the protection has changed or the object
 1605                  *      has been copied while we left the map unlocked.
 1606                  */
 1607                 prot &= retry_prot;
 1608                 vm_object_unlock(m->object);
 1609         }
 1610         vm_object_lock(m->object);
 1611 
 1612         /*
 1613          *      If the copy object changed while the top-level object
 1614          *      was unlocked, then we must take away write permission.
 1615          */
 1616 
 1617         if (m->object->copy != old_copy_object)
 1618                 prot &= ~VM_PROT_WRITE;
 1619 
 1620         /*
 1621          *      If we want to wire down this page, but no longer have
 1622          *      adequate permissions, we must start all over.
 1623          */
 1624 
 1625         if (wired && (prot != fault_type)) {
 1626                 vm_map_verify_done(map, &version);
 1627                 RELEASE_PAGE(m);
 1628                 UNLOCK_AND_DEALLOCATE;
 1629                 goto RetryFault;
 1630         }
 1631 
 1632         /*
 1633          *      It's critically important that a wired-down page be faulted
 1634          *      only once in each map for which it is wired.
 1635          */
 1636 
 1637         vm_object_unlock(m->object);
 1638 
 1639         /*
 1640          *      Put this page into the physical map.
 1641          *      We had to do the unlock above because pmap_enter
 1642          *      may cause other faults.  The page may be on
 1643          *      the pageout queues.  If the pageout daemon comes
 1644          *      across the page, it will remove it from the queues.
 1645          */
 1646 
 1647         PMAP_ENTER(map->pmap, vaddr, m, prot, wired);
 1648 
 1649         /*
 1650          *      If the page is not wired down and isn't already
 1651          *      on a pageout queue, then put it where the
 1652          *      pageout daemon can find it.
 1653          */
 1654         vm_object_lock(m->object);
 1655         vm_page_lock_queues();
 1656         if (change_wiring) {
 1657                 if (wired)
 1658                         vm_page_wire(m);
 1659                 else
 1660                         vm_page_unwire(m);
 1661         } else if (software_reference_bits) {
 1662                 if (!m->active && !m->inactive)
 1663                         vm_page_activate(m);
 1664                 m->reference = TRUE;
 1665         } else {
 1666                 vm_page_activate(m);
 1667         }
 1668         vm_page_unlock_queues();
 1669 
 1670         /*
 1671          *      Unlock everything, and return
 1672          */
 1673 
 1674         vm_map_verify_done(map, &version);
 1675         PAGE_WAKEUP_DONE(m);
 1676         kr = KERN_SUCCESS;
 1677         UNLOCK_AND_DEALLOCATE;
 1678 
 1679 #undef  UNLOCK_AND_DEALLOCATE
 1680 #undef  RELEASE_PAGE
 1681 
 1682     done:
 1683         if (continuation != (void (*)()) 0) {
 1684                 register vm_fault_state_t *state =
 1685                         (vm_fault_state_t *) current_thread()->ith_other;
 1686 
 1687                 zfree(vm_fault_state_zone, (vm_offset_t) state);
 1688                 (*continuation)(kr);
 1689                 /*NOTREACHED*/
 1690         }
 1691 
 1692         return(kr);
 1693 }
 1694 
 1695 kern_return_t   vm_fault_wire_fast();
 1696 
 1697 /*
 1698  *      vm_fault_wire:
 1699  *
 1700  *      Wire down a range of virtual addresses in a map.
 1701  */
 1702 void vm_fault_wire(map, entry)
 1703         vm_map_t        map;
 1704         vm_map_entry_t  entry;
 1705 {
 1706 
 1707         register vm_offset_t    va;
 1708         register pmap_t         pmap;
 1709         register vm_offset_t    end_addr = entry->vme_end;
 1710 
 1711         pmap = vm_map_pmap(map);
 1712 
 1713         /*
 1714          *      Inform the physical mapping system that the
 1715          *      range of addresses may not fault, so that
 1716          *      page tables and such can be locked down as well.
 1717          */
 1718 
 1719         pmap_pageable(pmap, entry->vme_start, end_addr, FALSE);
 1720 
 1721         /*
 1722          *      We simulate a fault to get the page and enter it
 1723          *      in the physical map.
 1724          */
 1725 
 1726         for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) {
 1727                 if (vm_fault_wire_fast(map, va, entry) != KERN_SUCCESS)
 1728                         (void) vm_fault(map, va, VM_PROT_NONE, TRUE,
 1729                                         FALSE, (void (*)()) 0);
 1730         }
 1731 }
 1732 
 1733 /*
 1734  *      vm_fault_unwire:
 1735  *
 1736  *      Unwire a range of virtual addresses in a map.
 1737  */
 1738 void vm_fault_unwire(map, entry)
 1739         vm_map_t        map;
 1740         vm_map_entry_t  entry;
 1741 {
 1742         register vm_offset_t    va;
 1743         register pmap_t         pmap;
 1744         register vm_offset_t    end_addr = entry->vme_end;
 1745         vm_object_t             object;
 1746 
 1747         pmap = vm_map_pmap(map);
 1748 
 1749         object = (entry->is_sub_map)
 1750                         ? VM_OBJECT_NULL : entry->object.vm_object;
 1751 
 1752         /*
 1753          *      Since the pages are wired down, we must be able to
 1754          *      get their mappings from the physical map system.
 1755          */
 1756 
 1757         for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) {
 1758                 pmap_change_wiring(pmap, va, FALSE);
 1759 
 1760                 if (object == VM_OBJECT_NULL) {
 1761                         vm_map_lock_set_recursive(map);
 1762                         (void) vm_fault(map, va, VM_PROT_NONE, TRUE,
 1763                                         FALSE, (void (*)()) 0);
 1764                         vm_map_lock_clear_recursive(map);
 1765                 } else {
 1766                         vm_prot_t       prot;
 1767                         vm_page_t       result_page;
 1768                         vm_page_t       top_page;
 1769                         vm_fault_return_t result;
 1770 
 1771                         do {
 1772                                 prot = VM_PROT_NONE;
 1773 
 1774                                 vm_object_lock(object);
 1775                                 vm_object_paging_begin(object);
 1776                                 result = vm_fault_page(object,
 1777                                                 entry->offset +
 1778                                                   (va - entry->vme_start),
 1779                                                 VM_PROT_NONE, TRUE,
 1780                                                 FALSE, &prot,
 1781                                                 &result_page,
 1782                                                 &top_page,
 1783                                                 FALSE, (void (*)()) 0);
 1784                         } while (result == VM_FAULT_RETRY);
 1785 
 1786                         if (result != VM_FAULT_SUCCESS)
 1787                                 panic("vm_fault_unwire: failure");
 1788 
 1789                         vm_page_lock_queues();
 1790                         vm_page_unwire(result_page);
 1791                         vm_page_unlock_queues();
 1792                         PAGE_WAKEUP_DONE(result_page);
 1793 
 1794                         vm_fault_cleanup(result_page->object, top_page);
 1795                 }
 1796         }
 1797 
 1798         /*
 1799          *      Inform the physical mapping system that the range
 1800          *      of addresses may fault, so that page tables and
 1801          *      such may be unwired themselves.
 1802          */
 1803 
 1804         pmap_pageable(pmap, entry->vme_start, end_addr, TRUE);
 1805 
 1806 }
 1807 
 1808 /*
 1809  *      vm_fault_wire_fast:
 1810  *
 1811  *      Handle common case of a wire down page fault at the given address.
 1812  *      If successful, the page is inserted into the associated physical map.
 1813  *      The map entry is passed in to avoid the overhead of a map lookup.
 1814  *
 1815  *      NOTE: the given address should be truncated to the
 1816  *      proper page address.
 1817  *
 1818  *      KERN_SUCCESS is returned if the page fault is handled; otherwise,
 1819  *      a standard error specifying why the fault is fatal is returned.
 1820  *
 1821  *      The map in question must be referenced, and remains so.
 1822  *      Caller has a read lock on the map.
 1823  *
 1824  *      This is a stripped version of vm_fault() for wiring pages.  Anything
 1825  *      other than the common case will return KERN_FAILURE, and the caller
 1826  *      is expected to call vm_fault().
 1827  */
 1828 kern_return_t vm_fault_wire_fast(map, va, entry)
 1829         vm_map_t        map;
 1830         vm_offset_t     va;
 1831         vm_map_entry_t  entry;
 1832 {
 1833         vm_object_t             object;
 1834         vm_offset_t             offset;
 1835         register vm_page_t      m;
 1836         vm_prot_t               prot;
 1837 
 1838         vm_stat.faults++;               /* needs lock XXX */
 1839 /*
 1840  *      Recovery actions
 1841  */
 1842 
 1843 #undef  RELEASE_PAGE
 1844 #define RELEASE_PAGE(m) {                               \
 1845         PAGE_WAKEUP_DONE(m);                            \
 1846         vm_page_lock_queues();                          \
 1847         vm_page_unwire(m);                              \
 1848         vm_page_unlock_queues();                        \
 1849 }
 1850 
 1851 
 1852 #undef  UNLOCK_THINGS
 1853 #define UNLOCK_THINGS   {                               \
 1854         object->paging_in_progress--;                   \
 1855         vm_object_unlock(object);                       \
 1856 }
 1857 
 1858 #undef  UNLOCK_AND_DEALLOCATE
 1859 #define UNLOCK_AND_DEALLOCATE   {                       \
 1860         UNLOCK_THINGS;                                  \
 1861         vm_object_deallocate(object);                   \
 1862 }
 1863 /*
 1864  *      Give up and have caller do things the hard way.
 1865  */
 1866 
 1867 #define GIVE_UP {                                       \
 1868         UNLOCK_AND_DEALLOCATE;                          \
 1869         return(KERN_FAILURE);                           \
 1870 }
 1871 
 1872 
 1873         /*
 1874          *      If this entry is not directly to a vm_object, bail out.
 1875          */
 1876         if (entry->is_sub_map)
 1877                 return(KERN_FAILURE);
 1878 
 1879         /*
 1880          *      Find the backing store object and offset into it.
 1881          */
 1882 
 1883         object = entry->object.vm_object;
 1884         offset = (va - entry->vme_start) + entry->offset;
 1885         prot = entry->protection;
 1886 
 1887         /*
 1888          *      Make a reference to this object to prevent its
 1889          *      disposal while we are messing with it.
 1890          */
 1891 
 1892         vm_object_lock(object);
 1893         assert(object->ref_count > 0);
 1894         object->ref_count++;
 1895         object->paging_in_progress++;
 1896 
 1897         /*
 1898          *      INVARIANTS (through entire routine):
 1899          *
 1900          *      1)      At all times, we must either have the object
 1901          *              lock or a busy page in some object to prevent
 1902          *              some other thread from trying to bring in
 1903          *              the same page.
 1904          *
 1905          *      2)      Once we have a busy page, we must remove it from
 1906          *              the pageout queues, so that the pageout daemon
 1907          *              will not grab it away.
 1908          *
 1909          */
 1910 
 1911         /*
 1912          *      Look for page in top-level object.  If it's not there or
 1913          *      there's something going on, give up.
 1914          */
 1915         m = vm_page_lookup(object, offset);
 1916         if ((m == VM_PAGE_NULL) || (m->error) ||
 1917             (m->busy) || (m->absent) || (prot & m->page_lock)) {
 1918                 GIVE_UP;
 1919         }
 1920 
 1921         /*
 1922          *      Wire the page down now.  All bail outs beyond this
 1923          *      point must unwire the page.  
 1924          */
 1925 
 1926         vm_page_lock_queues();
 1927         vm_page_wire(m);
 1928         vm_page_unlock_queues();
 1929 
 1930         /*
 1931          *      Mark page busy for other threads.
 1932          */
 1933         assert(!m->busy);
 1934         m->busy = TRUE;
 1935         assert(!m->absent);
 1936 
 1937         /*
 1938          *      Give up if the page is being written and there's a copy object
 1939          */
 1940         if ((object->copy != VM_OBJECT_NULL) && (prot & VM_PROT_WRITE)) {
 1941                 RELEASE_PAGE(m);
 1942                 GIVE_UP;
 1943         }
 1944 
 1945         /*
 1946          *      Put this page into the physical map.
 1947          *      We have to unlock the object because pmap_enter
 1948          *      may cause other faults.   
 1949          */
 1950         vm_object_unlock(object);
 1951 
 1952         PMAP_ENTER(map->pmap, va, m, prot, TRUE);
 1953 
 1954         /*
 1955          *      Must relock object so that paging_in_progress can be cleared.
 1956          */
 1957         vm_object_lock(object);
 1958 
 1959         /*
 1960          *      Unlock everything, and return
 1961          */
 1962 
 1963         PAGE_WAKEUP_DONE(m);
 1964         UNLOCK_AND_DEALLOCATE;
 1965 
 1966         return(KERN_SUCCESS);
 1967 
 1968 }
 1969 
 1970 /*
 1971  *      Routine:        vm_fault_copy_cleanup
 1972  *      Purpose:
 1973  *              Release a page used by vm_fault_copy.
 1974  */
 1975 
 1976 void    vm_fault_copy_cleanup(page, top_page)
 1977         vm_page_t       page;
 1978         vm_page_t       top_page;
 1979 {
 1980         vm_object_t     object = page->object;
 1981 
 1982         vm_object_lock(object);
 1983         PAGE_WAKEUP_DONE(page);
 1984         vm_page_lock_queues();
 1985         if (!page->active && !page->inactive)
 1986                 vm_page_activate(page);
 1987         vm_page_unlock_queues();
 1988         vm_fault_cleanup(object, top_page);
 1989 }
 1990 
 1991 /*
 1992  *      Routine:        vm_fault_copy
 1993  *
 1994  *      Purpose:
 1995  *              Copy pages from one virtual memory object to another --
 1996  *              neither the source nor destination pages need be resident.
 1997  *
 1998  *              Before actually copying a page, the version associated with
 1999  *              the destination address map wil be verified.
 2000  *
 2001  *      In/out conditions:
 2002  *              The caller must hold a reference, but not a lock, to
 2003  *              each of the source and destination objects and to the
 2004  *              destination map.
 2005  *
 2006  *      Results:
 2007  *              Returns KERN_SUCCESS if no errors were encountered in
 2008  *              reading or writing the data.  Returns KERN_INTERRUPTED if
 2009  *              the operation was interrupted (only possible if the
 2010  *              "interruptible" argument is asserted).  Other return values
 2011  *              indicate a permanent error in copying the data.
 2012  *
 2013  *              The actual amount of data copied will be returned in the
 2014  *              "copy_size" argument.  In the event that the destination map
 2015  *              verification failed, this amount may be less than the amount
 2016  *              requested.
 2017  */
 2018 kern_return_t   vm_fault_copy(
 2019                         src_object,
 2020                         src_offset,
 2021                         src_size,
 2022                         dst_object,
 2023                         dst_offset,
 2024                         dst_map,
 2025                         dst_version,
 2026                         interruptible
 2027                         )
 2028         vm_object_t     src_object;
 2029         vm_offset_t     src_offset;
 2030         vm_size_t       *src_size;              /* INOUT */
 2031         vm_object_t     dst_object;
 2032         vm_offset_t     dst_offset;
 2033         vm_map_t        dst_map;
 2034         vm_map_version_t *dst_version;
 2035         boolean_t       interruptible;
 2036 {
 2037         vm_page_t               result_page;
 2038         vm_prot_t               prot;
 2039         
 2040         vm_page_t               src_page;
 2041         vm_page_t               src_top_page;
 2042 
 2043         vm_page_t               dst_page;
 2044         vm_page_t               dst_top_page;
 2045 
 2046         vm_size_t               amount_done;
 2047         vm_object_t             old_copy_object;
 2048 
 2049 #define RETURN(x)                                       \
 2050         MACRO_BEGIN                                     \
 2051         *src_size = amount_done;                        \
 2052         MACRO_RETURN(x);                                \
 2053         MACRO_END
 2054 
 2055         amount_done = 0;
 2056         do { /* while (amount_done != *src_size) */
 2057 
 2058             RetrySourceFault: ;
 2059 
 2060                 if (src_object == VM_OBJECT_NULL) {
 2061                         /*
 2062                          *      No source object.  We will just
 2063                          *      zero-fill the page in dst_object.
 2064                          */
 2065 
 2066                         src_page = VM_PAGE_NULL;
 2067                 } else {
 2068                         prot = VM_PROT_READ;
 2069 
 2070                         vm_object_lock(src_object);
 2071                         vm_object_paging_begin(src_object);
 2072 
 2073                         switch (vm_fault_page(src_object, src_offset,
 2074                                         VM_PROT_READ, FALSE, interruptible,
 2075                                         &prot, &result_page, &src_top_page,
 2076                                         FALSE, (void (*)()) 0)) {
 2077 
 2078                                 case VM_FAULT_SUCCESS:
 2079                                         break;
 2080                                 case VM_FAULT_RETRY:
 2081                                         goto RetrySourceFault;
 2082                                 case VM_FAULT_INTERRUPTED:
 2083                                         RETURN(MACH_SEND_INTERRUPTED);
 2084                                 case VM_FAULT_MEMORY_SHORTAGE:
 2085                                         VM_PAGE_WAIT((void (*)()) 0);
 2086                                         goto RetrySourceFault;
 2087                                 case VM_FAULT_FICTITIOUS_SHORTAGE:
 2088                                         vm_page_more_fictitious();
 2089                                         goto RetrySourceFault;
 2090                                 case VM_FAULT_MEMORY_ERROR:
 2091                                         return(KERN_MEMORY_ERROR);
 2092                         }
 2093 
 2094                         src_page = result_page;
 2095 
 2096                         assert((src_top_page == VM_PAGE_NULL) ==
 2097                                         (src_page->object == src_object));
 2098 
 2099                         assert ((prot & VM_PROT_READ) != VM_PROT_NONE);
 2100 
 2101                         vm_object_unlock(src_page->object);
 2102                 }
 2103 
 2104             RetryDestinationFault: ;
 2105 
 2106                 prot = VM_PROT_WRITE;
 2107 
 2108                 vm_object_lock(dst_object);
 2109                 vm_object_paging_begin(dst_object);
 2110 
 2111                 switch (vm_fault_page(dst_object, dst_offset, VM_PROT_WRITE,
 2112                                 FALSE, FALSE /* interruptible */,
 2113                                 &prot, &result_page, &dst_top_page,
 2114                                 FALSE, (void (*)()) 0)) {
 2115 
 2116                         case VM_FAULT_SUCCESS:
 2117                                 break;
 2118                         case VM_FAULT_RETRY:
 2119                                 goto RetryDestinationFault;
 2120                         case VM_FAULT_INTERRUPTED:
 2121                                 if (src_page != VM_PAGE_NULL)
 2122                                         vm_fault_copy_cleanup(src_page,
 2123                                                               src_top_page);
 2124                                 RETURN(MACH_SEND_INTERRUPTED);
 2125                         case VM_FAULT_MEMORY_SHORTAGE:
 2126                                 VM_PAGE_WAIT((void (*)()) 0);
 2127                                 goto RetryDestinationFault;
 2128                         case VM_FAULT_FICTITIOUS_SHORTAGE:
 2129                                 vm_page_more_fictitious();
 2130                                 goto RetryDestinationFault;
 2131                         case VM_FAULT_MEMORY_ERROR:
 2132                                 if (src_page != VM_PAGE_NULL)
 2133                                         vm_fault_copy_cleanup(src_page,
 2134                                                               src_top_page);
 2135                                 return(KERN_MEMORY_ERROR);
 2136                 }
 2137                 assert ((prot & VM_PROT_WRITE) != VM_PROT_NONE);
 2138 
 2139                 dst_page = result_page;
 2140 
 2141                 old_copy_object = dst_page->object->copy;
 2142 
 2143                 vm_object_unlock(dst_page->object);
 2144 
 2145                 if (!vm_map_verify(dst_map, dst_version)) {
 2146 
 2147                  BailOut: ;
 2148 
 2149                         if (src_page != VM_PAGE_NULL)
 2150                                 vm_fault_copy_cleanup(src_page, src_top_page);
 2151                         vm_fault_copy_cleanup(dst_page, dst_top_page);
 2152                         break;
 2153                 }
 2154 
 2155 
 2156                 vm_object_lock(dst_page->object);
 2157                 if (dst_page->object->copy != old_copy_object) {
 2158                         vm_object_unlock(dst_page->object);
 2159                         vm_map_verify_done(dst_map, dst_version);
 2160                         goto BailOut;
 2161                 }
 2162                 vm_object_unlock(dst_page->object);
 2163 
 2164                 /*
 2165                  *      Copy the page, and note that it is dirty
 2166                  *      immediately.
 2167                  */
 2168 
 2169                 if (src_page == VM_PAGE_NULL)
 2170                         vm_page_zero_fill(dst_page);
 2171                 else
 2172                         vm_page_copy(src_page, dst_page);
 2173                 dst_page->dirty = TRUE;
 2174 
 2175                 /*
 2176                  *      Unlock everything, and return
 2177                  */
 2178 
 2179                 vm_map_verify_done(dst_map, dst_version);
 2180 
 2181                 if (src_page != VM_PAGE_NULL)
 2182                         vm_fault_copy_cleanup(src_page, src_top_page);
 2183                 vm_fault_copy_cleanup(dst_page, dst_top_page);
 2184 
 2185                 amount_done += PAGE_SIZE;
 2186                 src_offset += PAGE_SIZE;
 2187                 dst_offset += PAGE_SIZE;
 2188 
 2189         } while (amount_done != *src_size);
 2190 
 2191         RETURN(KERN_SUCCESS);
 2192 #undef  RETURN
 2193 
 2194         /*NOTREACHED*/  
 2195 }
 2196 
 2197 
 2198 
 2199 
 2200 
 2201 #ifdef  notdef
 2202 
 2203 /*
 2204  *      Routine:        vm_fault_page_overwrite
 2205  *
 2206  *      Description:
 2207  *              A form of vm_fault_page that assumes that the
 2208  *              resulting page will be overwritten in its entirety,
 2209  *              making it unnecessary to obtain the correct *contents*
 2210  *              of the page.
 2211  *
 2212  *      Implementation:
 2213  *              XXX Untested.  Also unused.  Eventually, this technology
 2214  *              could be used in vm_fault_copy() to advantage.
 2215  */
 2216 vm_fault_return_t vm_fault_page_overwrite(dst_object, dst_offset, result_page)
 2217         register
 2218         vm_object_t     dst_object;
 2219         vm_offset_t     dst_offset;
 2220         vm_page_t       *result_page;   /* OUT */
 2221 {
 2222         register
 2223         vm_page_t       dst_page;
 2224 
 2225 #define interruptible   FALSE   /* XXX */
 2226 
 2227         while (TRUE) {
 2228                 /*
 2229                  *      Look for a page at this offset
 2230                  */
 2231 
 2232                 while ((dst_page = vm_page_lookup(dst_object, dst_offset))
 2233                                  == VM_PAGE_NULL) {
 2234                         /*
 2235                          *      No page, no problem... just allocate one.
 2236                          */
 2237 
 2238                         dst_page = vm_page_alloc(dst_object, dst_offset);
 2239                         if (dst_page == VM_PAGE_NULL) {
 2240                                 vm_object_unlock(dst_object);
 2241                                 VM_PAGE_WAIT((void (*)()) 0);
 2242                                 vm_object_lock(dst_object);
 2243                                 continue;
 2244                         }
 2245 
 2246                         /*
 2247                          *      Pretend that the memory manager
 2248                          *      write-protected the page.
 2249                          *
 2250                          *      Note that we will be asking for write
 2251                          *      permission without asking for the data
 2252                          *      first.
 2253                          */
 2254 
 2255                         dst_page->overwriting = TRUE;
 2256                         dst_page->page_lock = VM_PROT_WRITE;
 2257                         dst_page->absent = TRUE;
 2258                         dst_object->absent_count++;
 2259 
 2260                         break;
 2261 
 2262                         /*
 2263                          *      When we bail out, we might have to throw
 2264                          *      away the page created here.
 2265                          */
 2266 
 2267 #define DISCARD_PAGE                                            \
 2268         MACRO_BEGIN                                             \
 2269         vm_object_lock(dst_object);                             \
 2270         dst_page = vm_page_lookup(dst_object, dst_offset);      \
 2271         if ((dst_page != VM_PAGE_NULL) && dst_page->overwriting) \
 2272                 VM_PAGE_FREE(dst_page);                         \
 2273         vm_object_unlock(dst_object);                           \
 2274         MACRO_END
 2275                 }
 2276 
 2277                 /*
 2278                  *      If the page is write-protected...
 2279                  */
 2280 
 2281                 if (dst_page->page_lock & VM_PROT_WRITE) {
 2282                         /*
 2283                          *      ... and an unlock request hasn't been sent
 2284                          */
 2285 
 2286                         if ( ! (dst_page->unlock_request & VM_PROT_WRITE)) {
 2287                                 vm_prot_t       u;
 2288                                 kern_return_t   rc;
 2289 
 2290                                 /*
 2291                                  *      ... then send one now.
 2292                                  */
 2293 
 2294                                 if (!dst_object->pager_ready) {
 2295                                         vm_object_assert_wait(dst_object,
 2296                                                 VM_OBJECT_EVENT_PAGER_READY,
 2297                                                 interruptible);
 2298                                         vm_object_unlock(dst_object);
 2299                                         thread_block((void (*)()) 0);
 2300                                         if (current_thread()->wait_result !=
 2301                                             THREAD_AWAKENED) {
 2302                                                 DISCARD_PAGE;
 2303                                                 return(VM_FAULT_INTERRUPTED);
 2304                                         }
 2305                                         continue;
 2306                                 }
 2307 
 2308                                 u = dst_page->unlock_request |= VM_PROT_WRITE;
 2309                                 vm_object_unlock(dst_object);
 2310 
 2311                                 if ((rc = memory_object_data_unlock(
 2312                                                 dst_object->pager,
 2313                                                 dst_object->pager_request,
 2314                                                 dst_offset + dst_object->paging_offset,
 2315                                                 PAGE_SIZE,
 2316                                                 u)) != KERN_SUCCESS) {
 2317                                         printf("vm_object_overwrite: memory_object_data_unlock failed\n");
 2318                                         DISCARD_PAGE;
 2319                                         return((rc == MACH_SEND_INTERRUPTED) ?
 2320                                                 VM_FAULT_INTERRUPTED :
 2321                                                 VM_FAULT_MEMORY_ERROR);
 2322                                 }
 2323                                 vm_object_lock(dst_object);
 2324                                 continue;
 2325                         }
 2326 
 2327                         /* ... fall through to wait below */
 2328                 } else {
 2329                         /*
 2330                          *      If the page isn't being used for other
 2331                          *      purposes, then we're done.
 2332                          */
 2333                         if ( ! (dst_page->busy || dst_page->absent || dst_page->error) )
 2334                                 break;
 2335                 }
 2336 
 2337                 PAGE_ASSERT_WAIT(dst_page, interruptible);
 2338                 vm_object_unlock(dst_object);
 2339                 thread_block((void (*)()) 0);
 2340                 if (current_thread()->wait_result != THREAD_AWAKENED) {
 2341                         DISCARD_PAGE;
 2342                         return(VM_FAULT_INTERRUPTED);
 2343                 }
 2344         }
 2345 
 2346         *result_page = dst_page;
 2347         return(VM_FAULT_SUCCESS);
 2348 
 2349 #undef  interruptible
 2350 #undef  DISCARD_PAGE
 2351 }
 2352 
 2353 #endif  notdef
Cache object: 0ebfd83d7f628c511891ea1650c7d555
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/vm/vm_fault.c

FreeBSD/Linux Kernel Cross Reference
sys/vm/vm_fault.c