vm_fault.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /* 
    2  * Mach Operating System
    3  * Copyright (c) 1993-1987 Carnegie Mellon University
    4  * All Rights Reserved.
    5  * 
    6  * Permission to use, copy, modify and distribute this software and its
    7  * documentation is hereby granted, provided that both the copyright
    8  * notice and this permission notice appear in all copies of the
    9  * software, derivative works or modified versions, and any portions
   10  * thereof, and that both notices appear in supporting documentation.
   11  * 
   12  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
   13  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
   14  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
   15  * 
   16  * Carnegie Mellon requests users of this software to return to
   17  * 
   18  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   19  *  School of Computer Science
   20  *  Carnegie Mellon University
   21  *  Pittsburgh PA 15213-3890
   22  * 
   23  * any improvements or extensions that they make and grant Carnegie Mellon
   24  * the rights to redistribute these changes.
   25  */
   26 /*
   27  * HISTORY
   28  * $Log:        vm_fault.c,v $
   29  * Revision 2.22  93/11/17  18:53:39  dbg
   30  *      Added ANSI function prototypes.
   31  *      [93/01/28            dbg]
   32  * 
   33  * Revision 2.21  93/08/03  12:35:11  mrt
   34  *      [93/08/02  16:55:49  bershad]
   35  * 
   36  *      Added vm sample support.
   37  *      [93/07/30  10:27:00  bershad]
   38  * 
   39  * Revision 2.20  93/01/14  18:00:55  danner
   40  *      Removed unneeded cast from argument to thread_wakeup_with_result.
   41  *      [92/12/30            dbg]
   42  *      64bit cleanup.
   43  *      [92/12/01            af]
   44  * 
   45  *      Added temporary variable to ensure that cpu_number (hence
   46  *      current_thread) does not change around blocking call to zalloc.
   47  *      Fix from Grenoble.
   48  *      [92/10/23            dbg]
   49  * 
   50  * Revision 2.19  92/08/03  18:00:24  jfriedl
   51  *      removed silly prototypes
   52  *      [92/08/02            jfriedl]
   53  * 
   54  * Revision 2.18  92/05/21  17:25:45  jfriedl
   55  *      Cleanup to quiet gcc warnings.
   56  *      [92/05/16            jfriedl]
   57  * 
   58  * Revision 2.17  92/02/23  19:50:43  elf
   59  *      Check for both copy and shadow objects due to asymmetric
   60  *      copy on write for temporary objects.
   61  *      [92/02/19  14:13:33  dlb]
   62  * 
   63  *      No more sharing maps.  Eliminated single_use argument
   64  *      from vm_map_lookup() calls.
   65  *      [92/01/07  11:03:34  dlb]
   66  * 
   67  * Revision 2.16  92/01/14  16:47:44  rpd
   68  *      Fixed some locking/assertion bugs in vm_fault_copy.
   69  *      [92/01/06            rpd]
   70  * 
   71  * Revision 2.15  91/11/12  11:52:02  rvb
   72  *      Added simple_lock_pause.
   73  *      [91/11/12            rpd]
   74  * 
   75  * Revision 2.14  91/07/01  08:27:05  jsb
   76  *      Changed remaining pager_* references to memory_object_*.
   77  *      [91/06/29  16:26:22  jsb]
   78  * 
   79  * Revision 2.13  91/05/18  14:40:02  rpd
   80  *      Added proper locking for vm_page_insert.
   81  *      [91/04/21            rpd]
   82  *      Changed vm_fault_page to use fictitious pages.
   83  *      Increased vm_object_absent_max to 50.
   84  *      [91/03/29            rpd]
   85  * 
   86  * Revision 2.12  91/05/14  17:48:42  mrt
   87  *      Correcting copyright
   88  * 
   89  * Revision 2.11  91/03/16  15:04:49  rpd
   90  *      Fixed vm_fault_page to give vm_pageout_page busy pages.
   91  *      [91/03/11            rpd]
   92  *      Added vm_fault_init, vm_fault_state_t.
   93  *      [91/02/16            rpd]
   94  * 
   95  *      Added resume, continuation arguments to vm_fault, vm_fault_page.
   96  *      Added continuation argument to VM_PAGE_WAIT.
   97  *      Added vm_fault_continue.
   98  *      [91/02/05            rpd]
   99  * 
  100  * Revision 2.10  91/02/05  17:58:00  mrt
  101  *      Changed to new Mach copyright
  102  *      [91/02/01  16:31:44  mrt]
  103  * 
  104  * Revision 2.9  91/01/08  16:44:45  rpd
  105  *      Turned software_reference_bits on.
  106  *      [90/12/29            rpd]
  107  *      Added continuation argument to thread_block.
  108  *      [90/12/08            rpd]
  109  * 
  110  *      Changed VM_WAIT to VM_PAGE_WAIT.
  111  *      [90/11/13            rpd]
  112  * 
  113  * Revision 2.8  90/10/25  14:49:52  rwd
  114  *      Turn software_reference_bits off by default.
  115  *      [90/10/25            rwd]
  116  * 
  117  *      Extended software_reference_bits to vm_fault_page.
  118  *      [90/10/24            rpd]
  119  *      Fixed vm_fault_page to clear the modify bit on zero-filled pages.
  120  *      [90/10/23            rpd]
  121  *      Added watchpoint debugger support.
  122  *      [90/10/16            rpd]
  123  *      Added software_reference_bits runtime switch.
  124  *      [90/10/13            rpd]
  125  * 
  126  * Revision 2.7  90/10/12  13:05:21  rpd
  127  *      Added missing vm_map_verify_done calls to vm_fault and
  128  *      vm_fault_copy.  From OSF.
  129  *      [90/10/10            rpd]
  130  *      Modified vm_fault_page to leave its result page on the pageout queues.
  131  *      Only activate the pages returned by vm_fault_page if they aren't
  132  *      already on a pageout queue.  In vm_fault, turn on the software
  133  *      reference bit for the page.
  134  *      [90/10/08            rpd]
  135  * 
  136  * Revision 2.6  90/06/02  15:10:32  rpd
  137  *      Fixed vm_fault_copy to handle null source objects.
  138  *      [90/04/24            rpd]
  139  *      Converted to new IPC.
  140  *      [90/03/26  23:11:58  rpd]
  141  * 
  142  * Revision 2.5  90/05/29  18:38:39  rwd
  143  *      Picked up rfr debugging changes.
  144  *      [90/04/12  13:47:40  rwd]
  145  * 
  146  * Revision 2.4  90/05/03  15:58:29  dbg
  147  *      Pass 'flush' argument to vm_pageout_page.
  148  *      [90/03/28            dbg]
  149  * 
  150  * Revision 2.3  90/02/22  20:05:21  dbg
  151  *      Deactivate the copied-from page.
  152  *      [90/02/09            dbg]
  153  *      Add changes from mainline:
  154  *              Assert that page is not busy before marking it busy.
  155  *              [89/12/21            dlb]
  156  *              Check for absent as well as busy before freeing a page when
  157  *              pagein fails.
  158  *              [89/12/13            dlb]
  159  *              Change all occurrences of PAGE_WAKEUP to PAGE_WAKEUP_DONE to
  160  *              reflect the fact that they clear the busy flag.  See
  161  *              vm/vm_page.h.  Add PAGE_WAKEUP_DONE to vm_fault_unwire().
  162  *              [89/12/13            dlb]
  163  *              Break out of fault loop after zero filling in response to
  164  *              finding an absent page; the zero filled page was either absent
  165  *              or newly allocated and so can't be page locked.
  166  *              [89/12/12            dlb]
  167  *              Must recheck page to object relationship before freeing
  168  *              page if pagein fails.
  169  *              [89/12/11            dlb]
  170  * 
  171  *              Use vme_start, vme_end when accessing map entries.
  172  *              [89/08/31  21:10:05  rpd]
  173  * 
  174  *              Add vm_fault_copy(), for overwriting a permanent object.
  175  *              [89/07/28  16:14:27  mwyoung]
  176  * 
  177  * Revision 2.2  90/01/11  11:47:36  dbg
  178  *      Add vm_fault_cleanup to save space.
  179  *      [89/12/13            dbg]
  180  * 
  181  *      Pick up changes from mainline:
  182  * 
  183  *              Consider paging_offset when looking at external page state.
  184  *              [89/10/16  15:31:17  af]
  185  * 
  186  *              Only require read access for the original page once past the
  187  *              top-level object... it will only be copied to a new page.
  188  *              [89/05/19  17:45:05  mwyoung]
  189  * 
  190  *              Also remove "absent" page from pageout queues before zero-filling.
  191  *              [89/05/01            mwyoung]
  192  *              When transforming an "absent" page into a placeholder page,
  193  *              remove it from the page queues.
  194  *              [89/04/22            mwyoung]
  195  * 
  196  *              Fixed usage of must_be_resident in vm_fault_page when
  197  *              descending down shadow chain.  Fixed corresponding
  198  *              assertion in vm_fault.
  199  *              [89/10/02  16:17:20  rpd]
  200  *      Remove vm_fault_copy_entry and non-XP code.
  201  *      [89/04/28            dbg]
  202  * 
  203  * Revision 2.1  89/08/03  16:44:50  rwd
  204  * Created.
  205  * 
  206  * Revision 2.17  89/05/06  02:58:43  rpd
  207  *      Picked up fix from mwyoung for a COW-triggered page leak:
  208  *      when copying from a copy-on-write page, activate the page
  209  *      instead of deactivating it.  Also picked up two innocuous
  210  *      VM_PAGE_QUEUES_REMOVE() additions in the "unavailable page" code.
  211  *      [89/05/06            rpd]
  212  *      Fixed the call to vm_fault_wire_fast in vm_fault_copy_entry.
  213  *      [89/05/05            rpd]
  214  * 
  215  * Revision 2.16  89/04/18  21:25:12  mwyoung
  216  *      Recent history:
  217  *              Limit the number of outstanding page requests for
  218  *               non-internal objects.
  219  *              Use hint to determine whether a page of temporary memory may
  220  *               have been written to backing storage.
  221  *      History condensation:
  222  *              Separate fault handling into separate routine (mwyoung).
  223  *              Handle I/O errors (dbg, mwyoung).
  224  *              Use map_verify technology (mwyoung).
  225  *              Allow faults to be interrupted (mwyoung).
  226  *              Optimized wiring code (dlb).
  227  *              Initial external memory management (mwyoung, bolosky).
  228  *              Original version (avie, mwyoung, dbg).
  229  * 
  230  */
  231 /*
  232  *      File:   vm_fault.c
  233  *      Author: Avadis Tevanian, Jr., Michael Wayne Young
  234  *
  235  *      Page fault handling module.
  236  */
  237 #include <mach_kdb.h>
  238 #include <mach_pagemap.h>
  239 #include <mach_pcsample.h>
  240 
  241 #include <vm/vm_fault.h>
  242 #include <mach/kern_return.h>
  243 #include <mach/message.h>       /* for error codes */
  244 #include <kern/counters.h>
  245 #include <kern/kern_io.h>
  246 #include <kern/thread.h>
  247 #include <kern/sched_prim.h>
  248 #include <vm/vm_map.h>
  249 #include <vm/vm_object.h>
  250 #include <vm/vm_page.h>
  251 #include <vm/pmap.h>
  252 #include <mach/vm_statistics.h>
  253 #include <vm/vm_pageout.h>
  254 #include <mach/vm_param.h>
  255 #include <mach/memory_object.h>
  256 #include <mach/memory_object_user.h>
  257                                 /* For memory_object_data_{request,unlock} */
  258 #include <kern/mach_param.h>
  259 #include <kern/macro_help.h>
  260 #include <kern/zalloc.h>
  261 
  262 #if     MACH_PCSAMPLE
  263 #include <kern/pc_sample.h>
  264 #endif
  265 
  266 
  267 /*
  268  *      State needed by vm_fault_continue.
  269  *      This is a little hefty to drop directly
  270  *      into the thread structure.
  271  */
  272 typedef struct vm_fault_state {
  273         /* state saved for vm_fault: */
  274         vm_map_t        vmf_map;                /* map */
  275         vm_offset_t     vmf_vaddr;              /* faulting address */
  276         vm_prot_t       vmf_fault_type;         /* access type */
  277         boolean_t       vmf_change_wiring;      /* wiring/unwiring? */
  278         no_return       (*vmf_continuation)(kern_return_t);
  279                                                 /* routine to run instead
  280                                                    of returning */
  281         vm_map_version_t vmf_version;           /* map version */
  282         boolean_t       vmf_wired;              /* wire pages */
  283         vm_object_t     vmf_object;             /* object, */
  284         vm_offset_t     vmf_offset;             /* offset for page found */
  285         vm_prot_t       vmf_prot;               /* actual protection on
  286                                                    map entry */
  287 
  288         /* state saved for vm_fault_page: */
  289         boolean_t       vmfp_backoff;           /* backoff and retry after
  290                                                    page_wait? */
  291         vm_object_t     vmfp_object;            /* current object and */
  292         vm_offset_t     vmfp_offset;            /* offset being searched */
  293         vm_page_t       vmfp_first_m;           /* placeholder page in
  294                                                    top-level object */
  295         vm_prot_t       vmfp_access;            /* access needed */
  296 } vm_fault_state_t;
  297 
  298 zone_t          vm_fault_state_zone = 0;
  299 
  300 int             vm_object_absent_max = 50;
  301 
  302 int             vm_fault_debug = 0;
  303 
  304 boolean_t       vm_fault_dirty_handling = FALSE;
  305 boolean_t       vm_fault_interruptible = TRUE;
  306 
  307 boolean_t       software_reference_bits = TRUE;
  308 
  309 #if     MACH_KDB
  310 extern struct db_watchpoint *db_watchpoint_list;
  311 #endif  /* MACH_KDB */
  312 
  313 /*
  314  *      Routine:        vm_fault_init
  315  *      Purpose:
  316  *              Initialize our private data structures.
  317  */
  318 void vm_fault_init(void)
  319 {
  320         vm_fault_state_zone = zinit(sizeof(vm_fault_state_t),
  321                                     THREAD_MAX * sizeof(vm_fault_state_t),
  322                                     sizeof(vm_fault_state_t),
  323                                     FALSE,
  324                                     "vm fault state");
  325 }
  326 
  327 /*
  328  *      Routine:        vm_fault_cleanup
  329  *      Purpose:
  330  *              Clean up the result of vm_fault_page.
  331  *      Results:
  332  *              The paging reference for "object" is released.
  333  *              "object" is unlocked.
  334  *              If "top_page" is not null,  "top_page" is
  335  *              freed and the paging reference for the object
  336  *              containing it is released.
  337  *
  338  *      In/out conditions:
  339  *              "object" must be locked.
  340  */
  341 void
  342 vm_fault_cleanup(
  343         register vm_object_t    object,
  344         register vm_page_t      top_page)
  345 {
  346         vm_object_paging_end(object);
  347         vm_object_unlock(object);
  348 
  349         if (top_page != VM_PAGE_NULL) {
  350             object = top_page->object;
  351             vm_object_lock(object);
  352             VM_PAGE_FREE(top_page);
  353             vm_object_paging_end(object);
  354             vm_object_unlock(object);
  355         }
  356 }
  357 
  358 
  359 #if     MACH_PCSAMPLE
  360 
  361 /*
  362  *      Do PC sampling on current thread, assuming
  363  *      that it is the thread taking this page fault.
  364  *
  365  *      Must check for THREAD_NULL, since faults
  366  *      can occur before threads are running.
  367  */
  368 
  369 #define vm_stat_sample(flavor) \
  370     MACRO_BEGIN \
  371         thread_t _thread_ = current_thread(); \
  372  \
  373         if (_thread_ != THREAD_NULL) \
  374             take_pc_sample_macro(_thread_, (flavor)); \
  375     MACRO_END
  376 
  377 #else
  378 #define vm_stat_sample(x)
  379 #endif  /* MACH_PCSAMPLE */
  380 
  381 
  382 /*
  383  *      Routine:        vm_fault_page
  384  *      Purpose:
  385  *              Find the resident page for the virtual memory
  386  *              specified by the given virtual memory object
  387  *              and offset.
  388  *      Additional arguments:
  389  *              The required permissions for the page is given
  390  *              in "fault_type".  Desired permissions are included
  391  *              in "protection".
  392  *
  393  *              If the desired page is known to be resident (for
  394  *              example, because it was previously wired down), asserting
  395  *              the "unwiring" parameter will speed the search.
  396  *
  397  *              If the operation can be interrupted (by thread_abort
  398  *              or thread_terminate), then the "interruptible"
  399  *              parameter should be asserted.
  400  *
  401  *      Results:
  402  *              The page containing the proper data is returned
  403  *              in "result_page".
  404  *
  405  *      In/out conditions:
  406  *              The source object must be locked and referenced,
  407  *              and must donate one paging reference.  The reference
  408  *              is not affected.  The paging reference and lock are
  409  *              consumed.
  410  *
  411  *              If the call succeeds, the object in which "result_page"
  412  *              resides is left locked and holding a paging reference.
  413  *              If this is not the original object, a busy page in the
  414  *              original object is returned in "top_page", to prevent other
  415  *              callers from pursuing this same data, along with a paging
  416  *              reference for the original object.  The "top_page" should
  417  *              be destroyed when this guarantee is no longer required.
  418  *              The "result_page" is also left busy.  It is not removed
  419  *              from the pageout queues.
  420  */
  421 vm_fault_return_t vm_fault_page(
  422  /* Inputs: */
  423         vm_object_t     first_object,   /* Object to begin search */
  424         vm_offset_t     first_offset,   /* Offset into object */
  425         vm_prot_t       fault_type,     /* What access is requested */
  426         boolean_t       must_be_resident,/* Must page be resident? */
  427         boolean_t       interruptible,  /* May fault be interrupted? */
  428  /* Modifies in place: */
  429         vm_prot_t       *protection,    /* Protection for mapping */
  430  /* Returns: */
  431         vm_page_t       *result_page,   /* Page found, if successful */
  432         vm_page_t       *top_page,      /* Page in top object, if
  433                                          * not result_page.
  434                                          */
  435  /* More inputs: */
  436         boolean_t       resume,         /* We are restarting. */
  437         continuation_t  continuation)   /* Continuation for blocking. */
  438 {
  439         register
  440         vm_page_t       m;
  441         register
  442         vm_object_t     object;
  443         register
  444         vm_offset_t     offset;
  445         vm_page_t       first_m;
  446         vm_object_t     next_object;
  447         vm_object_t     copy_object;
  448         boolean_t       look_for_page;
  449         vm_prot_t       access_required;
  450 
  451         if (resume) {
  452                 register vm_fault_state_t *state =
  453                         (vm_fault_state_t *) current_thread()->ith_other;
  454 
  455                 if (state->vmfp_backoff)
  456                         goto after_block_and_backoff;
  457 
  458                 object = state->vmfp_object;
  459                 offset = state->vmfp_offset;
  460                 first_m = state->vmfp_first_m;
  461                 access_required = state->vmfp_access;
  462                 goto after_thread_block;
  463         }
  464         
  465         vm_stat_sample(SAMPLED_PC_VM_FAULTS_ANY);
  466         vm_stat.faults++;               /* needs lock XXX */
  467 
  468 /*
  469  *      Recovery actions
  470  */
  471 #define RELEASE_PAGE(m)                                 \
  472         MACRO_BEGIN                                     \
  473         PAGE_WAKEUP_DONE(m);                            \
  474         vm_page_lock_queues();                          \
  475         if (!m->active && !m->inactive)                 \
  476                 vm_page_activate(m);                    \
  477         vm_page_unlock_queues();                        \
  478         MACRO_END
  479 
  480         if (vm_fault_dirty_handling
  481 #if     MACH_KDB
  482                 /*
  483                  *      If there are watchpoints set, then
  484                  *      we don't want to give away write permission
  485                  *      on a read fault.  Make the task write fault,
  486                  *      so that the watchpoint code notices the access.
  487                  */
  488             || db_watchpoint_list
  489 #endif  /* MACH_KDB */
  490             ) {
  491                 /*
  492                  *      If we aren't asking for write permission,
  493                  *      then don't give it away.  We're using write
  494                  *      faults to set the dirty bit.
  495                  */
  496                 if (!(fault_type & VM_PROT_WRITE))
  497                         *protection &= ~VM_PROT_WRITE;
  498         }
  499 
  500         if (!vm_fault_interruptible)
  501                 interruptible = FALSE;
  502 
  503         /*
  504          *      INVARIANTS (through entire routine):
  505          *
  506          *      1)      At all times, we must either have the object
  507          *              lock or a busy page in some object to prevent
  508          *              some other thread from trying to bring in
  509          *              the same page.
  510          *
  511          *              Note that we cannot hold any locks during the
  512          *              pager access or when waiting for memory, so
  513          *              we use a busy page then.
  514          *
  515          *              Note also that we aren't as concerned about more than
  516          *              one thread attempting to memory_object_data_unlock
  517          *              the same page at once, so we don't hold the page
  518          *              as busy then, but do record the highest unlock
  519          *              value so far.  [Unlock requests may also be delivered
  520          *              out of order.]
  521          *
  522          *      2)      To prevent another thread from racing us down the
  523          *              shadow chain and entering a new page in the top
  524          *              object before we do, we must keep a busy page in
  525          *              the top object while following the shadow chain.
  526          *
  527          *      3)      We must increment paging_in_progress on any object
  528          *              for which we have a busy page, to prevent
  529          *              vm_object_collapse from removing the busy page
  530          *              without our noticing.
  531          *
  532          *      4)      We leave busy pages on the pageout queues.
  533          *              If the pageout daemon comes across a busy page,
  534          *              it will remove the page from the pageout queues.
  535          */
  536 
  537         /*
  538          *      Search for the page at object/offset.
  539          */
  540 
  541         object = first_object;
  542         offset = first_offset;
  543         first_m = VM_PAGE_NULL;
  544         access_required = fault_type;
  545 
  546         /*
  547          *      See whether this page is resident
  548          */
  549 
  550         while (TRUE) {
  551                 m = vm_page_lookup(object, offset);
  552                 if (m != VM_PAGE_NULL) {
  553                         /*
  554                          *      If the page is being brought in,
  555                          *      wait for it and then retry.
  556                          *
  557                          *      A possible optimization: if the page
  558                          *      is known to be resident, we can ignore
  559                          *      pages that are absent (regardless of
  560                          *      whether they're busy).
  561                          */
  562 
  563                         if (m->busy) {
  564                                 kern_return_t   wait_result;
  565 
  566                                 PAGE_ASSERT_WAIT(m, interruptible);
  567                                 vm_object_unlock(object);
  568                                 if (continuation != (void (*)(void)) 0) {
  569                                         register vm_fault_state_t *state =
  570                                                 (vm_fault_state_t *) current_thread()->ith_other;
  571 
  572                                         /*
  573                                          *      Save variables in case
  574                                          *      thread_block discards
  575                                          *      our kernel stack.
  576                                          */
  577 
  578                                         state->vmfp_backoff = FALSE;
  579                                         state->vmfp_object = object;
  580                                         state->vmfp_offset = offset;
  581                                         state->vmfp_first_m = first_m;
  582                                         state->vmfp_access =
  583                                                 access_required;
  584                                         state->vmf_prot = *protection;
  585 
  586                                         counter(c_vm_fault_page_block_busy_user++);
  587                                         thread_block(continuation);
  588                                 } else {
  589                                         counter(c_vm_fault_page_block_busy_kernel++);
  590                                         thread_block(CONTINUE_NULL);
  591                                 }
  592                             after_thread_block:
  593                                 wait_result = current_thread()->wait_result;
  594                                 vm_object_lock(object);
  595                                 if (wait_result != THREAD_AWAKENED) {
  596                                         vm_fault_cleanup(object, first_m);
  597                                         if (wait_result == THREAD_RESTART)
  598                                                 return VM_FAULT_RETRY;
  599                                         else
  600                                                 return VM_FAULT_INTERRUPTED;
  601                                 }
  602                                 continue;
  603                         }
  604 
  605                         /*
  606                          *      If the page is in error, give up now.
  607                          */
  608 
  609                         if (m->error) {
  610                                 VM_PAGE_FREE(m);
  611                                 vm_fault_cleanup(object, first_m);
  612                                 return VM_FAULT_MEMORY_ERROR;
  613                         }
  614 
  615                         /*
  616                          *      If the page isn't busy, but is absent,
  617                          *      then it was deemed "unavailable".
  618                          */
  619 
  620                         if (m->absent) {
  621                                 /*
  622                                  * Remove the non-existent page (unless it's
  623                                  * in the top object) and move on down to the
  624                                  * next object (if there is one).
  625                                  */
  626 
  627                                 offset += object->shadow_offset;
  628                                 access_required = VM_PROT_READ;
  629                                 next_object = object->shadow;
  630                                 if (next_object == VM_OBJECT_NULL) {
  631                                         vm_page_t real_m;
  632 
  633                                         assert(!must_be_resident);
  634 
  635                                         /*
  636                                          * Absent page at bottom of shadow
  637                                          * chain; zero fill the page we left
  638                                          * busy in the first object, and flush
  639                                          * the absent page.  But first we
  640                                          * need to allocate a real page.
  641                                          */
  642 
  643                                         real_m = vm_page_grab();
  644                                         if (real_m == VM_PAGE_NULL) {
  645                                                 vm_fault_cleanup(object, first_m);
  646                                                 return VM_FAULT_MEMORY_SHORTAGE;
  647                                         }
  648 
  649                                         if (object != first_object) {
  650                                                 VM_PAGE_FREE(m);
  651                                                 vm_object_paging_end(object);
  652                                                 vm_object_unlock(object);
  653                                                 object = first_object;
  654                                                 offset = first_offset;
  655                                                 m = first_m;
  656                                                 first_m = VM_PAGE_NULL;
  657                                                 vm_object_lock(object);
  658                                         }
  659 
  660                                         VM_PAGE_FREE(m);
  661                                         assert(real_m->busy);
  662                                         vm_page_lock_queues();
  663                                         vm_page_insert(real_m, object, offset);
  664                                         vm_page_unlock_queues();
  665                                         m = real_m;
  666 
  667                                         /*
  668                                          *  Drop the lock while zero filling
  669                                          *  page.  Then break because this
  670                                          *  is the page we wanted.  Checking
  671                                          *  the page lock is a waste of time;
  672                                          *  this page was either absent or
  673                                          *  newly allocated -- in both cases
  674                                          *  it can't be page locked by a pager.
  675                                          */
  676                                         vm_object_unlock(object);
  677 
  678                                         vm_page_zero_fill(m);
  679 
  680                                         vm_stat_sample(SAMPLED_PC_VM_ZFILL_FAULTS);
  681                                         
  682                                         vm_stat.zero_fill_count++;
  683                                         vm_object_lock(object);
  684                                         pmap_clear_modify(m->phys_addr);
  685                                         break;
  686                                 } else {
  687                                         if (must_be_resident) {
  688                                                 vm_object_paging_end(object);
  689                                         } else if (object != first_object) {
  690                                                 vm_object_paging_end(object);
  691                                                 VM_PAGE_FREE(m);
  692                                         } else {
  693                                                 first_m = m;
  694                                                 m->absent = FALSE;
  695                                                 vm_object_absent_release(object);
  696                                                 m->busy = TRUE;
  697 
  698                                                 vm_page_lock_queues();
  699                                                 VM_PAGE_QUEUES_REMOVE(m);
  700                                                 vm_page_unlock_queues();
  701                                         }
  702                                         vm_object_lock(next_object);
  703                                         vm_object_unlock(object);
  704                                         object = next_object;
  705                                         vm_object_paging_begin(object);
  706                                         continue;
  707                                 }
  708                         }
  709 
  710                         /*
  711                          *      If the desired access to this page has
  712                          *      been locked out, request that it be unlocked.
  713                          */
  714 
  715                         if (access_required & m->page_lock) {
  716                                 if ((access_required & m->unlock_request) != access_required) {
  717                                         vm_prot_t       new_unlock_request;
  718                                         kern_return_t   rc;
  719                                         
  720                                         if (!object->pager_ready) {
  721                                                 vm_object_assert_wait(object,
  722                                                         VM_OBJECT_EVENT_PAGER_READY,
  723                                                         interruptible);
  724                                                 goto block_and_backoff;
  725                                         }
  726 
  727                                         new_unlock_request = m->unlock_request =
  728                                                 (access_required | m->unlock_request);
  729                                         vm_object_unlock(object);
  730                                         if ((rc = memory_object_data_unlock(
  731                                                 object->pager,
  732                                                 object->pager_request,
  733                                                 offset + object->paging_offset,
  734                                                 PAGE_SIZE,
  735                                                 new_unlock_request))
  736                                              != KERN_SUCCESS) {
  737                                                 printf("vm_fault: memory_object_data_unlock failed\n");
  738                                                 vm_object_lock(object);
  739                                                 vm_fault_cleanup(object, first_m);
  740                                                 return (rc == MACH_SEND_INTERRUPTED) ?
  741                                                         VM_FAULT_INTERRUPTED :
  742                                                         VM_FAULT_MEMORY_ERROR;
  743                                         }
  744                                         vm_object_lock(object);
  745                                         continue;
  746                                 }
  747 
  748                                 PAGE_ASSERT_WAIT(m, interruptible);
  749                                 goto block_and_backoff;
  750                         }
  751 
  752                         /*
  753                          *      We mark the page busy and leave it on
  754                          *      the pageout queues.  If the pageout
  755                          *      deamon comes across it, then it will
  756                          *      remove the page.
  757                          */
  758 
  759                         if (!software_reference_bits) {
  760                                 vm_page_lock_queues();
  761                                 if (m->inactive)  {
  762                                         vm_stat_sample(SAMPLED_PC_VM_REACTIVATION_FAULTS);
  763                                         vm_stat.reactivations++;
  764                                 }
  765 
  766                                 VM_PAGE_QUEUES_REMOVE(m);
  767                                 vm_page_unlock_queues();
  768                         }
  769 
  770                         assert(!m->busy);
  771                         m->busy = TRUE;
  772                         assert(!m->absent);
  773                         break;
  774                 }
  775 
  776                 look_for_page =
  777                         (object->pager_created)
  778 #if     MACH_PAGEMAP
  779                         && (vm_external_state_get(object->existence_info, offset + object->paging_offset) !=
  780                          VM_EXTERNAL_STATE_ABSENT)
  781 #endif  /* MACH_PAGEMAP */
  782                          ;
  783 
  784                 if ((look_for_page || (object == first_object))
  785                                  && !must_be_resident) {
  786                         /*
  787                          *      Allocate a new page for this object/offset
  788                          *      pair.
  789                          */
  790 
  791                         m = vm_page_grab_fictitious();
  792                         if (m == VM_PAGE_NULL) {
  793                                 vm_fault_cleanup(object, first_m);
  794                                 return VM_FAULT_FICTITIOUS_SHORTAGE;
  795                         }
  796 
  797                         vm_page_lock_queues();
  798                         vm_page_insert(m, object, offset);
  799                         vm_page_unlock_queues();
  800                 }
  801 
  802                 if (look_for_page && !must_be_resident) {
  803                         kern_return_t   rc;
  804 
  805                         /*
  806                          *      If the memory manager is not ready, we
  807                          *      cannot make requests.
  808                          */
  809                         if (!object->pager_ready) {
  810                                 vm_object_assert_wait(object,
  811                                         VM_OBJECT_EVENT_PAGER_READY,
  812                                         interruptible);
  813                                 VM_PAGE_FREE(m);
  814                                 goto block_and_backoff;
  815                         }
  816 
  817                         if (object->internal) {
  818                                 /*
  819                                  *      Requests to the default pager
  820                                  *      must reserve a real page in advance,
  821                                  *      because the pager's data-provided
  822                                  *      won't block for pages.
  823                                  */
  824 
  825                                 if (m->fictitious && !vm_page_convert(m)) {
  826                                         VM_PAGE_FREE(m);
  827                                         vm_fault_cleanup(object, first_m);
  828                                         return VM_FAULT_MEMORY_SHORTAGE;
  829                                 }
  830                         } else if (object->absent_count >
  831                                                 vm_object_absent_max) {
  832                                 /*
  833                                  *      If there are too many outstanding page
  834                                  *      requests pending on this object, we
  835                                  *      wait for them to be resolved now.
  836                                  */
  837 
  838                                 vm_object_absent_assert_wait(object, interruptible);
  839                                 VM_PAGE_FREE(m);
  840                                 goto block_and_backoff;
  841                         }
  842 
  843                         /*
  844                          *      Indicate that the page is waiting for data
  845                          *      from the memory manager.
  846                          */
  847 
  848                         m->absent = TRUE;
  849                         object->absent_count++;
  850 
  851                         /*
  852                          *      We have a busy page, so we can
  853                          *      release the object lock.
  854                          */
  855                         vm_object_unlock(object);
  856 
  857                         /*
  858                          *      Call the memory manager to retrieve the data.
  859                          */
  860 
  861                         vm_stat.pageins++;
  862                         vm_stat_sample(SAMPLED_PC_VM_PAGEIN_FAULTS);
  863 
  864                         if ((rc = memory_object_data_request(object->pager, 
  865                                 object->pager_request,
  866                                 m->offset + object->paging_offset, 
  867                                 PAGE_SIZE, access_required)) != KERN_SUCCESS) {
  868                                 if (rc != MACH_SEND_INTERRUPTED)
  869                                         printf("%s(%#x, %#x, %#x, %#x, %#x) failed, %d\n",
  870                                                 "memory_object_data_request",
  871                                                 (vm_offset_t) object->pager,
  872                                                 (vm_offset_t) object->pager_request,
  873                                                 m->offset + object->paging_offset, 
  874                                                 PAGE_SIZE, access_required, rc);
  875                                 /*
  876                                  *      Don't want to leave a busy page around,
  877                                  *      but the data request may have blocked,
  878                                  *      so check if it's still there and busy.
  879                                  */
  880                                 vm_object_lock(object);
  881                                 if (m == vm_page_lookup(object,offset) &&
  882                                     m->absent && m->busy)
  883                                         VM_PAGE_FREE(m);
  884                                 vm_fault_cleanup(object, first_m);
  885                                 return (rc == MACH_SEND_INTERRUPTED) ?
  886                                         VM_FAULT_INTERRUPTED :
  887                                         VM_FAULT_MEMORY_ERROR;
  888                         }
  889                         
  890                         /*
  891                          * Retry with same object/offset, since new data may
  892                          * be in a different page (i.e., m is meaningless at
  893                          * this point).
  894                          */
  895                         vm_object_lock(object);
  896                         continue;
  897                 }
  898 
  899                 /*
  900                  * For the XP system, the only case in which we get here is if
  901                  * object has no pager (or unwiring).  If the pager doesn't
  902                  * have the page this is handled in the m->absent case above
  903                  * (and if you change things here you should look above).
  904                  */
  905                 if (object == first_object)
  906                         first_m = m;
  907                 else
  908                 {
  909                         assert(m == VM_PAGE_NULL);
  910                 }
  911 
  912                 /*
  913                  *      Move on to the next object.  Lock the next
  914                  *      object before unlocking the current one.
  915                  */
  916                 access_required = VM_PROT_READ;
  917 
  918                 offset += object->shadow_offset;
  919                 next_object = object->shadow;
  920                 if (next_object == VM_OBJECT_NULL) {
  921                         assert(!must_be_resident);
  922 
  923                         /*
  924                          *      If there's no object left, fill the page
  925                          *      in the top object with zeros.  But first we
  926                          *      need to allocate a real page.
  927                          */
  928 
  929                         if (object != first_object) {
  930                                 vm_object_paging_end(object);
  931                                 vm_object_unlock(object);
  932 
  933                                 object = first_object;
  934                                 offset = first_offset;
  935                                 vm_object_lock(object);
  936                         }
  937 
  938                         m = first_m;
  939                         assert(m->object == object);
  940                         first_m = VM_PAGE_NULL;
  941 
  942                         if (m->fictitious && !vm_page_convert(m)) {
  943                                 VM_PAGE_FREE(m);
  944                                 vm_fault_cleanup(object, VM_PAGE_NULL);
  945                                 return VM_FAULT_MEMORY_SHORTAGE;
  946                         }
  947 
  948                         vm_object_unlock(object);
  949                         vm_page_zero_fill(m);
  950                         vm_stat_sample(SAMPLED_PC_VM_ZFILL_FAULTS);
  951                         vm_stat.zero_fill_count++;
  952                         vm_object_lock(object);
  953                         pmap_clear_modify(m->phys_addr);
  954                         break;
  955                 }
  956                 else {
  957                         vm_object_lock(next_object);
  958                         if ((object != first_object) || must_be_resident)
  959                                 vm_object_paging_end(object);
  960                         vm_object_unlock(object);
  961                         object = next_object;
  962                         vm_object_paging_begin(object);
  963                 }
  964         }
  965 
  966         /*
  967          *      PAGE HAS BEEN FOUND.
  968          *
  969          *      This page (m) is:
  970          *              busy, so that we can play with it;
  971          *              not absent, so that nobody else will fill it;
  972          *              possibly eligible for pageout;
  973          *
  974          *      The top-level page (first_m) is:
  975          *              VM_PAGE_NULL if the page was found in the
  976          *               top-level object;
  977          *              busy, not absent, and ineligible for pageout.
  978          *
  979          *      The current object (object) is locked.  A paging
  980          *      reference is held for the current and top-level
  981          *      objects.
  982          */
  983 
  984 #if     EXTRA_ASSERTIONS
  985         assert(m->busy && !m->absent);
  986         assert((first_m == VM_PAGE_NULL) ||
  987                 (first_m->busy && !first_m->absent &&
  988                  !first_m->active && !first_m->inactive));
  989 #endif  /* EXTRA_ASSERTIONS */
  990 
  991         /*
  992          *      If the page is being written, but isn't
  993          *      already owned by the top-level object,
  994          *      we have to copy it into a new page owned
  995          *      by the top-level object.
  996          */
  997 
  998         if (object != first_object) {
  999                 /*
 1000                  *      We only really need to copy if we
 1001                  *      want to write it.
 1002                  */
 1003 
 1004                 if (fault_type & VM_PROT_WRITE) {
 1005                         vm_page_t copy_m;
 1006 
 1007                         assert(!must_be_resident);
 1008 
 1009                         /*
 1010                          *      If we try to collapse first_object at this
 1011                          *      point, we may deadlock when we try to get
 1012                          *      the lock on an intermediate object (since we
 1013                          *      have the bottom object locked).  We can't
 1014                          *      unlock the bottom object, because the page
 1015                          *      we found may move (by collapse) if we do.
 1016                          *
 1017                          *      Instead, we first copy the page.  Then, when
 1018                          *      we have no more use for the bottom object,
 1019                          *      we unlock it and try to collapse.
 1020                          *
 1021                          *      Note that we copy the page even if we didn't
 1022                          *      need to... that's the breaks.
 1023                          */
 1024 
 1025                         /*
 1026                          *      Allocate a page for the copy
 1027                          */
 1028                         copy_m = vm_page_grab();
 1029                         if (copy_m == VM_PAGE_NULL) {
 1030                                 RELEASE_PAGE(m);
 1031                                 vm_fault_cleanup(object, first_m);
 1032                                 return VM_FAULT_MEMORY_SHORTAGE;
 1033                         }
 1034 
 1035                         vm_object_unlock(object);
 1036                         vm_page_copy(m, copy_m);
 1037                         vm_object_lock(object);
 1038 
 1039                         /*
 1040                          *      If another map is truly sharing this
 1041                          *      page with us, we have to flush all
 1042                          *      uses of the original page, since we
 1043                          *      can't distinguish those which want the
 1044                          *      original from those which need the
 1045                          *      new copy.
 1046                          *
 1047                          *      XXXO If we know that only one map has
 1048                          *      access to this page, then we could
 1049                          *      avoid the pmap_page_protect() call.
 1050                          */
 1051 
 1052                         vm_page_lock_queues();
 1053                         vm_page_deactivate(m);
 1054                         pmap_page_protect(m->phys_addr, VM_PROT_NONE);
 1055                         vm_page_unlock_queues();
 1056 
 1057                         /*
 1058                          *      We no longer need the old page or object.
 1059                          */
 1060 
 1061                         PAGE_WAKEUP_DONE(m);
 1062                         vm_object_paging_end(object);
 1063                         vm_object_unlock(object);
 1064 
 1065                         vm_stat.cow_faults++;
 1066                         vm_stat_sample(SAMPLED_PC_VM_COW_FAULTS);
 1067                         object = first_object;
 1068                         offset = first_offset;
 1069 
 1070                         vm_object_lock(object);
 1071                         VM_PAGE_FREE(first_m);
 1072                         first_m = VM_PAGE_NULL;
 1073                         assert(copy_m->busy);
 1074                         vm_page_lock_queues();
 1075                         vm_page_insert(copy_m, object, offset);
 1076                         vm_page_unlock_queues();
 1077                         m = copy_m;
 1078 
 1079                         /*
 1080                          *      Now that we've gotten the copy out of the
 1081                          *      way, let's try to collapse the top object.
 1082                          *      But we have to play ugly games with
 1083                          *      paging_in_progress to do that...
 1084                          */
 1085 
 1086                         vm_object_paging_end(object);
 1087                         vm_object_collapse(object);
 1088                         vm_object_paging_begin(object);
 1089                 }
 1090                 else {
 1091                         *protection &= (~VM_PROT_WRITE);
 1092                 }
 1093         }
 1094 
 1095         /*
 1096          *      Now check whether the page needs to be pushed into the
 1097          *      copy object.  The use of asymmetric copy on write for
 1098          *      shared temporary objects means that we may do two copies to
 1099          *      satisfy the fault; one above to get the page from a
 1100          *      shadowed object, and one here to push it into the copy.
 1101          */
 1102 
 1103         while ((copy_object = first_object->copy) != VM_OBJECT_NULL) {
 1104                 vm_offset_t     copy_offset;
 1105                 vm_page_t       copy_m;
 1106 
 1107                 /*
 1108                  *      If the page is being written, but hasn't been
 1109                  *      copied to the copy-object, we have to copy it there.
 1110                  */
 1111 
 1112                 if ((fault_type & VM_PROT_WRITE) == 0) {
 1113                         *protection &= ~VM_PROT_WRITE;
 1114                         break;
 1115                 }
 1116 
 1117                 /*
 1118                  *      If the page was guaranteed to be resident,
 1119                  *      we must have already performed the copy.
 1120                  */
 1121 
 1122                 if (must_be_resident)
 1123                         break;
 1124 
 1125                 /*
 1126                  *      Try to get the lock on the copy_object.
 1127                  */
 1128                 if (!vm_object_lock_try(copy_object)) {
 1129                         vm_object_unlock(object);
 1130 
 1131                         simple_lock_pause();    /* wait a bit */
 1132 
 1133                         vm_object_lock(object);
 1134                         continue;
 1135                 }
 1136 
 1137                 /*
 1138                  *      Make another reference to the copy-object,
 1139                  *      to keep it from disappearing during the
 1140                  *      copy.
 1141                  */
 1142                 assert(copy_object->ref_count > 0);
 1143                 copy_object->ref_count++;
 1144 
 1145                 /*
 1146                  *      Does the page exist in the copy?
 1147                  */
 1148                 copy_offset = first_offset - copy_object->shadow_offset;
 1149                 copy_m = vm_page_lookup(copy_object, copy_offset);
 1150                 if (copy_m != VM_PAGE_NULL) {
 1151                         if (copy_m->busy) {
 1152                                 /*
 1153                                  *      If the page is being brought
 1154                                  *      in, wait for it and then retry.
 1155                                  */
 1156                                 PAGE_ASSERT_WAIT(copy_m, interruptible);
 1157                                 RELEASE_PAGE(m);
 1158                                 copy_object->ref_count--;
 1159                                 assert(copy_object->ref_count > 0);
 1160                                 vm_object_unlock(copy_object);
 1161                                 goto block_and_backoff;
 1162                         }
 1163                 }
 1164                 else {
 1165                         /*
 1166                          *      Allocate a page for the copy
 1167                          */
 1168                         copy_m = vm_page_alloc(copy_object, copy_offset);
 1169                         if (copy_m == VM_PAGE_NULL) {
 1170                                 RELEASE_PAGE(m);
 1171                                 copy_object->ref_count--;
 1172                                 assert(copy_object->ref_count > 0);
 1173                                 vm_object_unlock(copy_object);
 1174                                 vm_fault_cleanup(object, first_m);
 1175                                 return VM_FAULT_MEMORY_SHORTAGE;
 1176                         }
 1177 
 1178                         /*
 1179                          *      Must copy page into copy-object.
 1180                          */
 1181 
 1182                         vm_page_copy(m, copy_m);
 1183                         
 1184                         /*
 1185                          *      If the old page was in use by any users
 1186                          *      of the copy-object, it must be removed
 1187                          *      from all pmaps.  (We can't know which
 1188                          *      pmaps use it.)
 1189                          */
 1190 
 1191                         vm_page_lock_queues();
 1192                         pmap_page_protect(m->phys_addr, VM_PROT_NONE);
 1193                         copy_m->dirty = TRUE;
 1194                         vm_page_unlock_queues();
 1195 
 1196                         /*
 1197                          *      If there's a pager, then immediately
 1198                          *      page out this page, using the "initialize"
 1199                          *      option.  Else, we use the copy.
 1200                          */
 1201 
 1202                         if (!copy_object->pager_created) {
 1203                                 vm_page_lock_queues();
 1204                                 vm_page_activate(copy_m);
 1205                                 vm_page_unlock_queues();
 1206                                 PAGE_WAKEUP_DONE(copy_m);
 1207                         } else {
 1208                                 /*
 1209                                  *      The page is already ready for pageout:
 1210                                  *      not on pageout queues and busy.
 1211                                  *      Unlock everything except the
 1212                                  *      copy_object itself.
 1213                                  */
 1214 
 1215                                 vm_object_unlock(object);
 1216 
 1217                                 /*
 1218                                  *      Write the page to the copy-object,
 1219                                  *      flushing it from the kernel.
 1220                                  */
 1221 
 1222                                 vm_pageout_page(copy_m, TRUE, TRUE);
 1223 
 1224                                 /*
 1225                                  *      Since the pageout may have
 1226                                  *      temporarily dropped the
 1227                                  *      copy_object's lock, we
 1228                                  *      check whether we'll have
 1229                                  *      to deallocate the hard way.
 1230                                  */
 1231 
 1232                                 if ((copy_object->shadow != object) ||
 1233                                     (copy_object->ref_count == 1)) {
 1234                                         vm_object_unlock(copy_object);
 1235                                         vm_object_deallocate(copy_object);
 1236                                         vm_object_lock(object);
 1237                                         continue;
 1238                                 }
 1239 
 1240                                 /*
 1241                                  *      Pick back up the old object's
 1242                                  *      lock.  [It is safe to do so,
 1243                                  *      since it must be deeper in the
 1244                                  *      object tree.]
 1245                                  */
 1246 
 1247                                 vm_object_lock(object);
 1248                         }
 1249 
 1250                         /*
 1251                          *      Because we're pushing a page upward
 1252                          *      in the object tree, we must restart
 1253                          *      any faults that are waiting here.
 1254                          *      [Note that this is an expansion of
 1255                          *      PAGE_WAKEUP that uses the THREAD_RESTART
 1256                          *      wait result].  Can't turn off the page's
 1257                          *      busy bit because we're not done with it.
 1258                          */
 1259                          
 1260                         if (m->wanted) {
 1261                                 m->wanted = FALSE;
 1262                                 thread_wakeup_with_result((event_t) m,
 1263                                         THREAD_RESTART);
 1264                         }
 1265                 }
 1266 
 1267                 /*
 1268                  *      The reference count on copy_object must be
 1269                  *      at least 2: one for our extra reference,
 1270                  *      and at least one from the outside world
 1271                  *      (we checked that when we last locked
 1272                  *      copy_object).
 1273                  */
 1274                 copy_object->ref_count--;
 1275                 assert(copy_object->ref_count > 0);
 1276                 vm_object_unlock(copy_object);
 1277 
 1278                 break;
 1279         }
 1280 
 1281         *result_page = m;
 1282         *top_page = first_m;
 1283 
 1284         /*
 1285          *      If the page can be written, assume that it will be.
 1286          *      [Earlier, we restrict the permission to allow write
 1287          *      access only if the fault so required, so we don't
 1288          *      mark read-only data as dirty.]
 1289          */
 1290 
 1291         if (vm_fault_dirty_handling && (*protection & VM_PROT_WRITE))
 1292                 m->dirty = TRUE;
 1293 
 1294         return VM_FAULT_SUCCESS;
 1295 
 1296     block_and_backoff:
 1297         vm_fault_cleanup(object, first_m);
 1298 
 1299         if (continuation != (void (*)(void)) 0) {
 1300                 register vm_fault_state_t *state =
 1301                         (vm_fault_state_t *) current_thread()->ith_other;
 1302 
 1303                 /*
 1304                  *      Save variables in case we must restart.
 1305                  */
 1306 
 1307                 state->vmfp_backoff = TRUE;
 1308                 state->vmf_prot = *protection;
 1309 
 1310                 counter(c_vm_fault_page_block_backoff_user++);
 1311                 thread_block(continuation);
 1312         } else {
 1313                 counter(c_vm_fault_page_block_backoff_kernel++);
 1314                 thread_block(CONTINUE_NULL);
 1315         }
 1316     after_block_and_backoff:
 1317         if (current_thread()->wait_result == THREAD_AWAKENED)
 1318                 return VM_FAULT_RETRY;
 1319         else
 1320                 return VM_FAULT_INTERRUPTED;
 1321 
 1322 #undef  RELEASE_PAGE
 1323 }
 1324 
 1325 /*
 1326  *      Routine:        vm_fault
 1327  *      Purpose:
 1328  *              Handle page faults, including pseudo-faults
 1329  *              used to change the wiring status of pages.
 1330  *      Returns:
 1331  *              If an explicit (expression) continuation is supplied,
 1332  *              then we call the continuation instead of returning.
 1333  *      Implementation:
 1334  *              Explicit continuations make this a little icky,
 1335  *              because it hasn't been rewritten to embrace CPS.
 1336  *              Instead, we have resume arguments for vm_fault and
 1337  *              vm_fault_page, to let continue the fault computation.
 1338  *
 1339  *              vm_fault and vm_fault_page save much state
 1340  *              in the equivalent of a closure.  The state
 1341  *              structure is allocated when first entering vm_fault
 1342  *              and deallocated when leaving vm_fault.
 1343  */
 1344 
 1345 no_return
 1346 vm_fault_continue(void)
 1347 {
 1348         register vm_fault_state_t *state =
 1349                 (vm_fault_state_t *) current_thread()->ith_other;
 1350 
 1351         vm_fault_noreturn(state->vmf_map,
 1352                           state->vmf_vaddr,
 1353                           state->vmf_fault_type,
 1354                           state->vmf_change_wiring,
 1355                           TRUE, state->vmf_continuation);
 1356         /*NOTREACHED*/
 1357 }
 1358 
 1359 kern_return_t vm_fault(
 1360         vm_map_t        map,
 1361         vm_offset_t     vaddr,
 1362         vm_prot_t       fault_type,
 1363         boolean_t       change_wiring,
 1364         boolean_t       resume,
 1365         no_return       (*continuation)(kern_return_t))
 1366 {
 1367         vm_map_version_t        version;        /* Map version for verificiation */
 1368         boolean_t               wired;          /* Should mapping be wired down? */
 1369         vm_object_t             object;         /* Top-level object */
 1370         vm_offset_t             offset;         /* Top-level offset */
 1371         vm_prot_t               prot;           /* Protection for mapping */
 1372         vm_object_t             old_copy_object; /* Saved copy object */
 1373         vm_page_t               result_page;    /* Result of vm_fault_page */
 1374         vm_page_t               top_page;       /* Placeholder page */
 1375         kern_return_t           kr;
 1376 
 1377         register
 1378         vm_page_t               m;      /* Fast access to result_page */
 1379 
 1380         if (resume) {
 1381                 register vm_fault_state_t *state =
 1382                         (vm_fault_state_t *) current_thread()->ith_other;
 1383 
 1384                 /*
 1385                  *      Retrieve cached variables and
 1386                  *      continue vm_fault_page.
 1387                  */
 1388 
 1389                 object = state->vmf_object;
 1390                 if (object == VM_OBJECT_NULL)
 1391                         goto RetryFault;
 1392                 version = state->vmf_version;
 1393                 wired = state->vmf_wired;
 1394                 offset = state->vmf_offset;
 1395                 prot = state->vmf_prot;
 1396 
 1397                 kr = vm_fault_page(object, offset, fault_type,
 1398                                 (change_wiring && !wired), !change_wiring,
 1399                                 &prot, &result_page, &top_page,
 1400                                 TRUE, vm_fault_continue);
 1401                 goto after_vm_fault_page;
 1402         }
 1403 
 1404         if (continuation != 0) {
 1405                 /*
 1406                  *      We will probably need to save state.
 1407                  */
 1408 
 1409                 vm_fault_state_t *state;
 1410 
 1411                 /*
 1412                  * if this assignment stmt is written as
 1413                  * 'active_threads[cpu_number()] = zalloc()',
 1414                  * cpu_number may be evaluated before zalloc;
 1415                  * if zalloc blocks, cpu_number will be wrong
 1416                  */
 1417 
 1418                 state = (vm_fault_state_t *) zalloc(vm_fault_state_zone);
 1419                 current_thread()->ith_other = state;
 1420 
 1421         }
 1422 
 1423     RetryFault: ;
 1424 
 1425         /*
 1426          *      Find the backing store object and offset into
 1427          *      it to begin the search.
 1428          */
 1429 
 1430         if ((kr = vm_map_lookup(&map, vaddr, fault_type, &version,
 1431                                 &object, &offset,
 1432                                 &prot, &wired)) != KERN_SUCCESS) {
 1433                 goto done;
 1434         }
 1435 
 1436         /*
 1437          *      If the page is wired, we must fault for the current protection
 1438          *      value, to avoid further faults.
 1439          */
 1440 
 1441         if (wired)
 1442                 fault_type = prot;
 1443 
 1444         /*
 1445          *      Make a reference to this object to
 1446          *      prevent its disposal while we are messing with
 1447          *      it.  Once we have the reference, the map is free
 1448          *      to be diddled.  Since objects reference their
 1449          *      shadows (and copies), they will stay around as well.
 1450          */
 1451 
 1452         assert(object->ref_count > 0);
 1453         object->ref_count++;
 1454         vm_object_paging_begin(object);
 1455 
 1456         if (continuation != (void (*)(kern_return_t)) 0) {
 1457                 register vm_fault_state_t *state =
 1458                         (vm_fault_state_t *) current_thread()->ith_other;
 1459 
 1460                 /*
 1461                  *      Save variables, in case vm_fault_page discards
 1462                  *      our kernel stack and we have to restart.
 1463                  */
 1464 
 1465                 state->vmf_map = map;
 1466                 state->vmf_vaddr = vaddr;
 1467                 state->vmf_fault_type = fault_type;
 1468                 state->vmf_change_wiring = change_wiring;
 1469                 state->vmf_continuation = continuation;
 1470 
 1471                 state->vmf_version = version;
 1472                 state->vmf_wired = wired;
 1473                 state->vmf_object = object;
 1474                 state->vmf_offset = offset;
 1475                 state->vmf_prot = prot;
 1476 
 1477                 kr = vm_fault_page(object, offset, fault_type,
 1478                                    (change_wiring && !wired), !change_wiring,
 1479                                    &prot, &result_page, &top_page,
 1480                                    FALSE, vm_fault_continue);
 1481         } else {
 1482                 kr = vm_fault_page(object, offset, fault_type,
 1483                                    (change_wiring && !wired), !change_wiring,
 1484                                    &prot, &result_page, &top_page,
 1485                                    FALSE, 0);
 1486         }
 1487     after_vm_fault_page:
 1488 
 1489         /*
 1490          *      If we didn't succeed, lose the object reference immediately.
 1491          */
 1492 
 1493         if (kr != VM_FAULT_SUCCESS)
 1494                 vm_object_deallocate(object);
 1495 
 1496         /*
 1497          *      See why we failed, and take corrective action.
 1498          */
 1499 
 1500         switch (kr) {
 1501                 case VM_FAULT_SUCCESS:
 1502                         break;
 1503                 case VM_FAULT_RETRY:
 1504                         goto RetryFault;
 1505                 case VM_FAULT_INTERRUPTED:
 1506                         kr = KERN_SUCCESS;
 1507                         goto done;
 1508                 case VM_FAULT_MEMORY_SHORTAGE:
 1509                         if (continuation != (void (*)(kern_return_t)) 0) {
 1510                                 register vm_fault_state_t *state =
 1511                                         (vm_fault_state_t *) current_thread()->ith_other;
 1512 
 1513                                 /*
 1514                                  *      Save variables in case VM_PAGE_WAIT
 1515                                  *      discards our kernel stack.
 1516                                  */
 1517 
 1518                                 state->vmf_map = map;
 1519                                 state->vmf_vaddr = vaddr;
 1520                                 state->vmf_fault_type = fault_type;
 1521                                 state->vmf_change_wiring = change_wiring;
 1522                                 state->vmf_continuation = continuation;
 1523                                 state->vmf_object = VM_OBJECT_NULL;
 1524 
 1525                                 VM_PAGE_WAIT(vm_fault_continue);
 1526                         } else
 1527                                 VM_PAGE_WAIT(CONTINUE_NULL);
 1528                         goto RetryFault;
 1529                 case VM_FAULT_FICTITIOUS_SHORTAGE:
 1530                         vm_page_more_fictitious();
 1531                         goto RetryFault;
 1532                 case VM_FAULT_MEMORY_ERROR:
 1533                         kr = KERN_MEMORY_ERROR;
 1534                         goto done;
 1535         }
 1536 
 1537         m = result_page;
 1538 
 1539         assert((change_wiring && !wired) ?
 1540                (top_page == VM_PAGE_NULL) :
 1541                ((top_page == VM_PAGE_NULL) == (m->object == object)));
 1542 
 1543         /*
 1544          *      How to clean up the result of vm_fault_page.  This
 1545          *      happens whether the mapping is entered or not.
 1546          */
 1547 
 1548 #define UNLOCK_AND_DEALLOCATE                           \
 1549         MACRO_BEGIN                                     \
 1550         vm_fault_cleanup(m->object, top_page);          \
 1551         vm_object_deallocate(object);                   \
 1552         MACRO_END
 1553 
 1554         /*
 1555          *      What to do with the resulting page from vm_fault_page
 1556          *      if it doesn't get entered into the physical map:
 1557          */
 1558 
 1559 #define RELEASE_PAGE(m)                                 \
 1560         MACRO_BEGIN                                     \
 1561         PAGE_WAKEUP_DONE(m);                            \
 1562         vm_page_lock_queues();                          \
 1563         if (!m->active && !m->inactive)                 \
 1564                 vm_page_activate(m);                    \
 1565         vm_page_unlock_queues();                        \
 1566         MACRO_END
 1567 
 1568         /*
 1569          *      We must verify that the maps have not changed
 1570          *      since our last lookup.
 1571          */
 1572 
 1573         old_copy_object = m->object->copy;
 1574 
 1575         vm_object_unlock(m->object);
 1576         while (!vm_map_verify(map, &version)) {
 1577                 vm_object_t     retry_object;
 1578                 vm_offset_t     retry_offset;
 1579                 vm_prot_t       retry_prot;
 1580 
 1581                 /*
 1582                  *      To avoid trying to write_lock the map while another
 1583                  *      thread has it read_locked (in vm_map_pageable), we
 1584                  *      do not try for write permission.  If the page is
 1585                  *      still writable, we will get write permission.  If it
 1586                  *      is not, or has been marked needs_copy, we enter the
 1587                  *      mapping without write permission, and will merely
 1588                  *      take another fault.
 1589                  */
 1590                 kr = vm_map_lookup(&map, vaddr,
 1591                                    fault_type & ~VM_PROT_WRITE, &version,
 1592                                    &retry_object, &retry_offset, &retry_prot,
 1593                                    &wired);
 1594 
 1595                 if (kr != KERN_SUCCESS) {
 1596                         vm_object_lock(m->object);
 1597                         RELEASE_PAGE(m);
 1598                         UNLOCK_AND_DEALLOCATE;
 1599                         goto done;
 1600                 }
 1601 
 1602                 vm_object_unlock(retry_object);
 1603                 vm_object_lock(m->object);
 1604 
 1605                 if ((retry_object != object) ||
 1606                     (retry_offset != offset)) {
 1607                         RELEASE_PAGE(m);
 1608                         UNLOCK_AND_DEALLOCATE;
 1609                         goto RetryFault;
 1610                 }
 1611 
 1612                 /*
 1613                  *      Check whether the protection has changed or the object
 1614                  *      has been copied while we left the map unlocked.
 1615                  */
 1616                 prot &= retry_prot;
 1617                 vm_object_unlock(m->object);
 1618         }
 1619         vm_object_lock(m->object);
 1620 
 1621         /*
 1622          *      If the copy object changed while the top-level object
 1623          *      was unlocked, then we must take away write permission.
 1624          */
 1625 
 1626         if (m->object->copy != old_copy_object)
 1627                 prot &= ~VM_PROT_WRITE;
 1628 
 1629         /*
 1630          *      If we want to wire down this page, but no longer have
 1631          *      adequate permissions, we must start all over.
 1632          */
 1633 
 1634         if (wired && (prot != fault_type)) {
 1635                 vm_map_verify_done(map, &version);
 1636                 RELEASE_PAGE(m);
 1637                 UNLOCK_AND_DEALLOCATE;
 1638                 goto RetryFault;
 1639         }
 1640 
 1641         /*
 1642          *      It's critically important that a wired-down page be faulted
 1643          *      only once in each map for which it is wired.
 1644          */
 1645 
 1646         vm_object_unlock(m->object);
 1647 
 1648         /*
 1649          *      Put this page into the physical map.
 1650          *      We had to do the unlock above because pmap_enter
 1651          *      may cause other faults.  The page may be on
 1652          *      the pageout queues.  If the pageout daemon comes
 1653          *      across the page, it will remove it from the queues.
 1654          */
 1655 
 1656         PMAP_ENTER(map->pmap, vaddr, m, prot, wired);
 1657 
 1658         /*
 1659          *      If the page is not wired down and isn't already
 1660          *      on a pageout queue, then put it where the
 1661          *      pageout daemon can find it.
 1662          */
 1663         vm_object_lock(m->object);
 1664         vm_page_lock_queues();
 1665         if (change_wiring) {
 1666                 if (wired)
 1667                         vm_page_wire(m);
 1668                 else
 1669                         vm_page_unwire(m);
 1670         } else if (software_reference_bits) {
 1671                 if (!m->active && !m->inactive)
 1672                         vm_page_activate(m);
 1673                 m->reference = TRUE;
 1674         } else {
 1675                 vm_page_activate(m);
 1676         }
 1677         vm_page_unlock_queues();
 1678 
 1679         /*
 1680          *      Unlock everything, and return
 1681          */
 1682 
 1683         vm_map_verify_done(map, &version);
 1684         PAGE_WAKEUP_DONE(m);
 1685         kr = KERN_SUCCESS;
 1686         UNLOCK_AND_DEALLOCATE;
 1687 
 1688 #undef  UNLOCK_AND_DEALLOCATE
 1689 #undef  RELEASE_PAGE
 1690 
 1691     done:
 1692         if (continuation != (void (*)(kern_return_t)) 0) {
 1693                 register vm_fault_state_t *state =
 1694                         (vm_fault_state_t *) current_thread()->ith_other;
 1695 
 1696                 zfree(vm_fault_state_zone, (vm_offset_t) state);
 1697                 (*continuation)(kr);
 1698                 /*NOTREACHED*/
 1699         }
 1700 
 1701         return kr;
 1702 }
 1703 
 1704 kern_return_t vm_fault_wire_fast(
 1705         vm_map_t        map,
 1706         vm_offset_t     va,
 1707         vm_map_entry_t  entry);         /* forward */
 1708 
 1709 /*
 1710  *      vm_fault_wire:
 1711  *
 1712  *      Wire down a range of virtual addresses in a map.
 1713  */
 1714 void vm_fault_wire(
 1715         vm_map_t        map,
 1716         vm_map_entry_t  entry)
 1717 {
 1718 
 1719         register vm_offset_t    va;
 1720         register pmap_t         pmap;
 1721         register vm_offset_t    end_addr = entry->vme_end;
 1722 
 1723         pmap = vm_map_pmap(map);
 1724 
 1725         /*
 1726          *      Inform the physical mapping system that the
 1727          *      range of addresses may not fault, so that
 1728          *      page tables and such can be locked down as well.
 1729          */
 1730 
 1731         pmap_pageable(pmap, entry->vme_start, end_addr, FALSE);
 1732 
 1733         /*
 1734          *      We simulate a fault to get the page and enter it
 1735          *      in the physical map.
 1736          */
 1737 
 1738         for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) {
 1739                 if (vm_fault_wire_fast(map, va, entry) != KERN_SUCCESS)
 1740                         (void) vm_fault(map, va, VM_PROT_NONE, TRUE,
 1741                                         FALSE, 0);
 1742         }
 1743 }
 1744 
 1745 /*
 1746  *      vm_fault_unwire:
 1747  *
 1748  *      Unwire a range of virtual addresses in a map.
 1749  */
 1750 void vm_fault_unwire(
 1751         vm_map_t        map,
 1752         vm_map_entry_t  entry)
 1753 {
 1754         register vm_offset_t    va;
 1755         register pmap_t         pmap;
 1756         register vm_offset_t    end_addr = entry->vme_end;
 1757         vm_object_t             object;
 1758 
 1759         pmap = vm_map_pmap(map);
 1760 
 1761         object = (entry->is_sub_map)
 1762                         ? VM_OBJECT_NULL : entry->object.vm_object;
 1763 
 1764         /*
 1765          *      Since the pages are wired down, we must be able to
 1766          *      get their mappings from the physical map system.
 1767          */
 1768 
 1769         for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) {
 1770                 pmap_change_wiring(pmap, va, FALSE);
 1771 
 1772                 if (object == VM_OBJECT_NULL) {
 1773                         vm_map_lock_set_recursive(map);
 1774                         (void) vm_fault(map, va, VM_PROT_NONE, TRUE,
 1775                                         FALSE, 0);
 1776                         vm_map_lock_clear_recursive(map);
 1777                 } else {
 1778                         vm_prot_t       prot;
 1779                         vm_page_t       result_page;
 1780                         vm_page_t       top_page;
 1781                         vm_fault_return_t result;
 1782 
 1783                         do {
 1784                                 prot = VM_PROT_NONE;
 1785 
 1786                                 vm_object_lock(object);
 1787                                 vm_object_paging_begin(object);
 1788                                 result = vm_fault_page(object,
 1789                                                 entry->offset +
 1790                                                   (va - entry->vme_start),
 1791                                                 VM_PROT_NONE, TRUE,
 1792                                                 FALSE, &prot,
 1793                                                 &result_page,
 1794                                                 &top_page,
 1795                                                 FALSE, 0);
 1796                         } while (result == VM_FAULT_RETRY);
 1797 
 1798                         if (result != VM_FAULT_SUCCESS)
 1799                                 panic("vm_fault_unwire: failure");
 1800 
 1801                         vm_page_lock_queues();
 1802                         vm_page_unwire(result_page);
 1803                         vm_page_unlock_queues();
 1804                         PAGE_WAKEUP_DONE(result_page);
 1805 
 1806                         vm_fault_cleanup(result_page->object, top_page);
 1807                 }
 1808         }
 1809 
 1810         /*
 1811          *      Inform the physical mapping system that the range
 1812          *      of addresses may fault, so that page tables and
 1813          *      such may be unwired themselves.
 1814          */
 1815 
 1816         pmap_pageable(pmap, entry->vme_start, end_addr, TRUE);
 1817 
 1818 }
 1819 
 1820 /*
 1821  *      vm_fault_wire_fast:
 1822  *
 1823  *      Handle common case of a wire down page fault at the given address.
 1824  *      If successful, the page is inserted into the associated physical map.
 1825  *      The map entry is passed in to avoid the overhead of a map lookup.
 1826  *
 1827  *      NOTE: the given address should be truncated to the
 1828  *      proper page address.
 1829  *
 1830  *      KERN_SUCCESS is returned if the page fault is handled; otherwise,
 1831  *      a standard error specifying why the fault is fatal is returned.
 1832  *
 1833  *      The map in question must be referenced, and remains so.
 1834  *      Caller has a read lock on the map.
 1835  *
 1836  *      This is a stripped version of vm_fault() for wiring pages.  Anything
 1837  *      other than the common case will return KERN_FAILURE, and the caller
 1838  *      is expected to call vm_fault().
 1839  */
 1840 kern_return_t vm_fault_wire_fast(
 1841         vm_map_t        map,
 1842         vm_offset_t     va,
 1843         vm_map_entry_t  entry)
 1844 {
 1845         vm_object_t             object;
 1846         vm_offset_t             offset;
 1847         register vm_page_t      m;
 1848         vm_prot_t               prot;
 1849 
 1850         vm_stat.faults++;               /* needs lock XXX */
 1851 /*
 1852  *      Recovery actions
 1853  */
 1854 
 1855 #undef  RELEASE_PAGE
 1856 #define RELEASE_PAGE(m) {                               \
 1857         PAGE_WAKEUP_DONE(m);                            \
 1858         vm_page_lock_queues();                          \
 1859         vm_page_unwire(m);                              \
 1860         vm_page_unlock_queues();                        \
 1861 }
 1862 
 1863 
 1864 #undef  UNLOCK_THINGS
 1865 #define UNLOCK_THINGS   {                               \
 1866         object->paging_in_progress--;                   \
 1867         vm_object_unlock(object);                       \
 1868 }
 1869 
 1870 #undef  UNLOCK_AND_DEALLOCATE
 1871 #define UNLOCK_AND_DEALLOCATE   {                       \
 1872         UNLOCK_THINGS;                                  \
 1873         vm_object_deallocate(object);                   \
 1874 }
 1875 /*
 1876  *      Give up and have caller do things the hard way.
 1877  */
 1878 
 1879 #define GIVE_UP {                                       \
 1880         UNLOCK_AND_DEALLOCATE;                          \
 1881         return KERN_FAILURE;                            \
 1882 }
 1883 
 1884 
 1885         /*
 1886          *      If this entry is not directly to a vm_object, bail out.
 1887          */
 1888         if (entry->is_sub_map)
 1889                 return KERN_FAILURE;
 1890 
 1891         /*
 1892          *      Find the backing store object and offset into it.
 1893          */
 1894 
 1895         object = entry->object.vm_object;
 1896         offset = (va - entry->vme_start) + entry->offset;
 1897         prot = entry->protection;
 1898 
 1899         /*
 1900          *      Make a reference to this object to prevent its
 1901          *      disposal while we are messing with it.
 1902          */
 1903 
 1904         vm_object_lock(object);
 1905         assert(object->ref_count > 0);
 1906         object->ref_count++;
 1907         object->paging_in_progress++;
 1908 
 1909         /*
 1910          *      INVARIANTS (through entire routine):
 1911          *
 1912          *      1)      At all times, we must either have the object
 1913          *              lock or a busy page in some object to prevent
 1914          *              some other thread from trying to bring in
 1915          *              the same page.
 1916          *
 1917          *      2)      Once we have a busy page, we must remove it from
 1918          *              the pageout queues, so that the pageout daemon
 1919          *              will not grab it away.
 1920          *
 1921          */
 1922 
 1923         /*
 1924          *      Look for page in top-level object.  If it's not there or
 1925          *      there's something going on, give up.
 1926          */
 1927         m = vm_page_lookup(object, offset);
 1928         if ((m == VM_PAGE_NULL) || (m->error) ||
 1929             (m->busy) || (m->absent) || (prot & m->page_lock)) {
 1930                 GIVE_UP;
 1931         }
 1932 
 1933         /*
 1934          *      Wire the page down now.  All bail outs beyond this
 1935          *      point must unwire the page.  
 1936          */
 1937 
 1938         vm_page_lock_queues();
 1939         vm_page_wire(m);
 1940         vm_page_unlock_queues();
 1941 
 1942         /*
 1943          *      Mark page busy for other threads.
 1944          */
 1945         assert(!m->busy);
 1946         m->busy = TRUE;
 1947         assert(!m->absent);
 1948 
 1949         /*
 1950          *      Give up if the page is being written and there's a copy object
 1951          */
 1952         if ((object->copy != VM_OBJECT_NULL) && (prot & VM_PROT_WRITE)) {
 1953                 RELEASE_PAGE(m);
 1954                 GIVE_UP;
 1955         }
 1956 
 1957         /*
 1958          *      Put this page into the physical map.
 1959          *      We have to unlock the object because pmap_enter
 1960          *      may cause other faults.   
 1961          */
 1962         vm_object_unlock(object);
 1963 
 1964         PMAP_ENTER(map->pmap, va, m, prot, TRUE);
 1965 
 1966         /*
 1967          *      Must relock object so that paging_in_progress can be cleared.
 1968          */
 1969         vm_object_lock(object);
 1970 
 1971         /*
 1972          *      Unlock everything, and return
 1973          */
 1974 
 1975         PAGE_WAKEUP_DONE(m);
 1976         UNLOCK_AND_DEALLOCATE;
 1977 
 1978         return KERN_SUCCESS;
 1979 
 1980 }
 1981 
 1982 /*
 1983  *      Routine:        vm_fault_copy_cleanup
 1984  *      Purpose:
 1985  *              Release a page used by vm_fault_copy.
 1986  */
 1987 
 1988 void vm_fault_copy_cleanup(
 1989         vm_page_t       page,
 1990         vm_page_t       top_page)
 1991 {
 1992         vm_object_t     object = page->object;
 1993 
 1994         vm_object_lock(object);
 1995         PAGE_WAKEUP_DONE(page);
 1996         vm_page_lock_queues();
 1997         if (!page->active && !page->inactive)
 1998                 vm_page_activate(page);
 1999         vm_page_unlock_queues();
 2000         vm_fault_cleanup(object, top_page);
 2001 }
 2002 
 2003 /*
 2004  *      Routine:        vm_fault_copy
 2005  *
 2006  *      Purpose:
 2007  *              Copy pages from one virtual memory object to another --
 2008  *              neither the source nor destination pages need be resident.
 2009  *
 2010  *              Before actually copying a page, the version associated with
 2011  *              the destination address map wil be verified.
 2012  *
 2013  *      In/out conditions:
 2014  *              The caller must hold a reference, but not a lock, to
 2015  *              each of the source and destination objects and to the
 2016  *              destination map.
 2017  *
 2018  *      Results:
 2019  *              Returns KERN_SUCCESS if no errors were encountered in
 2020  *              reading or writing the data.  Returns KERN_INTERRUPTED if
 2021  *              the operation was interrupted (only possible if the
 2022  *              "interruptible" argument is asserted).  Other return values
 2023  *              indicate a permanent error in copying the data.
 2024  *
 2025  *              The actual amount of data copied will be returned in the
 2026  *              "copy_size" argument.  In the event that the destination map
 2027  *              verification failed, this amount may be less than the amount
 2028  *              requested.
 2029  */
 2030 kern_return_t   vm_fault_copy(
 2031         vm_object_t     src_object,
 2032         vm_offset_t     src_offset,
 2033         vm_size_t       *src_size,              /* INOUT */
 2034         vm_object_t     dst_object,
 2035         vm_offset_t     dst_offset,
 2036         vm_map_t        dst_map,
 2037         vm_map_version_t *dst_version,
 2038         boolean_t       interruptible)
 2039 {
 2040         vm_page_t               result_page;
 2041         vm_prot_t               prot;
 2042         
 2043         vm_page_t               src_page;
 2044         vm_page_t               src_top_page;
 2045 
 2046         vm_page_t               dst_page;
 2047         vm_page_t               dst_top_page;
 2048 
 2049         vm_size_t               amount_done;
 2050         vm_object_t             old_copy_object;
 2051 
 2052 #define RETURN(x)                                       \
 2053         MACRO_BEGIN                                     \
 2054         *src_size = amount_done;                        \
 2055         MACRO_RETURN(x);                                \
 2056         MACRO_END
 2057 
 2058         amount_done = 0;
 2059         do { /* while (amount_done != *src_size) */
 2060 
 2061             RetrySourceFault: ;
 2062 
 2063                 if (src_object == VM_OBJECT_NULL) {
 2064                         /*
 2065                          *      No source object.  We will just
 2066                          *      zero-fill the page in dst_object.
 2067                          */
 2068 
 2069                         src_page = VM_PAGE_NULL;
 2070                 } else {
 2071                         prot = VM_PROT_READ;
 2072 
 2073                         vm_object_lock(src_object);
 2074                         vm_object_paging_begin(src_object);
 2075 
 2076                         switch (vm_fault_page(src_object, src_offset,
 2077                                         VM_PROT_READ, FALSE, interruptible,
 2078                                         &prot, &result_page, &src_top_page,
 2079                                         FALSE, 0)) {
 2080 
 2081                                 case VM_FAULT_SUCCESS:
 2082                                         break;
 2083                                 case VM_FAULT_RETRY:
 2084                                         goto RetrySourceFault;
 2085                                 case VM_FAULT_INTERRUPTED:
 2086                                         RETURN(MACH_SEND_INTERRUPTED);
 2087                                 case VM_FAULT_MEMORY_SHORTAGE:
 2088                                         VM_PAGE_WAIT(CONTINUE_NULL);
 2089                                         goto RetrySourceFault;
 2090                                 case VM_FAULT_FICTITIOUS_SHORTAGE:
 2091                                         vm_page_more_fictitious();
 2092                                         goto RetrySourceFault;
 2093                                 case VM_FAULT_MEMORY_ERROR:
 2094                                         return KERN_MEMORY_ERROR;
 2095                         }
 2096 
 2097                         src_page = result_page;
 2098 
 2099                         assert((src_top_page == VM_PAGE_NULL) ==
 2100                                         (src_page->object == src_object));
 2101 
 2102                         assert ((prot & VM_PROT_READ) != VM_PROT_NONE);
 2103 
 2104                         vm_object_unlock(src_page->object);
 2105                 }
 2106 
 2107             RetryDestinationFault: ;
 2108 
 2109                 prot = VM_PROT_WRITE;
 2110 
 2111                 vm_object_lock(dst_object);
 2112                 vm_object_paging_begin(dst_object);
 2113 
 2114                 switch (vm_fault_page(dst_object, dst_offset, VM_PROT_WRITE,
 2115                                 FALSE, FALSE /* interruptible */,
 2116                                 &prot, &result_page, &dst_top_page,
 2117                                 FALSE, 0)) {
 2118 
 2119                         case VM_FAULT_SUCCESS:
 2120                                 break;
 2121                         case VM_FAULT_RETRY:
 2122                                 goto RetryDestinationFault;
 2123                         case VM_FAULT_INTERRUPTED:
 2124                                 if (src_page != VM_PAGE_NULL)
 2125                                         vm_fault_copy_cleanup(src_page,
 2126                                                               src_top_page);
 2127                                 RETURN(MACH_SEND_INTERRUPTED);
 2128                         case VM_FAULT_MEMORY_SHORTAGE:
 2129                                 VM_PAGE_WAIT(CONTINUE_NULL);
 2130                                 goto RetryDestinationFault;
 2131                         case VM_FAULT_FICTITIOUS_SHORTAGE:
 2132                                 vm_page_more_fictitious();
 2133                                 goto RetryDestinationFault;
 2134                         case VM_FAULT_MEMORY_ERROR:
 2135                                 if (src_page != VM_PAGE_NULL)
 2136                                         vm_fault_copy_cleanup(src_page,
 2137                                                               src_top_page);
 2138                                 return KERN_MEMORY_ERROR;
 2139                 }
 2140                 assert ((prot & VM_PROT_WRITE) != VM_PROT_NONE);
 2141 
 2142                 dst_page = result_page;
 2143 
 2144                 old_copy_object = dst_page->object->copy;
 2145 
 2146                 vm_object_unlock(dst_page->object);
 2147 
 2148                 if (!vm_map_verify(dst_map, dst_version)) {
 2149 
 2150                  BailOut: ;
 2151 
 2152                         if (src_page != VM_PAGE_NULL)
 2153                                 vm_fault_copy_cleanup(src_page, src_top_page);
 2154                         vm_fault_copy_cleanup(dst_page, dst_top_page);
 2155                         break;
 2156                 }
 2157 
 2158 
 2159                 vm_object_lock(dst_page->object);
 2160                 if (dst_page->object->copy != old_copy_object) {
 2161                         vm_object_unlock(dst_page->object);
 2162                         vm_map_verify_done(dst_map, dst_version);
 2163                         goto BailOut;
 2164                 }
 2165                 vm_object_unlock(dst_page->object);
 2166 
 2167                 /*
 2168                  *      Copy the page, and note that it is dirty
 2169                  *      immediately.
 2170                  */
 2171 
 2172                 if (src_page == VM_PAGE_NULL)
 2173                         vm_page_zero_fill(dst_page);
 2174                 else
 2175                         vm_page_copy(src_page, dst_page);
 2176                 dst_page->dirty = TRUE;
 2177 
 2178                 /*
 2179                  *      Unlock everything, and return
 2180                  */
 2181 
 2182                 vm_map_verify_done(dst_map, dst_version);
 2183 
 2184                 if (src_page != VM_PAGE_NULL)
 2185                         vm_fault_copy_cleanup(src_page, src_top_page);
 2186                 vm_fault_copy_cleanup(dst_page, dst_top_page);
 2187 
 2188                 amount_done += PAGE_SIZE;
 2189                 src_offset += PAGE_SIZE;
 2190                 dst_offset += PAGE_SIZE;
 2191 
 2192         } while (amount_done != *src_size);
 2193 
 2194         RETURN(KERN_SUCCESS);
 2195 #undef  RETURN
 2196 
 2197         /*NOTREACHED*/  
 2198 }
 2199 
 2200 
 2201 
 2202 
 2203 
 2204 #ifdef  notdef
 2205 
 2206 /*
 2207  *      Routine:        vm_fault_page_overwrite
 2208  *
 2209  *      Description:
 2210  *              A form of vm_fault_page that assumes that the
 2211  *              resulting page will be overwritten in its entirety,
 2212  *              making it unnecessary to obtain the correct *contents*
 2213  *              of the page.
 2214  *
 2215  *      Implementation:
 2216  *              XXX Untested.  Also unused.  Eventually, this technology
 2217  *              could be used in vm_fault_copy() to advantage.
 2218  */
 2219 vm_fault_return_t vm_fault_page_overwrite(
 2220         register
 2221         vm_object_t     dst_object,
 2222         vm_offset_t     dst_offset,
 2223         vm_page_t       *result_page)   /* OUT */
 2224 {
 2225         register
 2226         vm_page_t       dst_page;
 2227 
 2228 #define interruptible   FALSE   /* XXX */
 2229 
 2230         while (TRUE) {
 2231                 /*
 2232                  *      Look for a page at this offset
 2233                  */
 2234 
 2235                 while ((dst_page = vm_page_lookup(dst_object, dst_offset))
 2236                                  == VM_PAGE_NULL) {
 2237                         /*
 2238                          *      No page, no problem... just allocate one.
 2239                          */
 2240 
 2241                         dst_page = vm_page_alloc(dst_object, dst_offset);
 2242                         if (dst_page == VM_PAGE_NULL) {
 2243                                 vm_object_unlock(dst_object);
 2244                                 VM_PAGE_WAIT((void (*)()) 0);
 2245                                 vm_object_lock(dst_object);
 2246                                 continue;
 2247                         }
 2248 
 2249                         /*
 2250                          *      Pretend that the memory manager
 2251                          *      write-protected the page.
 2252                          *
 2253                          *      Note that we will be asking for write
 2254                          *      permission without asking for the data
 2255                          *      first.
 2256                          */
 2257 
 2258                         dst_page->overwriting = TRUE;
 2259                         dst_page->page_lock = VM_PROT_WRITE;
 2260                         dst_page->absent = TRUE;
 2261                         dst_object->absent_count++;
 2262 
 2263                         break;
 2264 
 2265                         /*
 2266                          *      When we bail out, we might have to throw
 2267                          *      away the page created here.
 2268                          */
 2269 
 2270 #define DISCARD_PAGE                                            \
 2271         MACRO_BEGIN                                             \
 2272         vm_object_lock(dst_object);                             \
 2273         dst_page = vm_page_lookup(dst_object, dst_offset);      \
 2274         if ((dst_page != VM_PAGE_NULL) && dst_page->overwriting) \
 2275                 VM_PAGE_FREE(dst_page);                         \
 2276         vm_object_unlock(dst_object);                           \
 2277         MACRO_END
 2278                 }
 2279 
 2280                 /*
 2281                  *      If the page is write-protected...
 2282                  */
 2283 
 2284                 if (dst_page->page_lock & VM_PROT_WRITE) {
 2285                         /*
 2286                          *      ... and an unlock request hasn't been sent
 2287                          */
 2288 
 2289                         if ( ! (dst_page->unlock_request & VM_PROT_WRITE)) {
 2290                                 vm_prot_t       u;
 2291                                 kern_return_t   rc;
 2292 
 2293                                 /*
 2294                                  *      ... then send one now.
 2295                                  */
 2296 
 2297                                 if (!dst_object->pager_ready) {
 2298                                         vm_object_assert_wait(dst_object,
 2299                                                 VM_OBJECT_EVENT_PAGER_READY,
 2300                                                 interruptible);
 2301                                         vm_object_unlock(dst_object);
 2302                                         thread_block(CONTINUE_NULL);
 2303                                         if (current_thread()->wait_result !=
 2304                                             THREAD_AWAKENED) {
 2305                                                 DISCARD_PAGE;
 2306                                                 return VM_FAULT_INTERRUPTED;
 2307                                         }
 2308                                         continue;
 2309                                 }
 2310 
 2311                                 u = dst_page->unlock_request |= VM_PROT_WRITE;
 2312                                 vm_object_unlock(dst_object);
 2313 
 2314                                 if ((rc = memory_object_data_unlock(
 2315                                                 dst_object->pager,
 2316                                                 dst_object->pager_request,
 2317                                                 dst_offset + dst_object->paging_offset,
 2318                                                 PAGE_SIZE,
 2319                                                 u)) != KERN_SUCCESS) {
 2320                                         printf("vm_object_overwrite: memory_object_data_unlock failed\n");
 2321                                         DISCARD_PAGE;
 2322                                         return (rc == MACH_SEND_INTERRUPTED) ?
 2323                                                 VM_FAULT_INTERRUPTED :
 2324                                                 VM_FAULT_MEMORY_ERROR;
 2325                                 }
 2326                                 vm_object_lock(dst_object);
 2327                                 continue;
 2328                         }
 2329 
 2330                         /* ... fall through to wait below */
 2331                 } else {
 2332                         /*
 2333                          *      If the page isn't being used for other
 2334                          *      purposes, then we're done.
 2335                          */
 2336                         if ( ! (dst_page->busy || dst_page->absent || dst_page->error) )
 2337                                 break;
 2338                 }
 2339 
 2340                 PAGE_ASSERT_WAIT(dst_page, interruptible);
 2341                 vm_object_unlock(dst_object);
 2342                 thread_block(CONTINUE_NULL);
 2343                 if (current_thread()->wait_result != THREAD_AWAKENED) {
 2344                         DISCARD_PAGE;
 2345                         return VM_FAULT_INTERRUPTED;
 2346                 }
 2347         }
 2348 
 2349         *result_page = dst_page;
 2350         return VM_FAULT_SUCCESS;
 2351 
 2352 #undef  interruptible
 2353 #undef  DISCARD_PAGE
 2354 }
 2355 
 2356 #endif  /* notdef */
Cache object: bdd3b391139375d84ae27e05fb2ebb8c
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/vm/vm_fault.c

FreeBSD/Linux Kernel Cross Reference
sys/vm/vm_fault.c