vm_fault.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*-
    2  * Copyright (c) 1991, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  * Copyright (c) 1994 John S. Dyson
    5  * All rights reserved.
    6  * Copyright (c) 1994 David Greenman
    7  * All rights reserved.
    8  *
    9  *
   10  * This code is derived from software contributed to Berkeley by
   11  * The Mach Operating System project at Carnegie-Mellon University.
   12  *
   13  * Redistribution and use in source and binary forms, with or without
   14  * modification, are permitted provided that the following conditions
   15  * are met:
   16  * 1. Redistributions of source code must retain the above copyright
   17  *    notice, this list of conditions and the following disclaimer.
   18  * 2. Redistributions in binary form must reproduce the above copyright
   19  *    notice, this list of conditions and the following disclaimer in the
   20  *    documentation and/or other materials provided with the distribution.
   21  * 3. All advertising materials mentioning features or use of this software
   22  *    must display the following acknowledgement:
   23  *      This product includes software developed by the University of
   24  *      California, Berkeley and its contributors.
   25  * 4. Neither the name of the University nor the names of its contributors
   26  *    may be used to endorse or promote products derived from this software
   27  *    without specific prior written permission.
   28  *
   29  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   30  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   31  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   32  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   33  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   37  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   38  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   39  * SUCH DAMAGE.
   40  *
   41  *      from: @(#)vm_fault.c    8.4 (Berkeley) 1/12/94
   42  *
   43  *
   44  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
   45  * All rights reserved.
   46  *
   47  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
   48  *
   49  * Permission to use, copy, modify and distribute this software and
   50  * its documentation is hereby granted, provided that both the copyright
   51  * notice and this permission notice appear in all copies of the
   52  * software, derivative works or modified versions, and any portions
   53  * thereof, and that both notices appear in supporting documentation.
   54  *
   55  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
   56  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
   57  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
   58  *
   59  * Carnegie Mellon requests users of this software to return to
   60  *
   61  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   62  *  School of Computer Science
   63  *  Carnegie Mellon University
   64  *  Pittsburgh PA 15213-3890
   65  *
   66  * any improvements or extensions that they make and grant Carnegie the
   67  * rights to redistribute these changes.
   68  */
   69 
   70 /*
   71  *      Page fault handling module.
   72  */
   73 
   74 #include <sys/cdefs.h>
   75 __FBSDID("$FreeBSD: releng/9.0/sys/vm/vm_fault.c 226319 2011-10-12 20:08:25Z kib $");
   76 
   77 #include "opt_vm.h"
   78 
   79 #include <sys/param.h>
   80 #include <sys/systm.h>
   81 #include <sys/kernel.h>
   82 #include <sys/lock.h>
   83 #include <sys/mutex.h>
   84 #include <sys/proc.h>
   85 #include <sys/resourcevar.h>
   86 #include <sys/sysctl.h>
   87 #include <sys/vmmeter.h>
   88 #include <sys/vnode.h>
   89 
   90 #include <vm/vm.h>
   91 #include <vm/vm_param.h>
   92 #include <vm/pmap.h>
   93 #include <vm/vm_map.h>
   94 #include <vm/vm_object.h>
   95 #include <vm/vm_page.h>
   96 #include <vm/vm_pageout.h>
   97 #include <vm/vm_kern.h>
   98 #include <vm/vm_pager.h>
   99 #include <vm/vm_extern.h>
  100 
  101 #include <sys/mount.h>  /* XXX Temporary for VFS_LOCK_GIANT() */
  102 
  103 #define PFBAK 4
  104 #define PFFOR 4
  105 #define PAGEORDER_SIZE (PFBAK+PFFOR)
  106 
  107 static int prefault_pageorder[] = {
  108         -1 * PAGE_SIZE, 1 * PAGE_SIZE,
  109         -2 * PAGE_SIZE, 2 * PAGE_SIZE,
  110         -3 * PAGE_SIZE, 3 * PAGE_SIZE,
  111         -4 * PAGE_SIZE, 4 * PAGE_SIZE
  112 };
  113 
  114 static int vm_fault_additional_pages(vm_page_t, int, int, vm_page_t *, int *);
  115 static void vm_fault_prefault(pmap_t, vm_offset_t, vm_map_entry_t);
  116 
  117 #define VM_FAULT_READ_AHEAD 8
  118 #define VM_FAULT_READ_BEHIND 7
  119 #define VM_FAULT_READ (VM_FAULT_READ_AHEAD+VM_FAULT_READ_BEHIND+1)
  120 
  121 struct faultstate {
  122         vm_page_t m;
  123         vm_object_t object;
  124         vm_pindex_t pindex;
  125         vm_page_t first_m;
  126         vm_object_t     first_object;
  127         vm_pindex_t first_pindex;
  128         vm_map_t map;
  129         vm_map_entry_t entry;
  130         int lookup_still_valid;
  131         struct vnode *vp;
  132         int vfslocked;
  133 };
  134 
  135 static inline void
  136 release_page(struct faultstate *fs)
  137 {
  138 
  139         vm_page_wakeup(fs->m);
  140         vm_page_lock(fs->m);
  141         vm_page_deactivate(fs->m);
  142         vm_page_unlock(fs->m);
  143         fs->m = NULL;
  144 }
  145 
  146 static inline void
  147 unlock_map(struct faultstate *fs)
  148 {
  149 
  150         if (fs->lookup_still_valid) {
  151                 vm_map_lookup_done(fs->map, fs->entry);
  152                 fs->lookup_still_valid = FALSE;
  153         }
  154 }
  155 
  156 static void
  157 unlock_and_deallocate(struct faultstate *fs)
  158 {
  159 
  160         vm_object_pip_wakeup(fs->object);
  161         VM_OBJECT_UNLOCK(fs->object);
  162         if (fs->object != fs->first_object) {
  163                 VM_OBJECT_LOCK(fs->first_object);
  164                 vm_page_lock(fs->first_m);
  165                 vm_page_free(fs->first_m);
  166                 vm_page_unlock(fs->first_m);
  167                 vm_object_pip_wakeup(fs->first_object);
  168                 VM_OBJECT_UNLOCK(fs->first_object);
  169                 fs->first_m = NULL;
  170         }
  171         vm_object_deallocate(fs->first_object);
  172         unlock_map(fs); 
  173         if (fs->vp != NULL) { 
  174                 vput(fs->vp);
  175                 fs->vp = NULL;
  176         }
  177         VFS_UNLOCK_GIANT(fs->vfslocked);
  178         fs->vfslocked = 0;
  179 }
  180 
  181 /*
  182  * TRYPAGER - used by vm_fault to calculate whether the pager for the
  183  *            current object *might* contain the page.
  184  *
  185  *            default objects are zero-fill, there is no real pager.
  186  */
  187 #define TRYPAGER        (fs.object->type != OBJT_DEFAULT && \
  188                         ((fault_flags & VM_FAULT_CHANGE_WIRING) == 0 || wired))
  189 
  190 /*
  191  *      vm_fault:
  192  *
  193  *      Handle a page fault occurring at the given address,
  194  *      requiring the given permissions, in the map specified.
  195  *      If successful, the page is inserted into the
  196  *      associated physical map.
  197  *
  198  *      NOTE: the given address should be truncated to the
  199  *      proper page address.
  200  *
  201  *      KERN_SUCCESS is returned if the page fault is handled; otherwise,
  202  *      a standard error specifying why the fault is fatal is returned.
  203  *
  204  *      The map in question must be referenced, and remains so.
  205  *      Caller may hold no locks.
  206  */
  207 int
  208 vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
  209     int fault_flags)
  210 {
  211 
  212         if ((curthread->td_pflags & TDP_NOFAULTING) != 0)
  213                 return (KERN_PROTECTION_FAILURE);
  214         return (vm_fault_hold(map, vaddr, fault_type, fault_flags, NULL));
  215 }
  216 
  217 int
  218 vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
  219     int fault_flags, vm_page_t *m_hold)
  220 {
  221         vm_prot_t prot;
  222         int is_first_object_locked, result;
  223         boolean_t growstack, wired;
  224         int map_generation;
  225         vm_object_t next_object;
  226         vm_page_t marray[VM_FAULT_READ], mt, mt_prev;
  227         int hardfault;
  228         int faultcount, ahead, behind, alloc_req;
  229         struct faultstate fs;
  230         struct vnode *vp;
  231         int locked, error;
  232 
  233         hardfault = 0;
  234         growstack = TRUE;
  235         PCPU_INC(cnt.v_vm_faults);
  236         fs.vp = NULL;
  237         fs.vfslocked = 0;
  238         faultcount = behind = 0;
  239 
  240 RetryFault:;
  241 
  242         /*
  243          * Find the backing store object and offset into it to begin the
  244          * search.
  245          */
  246         fs.map = map;
  247         result = vm_map_lookup(&fs.map, vaddr, fault_type, &fs.entry,
  248             &fs.first_object, &fs.first_pindex, &prot, &wired);
  249         if (result != KERN_SUCCESS) {
  250                 if (growstack && result == KERN_INVALID_ADDRESS &&
  251                     map != kernel_map) {
  252                         result = vm_map_growstack(curproc, vaddr);
  253                         if (result != KERN_SUCCESS)
  254                                 return (KERN_FAILURE);
  255                         growstack = FALSE;
  256                         goto RetryFault;
  257                 }
  258                 return (result);
  259         }
  260 
  261         map_generation = fs.map->timestamp;
  262 
  263         if (fs.entry->eflags & MAP_ENTRY_NOFAULT) {
  264                 panic("vm_fault: fault on nofault entry, addr: %lx",
  265                     (u_long)vaddr);
  266         }
  267 
  268         /*
  269          * Make a reference to this object to prevent its disposal while we
  270          * are messing with it.  Once we have the reference, the map is free
  271          * to be diddled.  Since objects reference their shadows (and copies),
  272          * they will stay around as well.
  273          *
  274          * Bump the paging-in-progress count to prevent size changes (e.g. 
  275          * truncation operations) during I/O.  This must be done after
  276          * obtaining the vnode lock in order to avoid possible deadlocks.
  277          */
  278         VM_OBJECT_LOCK(fs.first_object);
  279         vm_object_reference_locked(fs.first_object);
  280         vm_object_pip_add(fs.first_object, 1);
  281 
  282         fs.lookup_still_valid = TRUE;
  283 
  284         if (wired)
  285                 fault_type = prot | (fault_type & VM_PROT_COPY);
  286 
  287         fs.first_m = NULL;
  288 
  289         /*
  290          * Search for the page at object/offset.
  291          */
  292         fs.object = fs.first_object;
  293         fs.pindex = fs.first_pindex;
  294         while (TRUE) {
  295                 /*
  296                  * If the object is dead, we stop here
  297                  */
  298                 if (fs.object->flags & OBJ_DEAD) {
  299                         unlock_and_deallocate(&fs);
  300                         return (KERN_PROTECTION_FAILURE);
  301                 }
  302 
  303                 /*
  304                  * See if page is resident
  305                  */
  306                 fs.m = vm_page_lookup(fs.object, fs.pindex);
  307                 if (fs.m != NULL) {
  308                         /* 
  309                          * check for page-based copy on write.
  310                          * We check fs.object == fs.first_object so
  311                          * as to ensure the legacy COW mechanism is
  312                          * used when the page in question is part of
  313                          * a shadow object.  Otherwise, vm_page_cowfault()
  314                          * removes the page from the backing object, 
  315                          * which is not what we want.
  316                          */
  317                         vm_page_lock(fs.m);
  318                         if ((fs.m->cow) && 
  319                             (fault_type & VM_PROT_WRITE) &&
  320                             (fs.object == fs.first_object)) {
  321                                 vm_page_cowfault(fs.m);
  322                                 unlock_and_deallocate(&fs);
  323                                 goto RetryFault;
  324                         }
  325 
  326                         /*
  327                          * Wait/Retry if the page is busy.  We have to do this
  328                          * if the page is busy via either VPO_BUSY or 
  329                          * vm_page_t->busy because the vm_pager may be using
  330                          * vm_page_t->busy for pageouts ( and even pageins if
  331                          * it is the vnode pager ), and we could end up trying
  332                          * to pagein and pageout the same page simultaneously.
  333                          *
  334                          * We can theoretically allow the busy case on a read
  335                          * fault if the page is marked valid, but since such
  336                          * pages are typically already pmap'd, putting that
  337                          * special case in might be more effort then it is 
  338                          * worth.  We cannot under any circumstances mess
  339                          * around with a vm_page_t->busy page except, perhaps,
  340                          * to pmap it.
  341                          */
  342                         if ((fs.m->oflags & VPO_BUSY) || fs.m->busy) {
  343                                 /*
  344                                  * Reference the page before unlocking and
  345                                  * sleeping so that the page daemon is less
  346                                  * likely to reclaim it. 
  347                                  */
  348                                 vm_page_aflag_set(fs.m, PGA_REFERENCED);
  349                                 vm_page_unlock(fs.m);
  350                                 if (fs.object != fs.first_object) {
  351                                         if (!VM_OBJECT_TRYLOCK(
  352                                             fs.first_object)) {
  353                                                 VM_OBJECT_UNLOCK(fs.object);
  354                                                 VM_OBJECT_LOCK(fs.first_object);
  355                                                 VM_OBJECT_LOCK(fs.object);
  356                                         }
  357                                         vm_page_lock(fs.first_m);
  358                                         vm_page_free(fs.first_m);
  359                                         vm_page_unlock(fs.first_m);
  360                                         vm_object_pip_wakeup(fs.first_object);
  361                                         VM_OBJECT_UNLOCK(fs.first_object);
  362                                         fs.first_m = NULL;
  363                                 }
  364                                 unlock_map(&fs);
  365                                 if (fs.m == vm_page_lookup(fs.object,
  366                                     fs.pindex)) {
  367                                         vm_page_sleep_if_busy(fs.m, TRUE,
  368                                             "vmpfw");
  369                                 }
  370                                 vm_object_pip_wakeup(fs.object);
  371                                 VM_OBJECT_UNLOCK(fs.object);
  372                                 PCPU_INC(cnt.v_intrans);
  373                                 vm_object_deallocate(fs.first_object);
  374                                 goto RetryFault;
  375                         }
  376                         vm_pageq_remove(fs.m);
  377                         vm_page_unlock(fs.m);
  378 
  379                         /*
  380                          * Mark page busy for other processes, and the 
  381                          * pagedaemon.  If it still isn't completely valid
  382                          * (readable), jump to readrest, else break-out ( we
  383                          * found the page ).
  384                          */
  385                         vm_page_busy(fs.m);
  386                         if (fs.m->valid != VM_PAGE_BITS_ALL)
  387                                 goto readrest;
  388                         break;
  389                 }
  390 
  391                 /*
  392                  * Page is not resident, If this is the search termination
  393                  * or the pager might contain the page, allocate a new page.
  394                  */
  395                 if (TRYPAGER || fs.object == fs.first_object) {
  396                         if (fs.pindex >= fs.object->size) {
  397                                 unlock_and_deallocate(&fs);
  398                                 return (KERN_PROTECTION_FAILURE);
  399                         }
  400 
  401                         /*
  402                          * Allocate a new page for this object/offset pair.
  403                          *
  404                          * Unlocked read of the p_flag is harmless. At
  405                          * worst, the P_KILLED might be not observed
  406                          * there, and allocation can fail, causing
  407                          * restart and new reading of the p_flag.
  408                          */
  409                         fs.m = NULL;
  410                         if (!vm_page_count_severe() || P_KILLED(curproc)) {
  411 #if VM_NRESERVLEVEL > 0
  412                                 if ((fs.object->flags & OBJ_COLORED) == 0) {
  413                                         fs.object->flags |= OBJ_COLORED;
  414                                         fs.object->pg_color = atop(vaddr) -
  415                                             fs.pindex;
  416                                 }
  417 #endif
  418                                 alloc_req = P_KILLED(curproc) ?
  419                                     VM_ALLOC_SYSTEM : VM_ALLOC_NORMAL;
  420                                 if (fs.object->type != OBJT_VNODE &&
  421                                     fs.object->backing_object == NULL)
  422                                         alloc_req |= VM_ALLOC_ZERO;
  423                                 fs.m = vm_page_alloc(fs.object, fs.pindex,
  424                                     alloc_req);
  425                         }
  426                         if (fs.m == NULL) {
  427                                 unlock_and_deallocate(&fs);
  428                                 VM_WAITPFAULT;
  429                                 goto RetryFault;
  430                         } else if (fs.m->valid == VM_PAGE_BITS_ALL)
  431                                 break;
  432                 }
  433 
  434 readrest:
  435                 /*
  436                  * We have found a valid page or we have allocated a new page.
  437                  * The page thus may not be valid or may not be entirely 
  438                  * valid.
  439                  *
  440                  * Attempt to fault-in the page if there is a chance that the
  441                  * pager has it, and potentially fault in additional pages
  442                  * at the same time.
  443                  */
  444                 if (TRYPAGER) {
  445                         int rv;
  446                         int reqpage = 0;
  447                         u_char behavior = vm_map_entry_behavior(fs.entry);
  448 
  449                         if (behavior == MAP_ENTRY_BEHAV_RANDOM ||
  450                             P_KILLED(curproc)) {
  451                                 ahead = 0;
  452                                 behind = 0;
  453                         } else {
  454                                 behind = (vaddr - fs.entry->start) >> PAGE_SHIFT;
  455                                 if (behind > VM_FAULT_READ_BEHIND)
  456                                         behind = VM_FAULT_READ_BEHIND;
  457 
  458                                 ahead = ((fs.entry->end - vaddr) >> PAGE_SHIFT) - 1;
  459                                 if (ahead > VM_FAULT_READ_AHEAD)
  460                                         ahead = VM_FAULT_READ_AHEAD;
  461                         }
  462                         is_first_object_locked = FALSE;
  463                         if ((behavior == MAP_ENTRY_BEHAV_SEQUENTIAL ||
  464                              (behavior != MAP_ENTRY_BEHAV_RANDOM &&
  465                               fs.pindex >= fs.entry->lastr &&
  466                               fs.pindex < fs.entry->lastr + VM_FAULT_READ)) &&
  467                             (fs.first_object == fs.object ||
  468                              (is_first_object_locked = VM_OBJECT_TRYLOCK(fs.first_object))) &&
  469                             fs.first_object->type != OBJT_DEVICE &&
  470                             fs.first_object->type != OBJT_PHYS &&
  471                             fs.first_object->type != OBJT_SG) {
  472                                 vm_pindex_t firstpindex;
  473 
  474                                 if (fs.first_pindex < 2 * VM_FAULT_READ)
  475                                         firstpindex = 0;
  476                                 else
  477                                         firstpindex = fs.first_pindex - 2 * VM_FAULT_READ;
  478                                 mt = fs.first_object != fs.object ?
  479                                     fs.first_m : fs.m;
  480                                 KASSERT(mt != NULL, ("vm_fault: missing mt"));
  481                                 KASSERT((mt->oflags & VPO_BUSY) != 0,
  482                                     ("vm_fault: mt %p not busy", mt));
  483                                 mt_prev = vm_page_prev(mt);
  484 
  485                                 /*
  486                                  * note: partially valid pages cannot be 
  487                                  * included in the lookahead - NFS piecemeal
  488                                  * writes will barf on it badly.
  489                                  */
  490                                 while ((mt = mt_prev) != NULL &&
  491                                     mt->pindex >= firstpindex &&
  492                                     mt->valid == VM_PAGE_BITS_ALL) {
  493                                         mt_prev = vm_page_prev(mt);
  494                                         if (mt->busy ||
  495                                             (mt->oflags & VPO_BUSY))
  496                                                 continue;
  497                                         vm_page_lock(mt);
  498                                         if (mt->hold_count ||
  499                                             mt->wire_count) {
  500                                                 vm_page_unlock(mt);
  501                                                 continue;
  502                                         }
  503                                         pmap_remove_all(mt);
  504                                         if (mt->dirty != 0)
  505                                                 vm_page_deactivate(mt);
  506                                         else
  507                                                 vm_page_cache(mt);
  508                                         vm_page_unlock(mt);
  509                                 }
  510                                 ahead += behind;
  511                                 behind = 0;
  512                         }
  513                         if (is_first_object_locked)
  514                                 VM_OBJECT_UNLOCK(fs.first_object);
  515 
  516                         /*
  517                          * Call the pager to retrieve the data, if any, after
  518                          * releasing the lock on the map.  We hold a ref on
  519                          * fs.object and the pages are VPO_BUSY'd.
  520                          */
  521                         unlock_map(&fs);
  522 
  523 vnode_lock:
  524                         if (fs.object->type == OBJT_VNODE) {
  525                                 vp = fs.object->handle;
  526                                 if (vp == fs.vp)
  527                                         goto vnode_locked;
  528                                 else if (fs.vp != NULL) {
  529                                         vput(fs.vp);
  530                                         fs.vp = NULL;
  531                                 }
  532                                 locked = VOP_ISLOCKED(vp);
  533 
  534                                 if (VFS_NEEDSGIANT(vp->v_mount) && !fs.vfslocked) {
  535                                         fs.vfslocked = 1;
  536                                         if (!mtx_trylock(&Giant)) {
  537                                                 VM_OBJECT_UNLOCK(fs.object);
  538                                                 mtx_lock(&Giant);
  539                                                 VM_OBJECT_LOCK(fs.object);
  540                                                 goto vnode_lock;
  541                                         }
  542                                 }
  543                                 if (locked != LK_EXCLUSIVE)
  544                                         locked = LK_SHARED;
  545                                 /* Do not sleep for vnode lock while fs.m is busy */
  546                                 error = vget(vp, locked | LK_CANRECURSE |
  547                                     LK_NOWAIT, curthread);
  548                                 if (error != 0) {
  549                                         int vfslocked;
  550 
  551                                         vfslocked = fs.vfslocked;
  552                                         fs.vfslocked = 0; /* Keep Giant */
  553                                         vhold(vp);
  554                                         release_page(&fs);
  555                                         unlock_and_deallocate(&fs);
  556                                         error = vget(vp, locked | LK_RETRY |
  557                                             LK_CANRECURSE, curthread);
  558                                         vdrop(vp);
  559                                         fs.vp = vp;
  560                                         fs.vfslocked = vfslocked;
  561                                         KASSERT(error == 0,
  562                                             ("vm_fault: vget failed"));
  563                                         goto RetryFault;
  564                                 }
  565                                 fs.vp = vp;
  566                         }
  567 vnode_locked:
  568                         KASSERT(fs.vp == NULL || !fs.map->system_map,
  569                             ("vm_fault: vnode-backed object mapped by system map"));
  570 
  571                         /*
  572                          * now we find out if any other pages should be paged
  573                          * in at this time this routine checks to see if the
  574                          * pages surrounding this fault reside in the same
  575                          * object as the page for this fault.  If they do,
  576                          * then they are faulted in also into the object.  The
  577                          * array "marray" returned contains an array of
  578                          * vm_page_t structs where one of them is the
  579                          * vm_page_t passed to the routine.  The reqpage
  580                          * return value is the index into the marray for the
  581                          * vm_page_t passed to the routine.
  582                          *
  583                          * fs.m plus the additional pages are VPO_BUSY'd.
  584                          */
  585                         faultcount = vm_fault_additional_pages(
  586                             fs.m, behind, ahead, marray, &reqpage);
  587 
  588                         rv = faultcount ?
  589                             vm_pager_get_pages(fs.object, marray, faultcount,
  590                                 reqpage) : VM_PAGER_FAIL;
  591 
  592                         if (rv == VM_PAGER_OK) {
  593                                 /*
  594                                  * Found the page. Leave it busy while we play
  595                                  * with it.
  596                                  */
  597 
  598                                 /*
  599                                  * Relookup in case pager changed page. Pager
  600                                  * is responsible for disposition of old page
  601                                  * if moved.
  602                                  */
  603                                 fs.m = vm_page_lookup(fs.object, fs.pindex);
  604                                 if (!fs.m) {
  605                                         unlock_and_deallocate(&fs);
  606                                         goto RetryFault;
  607                                 }
  608 
  609                                 hardfault++;
  610                                 break; /* break to PAGE HAS BEEN FOUND */
  611                         }
  612                         /*
  613                          * Remove the bogus page (which does not exist at this
  614                          * object/offset); before doing so, we must get back
  615                          * our object lock to preserve our invariant.
  616                          *
  617                          * Also wake up any other process that may want to bring
  618                          * in this page.
  619                          *
  620                          * If this is the top-level object, we must leave the
  621                          * busy page to prevent another process from rushing
  622                          * past us, and inserting the page in that object at
  623                          * the same time that we are.
  624                          */
  625                         if (rv == VM_PAGER_ERROR)
  626                                 printf("vm_fault: pager read error, pid %d (%s)\n",
  627                                     curproc->p_pid, curproc->p_comm);
  628                         /*
  629                          * Data outside the range of the pager or an I/O error
  630                          */
  631                         /*
  632                          * XXX - the check for kernel_map is a kludge to work
  633                          * around having the machine panic on a kernel space
  634                          * fault w/ I/O error.
  635                          */
  636                         if (((fs.map != kernel_map) && (rv == VM_PAGER_ERROR)) ||
  637                                 (rv == VM_PAGER_BAD)) {
  638                                 vm_page_lock(fs.m);
  639                                 vm_page_free(fs.m);
  640                                 vm_page_unlock(fs.m);
  641                                 fs.m = NULL;
  642                                 unlock_and_deallocate(&fs);
  643                                 return ((rv == VM_PAGER_ERROR) ? KERN_FAILURE : KERN_PROTECTION_FAILURE);
  644                         }
  645                         if (fs.object != fs.first_object) {
  646                                 vm_page_lock(fs.m);
  647                                 vm_page_free(fs.m);
  648                                 vm_page_unlock(fs.m);
  649                                 fs.m = NULL;
  650                                 /*
  651                                  * XXX - we cannot just fall out at this
  652                                  * point, m has been freed and is invalid!
  653                                  */
  654                         }
  655                 }
  656 
  657                 /*
  658                  * We get here if the object has default pager (or unwiring) 
  659                  * or the pager doesn't have the page.
  660                  */
  661                 if (fs.object == fs.first_object)
  662                         fs.first_m = fs.m;
  663 
  664                 /*
  665                  * Move on to the next object.  Lock the next object before
  666                  * unlocking the current one.
  667                  */
  668                 fs.pindex += OFF_TO_IDX(fs.object->backing_object_offset);
  669                 next_object = fs.object->backing_object;
  670                 if (next_object == NULL) {
  671                         /*
  672                          * If there's no object left, fill the page in the top
  673                          * object with zeros.
  674                          */
  675                         if (fs.object != fs.first_object) {
  676                                 vm_object_pip_wakeup(fs.object);
  677                                 VM_OBJECT_UNLOCK(fs.object);
  678 
  679                                 fs.object = fs.first_object;
  680                                 fs.pindex = fs.first_pindex;
  681                                 fs.m = fs.first_m;
  682                                 VM_OBJECT_LOCK(fs.object);
  683                         }
  684                         fs.first_m = NULL;
  685 
  686                         /*
  687                          * Zero the page if necessary and mark it valid.
  688                          */
  689                         if ((fs.m->flags & PG_ZERO) == 0) {
  690                                 pmap_zero_page(fs.m);
  691                         } else {
  692                                 PCPU_INC(cnt.v_ozfod);
  693                         }
  694                         PCPU_INC(cnt.v_zfod);
  695                         fs.m->valid = VM_PAGE_BITS_ALL;
  696                         break;  /* break to PAGE HAS BEEN FOUND */
  697                 } else {
  698                         KASSERT(fs.object != next_object,
  699                             ("object loop %p", next_object));
  700                         VM_OBJECT_LOCK(next_object);
  701                         vm_object_pip_add(next_object, 1);
  702                         if (fs.object != fs.first_object)
  703                                 vm_object_pip_wakeup(fs.object);
  704                         VM_OBJECT_UNLOCK(fs.object);
  705                         fs.object = next_object;
  706                 }
  707         }
  708 
  709         KASSERT((fs.m->oflags & VPO_BUSY) != 0,
  710             ("vm_fault: not busy after main loop"));
  711 
  712         /*
  713          * PAGE HAS BEEN FOUND. [Loop invariant still holds -- the object lock
  714          * is held.]
  715          */
  716 
  717         /*
  718          * If the page is being written, but isn't already owned by the
  719          * top-level object, we have to copy it into a new page owned by the
  720          * top-level object.
  721          */
  722         if (fs.object != fs.first_object) {
  723                 /*
  724                  * We only really need to copy if we want to write it.
  725                  */
  726                 if ((fault_type & (VM_PROT_COPY | VM_PROT_WRITE)) != 0) {
  727                         /*
  728                          * This allows pages to be virtually copied from a 
  729                          * backing_object into the first_object, where the 
  730                          * backing object has no other refs to it, and cannot
  731                          * gain any more refs.  Instead of a bcopy, we just 
  732                          * move the page from the backing object to the 
  733                          * first object.  Note that we must mark the page 
  734                          * dirty in the first object so that it will go out 
  735                          * to swap when needed.
  736                          */
  737                         is_first_object_locked = FALSE;
  738                         if (
  739                                 /*
  740                                  * Only one shadow object
  741                                  */
  742                                 (fs.object->shadow_count == 1) &&
  743                                 /*
  744                                  * No COW refs, except us
  745                                  */
  746                                 (fs.object->ref_count == 1) &&
  747                                 /*
  748                                  * No one else can look this object up
  749                                  */
  750                                 (fs.object->handle == NULL) &&
  751                                 /*
  752                                  * No other ways to look the object up
  753                                  */
  754                                 ((fs.object->type == OBJT_DEFAULT) ||
  755                                  (fs.object->type == OBJT_SWAP)) &&
  756                             (is_first_object_locked = VM_OBJECT_TRYLOCK(fs.first_object)) &&
  757                                 /*
  758                                  * We don't chase down the shadow chain
  759                                  */
  760                             fs.object == fs.first_object->backing_object) {
  761                                 /*
  762                                  * get rid of the unnecessary page
  763                                  */
  764                                 vm_page_lock(fs.first_m);
  765                                 vm_page_free(fs.first_m);
  766                                 vm_page_unlock(fs.first_m);
  767                                 /*
  768                                  * grab the page and put it into the 
  769                                  * process'es object.  The page is 
  770                                  * automatically made dirty.
  771                                  */
  772                                 vm_page_lock(fs.m);
  773                                 vm_page_rename(fs.m, fs.first_object, fs.first_pindex);
  774                                 vm_page_unlock(fs.m);
  775                                 vm_page_busy(fs.m);
  776                                 fs.first_m = fs.m;
  777                                 fs.m = NULL;
  778                                 PCPU_INC(cnt.v_cow_optim);
  779                         } else {
  780                                 /*
  781                                  * Oh, well, lets copy it.
  782                                  */
  783                                 pmap_copy_page(fs.m, fs.first_m);
  784                                 fs.first_m->valid = VM_PAGE_BITS_ALL;
  785                                 if (wired && (fault_flags &
  786                                     VM_FAULT_CHANGE_WIRING) == 0) {
  787                                         vm_page_lock(fs.first_m);
  788                                         vm_page_wire(fs.first_m);
  789                                         vm_page_unlock(fs.first_m);
  790                                         
  791                                         vm_page_lock(fs.m);
  792                                         vm_page_unwire(fs.m, FALSE);
  793                                         vm_page_unlock(fs.m);
  794                                 }
  795                                 /*
  796                                  * We no longer need the old page or object.
  797                                  */
  798                                 release_page(&fs);
  799                         }
  800                         /*
  801                          * fs.object != fs.first_object due to above 
  802                          * conditional
  803                          */
  804                         vm_object_pip_wakeup(fs.object);
  805                         VM_OBJECT_UNLOCK(fs.object);
  806                         /*
  807                          * Only use the new page below...
  808                          */
  809                         fs.object = fs.first_object;
  810                         fs.pindex = fs.first_pindex;
  811                         fs.m = fs.first_m;
  812                         if (!is_first_object_locked)
  813                                 VM_OBJECT_LOCK(fs.object);
  814                         PCPU_INC(cnt.v_cow_faults);
  815                 } else {
  816                         prot &= ~VM_PROT_WRITE;
  817                 }
  818         }
  819 
  820         /*
  821          * We must verify that the maps have not changed since our last
  822          * lookup.
  823          */
  824         if (!fs.lookup_still_valid) {
  825                 vm_object_t retry_object;
  826                 vm_pindex_t retry_pindex;
  827                 vm_prot_t retry_prot;
  828 
  829                 if (!vm_map_trylock_read(fs.map)) {
  830                         release_page(&fs);
  831                         unlock_and_deallocate(&fs);
  832                         goto RetryFault;
  833                 }
  834                 fs.lookup_still_valid = TRUE;
  835                 if (fs.map->timestamp != map_generation) {
  836                         result = vm_map_lookup_locked(&fs.map, vaddr, fault_type,
  837                             &fs.entry, &retry_object, &retry_pindex, &retry_prot, &wired);
  838 
  839                         /*
  840                          * If we don't need the page any longer, put it on the inactive
  841                          * list (the easiest thing to do here).  If no one needs it,
  842                          * pageout will grab it eventually.
  843                          */
  844                         if (result != KERN_SUCCESS) {
  845                                 release_page(&fs);
  846                                 unlock_and_deallocate(&fs);
  847 
  848                                 /*
  849                                  * If retry of map lookup would have blocked then
  850                                  * retry fault from start.
  851                                  */
  852                                 if (result == KERN_FAILURE)
  853                                         goto RetryFault;
  854                                 return (result);
  855                         }
  856                         if ((retry_object != fs.first_object) ||
  857                             (retry_pindex != fs.first_pindex)) {
  858                                 release_page(&fs);
  859                                 unlock_and_deallocate(&fs);
  860                                 goto RetryFault;
  861                         }
  862 
  863                         /*
  864                          * Check whether the protection has changed or the object has
  865                          * been copied while we left the map unlocked. Changing from
  866                          * read to write permission is OK - we leave the page
  867                          * write-protected, and catch the write fault. Changing from
  868                          * write to read permission means that we can't mark the page
  869                          * write-enabled after all.
  870                          */
  871                         prot &= retry_prot;
  872                 }
  873         }
  874         /*
  875          * If the page was filled by a pager, update the map entry's
  876          * last read offset.  Since the pager does not return the
  877          * actual set of pages that it read, this update is based on
  878          * the requested set.  Typically, the requested and actual
  879          * sets are the same.
  880          *
  881          * XXX The following assignment modifies the map
  882          * without holding a write lock on it.
  883          */
  884         if (hardfault)
  885                 fs.entry->lastr = fs.pindex + faultcount - behind;
  886 
  887         if ((prot & VM_PROT_WRITE) != 0 ||
  888             (fault_flags & VM_FAULT_DIRTY) != 0) {
  889                 vm_object_set_writeable_dirty(fs.object);
  890 
  891                 /*
  892                  * If this is a NOSYNC mmap we do not want to set VPO_NOSYNC
  893                  * if the page is already dirty to prevent data written with
  894                  * the expectation of being synced from not being synced.
  895                  * Likewise if this entry does not request NOSYNC then make
  896                  * sure the page isn't marked NOSYNC.  Applications sharing
  897                  * data should use the same flags to avoid ping ponging.
  898                  */
  899                 if (fs.entry->eflags & MAP_ENTRY_NOSYNC) {
  900                         if (fs.m->dirty == 0)
  901                                 fs.m->oflags |= VPO_NOSYNC;
  902                 } else {
  903                         fs.m->oflags &= ~VPO_NOSYNC;
  904                 }
  905 
  906                 /*
  907                  * If the fault is a write, we know that this page is being
  908                  * written NOW so dirty it explicitly to save on 
  909                  * pmap_is_modified() calls later.
  910                  *
  911                  * Also tell the backing pager, if any, that it should remove
  912                  * any swap backing since the page is now dirty.
  913                  */
  914                 if (((fault_type & VM_PROT_WRITE) != 0 &&
  915                     (fault_flags & VM_FAULT_CHANGE_WIRING) == 0) ||
  916                     (fault_flags & VM_FAULT_DIRTY) != 0) {
  917                         vm_page_dirty(fs.m);
  918                         vm_pager_page_unswapped(fs.m);
  919                 }
  920         }
  921 
  922         /*
  923          * Page had better still be busy
  924          */
  925         KASSERT(fs.m->oflags & VPO_BUSY,
  926                 ("vm_fault: page %p not busy!", fs.m));
  927         /*
  928          * Page must be completely valid or it is not fit to
  929          * map into user space.  vm_pager_get_pages() ensures this.
  930          */
  931         KASSERT(fs.m->valid == VM_PAGE_BITS_ALL,
  932             ("vm_fault: page %p partially invalid", fs.m));
  933         VM_OBJECT_UNLOCK(fs.object);
  934 
  935         /*
  936          * Put this page into the physical map.  We had to do the unlock above
  937          * because pmap_enter() may sleep.  We don't put the page
  938          * back on the active queue until later so that the pageout daemon
  939          * won't find it (yet).
  940          */
  941         pmap_enter(fs.map->pmap, vaddr, fault_type, fs.m, prot, wired);
  942         if ((fault_flags & VM_FAULT_CHANGE_WIRING) == 0 && wired == 0)
  943                 vm_fault_prefault(fs.map->pmap, vaddr, fs.entry);
  944         VM_OBJECT_LOCK(fs.object);
  945         vm_page_lock(fs.m);
  946 
  947         /*
  948          * If the page is not wired down, then put it where the pageout daemon
  949          * can find it.
  950          */
  951         if (fault_flags & VM_FAULT_CHANGE_WIRING) {
  952                 if (wired)
  953                         vm_page_wire(fs.m);
  954                 else
  955                         vm_page_unwire(fs.m, 1);
  956         } else
  957                 vm_page_activate(fs.m);
  958         if (m_hold != NULL) {
  959                 *m_hold = fs.m;
  960                 vm_page_hold(fs.m);
  961         }
  962         vm_page_unlock(fs.m);
  963         vm_page_wakeup(fs.m);
  964 
  965         /*
  966          * Unlock everything, and return
  967          */
  968         unlock_and_deallocate(&fs);
  969         if (hardfault)
  970                 curthread->td_ru.ru_majflt++;
  971         else
  972                 curthread->td_ru.ru_minflt++;
  973 
  974         return (KERN_SUCCESS);
  975 }
  976 
  977 /*
  978  * vm_fault_prefault provides a quick way of clustering
  979  * pagefaults into a processes address space.  It is a "cousin"
  980  * of vm_map_pmap_enter, except it runs at page fault time instead
  981  * of mmap time.
  982  */
  983 static void
  984 vm_fault_prefault(pmap_t pmap, vm_offset_t addra, vm_map_entry_t entry)
  985 {
  986         int i;
  987         vm_offset_t addr, starta;
  988         vm_pindex_t pindex;
  989         vm_page_t m;
  990         vm_object_t object;
  991 
  992         if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace))
  993                 return;
  994 
  995         object = entry->object.vm_object;
  996 
  997         starta = addra - PFBAK * PAGE_SIZE;
  998         if (starta < entry->start) {
  999                 starta = entry->start;
 1000         } else if (starta > addra) {
 1001                 starta = 0;
 1002         }
 1003 
 1004         for (i = 0; i < PAGEORDER_SIZE; i++) {
 1005                 vm_object_t backing_object, lobject;
 1006 
 1007                 addr = addra + prefault_pageorder[i];
 1008                 if (addr > addra + (PFFOR * PAGE_SIZE))
 1009                         addr = 0;
 1010 
 1011                 if (addr < starta || addr >= entry->end)
 1012                         continue;
 1013 
 1014                 if (!pmap_is_prefaultable(pmap, addr))
 1015                         continue;
 1016 
 1017                 pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT;
 1018                 lobject = object;
 1019                 VM_OBJECT_LOCK(lobject);
 1020                 while ((m = vm_page_lookup(lobject, pindex)) == NULL &&
 1021                     lobject->type == OBJT_DEFAULT &&
 1022                     (backing_object = lobject->backing_object) != NULL) {
 1023                         KASSERT((lobject->backing_object_offset & PAGE_MASK) ==
 1024                             0, ("vm_fault_prefault: unaligned object offset"));
 1025                         pindex += lobject->backing_object_offset >> PAGE_SHIFT;
 1026                         VM_OBJECT_LOCK(backing_object);
 1027                         VM_OBJECT_UNLOCK(lobject);
 1028                         lobject = backing_object;
 1029                 }
 1030                 /*
 1031                  * give-up when a page is not in memory
 1032                  */
 1033                 if (m == NULL) {
 1034                         VM_OBJECT_UNLOCK(lobject);
 1035                         break;
 1036                 }
 1037                 if (m->valid == VM_PAGE_BITS_ALL &&
 1038                     (m->flags & PG_FICTITIOUS) == 0)
 1039                         pmap_enter_quick(pmap, addr, m, entry->protection);
 1040                 VM_OBJECT_UNLOCK(lobject);
 1041         }
 1042 }
 1043 
 1044 /*
 1045  * Hold each of the physical pages that are mapped by the specified range of
 1046  * virtual addresses, ["addr", "addr" + "len"), if those mappings are valid
 1047  * and allow the specified types of access, "prot".  If all of the implied
 1048  * pages are successfully held, then the number of held pages is returned
 1049  * together with pointers to those pages in the array "ma".  However, if any
 1050  * of the pages cannot be held, -1 is returned.
 1051  */
 1052 int
 1053 vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len,
 1054     vm_prot_t prot, vm_page_t *ma, int max_count)
 1055 {
 1056         vm_offset_t end, va;
 1057         vm_page_t *mp;
 1058         int count;
 1059         boolean_t pmap_failed;
 1060 
 1061         if (len == 0)
 1062                 return (0);
 1063         end = round_page(addr + len);   
 1064         addr = trunc_page(addr);
 1065 
 1066         /*
 1067          * Check for illegal addresses.
 1068          */
 1069         if (addr < vm_map_min(map) || addr > end || end > vm_map_max(map))
 1070                 return (-1);
 1071 
 1072         count = howmany(end - addr, PAGE_SIZE);
 1073         if (count > max_count)
 1074                 panic("vm_fault_quick_hold_pages: count > max_count");
 1075 
 1076         /*
 1077          * Most likely, the physical pages are resident in the pmap, so it is
 1078          * faster to try pmap_extract_and_hold() first.
 1079          */
 1080         pmap_failed = FALSE;
 1081         for (mp = ma, va = addr; va < end; mp++, va += PAGE_SIZE) {
 1082                 *mp = pmap_extract_and_hold(map->pmap, va, prot);
 1083                 if (*mp == NULL)
 1084                         pmap_failed = TRUE;
 1085                 else if ((prot & VM_PROT_WRITE) != 0 &&
 1086                     (*mp)->dirty != VM_PAGE_BITS_ALL) {
 1087                         /*
 1088                          * Explicitly dirty the physical page.  Otherwise, the
 1089                          * caller's changes may go unnoticed because they are
 1090                          * performed through an unmanaged mapping or by a DMA
 1091                          * operation.
 1092                          *
 1093                          * The object lock is not held here.
 1094                          * See vm_page_clear_dirty_mask().
 1095                          */
 1096                         vm_page_dirty(*mp);
 1097                 }
 1098         }
 1099         if (pmap_failed) {
 1100                 /*
 1101                  * One or more pages could not be held by the pmap.  Either no
 1102                  * page was mapped at the specified virtual address or that
 1103                  * mapping had insufficient permissions.  Attempt to fault in
 1104                  * and hold these pages.
 1105                  */
 1106                 for (mp = ma, va = addr; va < end; mp++, va += PAGE_SIZE)
 1107                         if (*mp == NULL && vm_fault_hold(map, va, prot,
 1108                             VM_FAULT_NORMAL, mp) != KERN_SUCCESS)
 1109                                 goto error;
 1110         }
 1111         return (count);
 1112 error:  
 1113         for (mp = ma; mp < ma + count; mp++)
 1114                 if (*mp != NULL) {
 1115                         vm_page_lock(*mp);
 1116                         vm_page_unhold(*mp);
 1117                         vm_page_unlock(*mp);
 1118                 }
 1119         return (-1);
 1120 }
 1121 
 1122 /*
 1123  *      vm_fault_wire:
 1124  *
 1125  *      Wire down a range of virtual addresses in a map.
 1126  */
 1127 int
 1128 vm_fault_wire(vm_map_t map, vm_offset_t start, vm_offset_t end,
 1129     boolean_t fictitious)
 1130 {
 1131         vm_offset_t va;
 1132         int rv;
 1133 
 1134         /*
 1135          * We simulate a fault to get the page and enter it in the physical
 1136          * map.  For user wiring, we only ask for read access on currently
 1137          * read-only sections.
 1138          */
 1139         for (va = start; va < end; va += PAGE_SIZE) {
 1140                 rv = vm_fault(map, va, VM_PROT_NONE, VM_FAULT_CHANGE_WIRING);
 1141                 if (rv) {
 1142                         if (va != start)
 1143                                 vm_fault_unwire(map, start, va, fictitious);
 1144                         return (rv);
 1145                 }
 1146         }
 1147         return (KERN_SUCCESS);
 1148 }
 1149 
 1150 /*
 1151  *      vm_fault_unwire:
 1152  *
 1153  *      Unwire a range of virtual addresses in a map.
 1154  */
 1155 void
 1156 vm_fault_unwire(vm_map_t map, vm_offset_t start, vm_offset_t end,
 1157     boolean_t fictitious)
 1158 {
 1159         vm_paddr_t pa;
 1160         vm_offset_t va;
 1161         vm_page_t m;
 1162         pmap_t pmap;
 1163 
 1164         pmap = vm_map_pmap(map);
 1165 
 1166         /*
 1167          * Since the pages are wired down, we must be able to get their
 1168          * mappings from the physical map system.
 1169          */
 1170         for (va = start; va < end; va += PAGE_SIZE) {
 1171                 pa = pmap_extract(pmap, va);
 1172                 if (pa != 0) {
 1173                         pmap_change_wiring(pmap, va, FALSE);
 1174                         if (!fictitious) {
 1175                                 m = PHYS_TO_VM_PAGE(pa);
 1176                                 vm_page_lock(m);
 1177                                 vm_page_unwire(m, TRUE);
 1178                                 vm_page_unlock(m);
 1179                         }
 1180                 }
 1181         }
 1182 }
 1183 
 1184 /*
 1185  *      Routine:
 1186  *              vm_fault_copy_entry
 1187  *      Function:
 1188  *              Create new shadow object backing dst_entry with private copy of
 1189  *              all underlying pages. When src_entry is equal to dst_entry,
 1190  *              function implements COW for wired-down map entry. Otherwise,
 1191  *              it forks wired entry into dst_map.
 1192  *
 1193  *      In/out conditions:
 1194  *              The source and destination maps must be locked for write.
 1195  *              The source map entry must be wired down (or be a sharing map
 1196  *              entry corresponding to a main map entry that is wired down).
 1197  */
 1198 void
 1199 vm_fault_copy_entry(vm_map_t dst_map, vm_map_t src_map,
 1200     vm_map_entry_t dst_entry, vm_map_entry_t src_entry,
 1201     vm_ooffset_t *fork_charge)
 1202 {
 1203         vm_object_t backing_object, dst_object, object, src_object;
 1204         vm_pindex_t dst_pindex, pindex, src_pindex;
 1205         vm_prot_t access, prot;
 1206         vm_offset_t vaddr;
 1207         vm_page_t dst_m;
 1208         vm_page_t src_m;
 1209         boolean_t src_readonly, upgrade;
 1210 
 1211 #ifdef  lint
 1212         src_map++;
 1213 #endif  /* lint */
 1214 
 1215         upgrade = src_entry == dst_entry;
 1216 
 1217         src_object = src_entry->object.vm_object;
 1218         src_pindex = OFF_TO_IDX(src_entry->offset);
 1219         src_readonly = (src_entry->protection & VM_PROT_WRITE) == 0;
 1220 
 1221         /*
 1222          * Create the top-level object for the destination entry. (Doesn't
 1223          * actually shadow anything - we copy the pages directly.)
 1224          */
 1225         dst_object = vm_object_allocate(OBJT_DEFAULT,
 1226             OFF_TO_IDX(dst_entry->end - dst_entry->start));
 1227 #if VM_NRESERVLEVEL > 0
 1228         dst_object->flags |= OBJ_COLORED;
 1229         dst_object->pg_color = atop(dst_entry->start);
 1230 #endif
 1231 
 1232         VM_OBJECT_LOCK(dst_object);
 1233         KASSERT(upgrade || dst_entry->object.vm_object == NULL,
 1234             ("vm_fault_copy_entry: vm_object not NULL"));
 1235         dst_entry->object.vm_object = dst_object;
 1236         dst_entry->offset = 0;
 1237         dst_object->charge = dst_entry->end - dst_entry->start;
 1238         if (fork_charge != NULL) {
 1239                 KASSERT(dst_entry->cred == NULL,
 1240                     ("vm_fault_copy_entry: leaked swp charge"));
 1241                 dst_object->cred = curthread->td_ucred;
 1242                 crhold(dst_object->cred);
 1243                 *fork_charge += dst_object->charge;
 1244         } else {
 1245                 dst_object->cred = dst_entry->cred;
 1246                 dst_entry->cred = NULL;
 1247         }
 1248         access = prot = dst_entry->protection;
 1249         /*
 1250          * If not an upgrade, then enter the mappings in the pmap as
 1251          * read and/or execute accesses.  Otherwise, enter them as
 1252          * write accesses.
 1253          *
 1254          * A writeable large page mapping is only created if all of
 1255          * the constituent small page mappings are modified. Marking
 1256          * PTEs as modified on inception allows promotion to happen
 1257          * without taking potentially large number of soft faults.
 1258          */
 1259         if (!upgrade)
 1260                 access &= ~VM_PROT_WRITE;
 1261 
 1262         /*
 1263          * Loop through all of the pages in the entry's range, copying each
 1264          * one from the source object (it should be there) to the destination
 1265          * object.
 1266          */
 1267         for (vaddr = dst_entry->start, dst_pindex = 0;
 1268             vaddr < dst_entry->end;
 1269             vaddr += PAGE_SIZE, dst_pindex++) {
 1270 
 1271                 /*
 1272                  * Allocate a page in the destination object.
 1273                  */
 1274                 do {
 1275                         dst_m = vm_page_alloc(dst_object, dst_pindex,
 1276                             VM_ALLOC_NORMAL);
 1277                         if (dst_m == NULL) {
 1278                                 VM_OBJECT_UNLOCK(dst_object);
 1279                                 VM_WAIT;
 1280                                 VM_OBJECT_LOCK(dst_object);
 1281                         }
 1282                 } while (dst_m == NULL);
 1283 
 1284                 /*
 1285                  * Find the page in the source object, and copy it in.
 1286                  * (Because the source is wired down, the page will be in
 1287                  * memory.)
 1288                  */
 1289                 VM_OBJECT_LOCK(src_object);
 1290                 object = src_object;
 1291                 pindex = src_pindex + dst_pindex;
 1292                 while ((src_m = vm_page_lookup(object, pindex)) == NULL &&
 1293                     src_readonly &&
 1294                     (backing_object = object->backing_object) != NULL) {
 1295                         /*
 1296                          * Allow fallback to backing objects if we are reading.
 1297                          */
 1298                         VM_OBJECT_LOCK(backing_object);
 1299                         pindex += OFF_TO_IDX(object->backing_object_offset);
 1300                         VM_OBJECT_UNLOCK(object);
 1301                         object = backing_object;
 1302                 }
 1303                 if (src_m == NULL)
 1304                         panic("vm_fault_copy_wired: page missing");
 1305                 pmap_copy_page(src_m, dst_m);
 1306                 VM_OBJECT_UNLOCK(object);
 1307                 dst_m->valid = VM_PAGE_BITS_ALL;
 1308                 VM_OBJECT_UNLOCK(dst_object);
 1309 
 1310                 /*
 1311                  * Enter it in the pmap. If a wired, copy-on-write
 1312                  * mapping is being replaced by a write-enabled
 1313                  * mapping, then wire that new mapping.
 1314                  */
 1315                 pmap_enter(dst_map->pmap, vaddr, access, dst_m, prot, upgrade);
 1316 
 1317                 /*
 1318                  * Mark it no longer busy, and put it on the active list.
 1319                  */
 1320                 VM_OBJECT_LOCK(dst_object);
 1321                 
 1322                 if (upgrade) {
 1323                         vm_page_lock(src_m);
 1324                         vm_page_unwire(src_m, 0);
 1325                         vm_page_unlock(src_m);
 1326 
 1327                         vm_page_lock(dst_m);
 1328                         vm_page_wire(dst_m);
 1329                         vm_page_unlock(dst_m);
 1330                 } else {
 1331                         vm_page_lock(dst_m);
 1332                         vm_page_activate(dst_m);
 1333                         vm_page_unlock(dst_m);
 1334                 }
 1335                 vm_page_wakeup(dst_m);
 1336         }
 1337         VM_OBJECT_UNLOCK(dst_object);
 1338         if (upgrade) {
 1339                 dst_entry->eflags &= ~(MAP_ENTRY_COW | MAP_ENTRY_NEEDS_COPY);
 1340                 vm_object_deallocate(src_object);
 1341         }
 1342 }
 1343 
 1344 
 1345 /*
 1346  * This routine checks around the requested page for other pages that
 1347  * might be able to be faulted in.  This routine brackets the viable
 1348  * pages for the pages to be paged in.
 1349  *
 1350  * Inputs:
 1351  *      m, rbehind, rahead
 1352  *
 1353  * Outputs:
 1354  *  marray (array of vm_page_t), reqpage (index of requested page)
 1355  *
 1356  * Return value:
 1357  *  number of pages in marray
 1358  */
 1359 static int
 1360 vm_fault_additional_pages(m, rbehind, rahead, marray, reqpage)
 1361         vm_page_t m;
 1362         int rbehind;
 1363         int rahead;
 1364         vm_page_t *marray;
 1365         int *reqpage;
 1366 {
 1367         int i,j;
 1368         vm_object_t object;
 1369         vm_pindex_t pindex, startpindex, endpindex, tpindex;
 1370         vm_page_t rtm;
 1371         int cbehind, cahead;
 1372 
 1373         VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
 1374 
 1375         object = m->object;
 1376         pindex = m->pindex;
 1377         cbehind = cahead = 0;
 1378 
 1379         /*
 1380          * if the requested page is not available, then give up now
 1381          */
 1382         if (!vm_pager_has_page(object, pindex, &cbehind, &cahead)) {
 1383                 return 0;
 1384         }
 1385 
 1386         if ((cbehind == 0) && (cahead == 0)) {
 1387                 *reqpage = 0;
 1388                 marray[0] = m;
 1389                 return 1;
 1390         }
 1391 
 1392         if (rahead > cahead) {
 1393                 rahead = cahead;
 1394         }
 1395 
 1396         if (rbehind > cbehind) {
 1397                 rbehind = cbehind;
 1398         }
 1399 
 1400         /*
 1401          * scan backward for the read behind pages -- in memory 
 1402          */
 1403         if (pindex > 0) {
 1404                 if (rbehind > pindex) {
 1405                         rbehind = pindex;
 1406                         startpindex = 0;
 1407                 } else {
 1408                         startpindex = pindex - rbehind;
 1409                 }
 1410 
 1411                 if ((rtm = TAILQ_PREV(m, pglist, listq)) != NULL &&
 1412                     rtm->pindex >= startpindex)
 1413                         startpindex = rtm->pindex + 1;
 1414 
 1415                 /* tpindex is unsigned; beware of numeric underflow. */
 1416                 for (i = 0, tpindex = pindex - 1; tpindex >= startpindex &&
 1417                     tpindex < pindex; i++, tpindex--) {
 1418 
 1419                         rtm = vm_page_alloc(object, tpindex, VM_ALLOC_NORMAL |
 1420                             VM_ALLOC_IFNOTCACHED);
 1421                         if (rtm == NULL) {
 1422                                 /*
 1423                                  * Shift the allocated pages to the
 1424                                  * beginning of the array.
 1425                                  */
 1426                                 for (j = 0; j < i; j++) {
 1427                                         marray[j] = marray[j + tpindex + 1 -
 1428                                             startpindex];
 1429                                 }
 1430                                 break;
 1431                         }
 1432 
 1433                         marray[tpindex - startpindex] = rtm;
 1434                 }
 1435         } else {
 1436                 startpindex = 0;
 1437                 i = 0;
 1438         }
 1439 
 1440         marray[i] = m;
 1441         /* page offset of the required page */
 1442         *reqpage = i;
 1443 
 1444         tpindex = pindex + 1;
 1445         i++;
 1446 
 1447         /*
 1448          * scan forward for the read ahead pages
 1449          */
 1450         endpindex = tpindex + rahead;
 1451         if ((rtm = TAILQ_NEXT(m, listq)) != NULL && rtm->pindex < endpindex)
 1452                 endpindex = rtm->pindex;
 1453         if (endpindex > object->size)
 1454                 endpindex = object->size;
 1455 
 1456         for (; tpindex < endpindex; i++, tpindex++) {
 1457 
 1458                 rtm = vm_page_alloc(object, tpindex, VM_ALLOC_NORMAL |
 1459                     VM_ALLOC_IFNOTCACHED);
 1460                 if (rtm == NULL) {
 1461                         break;
 1462                 }
 1463 
 1464                 marray[i] = rtm;
 1465         }
 1466 
 1467         /* return number of pages */
 1468         return i;
 1469 }
 1470 
 1471 int
 1472 vm_fault_disable_pagefaults(void)
 1473 {
 1474 
 1475         return (curthread_pflags_set(TDP_NOFAULTING));
 1476 }
 1477 
 1478 void
 1479 vm_fault_enable_pagefaults(int save)
 1480 {
 1481 
 1482         curthread_pflags_restore(save);
 1483 }
Cache object: 1404dea35d390602693be1ab040645a4
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/vm/vm_fault.c

FreeBSD/Linux Kernel Cross Reference
sys/vm/vm_fault.c