The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/vm/vm_fault.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 1991, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  * Copyright (c) 1994 John S. Dyson
    5  * All rights reserved.
    6  * Copyright (c) 1994 David Greenman
    7  * All rights reserved.
    8  *
    9  *
   10  * This code is derived from software contributed to Berkeley by
   11  * The Mach Operating System project at Carnegie-Mellon University.
   12  *
   13  * Redistribution and use in source and binary forms, with or without
   14  * modification, are permitted provided that the following conditions
   15  * are met:
   16  * 1. Redistributions of source code must retain the above copyright
   17  *    notice, this list of conditions and the following disclaimer.
   18  * 2. Redistributions in binary form must reproduce the above copyright
   19  *    notice, this list of conditions and the following disclaimer in the
   20  *    documentation and/or other materials provided with the distribution.
   21  * 3. All advertising materials mentioning features or use of this software
   22  *    must display the following acknowledgement:
   23  *      This product includes software developed by the University of
   24  *      California, Berkeley and its contributors.
   25  * 4. Neither the name of the University nor the names of its contributors
   26  *    may be used to endorse or promote products derived from this software
   27  *    without specific prior written permission.
   28  *
   29  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   30  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   31  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   32  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   33  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   37  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   38  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   39  * SUCH DAMAGE.
   40  *
   41  *      from: @(#)vm_fault.c    8.4 (Berkeley) 1/12/94
   42  *
   43  *
   44  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
   45  * All rights reserved.
   46  *
   47  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
   48  *
   49  * Permission to use, copy, modify and distribute this software and
   50  * its documentation is hereby granted, provided that both the copyright
   51  * notice and this permission notice appear in all copies of the
   52  * software, derivative works or modified versions, and any portions
   53  * thereof, and that both notices appear in supporting documentation.
   54  *
   55  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
   56  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
   57  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
   58  *
   59  * Carnegie Mellon requests users of this software to return to
   60  *
   61  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   62  *  School of Computer Science
   63  *  Carnegie Mellon University
   64  *  Pittsburgh PA 15213-3890
   65  *
   66  * any improvements or extensions that they make and grant Carnegie the
   67  * rights to redistribute these changes.
   68  *
   69  * $FreeBSD: src/sys/vm/vm_fault.c,v 1.57.2.6 1999/09/05 08:24:22 peter Exp $
   70  */
   71 
   72 /*
   73  *      Page fault handling module.
   74  */
   75 
   76 #include <sys/param.h>
   77 #include <sys/systm.h>
   78 #include <sys/proc.h>
   79 #include <sys/vnode.h>
   80 #include <sys/resource.h>
   81 #include <sys/signalvar.h>
   82 #include <sys/resourcevar.h>
   83 #include <sys/vmmeter.h>
   84 #include <sys/buf.h>
   85 
   86 #include <vm/vm.h>
   87 #include <vm/vm_param.h>
   88 #include <vm/vm_prot.h>
   89 #include <vm/lock.h>
   90 #include <vm/pmap.h>
   91 #include <vm/vm_map.h>
   92 #include <vm/vm_object.h>
   93 #include <vm/vm_page.h>
   94 #include <vm/vm_pageout.h>
   95 #include <vm/vm_kern.h>
   96 #include <vm/vm_pager.h>
   97 #include <vm/vnode_pager.h>
   98 #include <vm/swap_pager.h>
   99 #include <vm/vm_extern.h>
  100 
  101 int vm_fault_additional_pages __P((vm_page_t, int, int, vm_page_t *, int *));
  102 
  103 #define VM_FAULT_READ_AHEAD 4
  104 #define VM_FAULT_READ_BEHIND 3
  105 #define VM_FAULT_READ (VM_FAULT_READ_AHEAD+VM_FAULT_READ_BEHIND+1)
  106 
  107 /*
  108  *      vm_fault:
  109  *
  110  *      Handle a page fault occuring at the given address,
  111  *      requiring the given permissions, in the map specified.
  112  *      If successful, the page is inserted into the
  113  *      associated physical map.
  114  *
  115  *      NOTE: the given address should be truncated to the
  116  *      proper page address.
  117  *
  118  *      KERN_SUCCESS is returned if the page fault is handled; otherwise,
  119  *      a standard error specifying why the fault is fatal is returned.
  120  *
  121  *
  122  *      The map in question must be referenced, and remains so.
  123  *      Caller may hold no locks.
  124  */
  125 int
  126 vm_fault(map, vaddr, fault_type, fault_flags)
  127         vm_map_t map;
  128         vm_offset_t vaddr;
  129         vm_prot_t fault_type;
  130         int fault_flags;
  131 {
  132         vm_object_t first_object;
  133         vm_pindex_t first_pindex;
  134         vm_map_entry_t entry;
  135         register vm_object_t object;
  136         register vm_pindex_t pindex;
  137         vm_page_t m;
  138         vm_page_t first_m;
  139         vm_prot_t prot;
  140         int result;
  141         boolean_t wired;
  142         boolean_t su;
  143         boolean_t lookup_still_valid;
  144         vm_page_t old_m;
  145         vm_object_t next_object;
  146         vm_page_t marray[VM_FAULT_READ];
  147         int hardfault = 0;
  148         struct vnode *vp = NULL;
  149 
  150         cnt.v_vm_faults++;      /* needs lock XXX */
  151 /*
  152  *      Recovery actions
  153  */
  154 #define FREE_PAGE(m)    {                               \
  155         PAGE_WAKEUP(m);                                 \
  156         vm_page_free(m);                                \
  157 }
  158 
  159 #define RELEASE_PAGE(m) {                               \
  160         PAGE_WAKEUP(m);                                 \
  161         if (m->queue != PQ_ACTIVE) vm_page_activate(m);         \
  162 }
  163 
  164 #define UNLOCK_MAP      {                               \
  165         if (lookup_still_valid) {                       \
  166                 vm_map_lookup_done(map, entry);         \
  167                 lookup_still_valid = FALSE;             \
  168         }                                               \
  169 }
  170 
  171 #define UNLOCK_THINGS   {                               \
  172         vm_object_pip_wakeup(object); \
  173         if (object != first_object) {                   \
  174                 FREE_PAGE(first_m);                     \
  175                 vm_object_pip_wakeup(first_object); \
  176         }                                               \
  177         UNLOCK_MAP;                                     \
  178         if (vp != NULL) VOP_UNLOCK(vp);                 \
  179 }
  180 
  181 #define UNLOCK_AND_DEALLOCATE   {                       \
  182         UNLOCK_THINGS;                                  \
  183         vm_object_deallocate(first_object);             \
  184 }
  185 
  186 
  187 RetryFault:;
  188 
  189         /*
  190          * Find the backing store object and offset into it to begin the
  191          * search.
  192          */
  193 
  194         if ((result = vm_map_lookup(&map, vaddr,
  195                 fault_type, &entry, &first_object,
  196                 &first_pindex, &prot, &wired, &su)) != KERN_SUCCESS) {
  197                 return (result);
  198         }
  199 
  200         if (entry->eflags & MAP_ENTRY_NOFAULT) {
  201                 panic("vm_fault: fault on nofault entry, addr: %lx",
  202                         vaddr);
  203         }
  204 
  205         /*
  206          * If we are user-wiring a r/w segment, and it is COW, then
  207          * we need to do the COW operation.  Note that we don't COW
  208          * currently RO sections now, because it is NOT desirable
  209          * to COW .text.  We simply keep .text from ever being COW'ed
  210          * and take the heat that one cannot debug wired .text sections.
  211          */
  212         if (((fault_flags & VM_FAULT_WIRE_MASK) == VM_FAULT_USER_WIRE) && (entry->eflags & MAP_ENTRY_NEEDS_COPY)) {
  213                 if(entry->protection & VM_PROT_WRITE) {
  214                         int tresult;
  215                         vm_map_lookup_done(map, entry);
  216 
  217                         tresult = vm_map_lookup(&map, vaddr, VM_PROT_READ|VM_PROT_WRITE,
  218                                 &entry, &first_object, &first_pindex, &prot, &wired, &su);
  219                         if (tresult != KERN_SUCCESS)
  220                                 return tresult;
  221                 } else {
  222                         /*
  223                          * If we don't COW now, on a user wire, the user will never
  224                          * be able to write to the mapping.  If we don't make this
  225                          * restriction, the bookkeeping would be nearly impossible.
  226                          */
  227                         entry->max_protection &= ~VM_PROT_WRITE;
  228                 }
  229         }
  230 
  231         vp = vnode_pager_lock(first_object);
  232 
  233         lookup_still_valid = TRUE;
  234 
  235         if (wired)
  236                 fault_type = prot;
  237 
  238         first_m = NULL;
  239 
  240         /*
  241          * Make a reference to this object to prevent its disposal while we
  242          * are messing with it.  Once we have the reference, the map is free
  243          * to be diddled.  Since objects reference their shadows (and copies),
  244          * they will stay around as well.
  245          */
  246 
  247         first_object->ref_count++;
  248         first_object->paging_in_progress++;
  249 
  250         /*
  251          * INVARIANTS (through entire routine):
  252          *
  253          * 1)   At all times, we must either have the object lock or a busy
  254          * page in some object to prevent some other process from trying to
  255          * bring in the same page.
  256          *
  257          * Note that we cannot hold any locks during the pager access or when
  258          * waiting for memory, so we use a busy page then.
  259          *
  260          * Note also that we aren't as concerned about more than one thead
  261          * attempting to pager_data_unlock the same page at once, so we don't
  262          * hold the page as busy then, but do record the highest unlock value
  263          * so far.  [Unlock requests may also be delivered out of order.]
  264          *
  265          * 2)   Once we have a busy page, we must remove it from the pageout
  266          * queues, so that the pageout daemon will not grab it away.
  267          *
  268          * 3)   To prevent another process from racing us down the shadow chain
  269          * and entering a new page in the top object before we do, we must
  270          * keep a busy page in the top object while following the shadow
  271          * chain.
  272          *
  273          * 4)   We must increment paging_in_progress on any object for which
  274          * we have a busy page, to prevent vm_object_collapse from removing
  275          * the busy page without our noticing.
  276          */
  277 
  278         /*
  279          * Search for the page at object/offset.
  280          */
  281 
  282         object = first_object;
  283         pindex = first_pindex;
  284 
  285         /*
  286          * See whether this page is resident
  287          */
  288 
  289         while (TRUE) {
  290                 m = vm_page_lookup(object, pindex);
  291                 if (m != NULL) {
  292                         int queue, s;
  293                         /*
  294                          * If the page is being brought in, wait for it and
  295                          * then retry.
  296                          */
  297                         if ((m->flags & PG_BUSY) || m->busy) {
  298                                 UNLOCK_THINGS;
  299                                 s = splvm();
  300                                 if (((m->flags & PG_BUSY) || m->busy)) {
  301                                         m->flags |= PG_WANTED | PG_REFERENCED;
  302                                         cnt.v_intrans++;
  303                                         tsleep(m, PSWP, "vmpfw", 0);
  304                                 }
  305                                 splx(s);
  306                                 vm_object_deallocate(first_object);
  307                                 goto RetryFault;
  308                         }
  309 
  310                         queue = m->queue;
  311                         s = splvm();
  312                         vm_page_unqueue_nowakeup(m);
  313                         splx(s);
  314 
  315                         /*
  316                          * Mark page busy for other processes, and the pagedaemon.
  317                          */
  318                         if (((queue - m->pc) == PQ_CACHE) &&
  319                             (cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min) {
  320                                 vm_page_activate(m);
  321                                 UNLOCK_AND_DEALLOCATE;
  322                                 VM_WAIT;
  323                                 goto RetryFault;
  324                         }
  325 
  326                         m->flags |= PG_BUSY;
  327 
  328                         if (/*m->valid && */
  329                                 ((m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL) &&
  330                                 m->object != kernel_object && m->object != kmem_object) {
  331                                 goto readrest;
  332                         }
  333                         break;
  334                 }
  335                 if (((object->type != OBJT_DEFAULT) && (((fault_flags & VM_FAULT_WIRE_MASK) == 0) || wired))
  336                     || (object == first_object)) {
  337 
  338                         if (pindex >= object->size) {
  339                                 UNLOCK_AND_DEALLOCATE;
  340                                 return (KERN_PROTECTION_FAILURE);
  341                         }
  342 
  343                         /*
  344                          * Allocate a new page for this object/offset pair.
  345                          */
  346                         m = vm_page_alloc(object, pindex,
  347                                 (vp || object->backing_object)?VM_ALLOC_NORMAL:VM_ALLOC_ZERO);
  348 
  349                         if (m == NULL) {
  350                                 UNLOCK_AND_DEALLOCATE;
  351                                 VM_WAIT;
  352                                 goto RetryFault;
  353                         }
  354                 }
  355 readrest:
  356                 if (object->type != OBJT_DEFAULT && (((fault_flags & VM_FAULT_WIRE_MASK) == 0) || wired)) {
  357                         int rv;
  358                         int faultcount;
  359                         int reqpage;
  360                         int ahead, behind;
  361 
  362                         ahead = VM_FAULT_READ_AHEAD;
  363                         behind = VM_FAULT_READ_BEHIND;
  364                         if (first_object->behavior == OBJ_RANDOM) {
  365                                 ahead = 0;
  366                                 behind = 0;
  367                         }
  368 
  369                         if ((first_object->type != OBJT_DEVICE) &&
  370                                 (first_object->behavior == OBJ_SEQUENTIAL)) {
  371                                 vm_pindex_t firstpindex, tmppindex;
  372                                 if (first_pindex <
  373                                         2*(VM_FAULT_READ_BEHIND + VM_FAULT_READ_AHEAD + 1))
  374                                         firstpindex = 0;
  375                                 else
  376                                         firstpindex = first_pindex -
  377                                                 2*(VM_FAULT_READ_BEHIND + VM_FAULT_READ_AHEAD + 1);
  378 
  379                                 for(tmppindex = first_pindex - 1;
  380                                         tmppindex >= first_pindex;
  381                                         --tmppindex) {
  382                                         vm_page_t mt;
  383                                         mt = vm_page_lookup( first_object, tmppindex);
  384                                         if (mt == NULL || (mt->valid != VM_PAGE_BITS_ALL))
  385                                                 break;
  386                                         if (mt->busy ||
  387                                                 (mt->flags & (PG_BUSY|PG_FICTITIOUS)) ||
  388                                                 mt->hold_count ||
  389                                                 mt->wire_count) 
  390                                                 continue;
  391                                         if (mt->dirty == 0)
  392                                                 vm_page_test_dirty(mt);
  393                                         if (mt->dirty) {
  394                                                 vm_page_protect(mt, VM_PROT_NONE);
  395                                                 vm_page_deactivate(mt);
  396                                         } else {
  397                                                 vm_page_cache(mt);
  398                                         }
  399                                 }
  400 
  401                                 ahead += behind;
  402                                 behind = 0;
  403                         }
  404 
  405                         /*
  406                          * now we find out if any other pages should be paged
  407                          * in at this time this routine checks to see if the
  408                          * pages surrounding this fault reside in the same
  409                          * object as the page for this fault.  If they do,
  410                          * then they are faulted in also into the object.  The
  411                          * array "marray" returned contains an array of
  412                          * vm_page_t structs where one of them is the
  413                          * vm_page_t passed to the routine.  The reqpage
  414                          * return value is the index into the marray for the
  415                          * vm_page_t passed to the routine.
  416                          */
  417                         faultcount = vm_fault_additional_pages(
  418                             m, behind, ahead, marray, &reqpage);
  419 
  420                         /*
  421                          * Call the pager to retrieve the data, if any, after
  422                          * releasing the lock on the map.
  423                          */
  424                         UNLOCK_MAP;
  425 
  426                         rv = faultcount ?
  427                             vm_pager_get_pages(object, marray, faultcount,
  428                                 reqpage) : VM_PAGER_FAIL;
  429 
  430                         if (rv == VM_PAGER_OK) {
  431                                 /*
  432                                  * Found the page. Leave it busy while we play
  433                                  * with it.
  434                                  */
  435 
  436                                 /*
  437                                  * Relookup in case pager changed page. Pager
  438                                  * is responsible for disposition of old page
  439                                  * if moved.
  440                                  */
  441                                 m = vm_page_lookup(object, pindex);
  442                                 if( !m) {
  443                                         UNLOCK_AND_DEALLOCATE;
  444                                         goto RetryFault;
  445                                 }
  446 
  447                                 hardfault++;
  448                                 break;
  449                         }
  450                         /*
  451                          * Remove the bogus page (which does not exist at this
  452                          * object/offset); before doing so, we must get back
  453                          * our object lock to preserve our invariant.
  454                          *
  455                          * Also wake up any other process that may want to bring
  456                          * in this page.
  457                          *
  458                          * If this is the top-level object, we must leave the
  459                          * busy page to prevent another process from rushing
  460                          * past us, and inserting the page in that object at
  461                          * the same time that we are.
  462                          */
  463 
  464                         if (rv == VM_PAGER_ERROR)
  465                                 printf("vm_fault: pager input (probably hardware) error, PID %d failure\n",
  466                                     curproc->p_pid);
  467                         /*
  468                          * Data outside the range of the pager or an I/O error
  469                          */
  470                         /*
  471                          * XXX - the check for kernel_map is a kludge to work
  472                          * around having the machine panic on a kernel space
  473                          * fault w/ I/O error.
  474                          */
  475                         if (((map != kernel_map) && (rv == VM_PAGER_ERROR)) || (rv == VM_PAGER_BAD)) {
  476                                 FREE_PAGE(m);
  477                                 UNLOCK_AND_DEALLOCATE;
  478                                 return ((rv == VM_PAGER_ERROR) ? KERN_FAILURE : KERN_PROTECTION_FAILURE);
  479                         }
  480                         if (object != first_object) {
  481                                 FREE_PAGE(m);
  482                                 /*
  483                                  * XXX - we cannot just fall out at this
  484                                  * point, m has been freed and is invalid!
  485                                  */
  486                         }
  487                 }
  488                 /*
  489                  * We get here if the object has default pager (or unwiring) or the
  490                  * pager doesn't have the page.
  491                  */
  492                 if (object == first_object)
  493                         first_m = m;
  494 
  495                 /*
  496                  * Move on to the next object.  Lock the next object before
  497                  * unlocking the current one.
  498                  */
  499 
  500                 pindex += OFF_TO_IDX(object->backing_object_offset);
  501                 next_object = object->backing_object;
  502                 if (next_object == NULL) {
  503                         /*
  504                          * If there's no object left, fill the page in the top
  505                          * object with zeros.
  506                          */
  507                         if (object != first_object) {
  508                                 vm_object_pip_wakeup(object);
  509 
  510                                 object = first_object;
  511                                 pindex = first_pindex;
  512                                 m = first_m;
  513                         }
  514                         first_m = NULL;
  515 
  516                         if ((m->flags & PG_ZERO) == 0)
  517                                 vm_page_zero_fill(m);
  518                         cnt.v_zfod++;
  519                         break;
  520                 } else {
  521                         if (object != first_object) {
  522                                 vm_object_pip_wakeup(object);
  523                         }
  524                         object = next_object;
  525                         object->paging_in_progress++;
  526                 }
  527         }
  528 
  529         if ((m->flags & PG_BUSY) == 0)
  530                 panic("vm_fault: not busy after main loop");
  531 
  532         /*
  533          * PAGE HAS BEEN FOUND. [Loop invariant still holds -- the object lock
  534          * is held.]
  535          */
  536 
  537         old_m = m;      /* save page that would be copied */
  538 
  539         /*
  540          * If the page is being written, but isn't already owned by the
  541          * top-level object, we have to copy it into a new page owned by the
  542          * top-level object.
  543          */
  544 
  545         if (object != first_object) {
  546                 /*
  547                  * We only really need to copy if we want to write it.
  548                  */
  549 
  550                 if (fault_type & VM_PROT_WRITE) {
  551 
  552                         /*
  553                          * This allows pages to be virtually copied from a backing_object
  554                          * into the first_object, where the backing object has no other
  555                          * refs to it, and cannot gain any more refs.  Instead of a
  556                          * bcopy, we just move the page from the backing object to the
  557                          * first object.  Note that we must mark the page dirty in the
  558                          * first object so that it will go out to swap when needed.
  559                          */
  560                         if (lookup_still_valid &&
  561                                 /*
  562                                  * Only one shadow object
  563                                  */
  564                                 (object->shadow_count == 1) &&
  565                                 /*
  566                                  * No COW refs, except us
  567                                  */
  568                                 (object->ref_count == 1) &&
  569                                 /*
  570                                  * Noone else can look this object up
  571                                  */
  572                                 (object->handle == NULL) &&
  573                                 /*
  574                                  * No other ways to look the object up
  575                                  */
  576                                 ((object->type == OBJT_DEFAULT) ||
  577                                  (object->type == OBJT_SWAP)) &&
  578                                 /*
  579                                  * We don't chase down the shadow chain
  580                                  */
  581                                 (object == first_object->backing_object)) {
  582 
  583                                 /*
  584                                  * get rid of the unnecessary page
  585                                  */
  586                                 vm_page_protect(first_m, VM_PROT_NONE);
  587                                 PAGE_WAKEUP(first_m);
  588                                 vm_page_free(first_m);
  589                                 /*
  590                                  * grab the page and put it into the process'es object
  591                                  */
  592                                 vm_page_rename(m, first_object, first_pindex);
  593                                 first_m = m;
  594                                 m->dirty = VM_PAGE_BITS_ALL;
  595                                 m = NULL;
  596                         } else {
  597                                 /*
  598                                  * Oh, well, lets copy it.
  599                                  */
  600                                 vm_page_copy(m, first_m);
  601                         }
  602 
  603                         /*
  604                          * This code handles the case where there are two references to the
  605                          * backing object, and one reference is getting a copy of the
  606                          * page.  If the other reference is the only other object that
  607                          * points to the backing object, then perform a virtual copy
  608                          * from the backing object to the other object after the
  609                          * page is copied to the current first_object.  If the other
  610                          * object already has the page, we destroy it in the backing object
  611                          * performing an optimized collapse-type operation.  We don't
  612                          * bother removing the page from the backing object's swap space.
  613                          */
  614                         if (lookup_still_valid &&
  615                                 /*
  616                                  * make sure that we have two shadow objs
  617                                  */
  618                                 (object->shadow_count == 2) &&
  619                                 /*
  620                                  * And no COW refs -- note that there are sometimes
  621                                  * temp refs to objs, but ignore that case -- we just
  622                                  * punt.
  623                                  */
  624                                 (object->ref_count == 2) &&
  625                                 /*
  626                                  * Noone else can look us up
  627                                  */
  628                                 (object->handle == NULL) &&
  629                                 /*
  630                                  * Not something that can be referenced elsewhere
  631                                  */
  632                                 ((object->type == OBJT_DEFAULT) ||
  633                                  (object->type == OBJT_SWAP)) &&
  634                                 /*
  635                                  * We don't bother chasing down object chain
  636                                  */
  637                                 (object == first_object->backing_object)) {
  638 
  639                                 vm_object_t other_object;
  640                                 vm_pindex_t other_pindex, other_pindex_offset;
  641                                 vm_page_t tm;
  642                                 
  643                                 other_object = TAILQ_FIRST(&object->shadow_head);
  644                                 if (other_object == first_object)
  645                                         other_object = TAILQ_NEXT(other_object, shadow_list);
  646                                 if (!other_object)
  647                                         panic("vm_fault: other object missing");
  648                                 if (other_object &&
  649                                         (other_object->type == OBJT_DEFAULT) &&
  650                                         (other_object->paging_in_progress == 0)) {
  651                                         other_pindex_offset =
  652                                                 OFF_TO_IDX(other_object->backing_object_offset);
  653                                         if (pindex >= other_pindex_offset) {
  654                                                 other_pindex = pindex - other_pindex_offset;
  655                                                 /*
  656                                                  * If the other object has the page, just free it.
  657                                                  */
  658                                                 if ((tm = vm_page_lookup(other_object, other_pindex))) {
  659                                                         if ((tm->flags & PG_BUSY) == 0 &&
  660                                                                 tm->busy == 0 &&
  661                                                                 tm->valid == VM_PAGE_BITS_ALL) {
  662                                                                 /*
  663                                                                  * get rid of the unnecessary page
  664                                                                  */
  665                                                                 vm_page_protect(m, VM_PROT_NONE);
  666                                                                 PAGE_WAKEUP(m);
  667                                                                 vm_page_free(m);
  668                                                                 m = NULL;
  669                                                                 tm->dirty = VM_PAGE_BITS_ALL;
  670                                                                 first_m->dirty = VM_PAGE_BITS_ALL;
  671                                                         }
  672                                                 } else {
  673                                                         /*
  674                                                          * If the other object doesn't have the page,
  675                                                          * then we move it there.
  676                                                          */
  677                                                         vm_page_rename(m, other_object, other_pindex);
  678                                                         m->dirty = VM_PAGE_BITS_ALL;
  679                                                         m->valid = VM_PAGE_BITS_ALL;
  680                                                 }
  681                                         }
  682                                 }
  683                         }
  684 
  685                         if (m) {
  686                                 if (m->queue != PQ_ACTIVE)
  687                                         vm_page_activate(m);
  688                         /*
  689                          * We no longer need the old page or object.
  690                          */
  691                                 PAGE_WAKEUP(m);
  692                         }
  693 
  694                         vm_object_pip_wakeup(object);
  695                         /*
  696                          * Only use the new page below...
  697                          */
  698 
  699                         cnt.v_cow_faults++;
  700                         m = first_m;
  701                         object = first_object;
  702                         pindex = first_pindex;
  703 
  704                         /*
  705                          * Now that we've gotten the copy out of the way,
  706                          * let's try to collapse the top object.
  707                          *
  708                          * But we have to play ugly games with
  709                          * paging_in_progress to do that...
  710                          */
  711                         vm_object_pip_wakeup(object);
  712                         vm_object_collapse(object);
  713                         object->paging_in_progress++;
  714                 } else {
  715                         prot &= ~VM_PROT_WRITE;
  716                 }
  717         }
  718 
  719         /*
  720          * We must verify that the maps have not changed since our last
  721          * lookup.
  722          */
  723 
  724         if (!lookup_still_valid) {
  725                 vm_object_t retry_object;
  726                 vm_pindex_t retry_pindex;
  727                 vm_prot_t retry_prot;
  728 
  729                 /*
  730                  * Since map entries may be pageable, make sure we can take a
  731                  * page fault on them.
  732                  */
  733 
  734                 /*
  735                  * To avoid trying to write_lock the map while another process
  736                  * has it read_locked (in vm_map_pageable), we do not try for
  737                  * write permission.  If the page is still writable, we will
  738                  * get write permission.  If it is not, or has been marked
  739                  * needs_copy, we enter the mapping without write permission,
  740                  * and will merely take another fault.
  741                  */
  742                 result = vm_map_lookup(&map, vaddr, fault_type & ~VM_PROT_WRITE,
  743                     &entry, &retry_object, &retry_pindex, &retry_prot, &wired, &su);
  744 
  745                 /*
  746                  * If we don't need the page any longer, put it on the active
  747                  * list (the easiest thing to do here).  If no one needs it,
  748                  * pageout will grab it eventually.
  749                  */
  750 
  751                 if (result != KERN_SUCCESS) {
  752                         RELEASE_PAGE(m);
  753                         UNLOCK_AND_DEALLOCATE;
  754                         return (result);
  755                 }
  756                 lookup_still_valid = TRUE;
  757 
  758                 if ((retry_object != first_object) ||
  759                     (retry_pindex != first_pindex)) {
  760                         RELEASE_PAGE(m);
  761                         UNLOCK_AND_DEALLOCATE;
  762                         goto RetryFault;
  763                 }
  764                 /*
  765                  * Check whether the protection has changed or the object has
  766                  * been copied while we left the map unlocked. Changing from
  767                  * read to write permission is OK - we leave the page
  768                  * write-protected, and catch the write fault. Changing from
  769                  * write to read permission means that we can't mark the page
  770                  * write-enabled after all.
  771                  */
  772                 prot &= retry_prot;
  773         }
  774 
  775         /*
  776          * Put this page into the physical map. We had to do the unlock above
  777          * because pmap_enter may cause other faults.   We don't put the page
  778          * back on the active queue until later so that the page-out daemon
  779          * won't find us (yet).
  780          */
  781 
  782         if (prot & VM_PROT_WRITE) {
  783                 m->flags |= PG_WRITEABLE;
  784                 m->object->flags |= OBJ_WRITEABLE|OBJ_MIGHTBEDIRTY;
  785                 /*
  786                  * If the fault is a write, we know that this page is being
  787                  * written NOW. This will save on the pmap_ts_modified() calls
  788                  * later.
  789                  */
  790                 if (fault_flags & VM_FAULT_DIRTY) {
  791                         m->dirty = VM_PAGE_BITS_ALL;
  792                 }
  793         }
  794 
  795         UNLOCK_THINGS;
  796         m->valid = VM_PAGE_BITS_ALL;
  797         m->flags &= ~PG_ZERO;
  798 
  799         pmap_enter(map->pmap, vaddr, VM_PAGE_TO_PHYS(m), prot, wired);
  800         if (((fault_flags & VM_FAULT_WIRE_MASK) == 0) && (wired == 0))
  801                 pmap_prefault(map->pmap, vaddr, entry, first_object);
  802 
  803         m->flags |= PG_MAPPED|PG_REFERENCED;
  804         if (fault_flags & VM_FAULT_HOLD)
  805                 vm_page_hold(m);
  806 
  807         /*
  808          * If the page is not wired down, then put it where the pageout daemon
  809          * can find it.
  810          */
  811         if (fault_flags & VM_FAULT_WIRE_MASK) {
  812                 if (wired)
  813                         vm_page_wire(m);
  814                 else
  815                         vm_page_unwire(m);
  816         } else {
  817                 if (m->queue != PQ_ACTIVE)
  818                         vm_page_activate(m);
  819         }
  820 
  821         if (curproc && (curproc->p_flag & P_INMEM) && curproc->p_stats) {
  822                 if (hardfault) {
  823                         curproc->p_stats->p_ru.ru_majflt++;
  824                 } else {
  825                         curproc->p_stats->p_ru.ru_minflt++;
  826                 }
  827         }
  828 
  829         /*
  830          * Unlock everything, and return
  831          */
  832 
  833         PAGE_WAKEUP(m);
  834         vm_object_deallocate(first_object);
  835 
  836         return (KERN_SUCCESS);
  837 
  838 }
  839 
  840 /*
  841  *      vm_fault_wire:
  842  *
  843  *      Wire down a range of virtual addresses in a map.
  844  */
  845 int
  846 vm_fault_wire(map, start, end)
  847         vm_map_t map;
  848         vm_offset_t start, end;
  849 {
  850 
  851         register vm_offset_t va;
  852         register pmap_t pmap;
  853         int rv;
  854 
  855         pmap = vm_map_pmap(map);
  856 
  857         /*
  858          * Inform the physical mapping system that the range of addresses may
  859          * not fault, so that page tables and such can be locked down as well.
  860          */
  861 
  862         pmap_pageable(pmap, start, end, FALSE);
  863 
  864         /*
  865          * We simulate a fault to get the page and enter it in the physical
  866          * map.
  867          */
  868 
  869         for (va = start; va < end; va += PAGE_SIZE) {
  870                 rv = vm_fault(map, va, VM_PROT_READ|VM_PROT_WRITE,
  871                         VM_FAULT_CHANGE_WIRING);
  872                 if (rv) {
  873                         if (va != start)
  874                                 vm_fault_unwire(map, start, va);
  875                         return (rv);
  876                 }
  877         }
  878         return (KERN_SUCCESS);
  879 }
  880 
  881 /*
  882  *      vm_fault_user_wire:
  883  *
  884  *      Wire down a range of virtual addresses in a map.  This
  885  *      is for user mode though, so we only ask for read access
  886  *      on currently read only sections.
  887  */
  888 int
  889 vm_fault_user_wire(map, start, end)
  890         vm_map_t map;
  891         vm_offset_t start, end;
  892 {
  893 
  894         register vm_offset_t va;
  895         register pmap_t pmap;
  896         int rv;
  897 
  898         pmap = vm_map_pmap(map);
  899 
  900         /*
  901          * Inform the physical mapping system that the range of addresses may
  902          * not fault, so that page tables and such can be locked down as well.
  903          */
  904         pmap_pageable(pmap, start, end, FALSE);
  905 
  906         /*
  907          * We simulate a fault to get the page and enter it in the physical
  908          * map.
  909          */
  910         for (va = start; va < end; va += PAGE_SIZE) {
  911                 rv = vm_fault(map, va, VM_PROT_READ, VM_FAULT_USER_WIRE);
  912                 if (rv) {
  913                         if (va != start)
  914                                 vm_fault_unwire(map, start, va);
  915                         return (rv);
  916                 }
  917         }
  918         return (KERN_SUCCESS);
  919 }
  920 
  921 
  922 /*
  923  *      vm_fault_unwire:
  924  *
  925  *      Unwire a range of virtual addresses in a map.
  926  */
  927 void
  928 vm_fault_unwire(map, start, end)
  929         vm_map_t map;
  930         vm_offset_t start, end;
  931 {
  932 
  933         register vm_offset_t va, pa;
  934         register pmap_t pmap;
  935 
  936         pmap = vm_map_pmap(map);
  937 
  938         /*
  939          * Since the pages are wired down, we must be able to get their
  940          * mappings from the physical map system.
  941          */
  942 
  943         for (va = start; va < end; va += PAGE_SIZE) {
  944                 pa = pmap_extract(pmap, va);
  945                 if (pa != (vm_offset_t) 0) {
  946                         pmap_change_wiring(pmap, va, FALSE);
  947                         vm_page_unwire(PHYS_TO_VM_PAGE(pa));
  948                 }
  949         }
  950 
  951         /*
  952          * Inform the physical mapping system that the range of addresses may
  953          * fault, so that page tables and such may be unwired themselves.
  954          */
  955 
  956         pmap_pageable(pmap, start, end, TRUE);
  957 
  958 }
  959 
  960 /*
  961  *      Routine:
  962  *              vm_fault_copy_entry
  963  *      Function:
  964  *              Copy all of the pages from a wired-down map entry to another.
  965  *
  966  *      In/out conditions:
  967  *              The source and destination maps must be locked for write.
  968  *              The source map entry must be wired down (or be a sharing map
  969  *              entry corresponding to a main map entry that is wired down).
  970  */
  971 
  972 void
  973 vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry)
  974         vm_map_t dst_map;
  975         vm_map_t src_map;
  976         vm_map_entry_t dst_entry;
  977         vm_map_entry_t src_entry;
  978 {
  979         vm_object_t dst_object;
  980         vm_object_t src_object;
  981         vm_ooffset_t dst_offset;
  982         vm_ooffset_t src_offset;
  983         vm_prot_t prot;
  984         vm_offset_t vaddr;
  985         vm_page_t dst_m;
  986         vm_page_t src_m;
  987 
  988 #ifdef  lint
  989         src_map++;
  990 #endif  /* lint */
  991 
  992         src_object = src_entry->object.vm_object;
  993         src_offset = src_entry->offset;
  994 
  995         /*
  996          * Create the top-level object for the destination entry. (Doesn't
  997          * actually shadow anything - we copy the pages directly.)
  998          */
  999         dst_object = vm_object_allocate(OBJT_DEFAULT,
 1000             (vm_size_t) OFF_TO_IDX(dst_entry->end - dst_entry->start));
 1001 
 1002         dst_entry->object.vm_object = dst_object;
 1003         dst_entry->offset = 0;
 1004 
 1005         prot = dst_entry->max_protection;
 1006 
 1007         /*
 1008          * Loop through all of the pages in the entry's range, copying each
 1009          * one from the source object (it should be there) to the destination
 1010          * object.
 1011          */
 1012         for (vaddr = dst_entry->start, dst_offset = 0;
 1013             vaddr < dst_entry->end;
 1014             vaddr += PAGE_SIZE, dst_offset += PAGE_SIZE) {
 1015 
 1016                 /*
 1017                  * Allocate a page in the destination object
 1018                  */
 1019                 do {
 1020                         dst_m = vm_page_alloc(dst_object,
 1021                                 OFF_TO_IDX(dst_offset), VM_ALLOC_NORMAL);
 1022                         if (dst_m == NULL) {
 1023                                 VM_WAIT;
 1024                         }
 1025                 } while (dst_m == NULL);
 1026 
 1027                 /*
 1028                  * Find the page in the source object, and copy it in.
 1029                  * (Because the source is wired down, the page will be in
 1030                  * memory.)
 1031                  */
 1032                 src_m = vm_page_lookup(src_object,
 1033                         OFF_TO_IDX(dst_offset + src_offset));
 1034                 if (src_m == NULL)
 1035                         panic("vm_fault_copy_wired: page missing");
 1036 
 1037                 vm_page_copy(src_m, dst_m);
 1038 
 1039                 /*
 1040                  * Enter it in the pmap...
 1041                  */
 1042 
 1043                 dst_m->flags &= ~PG_ZERO;
 1044                 pmap_enter(dst_map->pmap, vaddr, VM_PAGE_TO_PHYS(dst_m),
 1045                     prot, FALSE);
 1046                 dst_m->flags |= PG_WRITEABLE|PG_MAPPED;
 1047 
 1048                 /*
 1049                  * Mark it no longer busy, and put it on the active list.
 1050                  */
 1051                 vm_page_activate(dst_m);
 1052                 PAGE_WAKEUP(dst_m);
 1053         }
 1054 }
 1055 
 1056 
 1057 /*
 1058  * This routine checks around the requested page for other pages that
 1059  * might be able to be faulted in.  This routine brackets the viable
 1060  * pages for the pages to be paged in.
 1061  *
 1062  * Inputs:
 1063  *      m, rbehind, rahead
 1064  *
 1065  * Outputs:
 1066  *  marray (array of vm_page_t), reqpage (index of requested page)
 1067  *
 1068  * Return value:
 1069  *  number of pages in marray
 1070  */
 1071 int
 1072 vm_fault_additional_pages(m, rbehind, rahead, marray, reqpage)
 1073         vm_page_t m;
 1074         int rbehind;
 1075         int rahead;
 1076         vm_page_t *marray;
 1077         int *reqpage;
 1078 {
 1079         int i;
 1080         vm_object_t object;
 1081         vm_pindex_t pindex, startpindex, endpindex, tpindex;
 1082         vm_offset_t size;
 1083         vm_page_t rtm;
 1084         int treqpage;
 1085         int cbehind, cahead;
 1086 
 1087         object = m->object;
 1088         pindex = m->pindex;
 1089 
 1090         /*
 1091          * we don't fault-ahead for device pager
 1092          */
 1093         if (object->type == OBJT_DEVICE) {
 1094                 *reqpage = 0;
 1095                 marray[0] = m;
 1096                 return 1;
 1097         }
 1098 
 1099         /*
 1100          * if the requested page is not available, then give up now
 1101          */
 1102 
 1103         if (!vm_pager_has_page(object,
 1104                 OFF_TO_IDX(object->paging_offset) + pindex, &cbehind, &cahead))
 1105                 return 0;
 1106 
 1107         if ((cbehind == 0) && (cahead == 0)) {
 1108                 *reqpage = 0;
 1109                 marray[0] = m;
 1110                 return 1;
 1111         }
 1112 
 1113         if (rahead > cahead) {
 1114                 rahead = cahead;
 1115         }
 1116 
 1117         if (rbehind > cbehind) {
 1118                 rbehind = cbehind;
 1119         }
 1120 
 1121         /*
 1122          * try to do any readahead that we might have free pages for.
 1123          */
 1124         if ((rahead + rbehind) >
 1125                 ((cnt.v_free_count + cnt.v_cache_count) - cnt.v_free_reserved)) {
 1126                 pagedaemon_wakeup();
 1127                 *reqpage = 0;
 1128                 marray[0] = m;
 1129                 return 1;
 1130         }
 1131 
 1132         /*
 1133          * scan backward for the read behind pages -- in memory or on disk not
 1134          * in same object
 1135          */
 1136         tpindex = pindex - 1;
 1137         if (tpindex < pindex) {
 1138                 if (rbehind > pindex)
 1139                         rbehind = pindex;
 1140                 startpindex = pindex - rbehind;
 1141                 while (tpindex >= startpindex) {
 1142                         if (vm_page_lookup( object, tpindex)) {
 1143                                 startpindex = tpindex + 1;
 1144                                 break;
 1145                         }
 1146                         if (tpindex == 0)
 1147                                 break;
 1148                         tpindex -= 1;
 1149                 }
 1150         } else {
 1151                 startpindex = pindex;
 1152         }
 1153 
 1154         /*
 1155          * scan forward for the read ahead pages -- in memory or on disk not
 1156          * in same object
 1157          */
 1158         tpindex = pindex + 1;
 1159         endpindex = pindex + (rahead + 1);
 1160         if (endpindex > object->size)
 1161                 endpindex = object->size;
 1162         while (tpindex <  endpindex) {
 1163                 if ( vm_page_lookup(object, tpindex)) {
 1164                         break;
 1165                 }       
 1166                 tpindex += 1;
 1167         }
 1168         endpindex = tpindex;
 1169 
 1170         /* calculate number of bytes of pages */
 1171         size = endpindex - startpindex;
 1172 
 1173         /* calculate the page offset of the required page */
 1174         treqpage = pindex - startpindex;
 1175 
 1176         /* see if we have space (again) */
 1177         if ((cnt.v_free_count + cnt.v_cache_count) >
 1178                 (cnt.v_free_reserved + size)) {
 1179                 /*
 1180                  * get our pages and don't block for them
 1181                  */
 1182                 for (i = 0; i < size; i++) {
 1183                         if (i != treqpage) {
 1184                                 rtm = vm_page_alloc(object,
 1185                                         startpindex + i,
 1186                                         VM_ALLOC_NORMAL);
 1187                                 if (rtm == NULL) {
 1188                                         if (i < treqpage) {
 1189                                                 int j;
 1190                                                 for (j = 0; j < i; j++) {
 1191                                                         FREE_PAGE(marray[j]);
 1192                                                 }
 1193                                                 *reqpage = 0;
 1194                                                 marray[0] = m;
 1195                                                 return 1;
 1196                                         } else {
 1197                                                 size = i;
 1198                                                 *reqpage = treqpage;
 1199                                                 return size;
 1200                                         }
 1201                                 }
 1202                                 marray[i] = rtm;
 1203                         } else {
 1204                                 marray[i] = m;
 1205                         }
 1206                 }
 1207 
 1208                 *reqpage = treqpage;
 1209                 return size;
 1210         }
 1211         *reqpage = 0;
 1212         marray[0] = m;
 1213         return 1;
 1214 }

Cache object: 7a2e8ad434d091fe41400ed97a9b1b05


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.