vm_object.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*-
    2  * Copyright (c) 1991, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * This code is derived from software contributed to Berkeley by
    6  * The Mach Operating System project at Carnegie-Mellon University.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  * 4. Neither the name of the University nor the names of its contributors
   17  *    may be used to endorse or promote products derived from this software
   18  *    without specific prior written permission.
   19  *
   20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   30  * SUCH DAMAGE.
   31  *
   32  *      from: @(#)vm_object.c   8.5 (Berkeley) 3/22/94
   33  *
   34  *
   35  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
   36  * All rights reserved.
   37  *
   38  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
   39  *
   40  * Permission to use, copy, modify and distribute this software and
   41  * its documentation is hereby granted, provided that both the copyright
   42  * notice and this permission notice appear in all copies of the
   43  * software, derivative works or modified versions, and any portions
   44  * thereof, and that both notices appear in supporting documentation.
   45  *
   46  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
   47  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
   48  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
   49  *
   50  * Carnegie Mellon requests users of this software to return to
   51  *
   52  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   53  *  School of Computer Science
   54  *  Carnegie Mellon University
   55  *  Pittsburgh PA 15213-3890
   56  *
   57  * any improvements or extensions that they make and grant Carnegie the
   58  * rights to redistribute these changes.
   59  */
   60 
   61 /*
   62  *      Virtual memory object module.
   63  */
   64 
   65 #include <sys/cdefs.h>
   66 __FBSDID("$FreeBSD$");
   67 
   68 #include <sys/param.h>
   69 #include <sys/systm.h>
   70 #include <sys/lock.h>
   71 #include <sys/mman.h>
   72 #include <sys/mount.h>
   73 #include <sys/kernel.h>
   74 #include <sys/sysctl.h>
   75 #include <sys/mutex.h>
   76 #include <sys/proc.h>           /* for curproc, pageproc */
   77 #include <sys/socket.h>
   78 #include <sys/vnode.h>
   79 #include <sys/vmmeter.h>
   80 #include <sys/sx.h>
   81 
   82 #include <vm/vm.h>
   83 #include <vm/vm_param.h>
   84 #include <vm/pmap.h>
   85 #include <vm/vm_map.h>
   86 #include <vm/vm_object.h>
   87 #include <vm/vm_page.h>
   88 #include <vm/vm_pageout.h>
   89 #include <vm/vm_pager.h>
   90 #include <vm/swap_pager.h>
   91 #include <vm/vm_kern.h>
   92 #include <vm/vm_extern.h>
   93 #include <vm/uma.h>
   94 
   95 #define EASY_SCAN_FACTOR       8
   96 
   97 #define MSYNC_FLUSH_HARDSEQ     0x01
   98 #define MSYNC_FLUSH_SOFTSEQ     0x02
   99 
  100 /*
  101  * msync / VM object flushing optimizations
  102  */
  103 static int msync_flush_flags = MSYNC_FLUSH_HARDSEQ | MSYNC_FLUSH_SOFTSEQ;
  104 SYSCTL_INT(_vm, OID_AUTO, msync_flush_flags,
  105         CTLFLAG_RW, &msync_flush_flags, 0, "");
  106 
  107 static int old_msync;
  108 SYSCTL_INT(_vm, OID_AUTO, old_msync, CTLFLAG_RW, &old_msync, 0,
  109     "Use old (insecure) msync behavior");
  110 
  111 static void     vm_object_qcollapse(vm_object_t object);
  112 static int      vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int curgeneration, int pagerflags);
  113 
  114 /*
  115  *      Virtual memory objects maintain the actual data
  116  *      associated with allocated virtual memory.  A given
  117  *      page of memory exists within exactly one object.
  118  *
  119  *      An object is only deallocated when all "references"
  120  *      are given up.  Only one "reference" to a given
  121  *      region of an object should be writeable.
  122  *
  123  *      Associated with each object is a list of all resident
  124  *      memory pages belonging to that object; this list is
  125  *      maintained by the "vm_page" module, and locked by the object's
  126  *      lock.
  127  *
  128  *      Each object also records a "pager" routine which is
  129  *      used to retrieve (and store) pages to the proper backing
  130  *      storage.  In addition, objects may be backed by other
  131  *      objects from which they were virtual-copied.
  132  *
  133  *      The only items within the object structure which are
  134  *      modified after time of creation are:
  135  *              reference count         locked by object's lock
  136  *              pager routine           locked by object's lock
  137  *
  138  */
  139 
  140 struct object_q vm_object_list;
  141 struct mtx vm_object_list_mtx;  /* lock for object list and count */
  142 
  143 struct vm_object kernel_object_store;
  144 struct vm_object kmem_object_store;
  145 
  146 static long object_collapses;
  147 static long object_bypasses;
  148 
  149 /*
  150  * next_index determines the page color that is assigned to the next
  151  * allocated object.  Accesses to next_index are not synchronized
  152  * because the effects of two or more object allocations using
  153  * next_index simultaneously are inconsequential.  At any given time,
  154  * numerous objects have the same page color.
  155  */
  156 static int next_index;
  157 
  158 static uma_zone_t obj_zone;
  159 #define VM_OBJECTS_INIT 256
  160 
  161 static int vm_object_zinit(void *mem, int size, int flags);
  162 
  163 #ifdef INVARIANTS
  164 static void vm_object_zdtor(void *mem, int size, void *arg);
  165 
  166 static void
  167 vm_object_zdtor(void *mem, int size, void *arg)
  168 {
  169         vm_object_t object;
  170 
  171         object = (vm_object_t)mem;
  172         KASSERT(TAILQ_EMPTY(&object->memq),
  173             ("object %p has resident pages",
  174             object));
  175         KASSERT(object->paging_in_progress == 0,
  176             ("object %p paging_in_progress = %d",
  177             object, object->paging_in_progress));
  178         KASSERT(object->resident_page_count == 0,
  179             ("object %p resident_page_count = %d",
  180             object, object->resident_page_count));
  181         KASSERT(object->shadow_count == 0,
  182             ("object %p shadow_count = %d",
  183             object, object->shadow_count));
  184 }
  185 #endif
  186 
  187 static int
  188 vm_object_zinit(void *mem, int size, int flags)
  189 {
  190         vm_object_t object;
  191 
  192         object = (vm_object_t)mem;
  193         bzero(&object->mtx, sizeof(object->mtx));
  194         VM_OBJECT_LOCK_INIT(object, "standard object");
  195 
  196         /* These are true for any object that has been freed */
  197         object->paging_in_progress = 0;
  198         object->resident_page_count = 0;
  199         object->shadow_count = 0;
  200         return (0);
  201 }
  202 
  203 void
  204 _vm_object_allocate(objtype_t type, vm_pindex_t size, vm_object_t object)
  205 {
  206         int incr;
  207 
  208         TAILQ_INIT(&object->memq);
  209         LIST_INIT(&object->shadow_head);
  210 
  211         object->root = NULL;
  212         object->type = type;
  213         object->size = size;
  214         object->generation = 1;
  215         object->ref_count = 1;
  216         object->flags = 0;
  217         if ((object->type == OBJT_DEFAULT) || (object->type == OBJT_SWAP))
  218                 object->flags = OBJ_ONEMAPPING;
  219         if (size > (PQ_L2_SIZE / 3 + PQ_PRIME1))
  220                 incr = PQ_L2_SIZE / 3 + PQ_PRIME1;
  221         else
  222                 incr = size;
  223         object->pg_color = next_index;
  224         next_index = (object->pg_color + incr) & PQ_L2_MASK;
  225         object->handle = NULL;
  226         object->backing_object = NULL;
  227         object->backing_object_offset = (vm_ooffset_t) 0;
  228 
  229         mtx_lock(&vm_object_list_mtx);
  230         TAILQ_INSERT_TAIL(&vm_object_list, object, object_list);
  231         mtx_unlock(&vm_object_list_mtx);
  232 }
  233 
  234 /*
  235  *      vm_object_init:
  236  *
  237  *      Initialize the VM objects module.
  238  */
  239 void
  240 vm_object_init(void)
  241 {
  242         TAILQ_INIT(&vm_object_list);
  243         mtx_init(&vm_object_list_mtx, "vm object_list", NULL, MTX_DEF);
  244         
  245         VM_OBJECT_LOCK_INIT(&kernel_object_store, "kernel object");
  246         _vm_object_allocate(OBJT_DEFAULT, OFF_TO_IDX(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS),
  247             kernel_object);
  248 
  249         VM_OBJECT_LOCK_INIT(&kmem_object_store, "kmem object");
  250         _vm_object_allocate(OBJT_DEFAULT, OFF_TO_IDX(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS),
  251             kmem_object);
  252 
  253         /*
  254          * The lock portion of struct vm_object must be type stable due
  255          * to vm_pageout_fallback_object_lock locking a vm object
  256          * without holding any references to it.
  257          */
  258         obj_zone = uma_zcreate("VM OBJECT", sizeof (struct vm_object), NULL,
  259 #ifdef INVARIANTS
  260             vm_object_zdtor,
  261 #else
  262             NULL,
  263 #endif
  264             vm_object_zinit, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM|UMA_ZONE_NOFREE);
  265         uma_prealloc(obj_zone, VM_OBJECTS_INIT);
  266 }
  267 
  268 void
  269 vm_object_clear_flag(vm_object_t object, u_short bits)
  270 {
  271 
  272         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
  273         object->flags &= ~bits;
  274 }
  275 
  276 void
  277 vm_object_pip_add(vm_object_t object, short i)
  278 {
  279 
  280         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
  281         object->paging_in_progress += i;
  282 }
  283 
  284 void
  285 vm_object_pip_subtract(vm_object_t object, short i)
  286 {
  287 
  288         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
  289         object->paging_in_progress -= i;
  290 }
  291 
  292 void
  293 vm_object_pip_wakeup(vm_object_t object)
  294 {
  295 
  296         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
  297         object->paging_in_progress--;
  298         if ((object->flags & OBJ_PIPWNT) && object->paging_in_progress == 0) {
  299                 vm_object_clear_flag(object, OBJ_PIPWNT);
  300                 wakeup(object);
  301         }
  302 }
  303 
  304 void
  305 vm_object_pip_wakeupn(vm_object_t object, short i)
  306 {
  307 
  308         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
  309         if (i)
  310                 object->paging_in_progress -= i;
  311         if ((object->flags & OBJ_PIPWNT) && object->paging_in_progress == 0) {
  312                 vm_object_clear_flag(object, OBJ_PIPWNT);
  313                 wakeup(object);
  314         }
  315 }
  316 
  317 void
  318 vm_object_pip_wait(vm_object_t object, char *waitid)
  319 {
  320 
  321         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
  322         while (object->paging_in_progress) {
  323                 object->flags |= OBJ_PIPWNT;
  324                 msleep(object, VM_OBJECT_MTX(object), PVM, waitid, 0);
  325         }
  326 }
  327 
  328 /*
  329  *      vm_object_allocate:
  330  *
  331  *      Returns a new object with the given size.
  332  */
  333 vm_object_t
  334 vm_object_allocate(objtype_t type, vm_pindex_t size)
  335 {
  336         vm_object_t object;
  337 
  338         object = (vm_object_t)uma_zalloc(obj_zone, M_WAITOK);
  339         _vm_object_allocate(type, size, object);
  340         return (object);
  341 }
  342 
  343 
  344 /*
  345  *      vm_object_reference:
  346  *
  347  *      Gets another reference to the given object.  Note: OBJ_DEAD
  348  *      objects can be referenced during final cleaning.
  349  */
  350 void
  351 vm_object_reference(vm_object_t object)
  352 {
  353         struct vnode *vp;
  354 
  355         if (object == NULL)
  356                 return;
  357         VM_OBJECT_LOCK(object);
  358         object->ref_count++;
  359         if (object->type == OBJT_VNODE) {
  360                 int vfslocked;
  361 
  362                 vp = object->handle;
  363                 VM_OBJECT_UNLOCK(object);
  364                 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  365                 vget(vp, LK_RETRY, curthread);
  366                 VFS_UNLOCK_GIANT(vfslocked);
  367         } else
  368                 VM_OBJECT_UNLOCK(object);
  369 }
  370 
  371 /*
  372  *      vm_object_reference_locked:
  373  *
  374  *      Gets another reference to the given object.
  375  *
  376  *      The object must be locked.
  377  */
  378 void
  379 vm_object_reference_locked(vm_object_t object)
  380 {
  381         struct vnode *vp;
  382 
  383         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
  384         KASSERT((object->flags & OBJ_DEAD) == 0,
  385             ("vm_object_reference_locked: dead object referenced"));
  386         object->ref_count++;
  387         if (object->type == OBJT_VNODE) {
  388                 vp = object->handle;
  389                 vref(vp);
  390         }
  391 }
  392 
  393 /*
  394  * Handle deallocating an object of type OBJT_VNODE.
  395  */
  396 void
  397 vm_object_vndeallocate(vm_object_t object)
  398 {
  399         struct vnode *vp = (struct vnode *) object->handle;
  400 
  401         VFS_ASSERT_GIANT(vp->v_mount);
  402         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
  403         KASSERT(object->type == OBJT_VNODE,
  404             ("vm_object_vndeallocate: not a vnode object"));
  405         KASSERT(vp != NULL, ("vm_object_vndeallocate: missing vp"));
  406 #ifdef INVARIANTS
  407         if (object->ref_count == 0) {
  408                 vprint("vm_object_vndeallocate", vp);
  409                 panic("vm_object_vndeallocate: bad object reference count");
  410         }
  411 #endif
  412 
  413         object->ref_count--;
  414         if (object->ref_count == 0) {
  415                 mp_fixme("Unlocked vflag access.");
  416                 vp->v_vflag &= ~VV_TEXT;
  417         }
  418         VM_OBJECT_UNLOCK(object);
  419         /*
  420          * vrele may need a vop lock
  421          */
  422         vrele(vp);
  423 }
  424 
  425 /*
  426  *      vm_object_deallocate:
  427  *
  428  *      Release a reference to the specified object,
  429  *      gained either through a vm_object_allocate
  430  *      or a vm_object_reference call.  When all references
  431  *      are gone, storage associated with this object
  432  *      may be relinquished.
  433  *
  434  *      No object may be locked.
  435  */
  436 void
  437 vm_object_deallocate(vm_object_t object)
  438 {
  439         vm_object_t temp;
  440 
  441         while (object != NULL) {
  442                 int vfslocked;
  443 
  444                 vfslocked = 0;
  445         restart:
  446                 VM_OBJECT_LOCK(object);
  447                 if (object->type == OBJT_VNODE) {
  448                         struct vnode *vp = (struct vnode *) object->handle;
  449 
  450                         /*
  451                          * Conditionally acquire Giant for a vnode-backed
  452                          * object.  We have to be careful since the type of
  453                          * a vnode object can change while the object is
  454                          * unlocked.
  455                          */
  456                         if (VFS_NEEDSGIANT(vp->v_mount) && !vfslocked) {
  457                                 vfslocked = 1;
  458                                 if (!mtx_trylock(&Giant)) {
  459                                         VM_OBJECT_UNLOCK(object);
  460                                         mtx_lock(&Giant);
  461                                         goto restart;
  462                                 }
  463                         }
  464                         vm_object_vndeallocate(object);
  465                         VFS_UNLOCK_GIANT(vfslocked);
  466                         return;
  467                 } else
  468                         /*
  469                          * This is to handle the case that the object
  470                          * changed type while we dropped its lock to
  471                          * obtain Giant.
  472                          */
  473                         VFS_UNLOCK_GIANT(vfslocked);
  474 
  475                 KASSERT(object->ref_count != 0,
  476                         ("vm_object_deallocate: object deallocated too many times: %d", object->type));
  477 
  478                 /*
  479                  * If the reference count goes to 0 we start calling
  480                  * vm_object_terminate() on the object chain.
  481                  * A ref count of 1 may be a special case depending on the
  482                  * shadow count being 0 or 1.
  483                  */
  484                 object->ref_count--;
  485                 if (object->ref_count > 1) {
  486                         VM_OBJECT_UNLOCK(object);
  487                         return;
  488                 } else if (object->ref_count == 1) {
  489                         if (object->shadow_count == 0 &&
  490                             object->handle == NULL &&
  491                             (object->type == OBJT_DEFAULT ||
  492                              object->type == OBJT_SWAP)) {
  493                                 vm_object_set_flag(object, OBJ_ONEMAPPING);
  494                         } else if ((object->shadow_count == 1) &&
  495                             (object->handle == NULL) &&
  496                             (object->type == OBJT_DEFAULT ||
  497                              object->type == OBJT_SWAP)) {
  498                                 vm_object_t robject;
  499 
  500                                 robject = LIST_FIRST(&object->shadow_head);
  501                                 KASSERT(robject != NULL,
  502                                     ("vm_object_deallocate: ref_count: %d, shadow_count: %d",
  503                                          object->ref_count,
  504                                          object->shadow_count));
  505                                 if (!VM_OBJECT_TRYLOCK(robject)) {
  506                                         /*
  507                                          * Avoid a potential deadlock.
  508                                          */
  509                                         object->ref_count++;
  510                                         VM_OBJECT_UNLOCK(object);
  511                                         /*
  512                                          * More likely than not the thread
  513                                          * holding robject's lock has lower
  514                                          * priority than the current thread.
  515                                          * Let the lower priority thread run.
  516                                          */
  517                                         tsleep(&proc0, PVM, "vmo_de", 1);
  518                                         continue;
  519                                 }
  520                                 /*
  521                                  * Collapse object into its shadow unless its
  522                                  * shadow is dead.  In that case, object will
  523                                  * be deallocated by the thread that is
  524                                  * deallocating its shadow.
  525                                  */
  526                                 if ((robject->flags & OBJ_DEAD) == 0 &&
  527                                     (robject->handle == NULL) &&
  528                                     (robject->type == OBJT_DEFAULT ||
  529                                      robject->type == OBJT_SWAP)) {
  530 
  531                                         robject->ref_count++;
  532 retry:
  533                                         if (robject->paging_in_progress) {
  534                                                 VM_OBJECT_UNLOCK(object);
  535                                                 vm_object_pip_wait(robject,
  536                                                     "objde1");
  537                                                 temp = robject->backing_object;
  538                                                 if (object == temp) {
  539                                                         VM_OBJECT_LOCK(object);
  540                                                         goto retry;
  541                                                 }
  542                                         } else if (object->paging_in_progress) {
  543                                                 VM_OBJECT_UNLOCK(robject);
  544                                                 object->flags |= OBJ_PIPWNT;
  545                                                 msleep(object,
  546                                                     VM_OBJECT_MTX(object),
  547                                                     PDROP | PVM, "objde2", 0);
  548                                                 VM_OBJECT_LOCK(robject);
  549                                                 temp = robject->backing_object;
  550                                                 if (object == temp) {
  551                                                         VM_OBJECT_LOCK(object);
  552                                                         goto retry;
  553                                                 }
  554                                         } else
  555                                                 VM_OBJECT_UNLOCK(object);
  556 
  557                                         if (robject->ref_count == 1) {
  558                                                 robject->ref_count--;
  559                                                 object = robject;
  560                                                 goto doterm;
  561                                         }
  562                                         object = robject;
  563                                         vm_object_collapse(object);
  564                                         VM_OBJECT_UNLOCK(object);
  565                                         continue;
  566                                 }
  567                                 VM_OBJECT_UNLOCK(robject);
  568                         }
  569                         VM_OBJECT_UNLOCK(object);
  570                         return;
  571                 }
  572 doterm:
  573                 temp = object->backing_object;
  574                 if (temp != NULL) {
  575                         VM_OBJECT_LOCK(temp);
  576                         LIST_REMOVE(object, shadow_list);
  577                         temp->shadow_count--;
  578                         temp->generation++;
  579                         VM_OBJECT_UNLOCK(temp);
  580                         object->backing_object = NULL;
  581                 }
  582                 /*
  583                  * Don't double-terminate, we could be in a termination
  584                  * recursion due to the terminate having to sync data
  585                  * to disk.
  586                  */
  587                 if ((object->flags & OBJ_DEAD) == 0)
  588                         vm_object_terminate(object);
  589                 else
  590                         VM_OBJECT_UNLOCK(object);
  591                 object = temp;
  592         }
  593 }
  594 
  595 /*
  596  *      vm_object_destroy removes the object from the global object list
  597  *      and frees the space for the object.
  598  */
  599 void
  600 vm_object_destroy(vm_object_t object)
  601 {
  602 
  603         /*
  604          * Remove the object from the global object list.
  605          */
  606         mtx_lock(&vm_object_list_mtx);
  607         TAILQ_REMOVE(&vm_object_list, object, object_list);
  608         mtx_unlock(&vm_object_list_mtx);
  609 
  610         /*
  611          * Free the space for the object.
  612          */
  613         uma_zfree(obj_zone, object);
  614 }
  615 
  616 /*
  617  *      vm_object_terminate actually destroys the specified object, freeing
  618  *      up all previously used resources.
  619  *
  620  *      The object must be locked.
  621  *      This routine may block.
  622  */
  623 void
  624 vm_object_terminate(vm_object_t object)
  625 {
  626         vm_page_t p;
  627 
  628         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
  629 
  630         /*
  631          * Make sure no one uses us.
  632          */
  633         vm_object_set_flag(object, OBJ_DEAD);
  634 
  635         /*
  636          * wait for the pageout daemon to be done with the object
  637          */
  638         vm_object_pip_wait(object, "objtrm");
  639 
  640         KASSERT(!object->paging_in_progress,
  641                 ("vm_object_terminate: pageout in progress"));
  642 
  643         /*
  644          * Clean and free the pages, as appropriate. All references to the
  645          * object are gone, so we don't need to lock it.
  646          */
  647         if (object->type == OBJT_VNODE) {
  648                 struct vnode *vp = (struct vnode *)object->handle;
  649 
  650                 /*
  651                  * Clean pages and flush buffers.
  652                  */
  653                 vm_object_page_clean(object, 0, 0, OBJPC_SYNC);
  654                 VM_OBJECT_UNLOCK(object);
  655 
  656                 vinvalbuf(vp, V_SAVE, NULL, 0, 0);
  657 
  658                 VM_OBJECT_LOCK(object);
  659         }
  660 
  661         KASSERT(object->ref_count == 0, 
  662                 ("vm_object_terminate: object with references, ref_count=%d",
  663                 object->ref_count));
  664 
  665         /*
  666          * Now free any remaining pages. For internal objects, this also
  667          * removes them from paging queues. Don't free wired pages, just
  668          * remove them from the object. 
  669          */
  670         vm_page_lock_queues();
  671         while ((p = TAILQ_FIRST(&object->memq)) != NULL) {
  672                 KASSERT(!p->busy && (p->flags & PG_BUSY) == 0,
  673                         ("vm_object_terminate: freeing busy page %p "
  674                         "p->busy = %d, p->flags %x\n", p, p->busy, p->flags));
  675                 if (p->wire_count == 0) {
  676                         vm_page_free(p);
  677                         cnt.v_pfree++;
  678                 } else {
  679                         vm_page_remove(p);
  680                 }
  681         }
  682         vm_page_unlock_queues();
  683 
  684         /*
  685          * Let the pager know object is dead.
  686          */
  687         vm_pager_deallocate(object);
  688         VM_OBJECT_UNLOCK(object);
  689 
  690         vm_object_destroy(object);
  691 }
  692 
  693 /*
  694  *      vm_object_page_clean
  695  *
  696  *      Clean all dirty pages in the specified range of object.  Leaves page 
  697  *      on whatever queue it is currently on.   If NOSYNC is set then do not
  698  *      write out pages with PG_NOSYNC set (originally comes from MAP_NOSYNC),
  699  *      leaving the object dirty.
  700  *
  701  *      When stuffing pages asynchronously, allow clustering.  XXX we need a
  702  *      synchronous clustering mode implementation.
  703  *
  704  *      Odd semantics: if start == end, we clean everything.
  705  *
  706  *      The object must be locked.
  707  */
  708 void
  709 vm_object_page_clean(vm_object_t object, vm_pindex_t start, vm_pindex_t end, int flags)
  710 {
  711         vm_page_t p, np;
  712         vm_pindex_t tstart, tend;
  713         vm_pindex_t pi;
  714         int clearobjflags;
  715         int pagerflags;
  716         int curgeneration;
  717 
  718         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
  719         if (object->type != OBJT_VNODE ||
  720                 (object->flags & OBJ_MIGHTBEDIRTY) == 0)
  721                 return;
  722 
  723         pagerflags = (flags & (OBJPC_SYNC | OBJPC_INVAL)) ? VM_PAGER_PUT_SYNC : VM_PAGER_CLUSTER_OK;
  724         pagerflags |= (flags & OBJPC_INVAL) ? VM_PAGER_PUT_INVAL : 0;
  725 
  726         vm_object_set_flag(object, OBJ_CLEANING);
  727 
  728         tstart = start;
  729         if (end == 0) {
  730                 tend = object->size;
  731         } else {
  732                 tend = end;
  733         }
  734 
  735         vm_page_lock_queues();
  736         /*
  737          * If the caller is smart and only msync()s a range he knows is
  738          * dirty, we may be able to avoid an object scan.  This results in
  739          * a phenominal improvement in performance.  We cannot do this
  740          * as a matter of course because the object may be huge - e.g.
  741          * the size might be in the gigabytes or terrabytes.
  742          */
  743         if (msync_flush_flags & MSYNC_FLUSH_HARDSEQ) {
  744                 vm_pindex_t tscan;
  745                 int scanlimit;
  746                 int scanreset;
  747 
  748                 scanreset = object->resident_page_count / EASY_SCAN_FACTOR;
  749                 if (scanreset < 16)
  750                         scanreset = 16;
  751                 pagerflags |= VM_PAGER_IGNORE_CLEANCHK;
  752 
  753                 scanlimit = scanreset;
  754                 tscan = tstart;
  755                 while (tscan < tend) {
  756                         curgeneration = object->generation;
  757                         p = vm_page_lookup(object, tscan);
  758                         if (p == NULL || p->valid == 0 ||
  759                             (p->queue - p->pc) == PQ_CACHE) {
  760                                 if (--scanlimit == 0)
  761                                         break;
  762                                 ++tscan;
  763                                 continue;
  764                         }
  765                         vm_page_test_dirty(p);
  766                         if ((p->dirty & p->valid) == 0) {
  767                                 if (--scanlimit == 0)
  768                                         break;
  769                                 ++tscan;
  770                                 continue;
  771                         }
  772                         /*
  773                          * If we have been asked to skip nosync pages and 
  774                          * this is a nosync page, we can't continue.
  775                          */
  776                         if ((flags & OBJPC_NOSYNC) && (p->flags & PG_NOSYNC)) {
  777                                 if (--scanlimit == 0)
  778                                         break;
  779                                 ++tscan;
  780                                 continue;
  781                         }
  782                         scanlimit = scanreset;
  783 
  784                         /*
  785                          * This returns 0 if it was unable to busy the first
  786                          * page (i.e. had to sleep).
  787                          */
  788                         tscan += vm_object_page_collect_flush(object, p, curgeneration, pagerflags);
  789                 }
  790 
  791                 /*
  792                  * If everything was dirty and we flushed it successfully,
  793                  * and the requested range is not the entire object, we
  794                  * don't have to mess with CLEANCHK or MIGHTBEDIRTY and can
  795                  * return immediately.
  796                  */
  797                 if (tscan >= tend && (tstart || tend < object->size)) {
  798                         vm_page_unlock_queues();
  799                         vm_object_clear_flag(object, OBJ_CLEANING);
  800                         return;
  801                 }
  802                 pagerflags &= ~VM_PAGER_IGNORE_CLEANCHK;
  803         }
  804 
  805         /*
  806          * Generally set CLEANCHK interlock and make the page read-only so
  807          * we can then clear the object flags.
  808          *
  809          * However, if this is a nosync mmap then the object is likely to 
  810          * stay dirty so do not mess with the page and do not clear the
  811          * object flags.
  812          */
  813         clearobjflags = 1;
  814         TAILQ_FOREACH(p, &object->memq, listq) {
  815                 vm_page_flag_set(p, PG_CLEANCHK);
  816                 if ((flags & OBJPC_NOSYNC) && (p->flags & PG_NOSYNC))
  817                         clearobjflags = 0;
  818                 else
  819                         pmap_page_protect(p, VM_PROT_READ);
  820         }
  821 
  822         if (clearobjflags && (tstart == 0) && (tend == object->size)) {
  823                 struct vnode *vp;
  824 
  825                 vm_object_clear_flag(object, OBJ_WRITEABLE|OBJ_MIGHTBEDIRTY);
  826                 if (object->type == OBJT_VNODE &&
  827                     (vp = (struct vnode *)object->handle) != NULL) {
  828                         VI_LOCK(vp);
  829                         if (vp->v_iflag & VI_OBJDIRTY)
  830                                 vp->v_iflag &= ~VI_OBJDIRTY;
  831                         VI_UNLOCK(vp);
  832                 }
  833         }
  834 
  835 rescan:
  836         curgeneration = object->generation;
  837 
  838         for (p = TAILQ_FIRST(&object->memq); p; p = np) {
  839                 int n;
  840 
  841                 np = TAILQ_NEXT(p, listq);
  842 
  843 again:
  844                 pi = p->pindex;
  845                 if (((p->flags & PG_CLEANCHK) == 0) ||
  846                         (pi < tstart) || (pi >= tend) ||
  847                         (p->valid == 0) ||
  848                         ((p->queue - p->pc) == PQ_CACHE)) {
  849                         vm_page_flag_clear(p, PG_CLEANCHK);
  850                         continue;
  851                 }
  852 
  853                 vm_page_test_dirty(p);
  854                 if ((p->dirty & p->valid) == 0) {
  855                         vm_page_flag_clear(p, PG_CLEANCHK);
  856                         continue;
  857                 }
  858 
  859                 /*
  860                  * If we have been asked to skip nosync pages and this is a
  861                  * nosync page, skip it.  Note that the object flags were
  862                  * not cleared in this case so we do not have to set them.
  863                  */
  864                 if ((flags & OBJPC_NOSYNC) && (p->flags & PG_NOSYNC)) {
  865                         vm_page_flag_clear(p, PG_CLEANCHK);
  866                         continue;
  867                 }
  868 
  869                 n = vm_object_page_collect_flush(object, p,
  870                         curgeneration, pagerflags);
  871                 if (n == 0)
  872                         goto rescan;
  873 
  874                 if (object->generation != curgeneration)
  875                         goto rescan;
  876 
  877                 /*
  878                  * Try to optimize the next page.  If we can't we pick up
  879                  * our (random) scan where we left off.
  880                  */
  881                 if (msync_flush_flags & MSYNC_FLUSH_SOFTSEQ) {
  882                         if ((p = vm_page_lookup(object, pi + n)) != NULL)
  883                                 goto again;
  884                 }
  885         }
  886         vm_page_unlock_queues();
  887 #if 0
  888         VOP_FSYNC(vp, (pagerflags & VM_PAGER_PUT_SYNC)?MNT_WAIT:0, curproc);
  889 #endif
  890 
  891         vm_object_clear_flag(object, OBJ_CLEANING);
  892         return;
  893 }
  894 
  895 static int
  896 vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int curgeneration, int pagerflags)
  897 {
  898         int runlen;
  899         int maxf;
  900         int chkb;
  901         int maxb;
  902         int i;
  903         vm_pindex_t pi;
  904         vm_page_t maf[vm_pageout_page_count];
  905         vm_page_t mab[vm_pageout_page_count];
  906         vm_page_t ma[vm_pageout_page_count];
  907 
  908         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
  909         pi = p->pindex;
  910         while (vm_page_sleep_if_busy(p, TRUE, "vpcwai")) {
  911                 vm_page_lock_queues();
  912                 if (object->generation != curgeneration) {
  913                         return(0);
  914                 }
  915         }
  916         maxf = 0;
  917         for(i = 1; i < vm_pageout_page_count; i++) {
  918                 vm_page_t tp;
  919 
  920                 if ((tp = vm_page_lookup(object, pi + i)) != NULL) {
  921                         if ((tp->flags & PG_BUSY) ||
  922                                 ((pagerflags & VM_PAGER_IGNORE_CLEANCHK) == 0 &&
  923                                  (tp->flags & PG_CLEANCHK) == 0) ||
  924                                 (tp->busy != 0))
  925                                 break;
  926                         if((tp->queue - tp->pc) == PQ_CACHE) {
  927                                 vm_page_flag_clear(tp, PG_CLEANCHK);
  928                                 break;
  929                         }
  930                         vm_page_test_dirty(tp);
  931                         if ((tp->dirty & tp->valid) == 0) {
  932                                 vm_page_flag_clear(tp, PG_CLEANCHK);
  933                                 break;
  934                         }
  935                         maf[ i - 1 ] = tp;
  936                         maxf++;
  937                         continue;
  938                 }
  939                 break;
  940         }
  941 
  942         maxb = 0;
  943         chkb = vm_pageout_page_count -  maxf;
  944         if (chkb) {
  945                 for(i = 1; i < chkb;i++) {
  946                         vm_page_t tp;
  947 
  948                         if ((tp = vm_page_lookup(object, pi - i)) != NULL) {
  949                                 if ((tp->flags & PG_BUSY) ||
  950                                         ((pagerflags & VM_PAGER_IGNORE_CLEANCHK) == 0 &&
  951                                          (tp->flags & PG_CLEANCHK) == 0) ||
  952                                         (tp->busy != 0))
  953                                         break;
  954                                 if ((tp->queue - tp->pc) == PQ_CACHE) {
  955                                         vm_page_flag_clear(tp, PG_CLEANCHK);
  956                                         break;
  957                                 }
  958                                 vm_page_test_dirty(tp);
  959                                 if ((tp->dirty & tp->valid) == 0) {
  960                                         vm_page_flag_clear(tp, PG_CLEANCHK);
  961                                         break;
  962                                 }
  963                                 mab[ i - 1 ] = tp;
  964                                 maxb++;
  965                                 continue;
  966                         }
  967                         break;
  968                 }
  969         }
  970 
  971         for(i = 0; i < maxb; i++) {
  972                 int index = (maxb - i) - 1;
  973                 ma[index] = mab[i];
  974                 vm_page_flag_clear(ma[index], PG_CLEANCHK);
  975         }
  976         vm_page_flag_clear(p, PG_CLEANCHK);
  977         ma[maxb] = p;
  978         for(i = 0; i < maxf; i++) {
  979                 int index = (maxb + i) + 1;
  980                 ma[index] = maf[i];
  981                 vm_page_flag_clear(ma[index], PG_CLEANCHK);
  982         }
  983         runlen = maxb + maxf + 1;
  984 
  985         vm_pageout_flush(ma, runlen, pagerflags);
  986         for (i = 0; i < runlen; i++) {
  987                 if (ma[i]->valid & ma[i]->dirty) {
  988                         pmap_page_protect(ma[i], VM_PROT_READ);
  989                         vm_page_flag_set(ma[i], PG_CLEANCHK);
  990 
  991                         /*
  992                          * maxf will end up being the actual number of pages
  993                          * we wrote out contiguously, non-inclusive of the
  994                          * first page.  We do not count look-behind pages.
  995                          */
  996                         if (i >= maxb + 1 && (maxf > i - maxb - 1))
  997                                 maxf = i - maxb - 1;
  998                 }
  999         }
 1000         return(maxf + 1);
 1001 }
 1002 
 1003 /*
 1004  * Note that there is absolutely no sense in writing out
 1005  * anonymous objects, so we track down the vnode object
 1006  * to write out.
 1007  * We invalidate (remove) all pages from the address space
 1008  * for semantic correctness.
 1009  *
 1010  * Note: certain anonymous maps, such as MAP_NOSYNC maps,
 1011  * may start out with a NULL object.
 1012  */
 1013 void
 1014 vm_object_sync(vm_object_t object, vm_ooffset_t offset, vm_size_t size,
 1015     boolean_t syncio, boolean_t invalidate)
 1016 {
 1017         vm_object_t backing_object;
 1018         struct vnode *vp;
 1019         struct mount *mp;
 1020         int flags;
 1021 
 1022         if (object == NULL)
 1023                 return;
 1024         VM_OBJECT_LOCK(object);
 1025         while ((backing_object = object->backing_object) != NULL) {
 1026                 VM_OBJECT_LOCK(backing_object);
 1027                 offset += object->backing_object_offset;
 1028                 VM_OBJECT_UNLOCK(object);
 1029                 object = backing_object;
 1030                 if (object->size < OFF_TO_IDX(offset + size))
 1031                         size = IDX_TO_OFF(object->size) - offset;
 1032         }
 1033         /*
 1034          * Flush pages if writing is allowed, invalidate them
 1035          * if invalidation requested.  Pages undergoing I/O
 1036          * will be ignored by vm_object_page_remove().
 1037          *
 1038          * We cannot lock the vnode and then wait for paging
 1039          * to complete without deadlocking against vm_fault.
 1040          * Instead we simply call vm_object_page_remove() and
 1041          * allow it to block internally on a page-by-page
 1042          * basis when it encounters pages undergoing async
 1043          * I/O.
 1044          */
 1045         if (object->type == OBJT_VNODE &&
 1046             (object->flags & OBJ_MIGHTBEDIRTY) != 0) {
 1047                 int vfslocked;
 1048                 vp = object->handle;
 1049                 VM_OBJECT_UNLOCK(object);
 1050                 (void) vn_start_write(vp, &mp, V_WAIT);
 1051                 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 1052                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, curthread);
 1053                 flags = (syncio || invalidate) ? OBJPC_SYNC : 0;
 1054                 flags |= invalidate ? OBJPC_INVAL : 0;
 1055                 VM_OBJECT_LOCK(object);
 1056                 vm_object_page_clean(object,
 1057                     OFF_TO_IDX(offset),
 1058                     OFF_TO_IDX(offset + size + PAGE_MASK),
 1059                     flags);
 1060                 VM_OBJECT_UNLOCK(object);
 1061                 VOP_UNLOCK(vp, 0, curthread);
 1062                 VFS_UNLOCK_GIANT(vfslocked);
 1063                 vn_finished_write(mp);
 1064                 VM_OBJECT_LOCK(object);
 1065         }
 1066         if ((object->type == OBJT_VNODE ||
 1067              object->type == OBJT_DEVICE) && invalidate) {
 1068                 boolean_t purge;
 1069                 purge = old_msync || (object->type == OBJT_DEVICE);
 1070                 vm_object_page_remove(object,
 1071                     OFF_TO_IDX(offset),
 1072                     OFF_TO_IDX(offset + size + PAGE_MASK),
 1073                     purge ? FALSE : TRUE);
 1074         }
 1075         VM_OBJECT_UNLOCK(object);
 1076 }
 1077 
 1078 /*
 1079  *      vm_object_madvise:
 1080  *
 1081  *      Implements the madvise function at the object/page level.
 1082  *
 1083  *      MADV_WILLNEED   (any object)
 1084  *
 1085  *          Activate the specified pages if they are resident.
 1086  *
 1087  *      MADV_DONTNEED   (any object)
 1088  *
 1089  *          Deactivate the specified pages if they are resident.
 1090  *
 1091  *      MADV_FREE       (OBJT_DEFAULT/OBJT_SWAP objects,
 1092  *                       OBJ_ONEMAPPING only)
 1093  *
 1094  *          Deactivate and clean the specified pages if they are
 1095  *          resident.  This permits the process to reuse the pages
 1096  *          without faulting or the kernel to reclaim the pages
 1097  *          without I/O.
 1098  */
 1099 void
 1100 vm_object_madvise(vm_object_t object, vm_pindex_t pindex, int count, int advise)
 1101 {
 1102         vm_pindex_t end, tpindex;
 1103         vm_object_t backing_object, tobject;
 1104         vm_page_t m;
 1105 
 1106         if (object == NULL)
 1107                 return;
 1108         VM_OBJECT_LOCK(object);
 1109         end = pindex + count;
 1110         /*
 1111          * Locate and adjust resident pages
 1112          */
 1113         for (; pindex < end; pindex += 1) {
 1114 relookup:
 1115                 tobject = object;
 1116                 tpindex = pindex;
 1117 shadowlookup:
 1118                 /*
 1119                  * MADV_FREE only operates on OBJT_DEFAULT or OBJT_SWAP pages
 1120                  * and those pages must be OBJ_ONEMAPPING.
 1121                  */
 1122                 if (advise == MADV_FREE) {
 1123                         if ((tobject->type != OBJT_DEFAULT &&
 1124                              tobject->type != OBJT_SWAP) ||
 1125                             (tobject->flags & OBJ_ONEMAPPING) == 0) {
 1126                                 goto unlock_tobject;
 1127                         }
 1128                 }
 1129                 m = vm_page_lookup(tobject, tpindex);
 1130                 if (m == NULL) {
 1131                         /*
 1132                          * There may be swap even if there is no backing page
 1133                          */
 1134                         if (advise == MADV_FREE && tobject->type == OBJT_SWAP)
 1135                                 swap_pager_freespace(tobject, tpindex, 1);
 1136                         /*
 1137                          * next object
 1138                          */
 1139                         backing_object = tobject->backing_object;
 1140                         if (backing_object == NULL)
 1141                                 goto unlock_tobject;
 1142                         VM_OBJECT_LOCK(backing_object);
 1143                         tpindex += OFF_TO_IDX(tobject->backing_object_offset);
 1144                         if (tobject != object)
 1145                                 VM_OBJECT_UNLOCK(tobject);
 1146                         tobject = backing_object;
 1147                         goto shadowlookup;
 1148                 }
 1149                 /*
 1150                  * If the page is busy or not in a normal active state,
 1151                  * we skip it.  If the page is not managed there are no
 1152                  * page queues to mess with.  Things can break if we mess
 1153                  * with pages in any of the below states.
 1154                  */
 1155                 vm_page_lock_queues();
 1156                 if (m->hold_count ||
 1157                     m->wire_count ||
 1158                     (m->flags & PG_UNMANAGED) ||
 1159                     m->valid != VM_PAGE_BITS_ALL) {
 1160                         vm_page_unlock_queues();
 1161                         goto unlock_tobject;
 1162                 }
 1163                 if ((m->flags & PG_BUSY) || m->busy) {
 1164                         vm_page_flag_set(m, PG_WANTED | PG_REFERENCED);
 1165                         if (object != tobject)
 1166                                 VM_OBJECT_UNLOCK(object);
 1167                         VM_OBJECT_UNLOCK(tobject);
 1168                         msleep(m, &vm_page_queue_mtx, PDROP | PVM, "madvpo", 0);
 1169                         VM_OBJECT_LOCK(object);
 1170                         goto relookup;
 1171                 }
 1172                 if (advise == MADV_WILLNEED) {
 1173                         vm_page_activate(m);
 1174                 } else if (advise == MADV_DONTNEED) {
 1175                         vm_page_dontneed(m);
 1176                 } else if (advise == MADV_FREE) {
 1177                         /*
 1178                          * Mark the page clean.  This will allow the page
 1179                          * to be freed up by the system.  However, such pages
 1180                          * are often reused quickly by malloc()/free()
 1181                          * so we do not do anything that would cause
 1182                          * a page fault if we can help it.
 1183                          *
 1184                          * Specifically, we do not try to actually free
 1185                          * the page now nor do we try to put it in the
 1186                          * cache (which would cause a page fault on reuse).
 1187                          *
 1188                          * But we do make the page is freeable as we
 1189                          * can without actually taking the step of unmapping
 1190                          * it.
 1191                          */
 1192                         pmap_clear_modify(m);
 1193                         m->dirty = 0;
 1194                         m->act_count = 0;
 1195                         vm_page_dontneed(m);
 1196                 }
 1197                 vm_page_unlock_queues();
 1198                 if (advise == MADV_FREE && tobject->type == OBJT_SWAP)
 1199                         swap_pager_freespace(tobject, tpindex, 1);
 1200 unlock_tobject:
 1201                 if (tobject != object)
 1202                         VM_OBJECT_UNLOCK(tobject);
 1203         }       
 1204         VM_OBJECT_UNLOCK(object);
 1205 }
 1206 
 1207 /*
 1208  *      vm_object_shadow:
 1209  *
 1210  *      Create a new object which is backed by the
 1211  *      specified existing object range.  The source
 1212  *      object reference is deallocated.
 1213  *
 1214  *      The new object and offset into that object
 1215  *      are returned in the source parameters.
 1216  */
 1217 void
 1218 vm_object_shadow(
 1219         vm_object_t *object,    /* IN/OUT */
 1220         vm_ooffset_t *offset,   /* IN/OUT */
 1221         vm_size_t length)
 1222 {
 1223         vm_object_t source;
 1224         vm_object_t result;
 1225 
 1226         source = *object;
 1227 
 1228         /*
 1229          * Don't create the new object if the old object isn't shared.
 1230          */
 1231         if (source != NULL) {
 1232                 VM_OBJECT_LOCK(source);
 1233                 if (source->ref_count == 1 &&
 1234                     source->handle == NULL &&
 1235                     (source->type == OBJT_DEFAULT ||
 1236                      source->type == OBJT_SWAP)) {
 1237                         VM_OBJECT_UNLOCK(source);
 1238                         return;
 1239                 }
 1240                 VM_OBJECT_UNLOCK(source);
 1241         }
 1242 
 1243         /*
 1244          * Allocate a new object with the given length.
 1245          */
 1246         result = vm_object_allocate(OBJT_DEFAULT, length);
 1247 
 1248         /*
 1249          * The new object shadows the source object, adding a reference to it.
 1250          * Our caller changes his reference to point to the new object,
 1251          * removing a reference to the source object.  Net result: no change
 1252          * of reference count.
 1253          *
 1254          * Try to optimize the result object's page color when shadowing
 1255          * in order to maintain page coloring consistency in the combined 
 1256          * shadowed object.
 1257          */
 1258         result->backing_object = source;
 1259         /*
 1260          * Store the offset into the source object, and fix up the offset into
 1261          * the new object.
 1262          */
 1263         result->backing_object_offset = *offset;
 1264         if (source != NULL) {
 1265                 VM_OBJECT_LOCK(source);
 1266                 LIST_INSERT_HEAD(&source->shadow_head, result, shadow_list);
 1267                 source->shadow_count++;
 1268                 source->generation++;
 1269                 if (length < source->size)
 1270                         length = source->size;
 1271                 if (length > PQ_L2_SIZE / 3 + PQ_PRIME1 ||
 1272                     source->generation > 1)
 1273                         length = PQ_L2_SIZE / 3 + PQ_PRIME1;
 1274                 result->pg_color = (source->pg_color +
 1275                     length * source->generation) & PQ_L2_MASK;
 1276                 result->flags |= source->flags & OBJ_NEEDGIANT;
 1277                 VM_OBJECT_UNLOCK(source);
 1278                 next_index = (result->pg_color + PQ_L2_SIZE / 3 + PQ_PRIME1) &
 1279                     PQ_L2_MASK;
 1280         }
 1281 
 1282 
 1283         /*
 1284          * Return the new things
 1285          */
 1286         *offset = 0;
 1287         *object = result;
 1288 }
 1289 
 1290 /*
 1291  *      vm_object_split:
 1292  *
 1293  * Split the pages in a map entry into a new object.  This affords
 1294  * easier removal of unused pages, and keeps object inheritance from
 1295  * being a negative impact on memory usage.
 1296  */
 1297 void
 1298 vm_object_split(vm_map_entry_t entry)
 1299 {
 1300         vm_page_t m;
 1301         vm_object_t orig_object, new_object, source;
 1302         vm_pindex_t offidxstart, offidxend;
 1303         vm_size_t idx, size;
 1304 
 1305         orig_object = entry->object.vm_object;
 1306         if (orig_object->type != OBJT_DEFAULT && orig_object->type != OBJT_SWAP)
 1307                 return;
 1308         if (orig_object->ref_count <= 1)
 1309                 return;
 1310         VM_OBJECT_UNLOCK(orig_object);
 1311 
 1312         offidxstart = OFF_TO_IDX(entry->offset);
 1313         offidxend = offidxstart + OFF_TO_IDX(entry->end - entry->start);
 1314         size = offidxend - offidxstart;
 1315 
 1316         /*
 1317          * If swap_pager_copy() is later called, it will convert new_object
 1318          * into a swap object.
 1319          */
 1320         new_object = vm_object_allocate(OBJT_DEFAULT, size);
 1321 
 1322         VM_OBJECT_LOCK(new_object);
 1323         VM_OBJECT_LOCK(orig_object);
 1324         source = orig_object->backing_object;
 1325         if (source != NULL) {
 1326                 VM_OBJECT_LOCK(source);
 1327                 if ((source->flags & OBJ_DEAD) != 0) {
 1328                         VM_OBJECT_UNLOCK(source);
 1329                         VM_OBJECT_UNLOCK(orig_object);
 1330                         VM_OBJECT_UNLOCK(new_object);
 1331                         vm_object_deallocate(new_object);
 1332                         VM_OBJECT_LOCK(orig_object);
 1333                         return;
 1334                 }
 1335                 LIST_INSERT_HEAD(&source->shadow_head,
 1336                                   new_object, shadow_list);
 1337                 source->shadow_count++;
 1338                 source->generation++;
 1339                 vm_object_reference_locked(source);     /* for new_object */
 1340                 vm_object_clear_flag(source, OBJ_ONEMAPPING);
 1341                 VM_OBJECT_UNLOCK(source);
 1342                 new_object->backing_object_offset = 
 1343                         orig_object->backing_object_offset + entry->offset;
 1344                 new_object->backing_object = source;
 1345         }
 1346         new_object->flags |= orig_object->flags & OBJ_NEEDGIANT;
 1347         vm_page_lock_queues();
 1348         for (idx = 0; idx < size; idx++) {
 1349         retry:
 1350                 m = vm_page_lookup(orig_object, offidxstart + idx);
 1351                 if (m == NULL)
 1352                         continue;
 1353 
 1354                 /*
 1355                  * We must wait for pending I/O to complete before we can
 1356                  * rename the page.
 1357                  *
 1358                  * We do not have to VM_PROT_NONE the page as mappings should
 1359                  * not be changed by this operation.
 1360                  */
 1361                 if ((m->flags & PG_BUSY) || m->busy) {
 1362                         vm_page_flag_set(m, PG_WANTED | PG_REFERENCED);
 1363                         VM_OBJECT_UNLOCK(orig_object);
 1364                         VM_OBJECT_UNLOCK(new_object);
 1365                         msleep(m, &vm_page_queue_mtx, PDROP | PVM, "spltwt", 0);
 1366                         VM_OBJECT_LOCK(new_object);
 1367                         VM_OBJECT_LOCK(orig_object);
 1368                         vm_page_lock_queues();
 1369                         goto retry;
 1370                 }
 1371                 vm_page_rename(m, new_object, idx);
 1372                 /* page automatically made dirty by rename and cache handled */
 1373                 vm_page_busy(m);
 1374         }
 1375         vm_page_unlock_queues();
 1376         if (orig_object->type == OBJT_SWAP) {
 1377                 /*
 1378                  * swap_pager_copy() can sleep, in which case the orig_object's
 1379                  * and new_object's locks are released and reacquired. 
 1380                  */
 1381                 swap_pager_copy(orig_object, new_object, offidxstart, 0);
 1382         }
 1383         VM_OBJECT_UNLOCK(orig_object);
 1384         vm_page_lock_queues();
 1385         TAILQ_FOREACH(m, &new_object->memq, listq)
 1386                 vm_page_wakeup(m);
 1387         vm_page_unlock_queues();
 1388         VM_OBJECT_UNLOCK(new_object);
 1389         entry->object.vm_object = new_object;
 1390         entry->offset = 0LL;
 1391         vm_object_deallocate(orig_object);
 1392         VM_OBJECT_LOCK(new_object);
 1393 }
 1394 
 1395 #define OBSC_TEST_ALL_SHADOWED  0x0001
 1396 #define OBSC_COLLAPSE_NOWAIT    0x0002
 1397 #define OBSC_COLLAPSE_WAIT      0x0004
 1398 
 1399 static int
 1400 vm_object_backing_scan(vm_object_t object, int op)
 1401 {
 1402         int r = 1;
 1403         vm_page_t p;
 1404         vm_object_t backing_object;
 1405         vm_pindex_t backing_offset_index;
 1406 
 1407         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 1408         VM_OBJECT_LOCK_ASSERT(object->backing_object, MA_OWNED);
 1409 
 1410         backing_object = object->backing_object;
 1411         backing_offset_index = OFF_TO_IDX(object->backing_object_offset);
 1412 
 1413         /*
 1414          * Initial conditions
 1415          */
 1416         if (op & OBSC_TEST_ALL_SHADOWED) {
 1417                 /*
 1418                  * We do not want to have to test for the existence of
 1419                  * swap pages in the backing object.  XXX but with the
 1420                  * new swapper this would be pretty easy to do.
 1421                  *
 1422                  * XXX what about anonymous MAP_SHARED memory that hasn't
 1423                  * been ZFOD faulted yet?  If we do not test for this, the
 1424                  * shadow test may succeed! XXX
 1425                  */
 1426                 if (backing_object->type != OBJT_DEFAULT) {
 1427                         return (0);
 1428                 }
 1429         }
 1430         if (op & OBSC_COLLAPSE_WAIT) {
 1431                 vm_object_set_flag(backing_object, OBJ_DEAD);
 1432         }
 1433 
 1434         /*
 1435          * Our scan
 1436          */
 1437         p = TAILQ_FIRST(&backing_object->memq);
 1438         while (p) {
 1439                 vm_page_t next = TAILQ_NEXT(p, listq);
 1440                 vm_pindex_t new_pindex = p->pindex - backing_offset_index;
 1441 
 1442                 if (op & OBSC_TEST_ALL_SHADOWED) {
 1443                         vm_page_t pp;
 1444 
 1445                         /*
 1446                          * Ignore pages outside the parent object's range
 1447                          * and outside the parent object's mapping of the 
 1448                          * backing object.
 1449                          *
 1450                          * note that we do not busy the backing object's
 1451                          * page.
 1452                          */
 1453                         if (
 1454                             p->pindex < backing_offset_index ||
 1455                             new_pindex >= object->size
 1456                         ) {
 1457                                 p = next;
 1458                                 continue;
 1459                         }
 1460 
 1461                         /*
 1462                          * See if the parent has the page or if the parent's
 1463                          * object pager has the page.  If the parent has the
 1464                          * page but the page is not valid, the parent's
 1465                          * object pager must have the page.
 1466                          *
 1467                          * If this fails, the parent does not completely shadow
 1468                          * the object and we might as well give up now.
 1469                          */
 1470 
 1471                         pp = vm_page_lookup(object, new_pindex);
 1472                         if (
 1473                             (pp == NULL || pp->valid == 0) &&
 1474                             !vm_pager_has_page(object, new_pindex, NULL, NULL)
 1475                         ) {
 1476                                 r = 0;
 1477                                 break;
 1478                         }
 1479                 }
 1480 
 1481                 /*
 1482                  * Check for busy page
 1483                  */
 1484                 if (op & (OBSC_COLLAPSE_WAIT | OBSC_COLLAPSE_NOWAIT)) {
 1485                         vm_page_t pp;
 1486 
 1487                         if (op & OBSC_COLLAPSE_NOWAIT) {
 1488                                 if ((p->flags & PG_BUSY) ||
 1489                                     !p->valid || 
 1490                                     p->busy) {
 1491                                         p = next;
 1492                                         continue;
 1493                                 }
 1494                         } else if (op & OBSC_COLLAPSE_WAIT) {
 1495                                 if ((p->flags & PG_BUSY) || p->busy) {
 1496                                         vm_page_lock_queues();
 1497                                         vm_page_flag_set(p,
 1498                                             PG_WANTED | PG_REFERENCED);
 1499                                         VM_OBJECT_UNLOCK(backing_object);
 1500                                         VM_OBJECT_UNLOCK(object);
 1501                                         msleep(p, &vm_page_queue_mtx,
 1502                                             PDROP | PVM, "vmocol", 0);
 1503                                         VM_OBJECT_LOCK(object);
 1504                                         VM_OBJECT_LOCK(backing_object);
 1505                                         /*
 1506                                          * If we slept, anything could have
 1507                                          * happened.  Since the object is
 1508                                          * marked dead, the backing offset
 1509                                          * should not have changed so we
 1510                                          * just restart our scan.
 1511                                          */
 1512                                         p = TAILQ_FIRST(&backing_object->memq);
 1513                                         continue;
 1514                                 }
 1515                         }
 1516 
 1517                         KASSERT(
 1518                             p->object == backing_object,
 1519                             ("vm_object_backing_scan: object mismatch")
 1520                         );
 1521 
 1522                         /*
 1523                          * Destroy any associated swap
 1524                          */
 1525                         if (backing_object->type == OBJT_SWAP) {
 1526                                 swap_pager_freespace(
 1527                                     backing_object, 
 1528                                     p->pindex,
 1529                                     1
 1530                                 );
 1531                         }
 1532 
 1533                         if (
 1534                             p->pindex < backing_offset_index ||
 1535                             new_pindex >= object->size
 1536                         ) {
 1537                                 /*
 1538                                  * Page is out of the parent object's range, we 
 1539                                  * can simply destroy it. 
 1540                                  */
 1541                                 vm_page_lock_queues();
 1542                                 KASSERT(!pmap_page_is_mapped(p),
 1543                                     ("freeing mapped page %p", p));
 1544                                 if (p->wire_count == 0)
 1545                                         vm_page_free(p);
 1546                                 else
 1547                                         vm_page_remove(p);
 1548                                 vm_page_unlock_queues();
 1549                                 p = next;
 1550                                 continue;
 1551                         }
 1552 
 1553                         pp = vm_page_lookup(object, new_pindex);
 1554                         if (
 1555                             pp != NULL ||
 1556                             vm_pager_has_page(object, new_pindex, NULL, NULL)
 1557                         ) {
 1558                                 /*
 1559                                  * page already exists in parent OR swap exists
 1560                                  * for this location in the parent.  Destroy 
 1561                                  * the original page from the backing object.
 1562                                  *
 1563                                  * Leave the parent's page alone
 1564                                  */
 1565                                 vm_page_lock_queues();
 1566                                 KASSERT(!pmap_page_is_mapped(p),
 1567                                     ("freeing mapped page %p", p));
 1568                                 if (p->wire_count == 0)
 1569                                         vm_page_free(p);
 1570                                 else
 1571                                         vm_page_remove(p);
 1572                                 vm_page_unlock_queues();
 1573                                 p = next;
 1574                                 continue;
 1575                         }
 1576 
 1577                         /*
 1578                          * Page does not exist in parent, rename the
 1579                          * page from the backing object to the main object. 
 1580                          *
 1581                          * If the page was mapped to a process, it can remain 
 1582                          * mapped through the rename.
 1583                          */
 1584                         vm_page_lock_queues();
 1585                         vm_page_rename(p, object, new_pindex);
 1586                         vm_page_unlock_queues();
 1587                         /* page automatically made dirty by rename */
 1588                 }
 1589                 p = next;
 1590         }
 1591         return (r);
 1592 }
 1593 
 1594 
 1595 /*
 1596  * this version of collapse allows the operation to occur earlier and
 1597  * when paging_in_progress is true for an object...  This is not a complete
 1598  * operation, but should plug 99.9% of the rest of the leaks.
 1599  */
 1600 static void
 1601 vm_object_qcollapse(vm_object_t object)
 1602 {
 1603         vm_object_t backing_object = object->backing_object;
 1604 
 1605         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 1606         VM_OBJECT_LOCK_ASSERT(backing_object, MA_OWNED);
 1607 
 1608         if (backing_object->ref_count != 1)
 1609                 return;
 1610 
 1611         vm_object_backing_scan(object, OBSC_COLLAPSE_NOWAIT);
 1612 }
 1613 
 1614 /*
 1615  *      vm_object_collapse:
 1616  *
 1617  *      Collapse an object with the object backing it.
 1618  *      Pages in the backing object are moved into the
 1619  *      parent, and the backing object is deallocated.
 1620  */
 1621 void
 1622 vm_object_collapse(vm_object_t object)
 1623 {
 1624         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 1625         
 1626         while (TRUE) {
 1627                 vm_object_t backing_object;
 1628 
 1629                 /*
 1630                  * Verify that the conditions are right for collapse:
 1631                  *
 1632                  * The object exists and the backing object exists.
 1633                  */
 1634                 if ((backing_object = object->backing_object) == NULL)
 1635                         break;
 1636 
 1637                 /*
 1638                  * we check the backing object first, because it is most likely
 1639                  * not collapsable.
 1640                  */
 1641                 VM_OBJECT_LOCK(backing_object);
 1642                 if (backing_object->handle != NULL ||
 1643                     (backing_object->type != OBJT_DEFAULT &&
 1644                      backing_object->type != OBJT_SWAP) ||
 1645                     (backing_object->flags & OBJ_DEAD) ||
 1646                     object->handle != NULL ||
 1647                     (object->type != OBJT_DEFAULT &&
 1648                      object->type != OBJT_SWAP) ||
 1649                     (object->flags & OBJ_DEAD)) {
 1650                         VM_OBJECT_UNLOCK(backing_object);
 1651                         break;
 1652                 }
 1653 
 1654                 if (
 1655                     object->paging_in_progress != 0 ||
 1656                     backing_object->paging_in_progress != 0
 1657                 ) {
 1658                         vm_object_qcollapse(object);
 1659                         VM_OBJECT_UNLOCK(backing_object);
 1660                         break;
 1661                 }
 1662                 /*
 1663                  * We know that we can either collapse the backing object (if
 1664                  * the parent is the only reference to it) or (perhaps) have
 1665                  * the parent bypass the object if the parent happens to shadow
 1666                  * all the resident pages in the entire backing object.
 1667                  *
 1668                  * This is ignoring pager-backed pages such as swap pages.
 1669                  * vm_object_backing_scan fails the shadowing test in this
 1670                  * case.
 1671                  */
 1672                 if (backing_object->ref_count == 1) {
 1673                         /*
 1674                          * If there is exactly one reference to the backing
 1675                          * object, we can collapse it into the parent.  
 1676                          */
 1677                         vm_object_backing_scan(object, OBSC_COLLAPSE_WAIT);
 1678 
 1679                         /*
 1680                          * Move the pager from backing_object to object.
 1681                          */
 1682                         if (backing_object->type == OBJT_SWAP) {
 1683                                 /*
 1684                                  * swap_pager_copy() can sleep, in which case
 1685                                  * the backing_object's and object's locks are
 1686                                  * released and reacquired.
 1687                                  */
 1688                                 swap_pager_copy(
 1689                                     backing_object,
 1690                                     object,
 1691                                     OFF_TO_IDX(object->backing_object_offset), TRUE);
 1692                         }
 1693                         /*
 1694                          * Object now shadows whatever backing_object did.
 1695                          * Note that the reference to 
 1696                          * backing_object->backing_object moves from within 
 1697                          * backing_object to within object.
 1698                          */
 1699                         LIST_REMOVE(object, shadow_list);
 1700                         backing_object->shadow_count--;
 1701                         backing_object->generation++;
 1702                         if (backing_object->backing_object) {
 1703                                 VM_OBJECT_LOCK(backing_object->backing_object);
 1704                                 LIST_REMOVE(backing_object, shadow_list);
 1705                                 LIST_INSERT_HEAD(
 1706                                     &backing_object->backing_object->shadow_head,
 1707                                     object, shadow_list);
 1708                                 /*
 1709                                  * The shadow_count has not changed.
 1710                                  */
 1711                                 backing_object->backing_object->generation++;
 1712                                 VM_OBJECT_UNLOCK(backing_object->backing_object);
 1713                         }
 1714                         object->backing_object = backing_object->backing_object;
 1715                         object->backing_object_offset +=
 1716                             backing_object->backing_object_offset;
 1717 
 1718                         /*
 1719                          * Discard backing_object.
 1720                          *
 1721                          * Since the backing object has no pages, no pager left,
 1722                          * and no object references within it, all that is
 1723                          * necessary is to dispose of it.
 1724                          */
 1725                         KASSERT(backing_object->ref_count == 1, ("backing_object %p was somehow re-referenced during collapse!", backing_object));
 1726                         VM_OBJECT_UNLOCK(backing_object);
 1727 
 1728                         mtx_lock(&vm_object_list_mtx);
 1729                         TAILQ_REMOVE(
 1730                             &vm_object_list, 
 1731                             backing_object,
 1732                             object_list
 1733                         );
 1734                         mtx_unlock(&vm_object_list_mtx);
 1735 
 1736                         uma_zfree(obj_zone, backing_object);
 1737 
 1738                         object_collapses++;
 1739                 } else {
 1740                         vm_object_t new_backing_object;
 1741 
 1742                         /*
 1743                          * If we do not entirely shadow the backing object,
 1744                          * there is nothing we can do so we give up.
 1745                          */
 1746                         if (object->resident_page_count != object->size &&
 1747                             vm_object_backing_scan(object,
 1748                             OBSC_TEST_ALL_SHADOWED) == 0) {
 1749                                 VM_OBJECT_UNLOCK(backing_object);
 1750                                 break;
 1751                         }
 1752 
 1753                         /*
 1754                          * Make the parent shadow the next object in the
 1755                          * chain.  Deallocating backing_object will not remove
 1756                          * it, since its reference count is at least 2.
 1757                          */
 1758                         LIST_REMOVE(object, shadow_list);
 1759                         backing_object->shadow_count--;
 1760                         backing_object->generation++;
 1761 
 1762                         new_backing_object = backing_object->backing_object;
 1763                         if ((object->backing_object = new_backing_object) != NULL) {
 1764                                 VM_OBJECT_LOCK(new_backing_object);
 1765                                 LIST_INSERT_HEAD(
 1766                                     &new_backing_object->shadow_head,
 1767                                     object,
 1768                                     shadow_list
 1769                                 );
 1770                                 new_backing_object->shadow_count++;
 1771                                 new_backing_object->generation++;
 1772                                 vm_object_reference_locked(new_backing_object);
 1773                                 VM_OBJECT_UNLOCK(new_backing_object);
 1774                                 object->backing_object_offset +=
 1775                                         backing_object->backing_object_offset;
 1776                         }
 1777 
 1778                         /*
 1779                          * Drop the reference count on backing_object. Since
 1780                          * its ref_count was at least 2, it will not vanish.
 1781                          */
 1782                         backing_object->ref_count--;
 1783                         VM_OBJECT_UNLOCK(backing_object);
 1784                         object_bypasses++;
 1785                 }
 1786 
 1787                 /*
 1788                  * Try again with this object's new backing object.
 1789                  */
 1790         }
 1791 }
 1792 
 1793 /*
 1794  *      vm_object_page_remove:
 1795  *
 1796  *      For the given object, either frees or invalidates each of the
 1797  *      specified pages.  In general, a page is freed.  However, if a
 1798  *      page is wired for any reason other than the existence of a
 1799  *      managed, wired mapping, then it may be invalidated but not
 1800  *      removed from the object.  Pages are specified by the given
 1801  *      range ["start", "end") and Boolean "clean_only".  As a
 1802  *      special case, if "end" is zero, then the range extends from
 1803  *      "start" to the end of the object.  If "clean_only" is TRUE,
 1804  *      then only the non-dirty pages within the specified range are
 1805  *      affected.
 1806  *
 1807  *      In general, this operation should only be performed on objects
 1808  *      that contain managed pages.  There are two exceptions.  First,
 1809  *      it may be performed on the kernel and kmem objects.  Second,
 1810  *      it may be used by msync(..., MS_INVALIDATE) to invalidate
 1811  *      device-backed pages.
 1812  *
 1813  *      The object must be locked.
 1814  */
 1815 void
 1816 vm_object_page_remove(vm_object_t object, vm_pindex_t start, vm_pindex_t end,
 1817     boolean_t clean_only)
 1818 {
 1819         vm_page_t p, next;
 1820 
 1821         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 1822         if (object->resident_page_count == 0)
 1823                 return;
 1824 
 1825         /*
 1826          * Since physically-backed objects do not use managed pages, we can't
 1827          * remove pages from the object (we must instead remove the page
 1828          * references, and then destroy the object).
 1829          */
 1830         KASSERT(object->type != OBJT_PHYS,
 1831             ("attempt to remove pages from a physical object"));
 1832 
 1833         vm_object_pip_add(object, 1);
 1834 again:
 1835         vm_page_lock_queues();
 1836         if ((p = TAILQ_FIRST(&object->memq)) != NULL) {
 1837                 if (p->pindex < start) {
 1838                         p = vm_page_splay(start, object->root);
 1839                         if ((object->root = p)->pindex < start)
 1840                                 p = TAILQ_NEXT(p, listq);
 1841                 }
 1842         }
 1843         /*
 1844          * Assert: the variable p is either (1) the page with the
 1845          * least pindex greater than or equal to the parameter pindex
 1846          * or (2) NULL.
 1847          */
 1848         for (;
 1849              p != NULL && (p->pindex < end || end == 0);
 1850              p = next) {
 1851                 next = TAILQ_NEXT(p, listq);
 1852 
 1853                 if (p->wire_count != 0) {
 1854                         /* Fictitious pages do not have managed mappings. */
 1855                         if ((p->flags & PG_FICTITIOUS) == 0)
 1856                                 pmap_remove_all(p);
 1857                         if (!clean_only)
 1858                                 p->valid = 0;
 1859                         continue;
 1860                 }
 1861                 if (vm_page_sleep_if_busy(p, TRUE, "vmopar"))
 1862                         goto again;
 1863                 KASSERT((p->flags & PG_FICTITIOUS) == 0,
 1864                     ("vm_object_page_remove: page %p is fictitious", p));
 1865                 if (clean_only && p->valid) {
 1866                         pmap_page_protect(p, VM_PROT_READ | VM_PROT_EXECUTE);
 1867                         if (p->valid & p->dirty)
 1868                                 continue;
 1869                 }
 1870                 pmap_remove_all(p);
 1871                 vm_page_free(p);
 1872         }
 1873         vm_page_unlock_queues();
 1874         vm_object_pip_wakeup(object);
 1875 }
 1876 
 1877 /*
 1878  *      Routine:        vm_object_coalesce
 1879  *      Function:       Coalesces two objects backing up adjoining
 1880  *                      regions of memory into a single object.
 1881  *
 1882  *      returns TRUE if objects were combined.
 1883  *
 1884  *      NOTE:   Only works at the moment if the second object is NULL -
 1885  *              if it's not, which object do we lock first?
 1886  *
 1887  *      Parameters:
 1888  *              prev_object     First object to coalesce
 1889  *              prev_offset     Offset into prev_object
 1890  *              prev_size       Size of reference to prev_object
 1891  *              next_size       Size of reference to the second object
 1892  *
 1893  *      Conditions:
 1894  *      The object must *not* be locked.
 1895  */
 1896 boolean_t
 1897 vm_object_coalesce(vm_object_t prev_object, vm_ooffset_t prev_offset,
 1898         vm_size_t prev_size, vm_size_t next_size)
 1899 {
 1900         vm_pindex_t next_pindex;
 1901 
 1902         if (prev_object == NULL)
 1903                 return (TRUE);
 1904         VM_OBJECT_LOCK(prev_object);
 1905         if (prev_object->type != OBJT_DEFAULT &&
 1906             prev_object->type != OBJT_SWAP) {
 1907                 VM_OBJECT_UNLOCK(prev_object);
 1908                 return (FALSE);
 1909         }
 1910 
 1911         /*
 1912          * Try to collapse the object first
 1913          */
 1914         vm_object_collapse(prev_object);
 1915 
 1916         /*
 1917          * Can't coalesce if: . more than one reference . paged out . shadows
 1918          * another object . has a copy elsewhere (any of which mean that the
 1919          * pages not mapped to prev_entry may be in use anyway)
 1920          */
 1921         if (prev_object->backing_object != NULL) {
 1922                 VM_OBJECT_UNLOCK(prev_object);
 1923                 return (FALSE);
 1924         }
 1925 
 1926         prev_size >>= PAGE_SHIFT;
 1927         next_size >>= PAGE_SHIFT;
 1928         next_pindex = OFF_TO_IDX(prev_offset) + prev_size;
 1929 
 1930         if ((prev_object->ref_count > 1) &&
 1931             (prev_object->size != next_pindex)) {
 1932                 VM_OBJECT_UNLOCK(prev_object);
 1933                 return (FALSE);
 1934         }
 1935 
 1936         /*
 1937          * Remove any pages that may still be in the object from a previous
 1938          * deallocation.
 1939          */
 1940         if (next_pindex < prev_object->size) {
 1941                 vm_object_page_remove(prev_object,
 1942                                       next_pindex,
 1943                                       next_pindex + next_size, FALSE);
 1944                 if (prev_object->type == OBJT_SWAP)
 1945                         swap_pager_freespace(prev_object,
 1946                                              next_pindex, next_size);
 1947         }
 1948 
 1949         /*
 1950          * Extend the object if necessary.
 1951          */
 1952         if (next_pindex + next_size > prev_object->size)
 1953                 prev_object->size = next_pindex + next_size;
 1954 
 1955         VM_OBJECT_UNLOCK(prev_object);
 1956         return (TRUE);
 1957 }
 1958 
 1959 void
 1960 vm_object_set_writeable_dirty(vm_object_t object)
 1961 {
 1962         struct vnode *vp;
 1963 
 1964         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 1965         if ((object->flags & (OBJ_MIGHTBEDIRTY|OBJ_WRITEABLE)) ==
 1966             (OBJ_MIGHTBEDIRTY|OBJ_WRITEABLE))
 1967                 return;
 1968         vm_object_set_flag(object, OBJ_WRITEABLE|OBJ_MIGHTBEDIRTY);
 1969         if (object->type == OBJT_VNODE &&
 1970             (vp = (struct vnode *)object->handle) != NULL) {
 1971                 VI_LOCK(vp);
 1972                 vp->v_iflag |= VI_OBJDIRTY;
 1973                 VI_UNLOCK(vp);
 1974         }
 1975 }
 1976 
 1977 #include "opt_ddb.h"
 1978 #ifdef DDB
 1979 #include <sys/kernel.h>
 1980 
 1981 #include <sys/cons.h>
 1982 
 1983 #include <ddb/ddb.h>
 1984 
 1985 static int
 1986 _vm_object_in_map(vm_map_t map, vm_object_t object, vm_map_entry_t entry)
 1987 {
 1988         vm_map_t tmpm;
 1989         vm_map_entry_t tmpe;
 1990         vm_object_t obj;
 1991         int entcount;
 1992 
 1993         if (map == 0)
 1994                 return 0;
 1995 
 1996         if (entry == 0) {
 1997                 tmpe = map->header.next;
 1998                 entcount = map->nentries;
 1999                 while (entcount-- && (tmpe != &map->header)) {
 2000                         if (_vm_object_in_map(map, object, tmpe)) {
 2001                                 return 1;
 2002                         }
 2003                         tmpe = tmpe->next;
 2004                 }
 2005         } else if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
 2006                 tmpm = entry->object.sub_map;
 2007                 tmpe = tmpm->header.next;
 2008                 entcount = tmpm->nentries;
 2009                 while (entcount-- && tmpe != &tmpm->header) {
 2010                         if (_vm_object_in_map(tmpm, object, tmpe)) {
 2011                                 return 1;
 2012                         }
 2013                         tmpe = tmpe->next;
 2014                 }
 2015         } else if ((obj = entry->object.vm_object) != NULL) {
 2016                 for (; obj; obj = obj->backing_object)
 2017                         if (obj == object) {
 2018                                 return 1;
 2019                         }
 2020         }
 2021         return 0;
 2022 }
 2023 
 2024 static int
 2025 vm_object_in_map(vm_object_t object)
 2026 {
 2027         struct proc *p;
 2028 
 2029         /* sx_slock(&allproc_lock); */
 2030         LIST_FOREACH(p, &allproc, p_list) {
 2031                 if (!p->p_vmspace /* || (p->p_flag & (P_SYSTEM|P_WEXIT)) */)
 2032                         continue;
 2033                 if (_vm_object_in_map(&p->p_vmspace->vm_map, object, 0)) {
 2034                         /* sx_sunlock(&allproc_lock); */
 2035                         return 1;
 2036                 }
 2037         }
 2038         /* sx_sunlock(&allproc_lock); */
 2039         if (_vm_object_in_map(kernel_map, object, 0))
 2040                 return 1;
 2041         if (_vm_object_in_map(kmem_map, object, 0))
 2042                 return 1;
 2043         if (_vm_object_in_map(pager_map, object, 0))
 2044                 return 1;
 2045         if (_vm_object_in_map(buffer_map, object, 0))
 2046                 return 1;
 2047         return 0;
 2048 }
 2049 
 2050 DB_SHOW_COMMAND(vmochk, vm_object_check)
 2051 {
 2052         vm_object_t object;
 2053 
 2054         /*
 2055          * make sure that internal objs are in a map somewhere
 2056          * and none have zero ref counts.
 2057          */
 2058         TAILQ_FOREACH(object, &vm_object_list, object_list) {
 2059                 if (object->handle == NULL &&
 2060                     (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) {
 2061                         if (object->ref_count == 0) {
 2062                                 db_printf("vmochk: internal obj has zero ref count: %ld\n",
 2063                                         (long)object->size);
 2064                         }
 2065                         if (!vm_object_in_map(object)) {
 2066                                 db_printf(
 2067                         "vmochk: internal obj is not in a map: "
 2068                         "ref: %d, size: %lu: 0x%lx, backing_object: %p\n",
 2069                                     object->ref_count, (u_long)object->size, 
 2070                                     (u_long)object->size,
 2071                                     (void *)object->backing_object);
 2072                         }
 2073                 }
 2074         }
 2075 }
 2076 
 2077 /*
 2078  *      vm_object_print:        [ debug ]
 2079  */
 2080 DB_SHOW_COMMAND(object, vm_object_print_static)
 2081 {
 2082         /* XXX convert args. */
 2083         vm_object_t object = (vm_object_t)addr;
 2084         boolean_t full = have_addr;
 2085 
 2086         vm_page_t p;
 2087 
 2088         /* XXX count is an (unused) arg.  Avoid shadowing it. */
 2089 #define count   was_count
 2090 
 2091         int count;
 2092 
 2093         if (object == NULL)
 2094                 return;
 2095 
 2096         db_iprintf(
 2097             "Object %p: type=%d, size=0x%jx, res=%d, ref=%d, flags=0x%x\n",
 2098             object, (int)object->type, (uintmax_t)object->size,
 2099             object->resident_page_count, object->ref_count, object->flags);
 2100         db_iprintf(" sref=%d, backing_object(%d)=(%p)+0x%jx\n",
 2101             object->shadow_count, 
 2102             object->backing_object ? object->backing_object->ref_count : 0,
 2103             object->backing_object, (uintmax_t)object->backing_object_offset);
 2104 
 2105         if (!full)
 2106                 return;
 2107 
 2108         db_indent += 2;
 2109         count = 0;
 2110         TAILQ_FOREACH(p, &object->memq, listq) {
 2111                 if (count == 0)
 2112                         db_iprintf("memory:=");
 2113                 else if (count == 6) {
 2114                         db_printf("\n");
 2115                         db_iprintf(" ...");
 2116                         count = 0;
 2117                 } else
 2118                         db_printf(",");
 2119                 count++;
 2120 
 2121                 db_printf("(off=0x%jx,page=0x%jx)",
 2122                     (uintmax_t)p->pindex, (uintmax_t)VM_PAGE_TO_PHYS(p));
 2123         }
 2124         if (count != 0)
 2125                 db_printf("\n");
 2126         db_indent -= 2;
 2127 }
 2128 
 2129 /* XXX. */
 2130 #undef count
 2131 
 2132 /* XXX need this non-static entry for calling from vm_map_print. */
 2133 void
 2134 vm_object_print(
 2135         /* db_expr_t */ long addr,
 2136         boolean_t have_addr,
 2137         /* db_expr_t */ long count,
 2138         char *modif)
 2139 {
 2140         vm_object_print_static(addr, have_addr, count, modif);
 2141 }
 2142 
 2143 DB_SHOW_COMMAND(vmopag, vm_object_print_pages)
 2144 {
 2145         vm_object_t object;
 2146         int nl = 0;
 2147         int c;
 2148 
 2149         TAILQ_FOREACH(object, &vm_object_list, object_list) {
 2150                 vm_pindex_t idx, fidx;
 2151                 vm_pindex_t osize;
 2152                 vm_paddr_t pa = -1, padiff;
 2153                 int rcount;
 2154                 vm_page_t m;
 2155 
 2156                 db_printf("new object: %p\n", (void *)object);
 2157                 if (nl > 18) {
 2158                         c = cngetc();
 2159                         if (c != ' ')
 2160                                 return;
 2161                         nl = 0;
 2162                 }
 2163                 nl++;
 2164                 rcount = 0;
 2165                 fidx = 0;
 2166                 osize = object->size;
 2167                 if (osize > 128)
 2168                         osize = 128;
 2169                 for (idx = 0; idx < osize; idx++) {
 2170                         m = vm_page_lookup(object, idx);
 2171                         if (m == NULL) {
 2172                                 if (rcount) {
 2173                                         db_printf(" index(%ld)run(%d)pa(0x%lx)\n",
 2174                                                 (long)fidx, rcount, (long)pa);
 2175                                         if (nl > 18) {
 2176                                                 c = cngetc();
 2177                                                 if (c != ' ')
 2178                                                         return;
 2179                                                 nl = 0;
 2180                                         }
 2181                                         nl++;
 2182                                         rcount = 0;
 2183                                 }
 2184                                 continue;
 2185                         }
 2186 
 2187                                 
 2188                         if (rcount &&
 2189                                 (VM_PAGE_TO_PHYS(m) == pa + rcount * PAGE_SIZE)) {
 2190                                 ++rcount;
 2191                                 continue;
 2192                         }
 2193                         if (rcount) {
 2194                                 padiff = pa + rcount * PAGE_SIZE - VM_PAGE_TO_PHYS(m);
 2195                                 padiff >>= PAGE_SHIFT;
 2196                                 padiff &= PQ_L2_MASK;
 2197                                 if (padiff == 0) {
 2198                                         pa = VM_PAGE_TO_PHYS(m) - rcount * PAGE_SIZE;
 2199                                         ++rcount;
 2200                                         continue;
 2201                                 }
 2202                                 db_printf(" index(%ld)run(%d)pa(0x%lx)",
 2203                                         (long)fidx, rcount, (long)pa);
 2204                                 db_printf("pd(%ld)\n", (long)padiff);
 2205                                 if (nl > 18) {
 2206                                         c = cngetc();
 2207                                         if (c != ' ')
 2208                                                 return;
 2209                                         nl = 0;
 2210                                 }
 2211                                 nl++;
 2212                         }
 2213                         fidx = idx;
 2214                         pa = VM_PAGE_TO_PHYS(m);
 2215                         rcount = 1;
 2216                 }
 2217                 if (rcount) {
 2218                         db_printf(" index(%ld)run(%d)pa(0x%lx)\n",
 2219                                 (long)fidx, rcount, (long)pa);
 2220                         if (nl > 18) {
 2221                                 c = cngetc();
 2222                                 if (c != ' ')
 2223                                         return;
 2224                                 nl = 0;
 2225                         }
 2226                         nl++;
 2227                 }
 2228         }
 2229 }
 2230 #endif /* DDB */
Cache object: b1d9b2a6f8b10faf9477bcd4170909d5
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/vm/vm_object.c

FreeBSD/Linux Kernel Cross Reference
sys/vm/vm_object.c