vm_object.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*-
    2  * Copyright (c) 1991, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * This code is derived from software contributed to Berkeley by
    6  * The Mach Operating System project at Carnegie-Mellon University.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  * 4. Neither the name of the University nor the names of its contributors
   17  *    may be used to endorse or promote products derived from this software
   18  *    without specific prior written permission.
   19  *
   20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   30  * SUCH DAMAGE.
   31  *
   32  *      from: @(#)vm_object.c   8.5 (Berkeley) 3/22/94
   33  *
   34  *
   35  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
   36  * All rights reserved.
   37  *
   38  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
   39  *
   40  * Permission to use, copy, modify and distribute this software and
   41  * its documentation is hereby granted, provided that both the copyright
   42  * notice and this permission notice appear in all copies of the
   43  * software, derivative works or modified versions, and any portions
   44  * thereof, and that both notices appear in supporting documentation.
   45  *
   46  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
   47  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
   48  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
   49  *
   50  * Carnegie Mellon requests users of this software to return to
   51  *
   52  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   53  *  School of Computer Science
   54  *  Carnegie Mellon University
   55  *  Pittsburgh PA 15213-3890
   56  *
   57  * any improvements or extensions that they make and grant Carnegie the
   58  * rights to redistribute these changes.
   59  */
   60 
   61 /*
   62  *      Virtual memory object module.
   63  */
   64 
   65 #include <sys/cdefs.h>
   66 __FBSDID("$FreeBSD$");
   67 
   68 #include "opt_vm.h"
   69 
   70 #include <sys/param.h>
   71 #include <sys/systm.h>
   72 #include <sys/lock.h>
   73 #include <sys/mman.h>
   74 #include <sys/mount.h>
   75 #include <sys/kernel.h>
   76 #include <sys/sysctl.h>
   77 #include <sys/mutex.h>
   78 #include <sys/proc.h>           /* for curproc, pageproc */
   79 #include <sys/socket.h>
   80 #include <sys/vnode.h>
   81 #include <sys/vmmeter.h>
   82 #include <sys/sx.h>
   83 
   84 #include <vm/vm.h>
   85 #include <vm/vm_param.h>
   86 #include <vm/pmap.h>
   87 #include <vm/vm_map.h>
   88 #include <vm/vm_object.h>
   89 #include <vm/vm_page.h>
   90 #include <vm/vm_pageout.h>
   91 #include <vm/vm_pager.h>
   92 #include <vm/swap_pager.h>
   93 #include <vm/vm_kern.h>
   94 #include <vm/vm_extern.h>
   95 #include <vm/vm_reserv.h>
   96 #include <vm/uma.h>
   97 
   98 #define EASY_SCAN_FACTOR       8
   99 
  100 #define MSYNC_FLUSH_HARDSEQ     0x01
  101 #define MSYNC_FLUSH_SOFTSEQ     0x02
  102 
  103 /*
  104  * msync / VM object flushing optimizations
  105  */
  106 static int msync_flush_flags = MSYNC_FLUSH_HARDSEQ | MSYNC_FLUSH_SOFTSEQ;
  107 SYSCTL_INT(_vm, OID_AUTO, msync_flush_flags,
  108         CTLFLAG_RW, &msync_flush_flags, 0, "");
  109 
  110 static int old_msync;
  111 SYSCTL_INT(_vm, OID_AUTO, old_msync, CTLFLAG_RW, &old_msync, 0,
  112     "Use old (insecure) msync behavior");
  113 
  114 static void     vm_object_qcollapse(vm_object_t object);
  115 static int      vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int curgeneration, int pagerflags);
  116 static void     vm_object_vndeallocate(vm_object_t object);
  117 
  118 /*
  119  *      Virtual memory objects maintain the actual data
  120  *      associated with allocated virtual memory.  A given
  121  *      page of memory exists within exactly one object.
  122  *
  123  *      An object is only deallocated when all "references"
  124  *      are given up.  Only one "reference" to a given
  125  *      region of an object should be writeable.
  126  *
  127  *      Associated with each object is a list of all resident
  128  *      memory pages belonging to that object; this list is
  129  *      maintained by the "vm_page" module, and locked by the object's
  130  *      lock.
  131  *
  132  *      Each object also records a "pager" routine which is
  133  *      used to retrieve (and store) pages to the proper backing
  134  *      storage.  In addition, objects may be backed by other
  135  *      objects from which they were virtual-copied.
  136  *
  137  *      The only items within the object structure which are
  138  *      modified after time of creation are:
  139  *              reference count         locked by object's lock
  140  *              pager routine           locked by object's lock
  141  *
  142  */
  143 
  144 struct object_q vm_object_list;
  145 struct mtx vm_object_list_mtx;  /* lock for object list and count */
  146 
  147 struct vm_object kernel_object_store;
  148 struct vm_object kmem_object_store;
  149 
  150 SYSCTL_NODE(_vm_stats, OID_AUTO, object, CTLFLAG_RD, 0, "VM object stats");
  151 
  152 static long object_collapses;
  153 SYSCTL_LONG(_vm_stats_object, OID_AUTO, collapses, CTLFLAG_RD,
  154     &object_collapses, 0, "VM object collapses");
  155 
  156 static long object_bypasses;
  157 SYSCTL_LONG(_vm_stats_object, OID_AUTO, bypasses, CTLFLAG_RD,
  158     &object_bypasses, 0, "VM object bypasses");
  159 
  160 static uma_zone_t obj_zone;
  161 
  162 static int vm_object_zinit(void *mem, int size, int flags);
  163 
  164 #ifdef INVARIANTS
  165 static void vm_object_zdtor(void *mem, int size, void *arg);
  166 
  167 static void
  168 vm_object_zdtor(void *mem, int size, void *arg)
  169 {
  170         vm_object_t object;
  171 
  172         object = (vm_object_t)mem;
  173         KASSERT(TAILQ_EMPTY(&object->memq),
  174             ("object %p has resident pages",
  175             object));
  176 #if VM_NRESERVLEVEL > 0
  177         KASSERT(LIST_EMPTY(&object->rvq),
  178             ("object %p has reservations",
  179             object));
  180 #endif
  181         KASSERT(object->cache == NULL,
  182             ("object %p has cached pages",
  183             object));
  184         KASSERT(object->paging_in_progress == 0,
  185             ("object %p paging_in_progress = %d",
  186             object, object->paging_in_progress));
  187         KASSERT(object->resident_page_count == 0,
  188             ("object %p resident_page_count = %d",
  189             object, object->resident_page_count));
  190         KASSERT(object->shadow_count == 0,
  191             ("object %p shadow_count = %d",
  192             object, object->shadow_count));
  193 }
  194 #endif
  195 
  196 static int
  197 vm_object_zinit(void *mem, int size, int flags)
  198 {
  199         vm_object_t object;
  200 
  201         object = (vm_object_t)mem;
  202         bzero(&object->mtx, sizeof(object->mtx));
  203         VM_OBJECT_LOCK_INIT(object, "standard object");
  204 
  205         /* These are true for any object that has been freed */
  206         object->paging_in_progress = 0;
  207         object->resident_page_count = 0;
  208         object->shadow_count = 0;
  209         return (0);
  210 }
  211 
  212 void
  213 _vm_object_allocate(objtype_t type, vm_pindex_t size, vm_object_t object)
  214 {
  215 
  216         TAILQ_INIT(&object->memq);
  217         LIST_INIT(&object->shadow_head);
  218 
  219         object->root = NULL;
  220         object->type = type;
  221         object->size = size;
  222         object->generation = 1;
  223         object->ref_count = 1;
  224         object->flags = 0;
  225         if ((object->type == OBJT_DEFAULT) || (object->type == OBJT_SWAP))
  226                 object->flags = OBJ_ONEMAPPING;
  227         object->pg_color = 0;
  228         object->handle = NULL;
  229         object->backing_object = NULL;
  230         object->backing_object_offset = (vm_ooffset_t) 0;
  231 #if VM_NRESERVLEVEL > 0
  232         LIST_INIT(&object->rvq);
  233 #endif
  234         object->cache = NULL;
  235 
  236         mtx_lock(&vm_object_list_mtx);
  237         TAILQ_INSERT_TAIL(&vm_object_list, object, object_list);
  238         mtx_unlock(&vm_object_list_mtx);
  239 }
  240 
  241 /*
  242  *      vm_object_init:
  243  *
  244  *      Initialize the VM objects module.
  245  */
  246 void
  247 vm_object_init(void)
  248 {
  249         TAILQ_INIT(&vm_object_list);
  250         mtx_init(&vm_object_list_mtx, "vm object_list", NULL, MTX_DEF);
  251         
  252         VM_OBJECT_LOCK_INIT(&kernel_object_store, "kernel object");
  253         _vm_object_allocate(OBJT_PHYS, OFF_TO_IDX(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS),
  254             kernel_object);
  255 #if VM_NRESERVLEVEL > 0
  256         kernel_object->flags |= OBJ_COLORED;
  257         kernel_object->pg_color = (u_short)atop(VM_MIN_KERNEL_ADDRESS);
  258 #endif
  259 
  260         VM_OBJECT_LOCK_INIT(&kmem_object_store, "kmem object");
  261         _vm_object_allocate(OBJT_PHYS, OFF_TO_IDX(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS),
  262             kmem_object);
  263 #if VM_NRESERVLEVEL > 0
  264         kmem_object->flags |= OBJ_COLORED;
  265         kmem_object->pg_color = (u_short)atop(VM_MIN_KERNEL_ADDRESS);
  266 #endif
  267 
  268         /*
  269          * The lock portion of struct vm_object must be type stable due
  270          * to vm_pageout_fallback_object_lock locking a vm object
  271          * without holding any references to it.
  272          */
  273         obj_zone = uma_zcreate("VM OBJECT", sizeof (struct vm_object), NULL,
  274 #ifdef INVARIANTS
  275             vm_object_zdtor,
  276 #else
  277             NULL,
  278 #endif
  279             vm_object_zinit, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM|UMA_ZONE_NOFREE);
  280 }
  281 
  282 void
  283 vm_object_clear_flag(vm_object_t object, u_short bits)
  284 {
  285 
  286         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
  287         object->flags &= ~bits;
  288 }
  289 
  290 void
  291 vm_object_pip_add(vm_object_t object, short i)
  292 {
  293 
  294         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
  295         object->paging_in_progress += i;
  296 }
  297 
  298 void
  299 vm_object_pip_subtract(vm_object_t object, short i)
  300 {
  301 
  302         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
  303         object->paging_in_progress -= i;
  304 }
  305 
  306 void
  307 vm_object_pip_wakeup(vm_object_t object)
  308 {
  309 
  310         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
  311         object->paging_in_progress--;
  312         if ((object->flags & OBJ_PIPWNT) && object->paging_in_progress == 0) {
  313                 vm_object_clear_flag(object, OBJ_PIPWNT);
  314                 wakeup(object);
  315         }
  316 }
  317 
  318 void
  319 vm_object_pip_wakeupn(vm_object_t object, short i)
  320 {
  321 
  322         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
  323         if (i)
  324                 object->paging_in_progress -= i;
  325         if ((object->flags & OBJ_PIPWNT) && object->paging_in_progress == 0) {
  326                 vm_object_clear_flag(object, OBJ_PIPWNT);
  327                 wakeup(object);
  328         }
  329 }
  330 
  331 void
  332 vm_object_pip_wait(vm_object_t object, char *waitid)
  333 {
  334 
  335         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
  336         while (object->paging_in_progress) {
  337                 object->flags |= OBJ_PIPWNT;
  338                 msleep(object, VM_OBJECT_MTX(object), PVM, waitid, 0);
  339         }
  340 }
  341 
  342 /*
  343  *      vm_object_allocate:
  344  *
  345  *      Returns a new object with the given size.
  346  */
  347 vm_object_t
  348 vm_object_allocate(objtype_t type, vm_pindex_t size)
  349 {
  350         vm_object_t object;
  351 
  352         object = (vm_object_t)uma_zalloc(obj_zone, M_WAITOK);
  353         _vm_object_allocate(type, size, object);
  354         return (object);
  355 }
  356 
  357 
  358 /*
  359  *      vm_object_reference:
  360  *
  361  *      Gets another reference to the given object.  Note: OBJ_DEAD
  362  *      objects can be referenced during final cleaning.
  363  */
  364 void
  365 vm_object_reference(vm_object_t object)
  366 {
  367         struct vnode *vp;
  368 
  369         if (object == NULL)
  370                 return;
  371         VM_OBJECT_LOCK(object);
  372         object->ref_count++;
  373         if (object->type == OBJT_VNODE) {
  374                 int vfslocked;
  375 
  376                 vp = object->handle;
  377                 VM_OBJECT_UNLOCK(object);
  378                 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  379                 vget(vp, LK_RETRY, curthread);
  380                 VFS_UNLOCK_GIANT(vfslocked);
  381         } else
  382                 VM_OBJECT_UNLOCK(object);
  383 }
  384 
  385 /*
  386  *      vm_object_reference_locked:
  387  *
  388  *      Gets another reference to the given object.
  389  *
  390  *      The object must be locked.
  391  */
  392 void
  393 vm_object_reference_locked(vm_object_t object)
  394 {
  395         struct vnode *vp;
  396 
  397         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
  398         KASSERT((object->flags & OBJ_DEAD) == 0,
  399             ("vm_object_reference_locked: dead object referenced"));
  400         object->ref_count++;
  401         if (object->type == OBJT_VNODE) {
  402                 vp = object->handle;
  403                 vref(vp);
  404         }
  405 }
  406 
  407 /*
  408  * Handle deallocating an object of type OBJT_VNODE.
  409  */
  410 static void
  411 vm_object_vndeallocate(vm_object_t object)
  412 {
  413         struct vnode *vp = (struct vnode *) object->handle;
  414 
  415         VFS_ASSERT_GIANT(vp->v_mount);
  416         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
  417         KASSERT(object->type == OBJT_VNODE,
  418             ("vm_object_vndeallocate: not a vnode object"));
  419         KASSERT(vp != NULL, ("vm_object_vndeallocate: missing vp"));
  420 #ifdef INVARIANTS
  421         if (object->ref_count == 0) {
  422                 vprint("vm_object_vndeallocate", vp);
  423                 panic("vm_object_vndeallocate: bad object reference count");
  424         }
  425 #endif
  426 
  427         object->ref_count--;
  428         if (object->ref_count == 0) {
  429                 mp_fixme("Unlocked vflag access.");
  430                 vp->v_vflag &= ~VV_TEXT;
  431         }
  432         VM_OBJECT_UNLOCK(object);
  433         /*
  434          * vrele may need a vop lock
  435          */
  436         vrele(vp);
  437 }
  438 
  439 /*
  440  *      vm_object_deallocate:
  441  *
  442  *      Release a reference to the specified object,
  443  *      gained either through a vm_object_allocate
  444  *      or a vm_object_reference call.  When all references
  445  *      are gone, storage associated with this object
  446  *      may be relinquished.
  447  *
  448  *      No object may be locked.
  449  */
  450 void
  451 vm_object_deallocate(vm_object_t object)
  452 {
  453         vm_object_t temp;
  454 
  455         while (object != NULL) {
  456                 int vfslocked;
  457 
  458                 vfslocked = 0;
  459         restart:
  460                 VM_OBJECT_LOCK(object);
  461                 if (object->type == OBJT_VNODE) {
  462                         struct vnode *vp = (struct vnode *) object->handle;
  463 
  464                         /*
  465                          * Conditionally acquire Giant for a vnode-backed
  466                          * object.  We have to be careful since the type of
  467                          * a vnode object can change while the object is
  468                          * unlocked.
  469                          */
  470                         if (VFS_NEEDSGIANT(vp->v_mount) && !vfslocked) {
  471                                 vfslocked = 1;
  472                                 if (!mtx_trylock(&Giant)) {
  473                                         VM_OBJECT_UNLOCK(object);
  474                                         mtx_lock(&Giant);
  475                                         goto restart;
  476                                 }
  477                         }
  478                         vm_object_vndeallocate(object);
  479                         VFS_UNLOCK_GIANT(vfslocked);
  480                         return;
  481                 } else
  482                         /*
  483                          * This is to handle the case that the object
  484                          * changed type while we dropped its lock to
  485                          * obtain Giant.
  486                          */
  487                         VFS_UNLOCK_GIANT(vfslocked);
  488 
  489                 KASSERT(object->ref_count != 0,
  490                         ("vm_object_deallocate: object deallocated too many times: %d", object->type));
  491 
  492                 /*
  493                  * If the reference count goes to 0 we start calling
  494                  * vm_object_terminate() on the object chain.
  495                  * A ref count of 1 may be a special case depending on the
  496                  * shadow count being 0 or 1.
  497                  */
  498                 object->ref_count--;
  499                 if (object->ref_count > 1) {
  500                         VM_OBJECT_UNLOCK(object);
  501                         return;
  502                 } else if (object->ref_count == 1) {
  503                         if (object->shadow_count == 0 &&
  504                             object->handle == NULL &&
  505                             (object->type == OBJT_DEFAULT ||
  506                              object->type == OBJT_SWAP)) {
  507                                 vm_object_set_flag(object, OBJ_ONEMAPPING);
  508                         } else if ((object->shadow_count == 1) &&
  509                             (object->handle == NULL) &&
  510                             (object->type == OBJT_DEFAULT ||
  511                              object->type == OBJT_SWAP)) {
  512                                 vm_object_t robject;
  513 
  514                                 robject = LIST_FIRST(&object->shadow_head);
  515                                 KASSERT(robject != NULL,
  516                                     ("vm_object_deallocate: ref_count: %d, shadow_count: %d",
  517                                          object->ref_count,
  518                                          object->shadow_count));
  519                                 if (!VM_OBJECT_TRYLOCK(robject)) {
  520                                         /*
  521                                          * Avoid a potential deadlock.
  522                                          */
  523                                         object->ref_count++;
  524                                         VM_OBJECT_UNLOCK(object);
  525                                         /*
  526                                          * More likely than not the thread
  527                                          * holding robject's lock has lower
  528                                          * priority than the current thread.
  529                                          * Let the lower priority thread run.
  530                                          */
  531                                         pause("vmo_de", 1);
  532                                         continue;
  533                                 }
  534                                 /*
  535                                  * Collapse object into its shadow unless its
  536                                  * shadow is dead.  In that case, object will
  537                                  * be deallocated by the thread that is
  538                                  * deallocating its shadow.
  539                                  */
  540                                 if ((robject->flags & OBJ_DEAD) == 0 &&
  541                                     (robject->handle == NULL) &&
  542                                     (robject->type == OBJT_DEFAULT ||
  543                                      robject->type == OBJT_SWAP)) {
  544 
  545                                         robject->ref_count++;
  546 retry:
  547                                         if (robject->paging_in_progress) {
  548                                                 VM_OBJECT_UNLOCK(object);
  549                                                 vm_object_pip_wait(robject,
  550                                                     "objde1");
  551                                                 temp = robject->backing_object;
  552                                                 if (object == temp) {
  553                                                         VM_OBJECT_LOCK(object);
  554                                                         goto retry;
  555                                                 }
  556                                         } else if (object->paging_in_progress) {
  557                                                 VM_OBJECT_UNLOCK(robject);
  558                                                 object->flags |= OBJ_PIPWNT;
  559                                                 msleep(object,
  560                                                     VM_OBJECT_MTX(object),
  561                                                     PDROP | PVM, "objde2", 0);
  562                                                 VM_OBJECT_LOCK(robject);
  563                                                 temp = robject->backing_object;
  564                                                 if (object == temp) {
  565                                                         VM_OBJECT_LOCK(object);
  566                                                         goto retry;
  567                                                 }
  568                                         } else
  569                                                 VM_OBJECT_UNLOCK(object);
  570 
  571                                         if (robject->ref_count == 1) {
  572                                                 robject->ref_count--;
  573                                                 object = robject;
  574                                                 goto doterm;
  575                                         }
  576                                         object = robject;
  577                                         vm_object_collapse(object);
  578                                         VM_OBJECT_UNLOCK(object);
  579                                         continue;
  580                                 }
  581                                 VM_OBJECT_UNLOCK(robject);
  582                         }
  583                         VM_OBJECT_UNLOCK(object);
  584                         return;
  585                 }
  586 doterm:
  587                 temp = object->backing_object;
  588                 if (temp != NULL) {
  589                         VM_OBJECT_LOCK(temp);
  590                         LIST_REMOVE(object, shadow_list);
  591                         temp->shadow_count--;
  592                         temp->generation++;
  593                         VM_OBJECT_UNLOCK(temp);
  594                         object->backing_object = NULL;
  595                 }
  596                 /*
  597                  * Don't double-terminate, we could be in a termination
  598                  * recursion due to the terminate having to sync data
  599                  * to disk.
  600                  */
  601                 if ((object->flags & OBJ_DEAD) == 0)
  602                         vm_object_terminate(object);
  603                 else
  604                         VM_OBJECT_UNLOCK(object);
  605                 object = temp;
  606         }
  607 }
  608 
  609 /*
  610  *      vm_object_destroy removes the object from the global object list
  611  *      and frees the space for the object.
  612  */
  613 void
  614 vm_object_destroy(vm_object_t object)
  615 {
  616 
  617         /*
  618          * Remove the object from the global object list.
  619          */
  620         mtx_lock(&vm_object_list_mtx);
  621         TAILQ_REMOVE(&vm_object_list, object, object_list);
  622         mtx_unlock(&vm_object_list_mtx);
  623 
  624         /*
  625          * Free the space for the object.
  626          */
  627         uma_zfree(obj_zone, object);
  628 }
  629 
  630 /*
  631  *      vm_object_terminate actually destroys the specified object, freeing
  632  *      up all previously used resources.
  633  *
  634  *      The object must be locked.
  635  *      This routine may block.
  636  */
  637 void
  638 vm_object_terminate(vm_object_t object)
  639 {
  640         vm_page_t p;
  641 
  642         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
  643 
  644         /*
  645          * Make sure no one uses us.
  646          */
  647         vm_object_set_flag(object, OBJ_DEAD);
  648 
  649         /*
  650          * wait for the pageout daemon to be done with the object
  651          */
  652         vm_object_pip_wait(object, "objtrm");
  653 
  654         KASSERT(!object->paging_in_progress,
  655                 ("vm_object_terminate: pageout in progress"));
  656 
  657         /*
  658          * Clean and free the pages, as appropriate. All references to the
  659          * object are gone, so we don't need to lock it.
  660          */
  661         if (object->type == OBJT_VNODE) {
  662                 struct vnode *vp = (struct vnode *)object->handle;
  663 
  664                 /*
  665                  * Clean pages and flush buffers.
  666                  */
  667                 vm_object_page_clean(object, 0, 0, OBJPC_SYNC);
  668                 VM_OBJECT_UNLOCK(object);
  669 
  670                 vinvalbuf(vp, V_SAVE, NULL, 0, 0);
  671 
  672                 VM_OBJECT_LOCK(object);
  673         }
  674 
  675         KASSERT(object->ref_count == 0, 
  676                 ("vm_object_terminate: object with references, ref_count=%d",
  677                 object->ref_count));
  678 
  679         /*
  680          * Now free any remaining pages. For internal objects, this also
  681          * removes them from paging queues. Don't free wired pages, just
  682          * remove them from the object. 
  683          */
  684         vm_page_lock_queues();
  685         while ((p = TAILQ_FIRST(&object->memq)) != NULL) {
  686                 KASSERT(!p->busy && (p->oflags & VPO_BUSY) == 0,
  687                         ("vm_object_terminate: freeing busy page %p "
  688                         "p->busy = %d, p->flags %x\n", p, p->busy, p->flags));
  689                 if (p->wire_count == 0) {
  690                         vm_page_free(p);
  691                         cnt.v_pfree++;
  692                 } else {
  693                         vm_page_remove(p);
  694                 }
  695         }
  696         vm_page_unlock_queues();
  697 
  698 #if VM_NRESERVLEVEL > 0
  699         if (__predict_false(!LIST_EMPTY(&object->rvq)))
  700                 vm_reserv_break_all(object);
  701 #endif
  702         if (__predict_false(object->cache != NULL))
  703                 vm_page_cache_free(object, 0, 0);
  704 
  705         /*
  706          * Let the pager know object is dead.
  707          */
  708         vm_pager_deallocate(object);
  709         VM_OBJECT_UNLOCK(object);
  710 
  711         vm_object_destroy(object);
  712 }
  713 
  714 /*
  715  *      vm_object_page_clean
  716  *
  717  *      Clean all dirty pages in the specified range of object.  Leaves page 
  718  *      on whatever queue it is currently on.   If NOSYNC is set then do not
  719  *      write out pages with VPO_NOSYNC set (originally comes from MAP_NOSYNC),
  720  *      leaving the object dirty.
  721  *
  722  *      When stuffing pages asynchronously, allow clustering.  XXX we need a
  723  *      synchronous clustering mode implementation.
  724  *
  725  *      Odd semantics: if start == end, we clean everything.
  726  *
  727  *      The object must be locked.
  728  */
  729 void
  730 vm_object_page_clean(vm_object_t object, vm_pindex_t start, vm_pindex_t end, int flags)
  731 {
  732         vm_page_t p, np;
  733         vm_pindex_t tstart, tend;
  734         vm_pindex_t pi;
  735         int clearobjflags;
  736         int pagerflags;
  737         int curgeneration;
  738 
  739         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
  740         if (object->type != OBJT_VNODE ||
  741                 (object->flags & OBJ_MIGHTBEDIRTY) == 0)
  742                 return;
  743 
  744         pagerflags = (flags & (OBJPC_SYNC | OBJPC_INVAL)) ? VM_PAGER_PUT_SYNC : VM_PAGER_CLUSTER_OK;
  745         pagerflags |= (flags & OBJPC_INVAL) ? VM_PAGER_PUT_INVAL : 0;
  746 
  747         vm_object_set_flag(object, OBJ_CLEANING);
  748 
  749         tstart = start;
  750         if (end == 0) {
  751                 tend = object->size;
  752         } else {
  753                 tend = end;
  754         }
  755 
  756         vm_page_lock_queues();
  757         /*
  758          * If the caller is smart and only msync()s a range he knows is
  759          * dirty, we may be able to avoid an object scan.  This results in
  760          * a phenominal improvement in performance.  We cannot do this
  761          * as a matter of course because the object may be huge - e.g.
  762          * the size might be in the gigabytes or terrabytes.
  763          */
  764         if (msync_flush_flags & MSYNC_FLUSH_HARDSEQ) {
  765                 vm_pindex_t tscan;
  766                 int scanlimit;
  767                 int scanreset;
  768 
  769                 scanreset = object->resident_page_count / EASY_SCAN_FACTOR;
  770                 if (scanreset < 16)
  771                         scanreset = 16;
  772                 pagerflags |= VM_PAGER_IGNORE_CLEANCHK;
  773 
  774                 scanlimit = scanreset;
  775                 tscan = tstart;
  776                 while (tscan < tend) {
  777                         curgeneration = object->generation;
  778                         p = vm_page_lookup(object, tscan);
  779                         if (p == NULL || p->valid == 0) {
  780                                 if (--scanlimit == 0)
  781                                         break;
  782                                 ++tscan;
  783                                 continue;
  784                         }
  785                         vm_page_test_dirty(p);
  786                         if ((p->dirty & p->valid) == 0) {
  787                                 if (--scanlimit == 0)
  788                                         break;
  789                                 ++tscan;
  790                                 continue;
  791                         }
  792                         /*
  793                          * If we have been asked to skip nosync pages and 
  794                          * this is a nosync page, we can't continue.
  795                          */
  796                         if ((flags & OBJPC_NOSYNC) && (p->oflags & VPO_NOSYNC)) {
  797                                 if (--scanlimit == 0)
  798                                         break;
  799                                 ++tscan;
  800                                 continue;
  801                         }
  802                         scanlimit = scanreset;
  803 
  804                         /*
  805                          * This returns 0 if it was unable to busy the first
  806                          * page (i.e. had to sleep).
  807                          */
  808                         tscan += vm_object_page_collect_flush(object, p, curgeneration, pagerflags);
  809                 }
  810 
  811                 /*
  812                  * If everything was dirty and we flushed it successfully,
  813                  * and the requested range is not the entire object, we
  814                  * don't have to mess with CLEANCHK or MIGHTBEDIRTY and can
  815                  * return immediately.
  816                  */
  817                 if (tscan >= tend && (tstart || tend < object->size)) {
  818                         vm_page_unlock_queues();
  819                         vm_object_clear_flag(object, OBJ_CLEANING);
  820                         return;
  821                 }
  822                 pagerflags &= ~VM_PAGER_IGNORE_CLEANCHK;
  823         }
  824 
  825         /*
  826          * Generally set CLEANCHK interlock and make the page read-only so
  827          * we can then clear the object flags.
  828          *
  829          * However, if this is a nosync mmap then the object is likely to 
  830          * stay dirty so do not mess with the page and do not clear the
  831          * object flags.
  832          */
  833         clearobjflags = 1;
  834         TAILQ_FOREACH(p, &object->memq, listq) {
  835                 p->oflags |= VPO_CLEANCHK;
  836                 if ((flags & OBJPC_NOSYNC) && (p->oflags & VPO_NOSYNC))
  837                         clearobjflags = 0;
  838                 else
  839                         pmap_remove_write(p);
  840         }
  841 
  842         if (clearobjflags && (tstart == 0) && (tend == object->size)) {
  843                 struct vnode *vp;
  844 
  845                 vm_object_clear_flag(object, OBJ_MIGHTBEDIRTY);
  846                 if (object->type == OBJT_VNODE &&
  847                     (vp = (struct vnode *)object->handle) != NULL) {
  848                         VI_LOCK(vp);
  849                         if (vp->v_iflag & VI_OBJDIRTY)
  850                                 vp->v_iflag &= ~VI_OBJDIRTY;
  851                         VI_UNLOCK(vp);
  852                 }
  853         }
  854 
  855 rescan:
  856         curgeneration = object->generation;
  857 
  858         for (p = TAILQ_FIRST(&object->memq); p; p = np) {
  859                 int n;
  860 
  861                 np = TAILQ_NEXT(p, listq);
  862 
  863 again:
  864                 pi = p->pindex;
  865                 if ((p->oflags & VPO_CLEANCHK) == 0 ||
  866                         (pi < tstart) || (pi >= tend) ||
  867                     p->valid == 0) {
  868                         p->oflags &= ~VPO_CLEANCHK;
  869                         continue;
  870                 }
  871 
  872                 vm_page_test_dirty(p);
  873                 if ((p->dirty & p->valid) == 0) {
  874                         p->oflags &= ~VPO_CLEANCHK;
  875                         continue;
  876                 }
  877 
  878                 /*
  879                  * If we have been asked to skip nosync pages and this is a
  880                  * nosync page, skip it.  Note that the object flags were
  881                  * not cleared in this case so we do not have to set them.
  882                  */
  883                 if ((flags & OBJPC_NOSYNC) && (p->oflags & VPO_NOSYNC)) {
  884                         p->oflags &= ~VPO_CLEANCHK;
  885                         continue;
  886                 }
  887 
  888                 n = vm_object_page_collect_flush(object, p,
  889                         curgeneration, pagerflags);
  890                 if (n == 0)
  891                         goto rescan;
  892 
  893                 if (object->generation != curgeneration)
  894                         goto rescan;
  895 
  896                 /*
  897                  * Try to optimize the next page.  If we can't we pick up
  898                  * our (random) scan where we left off.
  899                  */
  900                 if (msync_flush_flags & MSYNC_FLUSH_SOFTSEQ) {
  901                         if ((p = vm_page_lookup(object, pi + n)) != NULL)
  902                                 goto again;
  903                 }
  904         }
  905         vm_page_unlock_queues();
  906 #if 0
  907         VOP_FSYNC(vp, (pagerflags & VM_PAGER_PUT_SYNC)?MNT_WAIT:0, curproc);
  908 #endif
  909 
  910         vm_object_clear_flag(object, OBJ_CLEANING);
  911         return;
  912 }
  913 
  914 static int
  915 vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int curgeneration, int pagerflags)
  916 {
  917         int runlen;
  918         int maxf;
  919         int chkb;
  920         int maxb;
  921         int i;
  922         vm_pindex_t pi;
  923         vm_page_t maf[vm_pageout_page_count];
  924         vm_page_t mab[vm_pageout_page_count];
  925         vm_page_t ma[vm_pageout_page_count];
  926 
  927         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
  928         pi = p->pindex;
  929         while (vm_page_sleep_if_busy(p, TRUE, "vpcwai")) {
  930                 vm_page_lock_queues();
  931                 if (object->generation != curgeneration) {
  932                         return(0);
  933                 }
  934         }
  935         maxf = 0;
  936         for(i = 1; i < vm_pageout_page_count; i++) {
  937                 vm_page_t tp;
  938 
  939                 if ((tp = vm_page_lookup(object, pi + i)) != NULL) {
  940                         if ((tp->oflags & VPO_BUSY) ||
  941                                 ((pagerflags & VM_PAGER_IGNORE_CLEANCHK) == 0 &&
  942                                  (tp->oflags & VPO_CLEANCHK) == 0) ||
  943                                 (tp->busy != 0))
  944                                 break;
  945                         vm_page_test_dirty(tp);
  946                         if ((tp->dirty & tp->valid) == 0) {
  947                                 tp->oflags &= ~VPO_CLEANCHK;
  948                                 break;
  949                         }
  950                         maf[ i - 1 ] = tp;
  951                         maxf++;
  952                         continue;
  953                 }
  954                 break;
  955         }
  956 
  957         maxb = 0;
  958         chkb = vm_pageout_page_count -  maxf;
  959         if (chkb) {
  960                 for(i = 1; i < chkb;i++) {
  961                         vm_page_t tp;
  962 
  963                         if ((tp = vm_page_lookup(object, pi - i)) != NULL) {
  964                                 if ((tp->oflags & VPO_BUSY) ||
  965                                         ((pagerflags & VM_PAGER_IGNORE_CLEANCHK) == 0 &&
  966                                          (tp->oflags & VPO_CLEANCHK) == 0) ||
  967                                         (tp->busy != 0))
  968                                         break;
  969                                 vm_page_test_dirty(tp);
  970                                 if ((tp->dirty & tp->valid) == 0) {
  971                                         tp->oflags &= ~VPO_CLEANCHK;
  972                                         break;
  973                                 }
  974                                 mab[ i - 1 ] = tp;
  975                                 maxb++;
  976                                 continue;
  977                         }
  978                         break;
  979                 }
  980         }
  981 
  982         for(i = 0; i < maxb; i++) {
  983                 int index = (maxb - i) - 1;
  984                 ma[index] = mab[i];
  985                 ma[index]->oflags &= ~VPO_CLEANCHK;
  986         }
  987         p->oflags &= ~VPO_CLEANCHK;
  988         ma[maxb] = p;
  989         for(i = 0; i < maxf; i++) {
  990                 int index = (maxb + i) + 1;
  991                 ma[index] = maf[i];
  992                 ma[index]->oflags &= ~VPO_CLEANCHK;
  993         }
  994         runlen = maxb + maxf + 1;
  995 
  996         vm_pageout_flush(ma, runlen, pagerflags);
  997         for (i = 0; i < runlen; i++) {
  998                 if (ma[i]->valid & ma[i]->dirty) {
  999                         pmap_remove_write(ma[i]);
 1000                         ma[i]->oflags |= VPO_CLEANCHK;
 1001 
 1002                         /*
 1003                          * maxf will end up being the actual number of pages
 1004                          * we wrote out contiguously, non-inclusive of the
 1005                          * first page.  We do not count look-behind pages.
 1006                          */
 1007                         if (i >= maxb + 1 && (maxf > i - maxb - 1))
 1008                                 maxf = i - maxb - 1;
 1009                 }
 1010         }
 1011         return(maxf + 1);
 1012 }
 1013 
 1014 /*
 1015  * Note that there is absolutely no sense in writing out
 1016  * anonymous objects, so we track down the vnode object
 1017  * to write out.
 1018  * We invalidate (remove) all pages from the address space
 1019  * for semantic correctness.
 1020  *
 1021  * Note: certain anonymous maps, such as MAP_NOSYNC maps,
 1022  * may start out with a NULL object.
 1023  */
 1024 void
 1025 vm_object_sync(vm_object_t object, vm_ooffset_t offset, vm_size_t size,
 1026     boolean_t syncio, boolean_t invalidate)
 1027 {
 1028         vm_object_t backing_object;
 1029         struct vnode *vp;
 1030         struct mount *mp;
 1031         int flags;
 1032 
 1033         if (object == NULL)
 1034                 return;
 1035         VM_OBJECT_LOCK(object);
 1036         while ((backing_object = object->backing_object) != NULL) {
 1037                 VM_OBJECT_LOCK(backing_object);
 1038                 offset += object->backing_object_offset;
 1039                 VM_OBJECT_UNLOCK(object);
 1040                 object = backing_object;
 1041                 if (object->size < OFF_TO_IDX(offset + size))
 1042                         size = IDX_TO_OFF(object->size) - offset;
 1043         }
 1044         /*
 1045          * Flush pages if writing is allowed, invalidate them
 1046          * if invalidation requested.  Pages undergoing I/O
 1047          * will be ignored by vm_object_page_remove().
 1048          *
 1049          * We cannot lock the vnode and then wait for paging
 1050          * to complete without deadlocking against vm_fault.
 1051          * Instead we simply call vm_object_page_remove() and
 1052          * allow it to block internally on a page-by-page
 1053          * basis when it encounters pages undergoing async
 1054          * I/O.
 1055          */
 1056         if (object->type == OBJT_VNODE &&
 1057             (object->flags & OBJ_MIGHTBEDIRTY) != 0) {
 1058                 int vfslocked;
 1059                 vp = object->handle;
 1060                 VM_OBJECT_UNLOCK(object);
 1061                 (void) vn_start_write(vp, &mp, V_WAIT);
 1062                 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 1063                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, curthread);
 1064                 flags = (syncio || invalidate) ? OBJPC_SYNC : 0;
 1065                 flags |= invalidate ? OBJPC_INVAL : 0;
 1066                 VM_OBJECT_LOCK(object);
 1067                 vm_object_page_clean(object,
 1068                     OFF_TO_IDX(offset),
 1069                     OFF_TO_IDX(offset + size + PAGE_MASK),
 1070                     flags);
 1071                 VM_OBJECT_UNLOCK(object);
 1072                 VOP_UNLOCK(vp, 0, curthread);
 1073                 VFS_UNLOCK_GIANT(vfslocked);
 1074                 vn_finished_write(mp);
 1075                 VM_OBJECT_LOCK(object);
 1076         }
 1077         if ((object->type == OBJT_VNODE ||
 1078              object->type == OBJT_DEVICE) && invalidate) {
 1079                 boolean_t purge;
 1080                 purge = old_msync || (object->type == OBJT_DEVICE);
 1081                 vm_object_page_remove(object,
 1082                     OFF_TO_IDX(offset),
 1083                     OFF_TO_IDX(offset + size + PAGE_MASK),
 1084                     purge ? FALSE : TRUE);
 1085         }
 1086         VM_OBJECT_UNLOCK(object);
 1087 }
 1088 
 1089 /*
 1090  *      vm_object_madvise:
 1091  *
 1092  *      Implements the madvise function at the object/page level.
 1093  *
 1094  *      MADV_WILLNEED   (any object)
 1095  *
 1096  *          Activate the specified pages if they are resident.
 1097  *
 1098  *      MADV_DONTNEED   (any object)
 1099  *
 1100  *          Deactivate the specified pages if they are resident.
 1101  *
 1102  *      MADV_FREE       (OBJT_DEFAULT/OBJT_SWAP objects,
 1103  *                       OBJ_ONEMAPPING only)
 1104  *
 1105  *          Deactivate and clean the specified pages if they are
 1106  *          resident.  This permits the process to reuse the pages
 1107  *          without faulting or the kernel to reclaim the pages
 1108  *          without I/O.
 1109  */
 1110 void
 1111 vm_object_madvise(vm_object_t object, vm_pindex_t pindex, int count, int advise)
 1112 {
 1113         vm_pindex_t end, tpindex;
 1114         vm_object_t backing_object, tobject;
 1115         vm_page_t m;
 1116 
 1117         if (object == NULL)
 1118                 return;
 1119         VM_OBJECT_LOCK(object);
 1120         end = pindex + count;
 1121         /*
 1122          * Locate and adjust resident pages
 1123          */
 1124         for (; pindex < end; pindex += 1) {
 1125 relookup:
 1126                 tobject = object;
 1127                 tpindex = pindex;
 1128 shadowlookup:
 1129                 /*
 1130                  * MADV_FREE only operates on OBJT_DEFAULT or OBJT_SWAP pages
 1131                  * and those pages must be OBJ_ONEMAPPING.
 1132                  */
 1133                 if (advise == MADV_FREE) {
 1134                         if ((tobject->type != OBJT_DEFAULT &&
 1135                              tobject->type != OBJT_SWAP) ||
 1136                             (tobject->flags & OBJ_ONEMAPPING) == 0) {
 1137                                 goto unlock_tobject;
 1138                         }
 1139                 }
 1140                 m = vm_page_lookup(tobject, tpindex);
 1141                 if (m == NULL && advise == MADV_WILLNEED) {
 1142                         /*
 1143                          * If the page is cached, reactivate it.
 1144                          */
 1145                         m = vm_page_alloc(tobject, tpindex, VM_ALLOC_IFCACHED |
 1146                             VM_ALLOC_NOBUSY);
 1147                 }
 1148                 if (m == NULL) {
 1149                         /*
 1150                          * There may be swap even if there is no backing page
 1151                          */
 1152                         if (advise == MADV_FREE && tobject->type == OBJT_SWAP)
 1153                                 swap_pager_freespace(tobject, tpindex, 1);
 1154                         /*
 1155                          * next object
 1156                          */
 1157                         backing_object = tobject->backing_object;
 1158                         if (backing_object == NULL)
 1159                                 goto unlock_tobject;
 1160                         VM_OBJECT_LOCK(backing_object);
 1161                         tpindex += OFF_TO_IDX(tobject->backing_object_offset);
 1162                         if (tobject != object)
 1163                                 VM_OBJECT_UNLOCK(tobject);
 1164                         tobject = backing_object;
 1165                         goto shadowlookup;
 1166                 }
 1167                 /*
 1168                  * If the page is busy or not in a normal active state,
 1169                  * we skip it.  If the page is not managed there are no
 1170                  * page queues to mess with.  Things can break if we mess
 1171                  * with pages in any of the below states.
 1172                  */
 1173                 vm_page_lock_queues();
 1174                 if (m->hold_count ||
 1175                     m->wire_count ||
 1176                     (m->flags & PG_UNMANAGED) ||
 1177                     m->valid != VM_PAGE_BITS_ALL) {
 1178                         vm_page_unlock_queues();
 1179                         goto unlock_tobject;
 1180                 }
 1181                 if ((m->oflags & VPO_BUSY) || m->busy) {
 1182                         vm_page_flag_set(m, PG_REFERENCED);
 1183                         vm_page_unlock_queues();
 1184                         if (object != tobject)
 1185                                 VM_OBJECT_UNLOCK(object);
 1186                         m->oflags |= VPO_WANTED;
 1187                         msleep(m, VM_OBJECT_MTX(tobject), PDROP | PVM, "madvpo", 0);
 1188                         VM_OBJECT_LOCK(object);
 1189                         goto relookup;
 1190                 }
 1191                 if (advise == MADV_WILLNEED) {
 1192                         vm_page_activate(m);
 1193                 } else if (advise == MADV_DONTNEED) {
 1194                         vm_page_dontneed(m);
 1195                 } else if (advise == MADV_FREE) {
 1196                         /*
 1197                          * Mark the page clean.  This will allow the page
 1198                          * to be freed up by the system.  However, such pages
 1199                          * are often reused quickly by malloc()/free()
 1200                          * so we do not do anything that would cause
 1201                          * a page fault if we can help it.
 1202                          *
 1203                          * Specifically, we do not try to actually free
 1204                          * the page now nor do we try to put it in the
 1205                          * cache (which would cause a page fault on reuse).
 1206                          *
 1207                          * But we do make the page is freeable as we
 1208                          * can without actually taking the step of unmapping
 1209                          * it.
 1210                          */
 1211                         pmap_clear_modify(m);
 1212                         m->dirty = 0;
 1213                         m->act_count = 0;
 1214                         vm_page_dontneed(m);
 1215                 }
 1216                 vm_page_unlock_queues();
 1217                 if (advise == MADV_FREE && tobject->type == OBJT_SWAP)
 1218                         swap_pager_freespace(tobject, tpindex, 1);
 1219 unlock_tobject:
 1220                 if (tobject != object)
 1221                         VM_OBJECT_UNLOCK(tobject);
 1222         }       
 1223         VM_OBJECT_UNLOCK(object);
 1224 }
 1225 
 1226 /*
 1227  *      vm_object_shadow:
 1228  *
 1229  *      Create a new object which is backed by the
 1230  *      specified existing object range.  The source
 1231  *      object reference is deallocated.
 1232  *
 1233  *      The new object and offset into that object
 1234  *      are returned in the source parameters.
 1235  */
 1236 void
 1237 vm_object_shadow(
 1238         vm_object_t *object,    /* IN/OUT */
 1239         vm_ooffset_t *offset,   /* IN/OUT */
 1240         vm_size_t length)
 1241 {
 1242         vm_object_t source;
 1243         vm_object_t result;
 1244 
 1245         source = *object;
 1246 
 1247         /*
 1248          * Don't create the new object if the old object isn't shared.
 1249          */
 1250         if (source != NULL) {
 1251                 VM_OBJECT_LOCK(source);
 1252                 if (source->ref_count == 1 &&
 1253                     source->handle == NULL &&
 1254                     (source->type == OBJT_DEFAULT ||
 1255                      source->type == OBJT_SWAP)) {
 1256                         VM_OBJECT_UNLOCK(source);
 1257                         return;
 1258                 }
 1259                 VM_OBJECT_UNLOCK(source);
 1260         }
 1261 
 1262         /*
 1263          * Allocate a new object with the given length.
 1264          */
 1265         result = vm_object_allocate(OBJT_DEFAULT, length);
 1266 
 1267         /*
 1268          * The new object shadows the source object, adding a reference to it.
 1269          * Our caller changes his reference to point to the new object,
 1270          * removing a reference to the source object.  Net result: no change
 1271          * of reference count.
 1272          *
 1273          * Try to optimize the result object's page color when shadowing
 1274          * in order to maintain page coloring consistency in the combined 
 1275          * shadowed object.
 1276          */
 1277         result->backing_object = source;
 1278         /*
 1279          * Store the offset into the source object, and fix up the offset into
 1280          * the new object.
 1281          */
 1282         result->backing_object_offset = *offset;
 1283         if (source != NULL) {
 1284                 VM_OBJECT_LOCK(source);
 1285                 LIST_INSERT_HEAD(&source->shadow_head, result, shadow_list);
 1286                 source->shadow_count++;
 1287                 source->generation++;
 1288 #if VM_NRESERVLEVEL > 0
 1289                 result->flags |= source->flags & (OBJ_NEEDGIANT | OBJ_COLORED);
 1290                 result->pg_color = (source->pg_color + OFF_TO_IDX(*offset)) &
 1291                     ((1 << (VM_NFREEORDER - 1)) - 1);
 1292 #else
 1293                 result->flags |= source->flags & OBJ_NEEDGIANT;
 1294 #endif
 1295                 VM_OBJECT_UNLOCK(source);
 1296         }
 1297 
 1298 
 1299         /*
 1300          * Return the new things
 1301          */
 1302         *offset = 0;
 1303         *object = result;
 1304 }
 1305 
 1306 /*
 1307  *      vm_object_split:
 1308  *
 1309  * Split the pages in a map entry into a new object.  This affords
 1310  * easier removal of unused pages, and keeps object inheritance from
 1311  * being a negative impact on memory usage.
 1312  */
 1313 void
 1314 vm_object_split(vm_map_entry_t entry)
 1315 {
 1316         vm_page_t m, m_next;
 1317         vm_object_t orig_object, new_object, source;
 1318         vm_pindex_t idx, offidxstart;
 1319         vm_size_t size;
 1320 
 1321         orig_object = entry->object.vm_object;
 1322         if (orig_object->type != OBJT_DEFAULT && orig_object->type != OBJT_SWAP)
 1323                 return;
 1324         if (orig_object->ref_count <= 1)
 1325                 return;
 1326         VM_OBJECT_UNLOCK(orig_object);
 1327 
 1328         offidxstart = OFF_TO_IDX(entry->offset);
 1329         size = atop(entry->end - entry->start);
 1330 
 1331         /*
 1332          * If swap_pager_copy() is later called, it will convert new_object
 1333          * into a swap object.
 1334          */
 1335         new_object = vm_object_allocate(OBJT_DEFAULT, size);
 1336 
 1337         /*
 1338          * At this point, the new object is still private, so the order in
 1339          * which the original and new objects are locked does not matter.
 1340          */
 1341         VM_OBJECT_LOCK(new_object);
 1342         VM_OBJECT_LOCK(orig_object);
 1343         source = orig_object->backing_object;
 1344         if (source != NULL) {
 1345                 VM_OBJECT_LOCK(source);
 1346                 if ((source->flags & OBJ_DEAD) != 0) {
 1347                         VM_OBJECT_UNLOCK(source);
 1348                         VM_OBJECT_UNLOCK(orig_object);
 1349                         VM_OBJECT_UNLOCK(new_object);
 1350                         vm_object_deallocate(new_object);
 1351                         VM_OBJECT_LOCK(orig_object);
 1352                         return;
 1353                 }
 1354                 LIST_INSERT_HEAD(&source->shadow_head,
 1355                                   new_object, shadow_list);
 1356                 source->shadow_count++;
 1357                 source->generation++;
 1358                 vm_object_reference_locked(source);     /* for new_object */
 1359                 vm_object_clear_flag(source, OBJ_ONEMAPPING);
 1360                 VM_OBJECT_UNLOCK(source);
 1361                 new_object->backing_object_offset = 
 1362                         orig_object->backing_object_offset + entry->offset;
 1363                 new_object->backing_object = source;
 1364         }
 1365         new_object->flags |= orig_object->flags & OBJ_NEEDGIANT;
 1366 retry:
 1367         if ((m = TAILQ_FIRST(&orig_object->memq)) != NULL) {
 1368                 if (m->pindex < offidxstart) {
 1369                         m = vm_page_splay(offidxstart, orig_object->root);
 1370                         if ((orig_object->root = m)->pindex < offidxstart)
 1371                                 m = TAILQ_NEXT(m, listq);
 1372                 }
 1373         }
 1374         vm_page_lock_queues();
 1375         for (; m != NULL && (idx = m->pindex - offidxstart) < size;
 1376             m = m_next) {
 1377                 m_next = TAILQ_NEXT(m, listq);
 1378 
 1379                 /*
 1380                  * We must wait for pending I/O to complete before we can
 1381                  * rename the page.
 1382                  *
 1383                  * We do not have to VM_PROT_NONE the page as mappings should
 1384                  * not be changed by this operation.
 1385                  */
 1386                 if ((m->oflags & VPO_BUSY) || m->busy) {
 1387                         vm_page_flag_set(m, PG_REFERENCED);
 1388                         vm_page_unlock_queues();
 1389                         VM_OBJECT_UNLOCK(new_object);
 1390                         m->oflags |= VPO_WANTED;
 1391                         msleep(m, VM_OBJECT_MTX(orig_object), PVM, "spltwt", 0);
 1392                         VM_OBJECT_LOCK(new_object);
 1393                         goto retry;
 1394                 }
 1395                 vm_page_rename(m, new_object, idx);
 1396                 /* page automatically made dirty by rename and cache handled */
 1397                 vm_page_busy(m);
 1398         }
 1399         vm_page_unlock_queues();
 1400         if (orig_object->type == OBJT_SWAP) {
 1401                 /*
 1402                  * swap_pager_copy() can sleep, in which case the orig_object's
 1403                  * and new_object's locks are released and reacquired. 
 1404                  */
 1405                 swap_pager_copy(orig_object, new_object, offidxstart, 0);
 1406 
 1407                 /*
 1408                  * Transfer any cached pages from orig_object to new_object.
 1409                  */
 1410                 if (__predict_false(orig_object->cache != NULL))
 1411                         vm_page_cache_transfer(orig_object, offidxstart,
 1412                             new_object);
 1413         }
 1414         VM_OBJECT_UNLOCK(orig_object);
 1415         TAILQ_FOREACH(m, &new_object->memq, listq)
 1416                 vm_page_wakeup(m);
 1417         VM_OBJECT_UNLOCK(new_object);
 1418         entry->object.vm_object = new_object;
 1419         entry->offset = 0LL;
 1420         vm_object_deallocate(orig_object);
 1421         VM_OBJECT_LOCK(new_object);
 1422 }
 1423 
 1424 #define OBSC_TEST_ALL_SHADOWED  0x0001
 1425 #define OBSC_COLLAPSE_NOWAIT    0x0002
 1426 #define OBSC_COLLAPSE_WAIT      0x0004
 1427 
 1428 static int
 1429 vm_object_backing_scan(vm_object_t object, int op)
 1430 {
 1431         int r = 1;
 1432         vm_page_t p;
 1433         vm_object_t backing_object;
 1434         vm_pindex_t backing_offset_index;
 1435 
 1436         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 1437         VM_OBJECT_LOCK_ASSERT(object->backing_object, MA_OWNED);
 1438 
 1439         backing_object = object->backing_object;
 1440         backing_offset_index = OFF_TO_IDX(object->backing_object_offset);
 1441 
 1442         /*
 1443          * Initial conditions
 1444          */
 1445         if (op & OBSC_TEST_ALL_SHADOWED) {
 1446                 /*
 1447                  * We do not want to have to test for the existence of cache
 1448                  * or swap pages in the backing object.  XXX but with the
 1449                  * new swapper this would be pretty easy to do.
 1450                  *
 1451                  * XXX what about anonymous MAP_SHARED memory that hasn't
 1452                  * been ZFOD faulted yet?  If we do not test for this, the
 1453                  * shadow test may succeed! XXX
 1454                  */
 1455                 if (backing_object->type != OBJT_DEFAULT) {
 1456                         return (0);
 1457                 }
 1458         }
 1459         if (op & OBSC_COLLAPSE_WAIT) {
 1460                 vm_object_set_flag(backing_object, OBJ_DEAD);
 1461         }
 1462 
 1463         /*
 1464          * Our scan
 1465          */
 1466         p = TAILQ_FIRST(&backing_object->memq);
 1467         while (p) {
 1468                 vm_page_t next = TAILQ_NEXT(p, listq);
 1469                 vm_pindex_t new_pindex = p->pindex - backing_offset_index;
 1470 
 1471                 if (op & OBSC_TEST_ALL_SHADOWED) {
 1472                         vm_page_t pp;
 1473 
 1474                         /*
 1475                          * Ignore pages outside the parent object's range
 1476                          * and outside the parent object's mapping of the 
 1477                          * backing object.
 1478                          *
 1479                          * note that we do not busy the backing object's
 1480                          * page.
 1481                          */
 1482                         if (
 1483                             p->pindex < backing_offset_index ||
 1484                             new_pindex >= object->size
 1485                         ) {
 1486                                 p = next;
 1487                                 continue;
 1488                         }
 1489 
 1490                         /*
 1491                          * See if the parent has the page or if the parent's
 1492                          * object pager has the page.  If the parent has the
 1493                          * page but the page is not valid, the parent's
 1494                          * object pager must have the page.
 1495                          *
 1496                          * If this fails, the parent does not completely shadow
 1497                          * the object and we might as well give up now.
 1498                          */
 1499 
 1500                         pp = vm_page_lookup(object, new_pindex);
 1501                         if (
 1502                             (pp == NULL || pp->valid == 0) &&
 1503                             !vm_pager_has_page(object, new_pindex, NULL, NULL)
 1504                         ) {
 1505                                 r = 0;
 1506                                 break;
 1507                         }
 1508                 }
 1509 
 1510                 /*
 1511                  * Check for busy page
 1512                  */
 1513                 if (op & (OBSC_COLLAPSE_WAIT | OBSC_COLLAPSE_NOWAIT)) {
 1514                         vm_page_t pp;
 1515 
 1516                         if (op & OBSC_COLLAPSE_NOWAIT) {
 1517                                 if ((p->oflags & VPO_BUSY) ||
 1518                                     !p->valid || 
 1519                                     p->busy) {
 1520                                         p = next;
 1521                                         continue;
 1522                                 }
 1523                         } else if (op & OBSC_COLLAPSE_WAIT) {
 1524                                 if ((p->oflags & VPO_BUSY) || p->busy) {
 1525                                         vm_page_lock_queues();
 1526                                         vm_page_flag_set(p, PG_REFERENCED);
 1527                                         vm_page_unlock_queues();
 1528                                         VM_OBJECT_UNLOCK(object);
 1529                                         p->oflags |= VPO_WANTED;
 1530                                         msleep(p, VM_OBJECT_MTX(backing_object),
 1531                                             PDROP | PVM, "vmocol", 0);
 1532                                         VM_OBJECT_LOCK(object);
 1533                                         VM_OBJECT_LOCK(backing_object);
 1534                                         /*
 1535                                          * If we slept, anything could have
 1536                                          * happened.  Since the object is
 1537                                          * marked dead, the backing offset
 1538                                          * should not have changed so we
 1539                                          * just restart our scan.
 1540                                          */
 1541                                         p = TAILQ_FIRST(&backing_object->memq);
 1542                                         continue;
 1543                                 }
 1544                         }
 1545 
 1546                         KASSERT(
 1547                             p->object == backing_object,
 1548                             ("vm_object_backing_scan: object mismatch")
 1549                         );
 1550 
 1551                         /*
 1552                          * Destroy any associated swap
 1553                          */
 1554                         if (backing_object->type == OBJT_SWAP) {
 1555                                 swap_pager_freespace(
 1556                                     backing_object, 
 1557                                     p->pindex,
 1558                                     1
 1559                                 );
 1560                         }
 1561 
 1562                         if (
 1563                             p->pindex < backing_offset_index ||
 1564                             new_pindex >= object->size
 1565                         ) {
 1566                                 /*
 1567                                  * Page is out of the parent object's range, we 
 1568                                  * can simply destroy it. 
 1569                                  */
 1570                                 vm_page_lock_queues();
 1571                                 KASSERT(!pmap_page_is_mapped(p),
 1572                                     ("freeing mapped page %p", p));
 1573                                 if (p->wire_count == 0)
 1574                                         vm_page_free(p);
 1575                                 else
 1576                                         vm_page_remove(p);
 1577                                 vm_page_unlock_queues();
 1578                                 p = next;
 1579                                 continue;
 1580                         }
 1581 
 1582                         pp = vm_page_lookup(object, new_pindex);
 1583                         if (
 1584                             pp != NULL ||
 1585                             vm_pager_has_page(object, new_pindex, NULL, NULL)
 1586                         ) {
 1587                                 /*
 1588                                  * page already exists in parent OR swap exists
 1589                                  * for this location in the parent.  Destroy 
 1590                                  * the original page from the backing object.
 1591                                  *
 1592                                  * Leave the parent's page alone
 1593                                  */
 1594                                 vm_page_lock_queues();
 1595                                 KASSERT(!pmap_page_is_mapped(p),
 1596                                     ("freeing mapped page %p", p));
 1597                                 if (p->wire_count == 0)
 1598                                         vm_page_free(p);
 1599                                 else
 1600                                         vm_page_remove(p);
 1601                                 vm_page_unlock_queues();
 1602                                 p = next;
 1603                                 continue;
 1604                         }
 1605 
 1606 #if VM_NRESERVLEVEL > 0
 1607                         /*
 1608                          * Rename the reservation.
 1609                          */
 1610                         vm_reserv_rename(p, object, backing_object,
 1611                             backing_offset_index);
 1612 #endif
 1613 
 1614                         /*
 1615                          * Page does not exist in parent, rename the
 1616                          * page from the backing object to the main object. 
 1617                          *
 1618                          * If the page was mapped to a process, it can remain 
 1619                          * mapped through the rename.
 1620                          */
 1621                         vm_page_lock_queues();
 1622                         vm_page_rename(p, object, new_pindex);
 1623                         vm_page_unlock_queues();
 1624                         /* page automatically made dirty by rename */
 1625                 }
 1626                 p = next;
 1627         }
 1628         return (r);
 1629 }
 1630 
 1631 
 1632 /*
 1633  * this version of collapse allows the operation to occur earlier and
 1634  * when paging_in_progress is true for an object...  This is not a complete
 1635  * operation, but should plug 99.9% of the rest of the leaks.
 1636  */
 1637 static void
 1638 vm_object_qcollapse(vm_object_t object)
 1639 {
 1640         vm_object_t backing_object = object->backing_object;
 1641 
 1642         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 1643         VM_OBJECT_LOCK_ASSERT(backing_object, MA_OWNED);
 1644 
 1645         if (backing_object->ref_count != 1)
 1646                 return;
 1647 
 1648         vm_object_backing_scan(object, OBSC_COLLAPSE_NOWAIT);
 1649 }
 1650 
 1651 /*
 1652  *      vm_object_collapse:
 1653  *
 1654  *      Collapse an object with the object backing it.
 1655  *      Pages in the backing object are moved into the
 1656  *      parent, and the backing object is deallocated.
 1657  */
 1658 void
 1659 vm_object_collapse(vm_object_t object)
 1660 {
 1661         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 1662         
 1663         while (TRUE) {
 1664                 vm_object_t backing_object;
 1665 
 1666                 /*
 1667                  * Verify that the conditions are right for collapse:
 1668                  *
 1669                  * The object exists and the backing object exists.
 1670                  */
 1671                 if ((backing_object = object->backing_object) == NULL)
 1672                         break;
 1673 
 1674                 /*
 1675                  * we check the backing object first, because it is most likely
 1676                  * not collapsable.
 1677                  */
 1678                 VM_OBJECT_LOCK(backing_object);
 1679                 if (backing_object->handle != NULL ||
 1680                     (backing_object->type != OBJT_DEFAULT &&
 1681                      backing_object->type != OBJT_SWAP) ||
 1682                     (backing_object->flags & OBJ_DEAD) ||
 1683                     object->handle != NULL ||
 1684                     (object->type != OBJT_DEFAULT &&
 1685                      object->type != OBJT_SWAP) ||
 1686                     (object->flags & OBJ_DEAD)) {
 1687                         VM_OBJECT_UNLOCK(backing_object);
 1688                         break;
 1689                 }
 1690 
 1691                 if (
 1692                     object->paging_in_progress != 0 ||
 1693                     backing_object->paging_in_progress != 0
 1694                 ) {
 1695                         vm_object_qcollapse(object);
 1696                         VM_OBJECT_UNLOCK(backing_object);
 1697                         break;
 1698                 }
 1699                 /*
 1700                  * We know that we can either collapse the backing object (if
 1701                  * the parent is the only reference to it) or (perhaps) have
 1702                  * the parent bypass the object if the parent happens to shadow
 1703                  * all the resident pages in the entire backing object.
 1704                  *
 1705                  * This is ignoring pager-backed pages such as swap pages.
 1706                  * vm_object_backing_scan fails the shadowing test in this
 1707                  * case.
 1708                  */
 1709                 if (backing_object->ref_count == 1) {
 1710                         /*
 1711                          * If there is exactly one reference to the backing
 1712                          * object, we can collapse it into the parent.  
 1713                          */
 1714                         vm_object_backing_scan(object, OBSC_COLLAPSE_WAIT);
 1715 
 1716 #if VM_NRESERVLEVEL > 0
 1717                         /*
 1718                          * Break any reservations from backing_object.
 1719                          */
 1720                         if (__predict_false(!LIST_EMPTY(&backing_object->rvq)))
 1721                                 vm_reserv_break_all(backing_object);
 1722 #endif
 1723 
 1724                         /*
 1725                          * Move the pager from backing_object to object.
 1726                          */
 1727                         if (backing_object->type == OBJT_SWAP) {
 1728                                 /*
 1729                                  * swap_pager_copy() can sleep, in which case
 1730                                  * the backing_object's and object's locks are
 1731                                  * released and reacquired.
 1732                                  */
 1733                                 swap_pager_copy(
 1734                                     backing_object,
 1735                                     object,
 1736                                     OFF_TO_IDX(object->backing_object_offset), TRUE);
 1737 
 1738                                 /*
 1739                                  * Free any cached pages from backing_object.
 1740                                  */
 1741                                 if (__predict_false(backing_object->cache != NULL))
 1742                                         vm_page_cache_free(backing_object, 0, 0);
 1743                         }
 1744                         /*
 1745                          * Object now shadows whatever backing_object did.
 1746                          * Note that the reference to 
 1747                          * backing_object->backing_object moves from within 
 1748                          * backing_object to within object.
 1749                          */
 1750                         LIST_REMOVE(object, shadow_list);
 1751                         backing_object->shadow_count--;
 1752                         backing_object->generation++;
 1753                         if (backing_object->backing_object) {
 1754                                 VM_OBJECT_LOCK(backing_object->backing_object);
 1755                                 LIST_REMOVE(backing_object, shadow_list);
 1756                                 LIST_INSERT_HEAD(
 1757                                     &backing_object->backing_object->shadow_head,
 1758                                     object, shadow_list);
 1759                                 /*
 1760                                  * The shadow_count has not changed.
 1761                                  */
 1762                                 backing_object->backing_object->generation++;
 1763                                 VM_OBJECT_UNLOCK(backing_object->backing_object);
 1764                         }
 1765                         object->backing_object = backing_object->backing_object;
 1766                         object->backing_object_offset +=
 1767                             backing_object->backing_object_offset;
 1768 
 1769                         /*
 1770                          * Discard backing_object.
 1771                          *
 1772                          * Since the backing object has no pages, no pager left,
 1773                          * and no object references within it, all that is
 1774                          * necessary is to dispose of it.
 1775                          */
 1776                         KASSERT(backing_object->ref_count == 1, ("backing_object %p was somehow re-referenced during collapse!", backing_object));
 1777                         VM_OBJECT_UNLOCK(backing_object);
 1778 
 1779                         mtx_lock(&vm_object_list_mtx);
 1780                         TAILQ_REMOVE(
 1781                             &vm_object_list, 
 1782                             backing_object,
 1783                             object_list
 1784                         );
 1785                         mtx_unlock(&vm_object_list_mtx);
 1786 
 1787                         uma_zfree(obj_zone, backing_object);
 1788 
 1789                         object_collapses++;
 1790                 } else {
 1791                         vm_object_t new_backing_object;
 1792 
 1793                         /*
 1794                          * If we do not entirely shadow the backing object,
 1795                          * there is nothing we can do so we give up.
 1796                          */
 1797                         if (object->resident_page_count != object->size &&
 1798                             vm_object_backing_scan(object,
 1799                             OBSC_TEST_ALL_SHADOWED) == 0) {
 1800                                 VM_OBJECT_UNLOCK(backing_object);
 1801                                 break;
 1802                         }
 1803 
 1804                         /*
 1805                          * Make the parent shadow the next object in the
 1806                          * chain.  Deallocating backing_object will not remove
 1807                          * it, since its reference count is at least 2.
 1808                          */
 1809                         LIST_REMOVE(object, shadow_list);
 1810                         backing_object->shadow_count--;
 1811                         backing_object->generation++;
 1812 
 1813                         new_backing_object = backing_object->backing_object;
 1814                         if ((object->backing_object = new_backing_object) != NULL) {
 1815                                 VM_OBJECT_LOCK(new_backing_object);
 1816                                 LIST_INSERT_HEAD(
 1817                                     &new_backing_object->shadow_head,
 1818                                     object,
 1819                                     shadow_list
 1820                                 );
 1821                                 new_backing_object->shadow_count++;
 1822                                 new_backing_object->generation++;
 1823                                 vm_object_reference_locked(new_backing_object);
 1824                                 VM_OBJECT_UNLOCK(new_backing_object);
 1825                                 object->backing_object_offset +=
 1826                                         backing_object->backing_object_offset;
 1827                         }
 1828 
 1829                         /*
 1830                          * Drop the reference count on backing_object. Since
 1831                          * its ref_count was at least 2, it will not vanish.
 1832                          */
 1833                         backing_object->ref_count--;
 1834                         VM_OBJECT_UNLOCK(backing_object);
 1835                         object_bypasses++;
 1836                 }
 1837 
 1838                 /*
 1839                  * Try again with this object's new backing object.
 1840                  */
 1841         }
 1842 }
 1843 
 1844 /*
 1845  *      vm_object_page_remove:
 1846  *
 1847  *      For the given object, either frees or invalidates each of the
 1848  *      specified pages.  In general, a page is freed.  However, if a
 1849  *      page is wired for any reason other than the existence of a
 1850  *      managed, wired mapping, then it may be invalidated but not
 1851  *      removed from the object.  Pages are specified by the given
 1852  *      range ["start", "end") and Boolean "clean_only".  As a
 1853  *      special case, if "end" is zero, then the range extends from
 1854  *      "start" to the end of the object.  If "clean_only" is TRUE,
 1855  *      then only the non-dirty pages within the specified range are
 1856  *      affected.
 1857  *
 1858  *      In general, this operation should only be performed on objects
 1859  *      that contain managed pages.  There are two exceptions.  First,
 1860  *      it may be performed on the kernel and kmem objects.  Second,
 1861  *      it may be used by msync(..., MS_INVALIDATE) to invalidate
 1862  *      device-backed pages.
 1863  *
 1864  *      The object must be locked.
 1865  */
 1866 void
 1867 vm_object_page_remove(vm_object_t object, vm_pindex_t start, vm_pindex_t end,
 1868     boolean_t clean_only)
 1869 {
 1870         vm_page_t p, next;
 1871 
 1872         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 1873         if (object->resident_page_count == 0)
 1874                 goto skipmemq;
 1875 
 1876         /*
 1877          * Since physically-backed objects do not use managed pages, we can't
 1878          * remove pages from the object (we must instead remove the page
 1879          * references, and then destroy the object).
 1880          */
 1881         KASSERT(object->type != OBJT_PHYS || object == kernel_object ||
 1882             object == kmem_object,
 1883             ("attempt to remove pages from a physical object"));
 1884 
 1885         vm_object_pip_add(object, 1);
 1886 again:
 1887         vm_page_lock_queues();
 1888         if ((p = TAILQ_FIRST(&object->memq)) != NULL) {
 1889                 if (p->pindex < start) {
 1890                         p = vm_page_splay(start, object->root);
 1891                         if ((object->root = p)->pindex < start)
 1892                                 p = TAILQ_NEXT(p, listq);
 1893                 }
 1894         }
 1895         /*
 1896          * Assert: the variable p is either (1) the page with the
 1897          * least pindex greater than or equal to the parameter pindex
 1898          * or (2) NULL.
 1899          */
 1900         for (;
 1901              p != NULL && (p->pindex < end || end == 0);
 1902              p = next) {
 1903                 next = TAILQ_NEXT(p, listq);
 1904 
 1905                 if (p->wire_count != 0) {
 1906                         /* Fictitious pages do not have managed mappings. */
 1907                         if ((p->flags & PG_FICTITIOUS) == 0)
 1908                                 pmap_remove_all(p);
 1909                         if (!clean_only)
 1910                                 p->valid = 0;
 1911                         continue;
 1912                 }
 1913                 if (vm_page_sleep_if_busy(p, TRUE, "vmopar"))
 1914                         goto again;
 1915                 KASSERT((p->flags & PG_FICTITIOUS) == 0,
 1916                     ("vm_object_page_remove: page %p is fictitious", p));
 1917                 if (clean_only && p->valid) {
 1918                         pmap_remove_write(p);
 1919                         if (p->valid & p->dirty)
 1920                                 continue;
 1921                 }
 1922                 pmap_remove_all(p);
 1923                 vm_page_free(p);
 1924         }
 1925         vm_page_unlock_queues();
 1926         vm_object_pip_wakeup(object);
 1927 skipmemq:
 1928         if (__predict_false(object->cache != NULL))
 1929                 vm_page_cache_free(object, start, end);
 1930 }
 1931 
 1932 /*
 1933  *      Routine:        vm_object_coalesce
 1934  *      Function:       Coalesces two objects backing up adjoining
 1935  *                      regions of memory into a single object.
 1936  *
 1937  *      returns TRUE if objects were combined.
 1938  *
 1939  *      NOTE:   Only works at the moment if the second object is NULL -
 1940  *              if it's not, which object do we lock first?
 1941  *
 1942  *      Parameters:
 1943  *              prev_object     First object to coalesce
 1944  *              prev_offset     Offset into prev_object
 1945  *              prev_size       Size of reference to prev_object
 1946  *              next_size       Size of reference to the second object
 1947  *
 1948  *      Conditions:
 1949  *      The object must *not* be locked.
 1950  */
 1951 boolean_t
 1952 vm_object_coalesce(vm_object_t prev_object, vm_ooffset_t prev_offset,
 1953         vm_size_t prev_size, vm_size_t next_size)
 1954 {
 1955         vm_pindex_t next_pindex;
 1956 
 1957         if (prev_object == NULL)
 1958                 return (TRUE);
 1959         VM_OBJECT_LOCK(prev_object);
 1960         if (prev_object->type != OBJT_DEFAULT &&
 1961             prev_object->type != OBJT_SWAP) {
 1962                 VM_OBJECT_UNLOCK(prev_object);
 1963                 return (FALSE);
 1964         }
 1965 
 1966         /*
 1967          * Try to collapse the object first
 1968          */
 1969         vm_object_collapse(prev_object);
 1970 
 1971         /*
 1972          * Can't coalesce if: . more than one reference . paged out . shadows
 1973          * another object . has a copy elsewhere (any of which mean that the
 1974          * pages not mapped to prev_entry may be in use anyway)
 1975          */
 1976         if (prev_object->backing_object != NULL) {
 1977                 VM_OBJECT_UNLOCK(prev_object);
 1978                 return (FALSE);
 1979         }
 1980 
 1981         prev_size >>= PAGE_SHIFT;
 1982         next_size >>= PAGE_SHIFT;
 1983         next_pindex = OFF_TO_IDX(prev_offset) + prev_size;
 1984 
 1985         if ((prev_object->ref_count > 1) &&
 1986             (prev_object->size != next_pindex)) {
 1987                 VM_OBJECT_UNLOCK(prev_object);
 1988                 return (FALSE);
 1989         }
 1990 
 1991         /*
 1992          * Remove any pages that may still be in the object from a previous
 1993          * deallocation.
 1994          */
 1995         if (next_pindex < prev_object->size) {
 1996                 vm_object_page_remove(prev_object,
 1997                                       next_pindex,
 1998                                       next_pindex + next_size, FALSE);
 1999                 if (prev_object->type == OBJT_SWAP)
 2000                         swap_pager_freespace(prev_object,
 2001                                              next_pindex, next_size);
 2002         }
 2003 
 2004         /*
 2005          * Extend the object if necessary.
 2006          */
 2007         if (next_pindex + next_size > prev_object->size)
 2008                 prev_object->size = next_pindex + next_size;
 2009 
 2010         VM_OBJECT_UNLOCK(prev_object);
 2011         return (TRUE);
 2012 }
 2013 
 2014 void
 2015 vm_object_set_writeable_dirty(vm_object_t object)
 2016 {
 2017         struct vnode *vp;
 2018 
 2019         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 2020         if ((object->flags & OBJ_MIGHTBEDIRTY) != 0)
 2021                 return;
 2022         vm_object_set_flag(object, OBJ_MIGHTBEDIRTY);
 2023         if (object->type == OBJT_VNODE &&
 2024             (vp = (struct vnode *)object->handle) != NULL) {
 2025                 VI_LOCK(vp);
 2026                 vp->v_iflag |= VI_OBJDIRTY;
 2027                 VI_UNLOCK(vp);
 2028         }
 2029 }
 2030 
 2031 #include "opt_ddb.h"
 2032 #ifdef DDB
 2033 #include <sys/kernel.h>
 2034 
 2035 #include <sys/cons.h>
 2036 
 2037 #include <ddb/ddb.h>
 2038 
 2039 static int
 2040 _vm_object_in_map(vm_map_t map, vm_object_t object, vm_map_entry_t entry)
 2041 {
 2042         vm_map_t tmpm;
 2043         vm_map_entry_t tmpe;
 2044         vm_object_t obj;
 2045         int entcount;
 2046 
 2047         if (map == 0)
 2048                 return 0;
 2049 
 2050         if (entry == 0) {
 2051                 tmpe = map->header.next;
 2052                 entcount = map->nentries;
 2053                 while (entcount-- && (tmpe != &map->header)) {
 2054                         if (_vm_object_in_map(map, object, tmpe)) {
 2055                                 return 1;
 2056                         }
 2057                         tmpe = tmpe->next;
 2058                 }
 2059         } else if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
 2060                 tmpm = entry->object.sub_map;
 2061                 tmpe = tmpm->header.next;
 2062                 entcount = tmpm->nentries;
 2063                 while (entcount-- && tmpe != &tmpm->header) {
 2064                         if (_vm_object_in_map(tmpm, object, tmpe)) {
 2065                                 return 1;
 2066                         }
 2067                         tmpe = tmpe->next;
 2068                 }
 2069         } else if ((obj = entry->object.vm_object) != NULL) {
 2070                 for (; obj; obj = obj->backing_object)
 2071                         if (obj == object) {
 2072                                 return 1;
 2073                         }
 2074         }
 2075         return 0;
 2076 }
 2077 
 2078 static int
 2079 vm_object_in_map(vm_object_t object)
 2080 {
 2081         struct proc *p;
 2082 
 2083         /* sx_slock(&allproc_lock); */
 2084         FOREACH_PROC_IN_SYSTEM(p) {
 2085                 if (!p->p_vmspace /* || (p->p_flag & (P_SYSTEM|P_WEXIT)) */)
 2086                         continue;
 2087                 if (_vm_object_in_map(&p->p_vmspace->vm_map, object, 0)) {
 2088                         /* sx_sunlock(&allproc_lock); */
 2089                         return 1;
 2090                 }
 2091         }
 2092         /* sx_sunlock(&allproc_lock); */
 2093         if (_vm_object_in_map(kernel_map, object, 0))
 2094                 return 1;
 2095         if (_vm_object_in_map(kmem_map, object, 0))
 2096                 return 1;
 2097         if (_vm_object_in_map(pager_map, object, 0))
 2098                 return 1;
 2099         if (_vm_object_in_map(buffer_map, object, 0))
 2100                 return 1;
 2101         return 0;
 2102 }
 2103 
 2104 DB_SHOW_COMMAND(vmochk, vm_object_check)
 2105 {
 2106         vm_object_t object;
 2107 
 2108         /*
 2109          * make sure that internal objs are in a map somewhere
 2110          * and none have zero ref counts.
 2111          */
 2112         TAILQ_FOREACH(object, &vm_object_list, object_list) {
 2113                 if (object->handle == NULL &&
 2114                     (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) {
 2115                         if (object->ref_count == 0) {
 2116                                 db_printf("vmochk: internal obj has zero ref count: %ld\n",
 2117                                         (long)object->size);
 2118                         }
 2119                         if (!vm_object_in_map(object)) {
 2120                                 db_printf(
 2121                         "vmochk: internal obj is not in a map: "
 2122                         "ref: %d, size: %lu: 0x%lx, backing_object: %p\n",
 2123                                     object->ref_count, (u_long)object->size, 
 2124                                     (u_long)object->size,
 2125                                     (void *)object->backing_object);
 2126                         }
 2127                 }
 2128         }
 2129 }
 2130 
 2131 /*
 2132  *      vm_object_print:        [ debug ]
 2133  */
 2134 DB_SHOW_COMMAND(object, vm_object_print_static)
 2135 {
 2136         /* XXX convert args. */
 2137         vm_object_t object = (vm_object_t)addr;
 2138         boolean_t full = have_addr;
 2139 
 2140         vm_page_t p;
 2141 
 2142         /* XXX count is an (unused) arg.  Avoid shadowing it. */
 2143 #define count   was_count
 2144 
 2145         int count;
 2146 
 2147         if (object == NULL)
 2148                 return;
 2149 
 2150         db_iprintf(
 2151             "Object %p: type=%d, size=0x%jx, res=%d, ref=%d, flags=0x%x\n",
 2152             object, (int)object->type, (uintmax_t)object->size,
 2153             object->resident_page_count, object->ref_count, object->flags);
 2154         db_iprintf(" sref=%d, backing_object(%d)=(%p)+0x%jx\n",
 2155             object->shadow_count, 
 2156             object->backing_object ? object->backing_object->ref_count : 0,
 2157             object->backing_object, (uintmax_t)object->backing_object_offset);
 2158 
 2159         if (!full)
 2160                 return;
 2161 
 2162         db_indent += 2;
 2163         count = 0;
 2164         TAILQ_FOREACH(p, &object->memq, listq) {
 2165                 if (count == 0)
 2166                         db_iprintf("memory:=");
 2167                 else if (count == 6) {
 2168                         db_printf("\n");
 2169                         db_iprintf(" ...");
 2170                         count = 0;
 2171                 } else
 2172                         db_printf(",");
 2173                 count++;
 2174 
 2175                 db_printf("(off=0x%jx,page=0x%jx)",
 2176                     (uintmax_t)p->pindex, (uintmax_t)VM_PAGE_TO_PHYS(p));
 2177         }
 2178         if (count != 0)
 2179                 db_printf("\n");
 2180         db_indent -= 2;
 2181 }
 2182 
 2183 /* XXX. */
 2184 #undef count
 2185 
 2186 /* XXX need this non-static entry for calling from vm_map_print. */
 2187 void
 2188 vm_object_print(
 2189         /* db_expr_t */ long addr,
 2190         boolean_t have_addr,
 2191         /* db_expr_t */ long count,
 2192         char *modif)
 2193 {
 2194         vm_object_print_static(addr, have_addr, count, modif);
 2195 }
 2196 
 2197 DB_SHOW_COMMAND(vmopag, vm_object_print_pages)
 2198 {
 2199         vm_object_t object;
 2200         int nl = 0;
 2201         int c;
 2202 
 2203         TAILQ_FOREACH(object, &vm_object_list, object_list) {
 2204                 vm_pindex_t idx, fidx;
 2205                 vm_pindex_t osize;
 2206                 vm_paddr_t pa = -1;
 2207                 int rcount;
 2208                 vm_page_t m;
 2209 
 2210                 db_printf("new object: %p\n", (void *)object);
 2211                 if (nl > 18) {
 2212                         c = cngetc();
 2213                         if (c != ' ')
 2214                                 return;
 2215                         nl = 0;
 2216                 }
 2217                 nl++;
 2218                 rcount = 0;
 2219                 fidx = 0;
 2220                 osize = object->size;
 2221                 if (osize > 128)
 2222                         osize = 128;
 2223                 for (idx = 0; idx < osize; idx++) {
 2224                         m = vm_page_lookup(object, idx);
 2225                         if (m == NULL) {
 2226                                 if (rcount) {
 2227                                         db_printf(" index(%ld)run(%d)pa(0x%lx)\n",
 2228                                                 (long)fidx, rcount, (long)pa);
 2229                                         if (nl > 18) {
 2230                                                 c = cngetc();
 2231                                                 if (c != ' ')
 2232                                                         return;
 2233                                                 nl = 0;
 2234                                         }
 2235                                         nl++;
 2236                                         rcount = 0;
 2237                                 }
 2238                                 continue;
 2239                         }
 2240 
 2241                                 
 2242                         if (rcount &&
 2243                                 (VM_PAGE_TO_PHYS(m) == pa + rcount * PAGE_SIZE)) {
 2244                                 ++rcount;
 2245                                 continue;
 2246                         }
 2247                         if (rcount) {
 2248                                 db_printf(" index(%ld)run(%d)pa(0x%lx)\n",
 2249                                         (long)fidx, rcount, (long)pa);
 2250                                 if (nl > 18) {
 2251                                         c = cngetc();
 2252                                         if (c != ' ')
 2253                                                 return;
 2254                                         nl = 0;
 2255                                 }
 2256                                 nl++;
 2257                         }
 2258                         fidx = idx;
 2259                         pa = VM_PAGE_TO_PHYS(m);
 2260                         rcount = 1;
 2261                 }
 2262                 if (rcount) {
 2263                         db_printf(" index(%ld)run(%d)pa(0x%lx)\n",
 2264                                 (long)fidx, rcount, (long)pa);
 2265                         if (nl > 18) {
 2266                                 c = cngetc();
 2267                                 if (c != ' ')
 2268                                         return;
 2269                                 nl = 0;
 2270                         }
 2271                         nl++;
 2272                 }
 2273         }
 2274 }
 2275 #endif /* DDB */
Cache object: acc6d111fb37c1c415449f6c9973adad
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/vm/vm_object.c

FreeBSD/Linux Kernel Cross Reference
sys/vm/vm_object.c