The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/vm/vm_object.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 1991, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * This code is derived from software contributed to Berkeley by
    6  * The Mach Operating System project at Carnegie-Mellon University.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  * 3. All advertising materials mentioning features or use of this software
   17  *    must display the following acknowledgement:
   18  *      This product includes software developed by the University of
   19  *      California, Berkeley and its contributors.
   20  * 4. Neither the name of the University nor the names of its contributors
   21  *    may be used to endorse or promote products derived from this software
   22  *    without specific prior written permission.
   23  *
   24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   34  * SUCH DAMAGE.
   35  *
   36  *      from: @(#)vm_object.c   8.5 (Berkeley) 3/22/94
   37  *
   38  *
   39  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
   40  * All rights reserved.
   41  *
   42  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
   43  *
   44  * Permission to use, copy, modify and distribute this software and
   45  * its documentation is hereby granted, provided that both the copyright
   46  * notice and this permission notice appear in all copies of the
   47  * software, derivative works or modified versions, and any portions
   48  * thereof, and that both notices appear in supporting documentation.
   49  *
   50  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
   51  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
   52  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
   53  *
   54  * Carnegie Mellon requests users of this software to return to
   55  *
   56  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   57  *  School of Computer Science
   58  *  Carnegie Mellon University
   59  *  Pittsburgh PA 15213-3890
   60  *
   61  * any improvements or extensions that they make and grant Carnegie the
   62  * rights to redistribute these changes.
   63  *
   64  * $FreeBSD: releng/5.0/sys/vm/vm_object.c 107304 2002-11-27 08:03:24Z alc $
   65  */
   66 
   67 /*
   68  *      Virtual memory object module.
   69  */
   70 
   71 #include <sys/param.h>
   72 #include <sys/systm.h>
   73 #include <sys/lock.h>
   74 #include <sys/mman.h>
   75 #include <sys/mount.h>
   76 #include <sys/kernel.h>
   77 #include <sys/sysctl.h>
   78 #include <sys/mutex.h>
   79 #include <sys/proc.h>           /* for curproc, pageproc */
   80 #include <sys/socket.h>
   81 #include <sys/stdint.h>
   82 #include <sys/vnode.h>
   83 #include <sys/vmmeter.h>
   84 #include <sys/sx.h>
   85 
   86 #include <vm/vm.h>
   87 #include <vm/vm_param.h>
   88 #include <vm/pmap.h>
   89 #include <vm/vm_map.h>
   90 #include <vm/vm_object.h>
   91 #include <vm/vm_page.h>
   92 #include <vm/vm_pageout.h>
   93 #include <vm/vm_pager.h>
   94 #include <vm/swap_pager.h>
   95 #include <vm/vm_kern.h>
   96 #include <vm/vm_extern.h>
   97 #include <vm/uma.h>
   98 
   99 #define EASY_SCAN_FACTOR       8
  100 
  101 #define MSYNC_FLUSH_HARDSEQ     0x01
  102 #define MSYNC_FLUSH_SOFTSEQ     0x02
  103 
  104 /*
  105  * msync / VM object flushing optimizations
  106  */
  107 static int msync_flush_flags = MSYNC_FLUSH_HARDSEQ | MSYNC_FLUSH_SOFTSEQ;
  108 SYSCTL_INT(_vm, OID_AUTO, msync_flush_flags,
  109         CTLFLAG_RW, &msync_flush_flags, 0, "");
  110 
  111 static void     vm_object_qcollapse(vm_object_t object);
  112 static int      vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int curgeneration, int pagerflags);
  113 
  114 /*
  115  *      Virtual memory objects maintain the actual data
  116  *      associated with allocated virtual memory.  A given
  117  *      page of memory exists within exactly one object.
  118  *
  119  *      An object is only deallocated when all "references"
  120  *      are given up.  Only one "reference" to a given
  121  *      region of an object should be writeable.
  122  *
  123  *      Associated with each object is a list of all resident
  124  *      memory pages belonging to that object; this list is
  125  *      maintained by the "vm_page" module, and locked by the object's
  126  *      lock.
  127  *
  128  *      Each object also records a "pager" routine which is
  129  *      used to retrieve (and store) pages to the proper backing
  130  *      storage.  In addition, objects may be backed by other
  131  *      objects from which they were virtual-copied.
  132  *
  133  *      The only items within the object structure which are
  134  *      modified after time of creation are:
  135  *              reference count         locked by object's lock
  136  *              pager routine           locked by object's lock
  137  *
  138  */
  139 
  140 struct object_q vm_object_list;
  141 struct mtx vm_object_list_mtx;  /* lock for object list and count */
  142 vm_object_t kernel_object;
  143 vm_object_t kmem_object;
  144 static struct vm_object kernel_object_store;
  145 static struct vm_object kmem_object_store;
  146 extern int vm_pageout_page_count;
  147 
  148 static long object_collapses;
  149 static long object_bypasses;
  150 static int next_index;
  151 static uma_zone_t obj_zone;
  152 #define VM_OBJECTS_INIT 256
  153 
  154 static void vm_object_zinit(void *mem, int size);
  155 
  156 #ifdef INVARIANTS
  157 static void vm_object_zdtor(void *mem, int size, void *arg);
  158 
  159 static void
  160 vm_object_zdtor(void *mem, int size, void *arg)
  161 {
  162         vm_object_t object;
  163 
  164         object = (vm_object_t)mem;
  165         KASSERT(object->paging_in_progress == 0,
  166             ("object %p paging_in_progress = %d",
  167             object, object->paging_in_progress));
  168         KASSERT(object->resident_page_count == 0,
  169             ("object %p resident_page_count = %d",
  170             object, object->resident_page_count));
  171         KASSERT(object->shadow_count == 0,
  172             ("object %p shadow_count = %d",
  173             object, object->shadow_count));
  174 }
  175 #endif
  176 
  177 static void
  178 vm_object_zinit(void *mem, int size)
  179 {
  180         vm_object_t object;
  181 
  182         object = (vm_object_t)mem;
  183 
  184         /* These are true for any object that has been freed */
  185         object->paging_in_progress = 0;
  186         object->resident_page_count = 0;
  187         object->shadow_count = 0;
  188 }
  189 
  190 void
  191 _vm_object_allocate(objtype_t type, vm_pindex_t size, vm_object_t object)
  192 {
  193         static int object_hash_rand;
  194         int exp, incr;
  195 
  196         TAILQ_INIT(&object->memq);
  197         TAILQ_INIT(&object->shadow_head);
  198 
  199         object->root = NULL;
  200         object->type = type;
  201         object->size = size;
  202         object->ref_count = 1;
  203         object->flags = 0;
  204         if ((object->type == OBJT_DEFAULT) || (object->type == OBJT_SWAP))
  205                 vm_object_set_flag(object, OBJ_ONEMAPPING);
  206         if (size > (PQ_L2_SIZE / 3 + PQ_PRIME1))
  207                 incr = PQ_L2_SIZE / 3 + PQ_PRIME1;
  208         else
  209                 incr = size;
  210         do
  211                 object->pg_color = next_index;
  212         while (!atomic_cmpset_int(&next_index, object->pg_color,
  213                                   (object->pg_color + incr) & PQ_L2_MASK));
  214         object->handle = NULL;
  215         object->backing_object = NULL;
  216         object->backing_object_offset = (vm_ooffset_t) 0;
  217         /*
  218          * Try to generate a number that will spread objects out in the
  219          * hash table.  We 'wipe' new objects across the hash in 128 page
  220          * increments plus 1 more to offset it a little more by the time
  221          * it wraps around.
  222          */
  223         do {
  224                 exp = object_hash_rand;
  225                 object->hash_rand = exp - 129;
  226         } while (!atomic_cmpset_int(&object_hash_rand, exp, object->hash_rand));
  227 
  228         atomic_add_int(&object->generation, 1);
  229 
  230         mtx_lock(&vm_object_list_mtx);
  231         TAILQ_INSERT_TAIL(&vm_object_list, object, object_list);
  232         mtx_unlock(&vm_object_list_mtx);
  233 }
  234 
  235 /*
  236  *      vm_object_init:
  237  *
  238  *      Initialize the VM objects module.
  239  */
  240 void
  241 vm_object_init(void)
  242 {
  243         TAILQ_INIT(&vm_object_list);
  244         mtx_init(&vm_object_list_mtx, "vm object_list", NULL, MTX_DEF);
  245         
  246         kernel_object = &kernel_object_store;
  247         _vm_object_allocate(OBJT_DEFAULT, OFF_TO_IDX(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS),
  248             kernel_object);
  249 
  250         kmem_object = &kmem_object_store;
  251         _vm_object_allocate(OBJT_DEFAULT, OFF_TO_IDX(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS),
  252             kmem_object);
  253         obj_zone = uma_zcreate("VM OBJECT", sizeof (struct vm_object), NULL,
  254 #ifdef INVARIANTS
  255             vm_object_zdtor,
  256 #else
  257             NULL,
  258 #endif
  259             vm_object_zinit, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
  260         uma_prealloc(obj_zone, VM_OBJECTS_INIT);
  261 }
  262 
  263 void
  264 vm_object_init2(void)
  265 {
  266 }
  267 
  268 void
  269 vm_object_set_flag(vm_object_t object, u_short bits)
  270 {
  271         object->flags |= bits;
  272 }
  273 
  274 void
  275 vm_object_clear_flag(vm_object_t object, u_short bits)
  276 {
  277         GIANT_REQUIRED;
  278         object->flags &= ~bits;
  279 }
  280 
  281 void
  282 vm_object_pip_add(vm_object_t object, short i)
  283 {
  284         GIANT_REQUIRED;
  285         object->paging_in_progress += i;
  286 }
  287 
  288 void
  289 vm_object_pip_subtract(vm_object_t object, short i)
  290 {
  291         GIANT_REQUIRED;
  292         object->paging_in_progress -= i;
  293 }
  294 
  295 void
  296 vm_object_pip_wakeup(vm_object_t object)
  297 {
  298         GIANT_REQUIRED;
  299         object->paging_in_progress--;
  300         if ((object->flags & OBJ_PIPWNT) && object->paging_in_progress == 0) {
  301                 vm_object_clear_flag(object, OBJ_PIPWNT);
  302                 wakeup(object);
  303         }
  304 }
  305 
  306 void
  307 vm_object_pip_wakeupn(vm_object_t object, short i)
  308 {
  309         GIANT_REQUIRED;
  310         if (i)
  311                 object->paging_in_progress -= i;
  312         if ((object->flags & OBJ_PIPWNT) && object->paging_in_progress == 0) {
  313                 vm_object_clear_flag(object, OBJ_PIPWNT);
  314                 wakeup(object);
  315         }
  316 }
  317 
  318 void
  319 vm_object_pip_sleep(vm_object_t object, char *waitid)
  320 {
  321         GIANT_REQUIRED;
  322         if (object->paging_in_progress) {
  323                 int s = splvm();
  324                 if (object->paging_in_progress) {
  325                         vm_object_set_flag(object, OBJ_PIPWNT);
  326                         tsleep(object, PVM, waitid, 0);
  327                 }
  328                 splx(s);
  329         }
  330 }
  331 
  332 void
  333 vm_object_pip_wait(vm_object_t object, char *waitid)
  334 {
  335         GIANT_REQUIRED;
  336         while (object->paging_in_progress)
  337                 vm_object_pip_sleep(object, waitid);
  338 }
  339 
  340 /*
  341  *      vm_object_allocate_wait
  342  *
  343  *      Return a new object with the given size, and give the user the
  344  *      option of waiting for it to complete or failing if the needed
  345  *      memory isn't available.
  346  */
  347 vm_object_t
  348 vm_object_allocate_wait(objtype_t type, vm_pindex_t size, int flags)
  349 {
  350         vm_object_t result;
  351 
  352         result = (vm_object_t) uma_zalloc(obj_zone, flags);
  353 
  354         if (result != NULL)
  355                 _vm_object_allocate(type, size, result);
  356 
  357         return (result);
  358 }
  359 
  360 /*
  361  *      vm_object_allocate:
  362  *
  363  *      Returns a new object with the given size.
  364  */
  365 vm_object_t
  366 vm_object_allocate(objtype_t type, vm_pindex_t size)
  367 {
  368         return(vm_object_allocate_wait(type, size, M_WAITOK));
  369 }
  370 
  371 
  372 /*
  373  *      vm_object_reference:
  374  *
  375  *      Gets another reference to the given object.
  376  */
  377 void
  378 vm_object_reference(vm_object_t object)
  379 {
  380         if (object == NULL)
  381                 return;
  382 
  383         vm_object_lock(object);
  384 #if 0
  385         /* object can be re-referenced during final cleaning */
  386         KASSERT(!(object->flags & OBJ_DEAD),
  387             ("vm_object_reference: attempting to reference dead obj"));
  388 #endif
  389 
  390         object->ref_count++;
  391         if (object->type == OBJT_VNODE) {
  392                 while (vget((struct vnode *) object->handle, LK_RETRY, curthread)) {
  393                         printf("vm_object_reference: delay in getting object\n");
  394                 }
  395         }
  396         vm_object_unlock(object);
  397 }
  398 
  399 /*
  400  * handle deallocating a object of type OBJT_VNODE
  401  */
  402 void
  403 vm_object_vndeallocate(vm_object_t object)
  404 {
  405         struct vnode *vp = (struct vnode *) object->handle;
  406 
  407         GIANT_REQUIRED;
  408         KASSERT(object->type == OBJT_VNODE,
  409             ("vm_object_vndeallocate: not a vnode object"));
  410         KASSERT(vp != NULL, ("vm_object_vndeallocate: missing vp"));
  411 #ifdef INVARIANTS
  412         if (object->ref_count == 0) {
  413                 vprint("vm_object_vndeallocate", vp);
  414                 panic("vm_object_vndeallocate: bad object reference count");
  415         }
  416 #endif
  417 
  418         object->ref_count--;
  419         if (object->ref_count == 0) {
  420                 mp_fixme("Unlocked vflag access.");
  421                 vp->v_vflag &= ~VV_TEXT;
  422 #ifdef ENABLE_VFS_IOOPT
  423                 vm_object_clear_flag(object, OBJ_OPT);
  424 #endif
  425         }
  426         /*
  427          * vrele may need a vop lock
  428          */
  429         vrele(vp);
  430 }
  431 
  432 /*
  433  *      vm_object_deallocate:
  434  *
  435  *      Release a reference to the specified object,
  436  *      gained either through a vm_object_allocate
  437  *      or a vm_object_reference call.  When all references
  438  *      are gone, storage associated with this object
  439  *      may be relinquished.
  440  *
  441  *      No object may be locked.
  442  */
  443 void
  444 vm_object_deallocate(vm_object_t object)
  445 {
  446         vm_object_t temp;
  447 
  448         mtx_lock(&Giant);
  449         while (object != NULL) {
  450 
  451                 if (object->type == OBJT_VNODE) {
  452                         vm_object_vndeallocate(object);
  453                         mtx_unlock(&Giant);
  454                         return;
  455                 }
  456 
  457                 KASSERT(object->ref_count != 0,
  458                         ("vm_object_deallocate: object deallocated too many times: %d", object->type));
  459 
  460                 /*
  461                  * If the reference count goes to 0 we start calling
  462                  * vm_object_terminate() on the object chain.
  463                  * A ref count of 1 may be a special case depending on the
  464                  * shadow count being 0 or 1.
  465                  */
  466                 object->ref_count--;
  467                 if (object->ref_count > 1) {
  468                         mtx_unlock(&Giant);
  469                         return;
  470                 } else if (object->ref_count == 1) {
  471                         if (object->shadow_count == 0) {
  472                                 vm_object_set_flag(object, OBJ_ONEMAPPING);
  473                         } else if ((object->shadow_count == 1) &&
  474                             (object->handle == NULL) &&
  475                             (object->type == OBJT_DEFAULT ||
  476                              object->type == OBJT_SWAP)) {
  477                                 vm_object_t robject;
  478 
  479                                 robject = TAILQ_FIRST(&object->shadow_head);
  480                                 KASSERT(robject != NULL,
  481                                     ("vm_object_deallocate: ref_count: %d, shadow_count: %d",
  482                                          object->ref_count,
  483                                          object->shadow_count));
  484                                 if ((robject->handle == NULL) &&
  485                                     (robject->type == OBJT_DEFAULT ||
  486                                      robject->type == OBJT_SWAP)) {
  487 
  488                                         robject->ref_count++;
  489 
  490                                         while (
  491                                                 robject->paging_in_progress ||
  492                                                 object->paging_in_progress
  493                                         ) {
  494                                                 vm_object_pip_sleep(robject, "objde1");
  495                                                 vm_object_pip_sleep(object, "objde2");
  496                                         }
  497 
  498                                         if (robject->ref_count == 1) {
  499                                                 robject->ref_count--;
  500                                                 object = robject;
  501                                                 goto doterm;
  502                                         }
  503 
  504                                         object = robject;
  505                                         vm_object_collapse(object);
  506                                         continue;
  507                                 }
  508                         }
  509                         mtx_unlock(&Giant);
  510                         return;
  511                 }
  512 doterm:
  513                 temp = object->backing_object;
  514                 if (temp) {
  515                         TAILQ_REMOVE(&temp->shadow_head, object, shadow_list);
  516                         temp->shadow_count--;
  517 #ifdef ENABLE_VFS_IOOPT
  518                         if (temp->ref_count == 0)
  519                                 vm_object_clear_flag(temp, OBJ_OPT);
  520 #endif
  521                         temp->generation++;
  522                         object->backing_object = NULL;
  523                 }
  524                 /*
  525                  * Don't double-terminate, we could be in a termination
  526                  * recursion due to the terminate having to sync data
  527                  * to disk.
  528                  */
  529                 if ((object->flags & OBJ_DEAD) == 0)
  530                         vm_object_terminate(object);
  531                 object = temp;
  532         }
  533         mtx_unlock(&Giant);
  534 }
  535 
  536 /*
  537  *      vm_object_terminate actually destroys the specified object, freeing
  538  *      up all previously used resources.
  539  *
  540  *      The object must be locked.
  541  *      This routine may block.
  542  */
  543 void
  544 vm_object_terminate(vm_object_t object)
  545 {
  546         vm_page_t p;
  547         int s;
  548 
  549         GIANT_REQUIRED;
  550 
  551         /*
  552          * Make sure no one uses us.
  553          */
  554         vm_object_set_flag(object, OBJ_DEAD);
  555 
  556         /*
  557          * wait for the pageout daemon to be done with the object
  558          */
  559         vm_object_pip_wait(object, "objtrm");
  560 
  561         KASSERT(!object->paging_in_progress,
  562                 ("vm_object_terminate: pageout in progress"));
  563 
  564         /*
  565          * Clean and free the pages, as appropriate. All references to the
  566          * object are gone, so we don't need to lock it.
  567          */
  568         if (object->type == OBJT_VNODE) {
  569                 struct vnode *vp;
  570 
  571 #ifdef ENABLE_VFS_IOOPT
  572                 /*
  573                  * Freeze optimized copies.
  574                  */
  575                 vm_freeze_copyopts(object, 0, object->size);
  576 #endif
  577                 /*
  578                  * Clean pages and flush buffers.
  579                  */
  580                 vm_object_page_clean(object, 0, 0, OBJPC_SYNC);
  581 
  582                 vp = (struct vnode *) object->handle;
  583                 vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0);
  584         }
  585 
  586         KASSERT(object->ref_count == 0, 
  587                 ("vm_object_terminate: object with references, ref_count=%d",
  588                 object->ref_count));
  589 
  590         /*
  591          * Now free any remaining pages. For internal objects, this also
  592          * removes them from paging queues. Don't free wired pages, just
  593          * remove them from the object. 
  594          */
  595         s = splvm();
  596         vm_page_lock_queues();
  597         while ((p = TAILQ_FIRST(&object->memq)) != NULL) {
  598                 KASSERT(!p->busy && (p->flags & PG_BUSY) == 0,
  599                         ("vm_object_terminate: freeing busy page %p "
  600                         "p->busy = %d, p->flags %x\n", p, p->busy, p->flags));
  601                 if (p->wire_count == 0) {
  602                         vm_page_busy(p);
  603                         vm_page_free(p);
  604                         cnt.v_pfree++;
  605                 } else {
  606                         vm_page_busy(p);
  607                         vm_page_remove(p);
  608                 }
  609         }
  610         vm_page_unlock_queues();
  611         splx(s);
  612 
  613         /*
  614          * Let the pager know object is dead.
  615          */
  616         vm_pager_deallocate(object);
  617 
  618         /*
  619          * Remove the object from the global object list.
  620          */
  621         mtx_lock(&vm_object_list_mtx);
  622         TAILQ_REMOVE(&vm_object_list, object, object_list);
  623         mtx_unlock(&vm_object_list_mtx);
  624 
  625         wakeup(object);
  626 
  627         /*
  628          * Free the space for the object.
  629          */
  630         uma_zfree(obj_zone, object);
  631 }
  632 
  633 /*
  634  *      vm_object_page_clean
  635  *
  636  *      Clean all dirty pages in the specified range of object.  Leaves page 
  637  *      on whatever queue it is currently on.   If NOSYNC is set then do not
  638  *      write out pages with PG_NOSYNC set (originally comes from MAP_NOSYNC),
  639  *      leaving the object dirty.
  640  *
  641  *      Odd semantics: if start == end, we clean everything.
  642  *
  643  *      The object must be locked.
  644  */
  645 void
  646 vm_object_page_clean(vm_object_t object, vm_pindex_t start, vm_pindex_t end, int flags)
  647 {
  648         vm_page_t p, np;
  649         vm_pindex_t tstart, tend;
  650         vm_pindex_t pi;
  651         struct vnode *vp;
  652         int clearobjflags;
  653         int pagerflags;
  654         int curgeneration;
  655 
  656         GIANT_REQUIRED;
  657 
  658         if (object->type != OBJT_VNODE ||
  659                 (object->flags & OBJ_MIGHTBEDIRTY) == 0)
  660                 return;
  661 
  662         pagerflags = (flags & (OBJPC_SYNC | OBJPC_INVAL)) ? VM_PAGER_PUT_SYNC : 0;
  663         pagerflags |= (flags & OBJPC_INVAL) ? VM_PAGER_PUT_INVAL : 0;
  664 
  665         vp = object->handle;
  666 
  667         vm_object_set_flag(object, OBJ_CLEANING);
  668 
  669         tstart = start;
  670         if (end == 0) {
  671                 tend = object->size;
  672         } else {
  673                 tend = end;
  674         }
  675 
  676         /*
  677          * If the caller is smart and only msync()s a range he knows is
  678          * dirty, we may be able to avoid an object scan.  This results in
  679          * a phenominal improvement in performance.  We cannot do this
  680          * as a matter of course because the object may be huge - e.g.
  681          * the size might be in the gigabytes or terrabytes.
  682          */
  683         if (msync_flush_flags & MSYNC_FLUSH_HARDSEQ) {
  684                 vm_pindex_t tscan;
  685                 int scanlimit;
  686                 int scanreset;
  687 
  688                 scanreset = object->resident_page_count / EASY_SCAN_FACTOR;
  689                 if (scanreset < 16)
  690                         scanreset = 16;
  691 
  692                 scanlimit = scanreset;
  693                 tscan = tstart;
  694                 while (tscan < tend) {
  695                         curgeneration = object->generation;
  696                         p = vm_page_lookup(object, tscan);
  697                         if (p == NULL || p->valid == 0 ||
  698                             (p->queue - p->pc) == PQ_CACHE) {
  699                                 if (--scanlimit == 0)
  700                                         break;
  701                                 ++tscan;
  702                                 continue;
  703                         }
  704                         vm_page_test_dirty(p);
  705                         if ((p->dirty & p->valid) == 0) {
  706                                 if (--scanlimit == 0)
  707                                         break;
  708                                 ++tscan;
  709                                 continue;
  710                         }
  711                         /*
  712                          * If we have been asked to skip nosync pages and 
  713                          * this is a nosync page, we can't continue.
  714                          */
  715                         if ((flags & OBJPC_NOSYNC) && (p->flags & PG_NOSYNC)) {
  716                                 if (--scanlimit == 0)
  717                                         break;
  718                                 ++tscan;
  719                                 continue;
  720                         }
  721                         scanlimit = scanreset;
  722 
  723                         /*
  724                          * This returns 0 if it was unable to busy the first
  725                          * page (i.e. had to sleep).
  726                          */
  727                         tscan += vm_object_page_collect_flush(object, p, curgeneration, pagerflags);
  728                 }
  729 
  730                 /*
  731                  * If everything was dirty and we flushed it successfully,
  732                  * and the requested range is not the entire object, we
  733                  * don't have to mess with CLEANCHK or MIGHTBEDIRTY and can
  734                  * return immediately.
  735                  */
  736                 if (tscan >= tend && (tstart || tend < object->size)) {
  737                         vm_object_clear_flag(object, OBJ_CLEANING);
  738                         return;
  739                 }
  740         }
  741 
  742         /*
  743          * Generally set CLEANCHK interlock and make the page read-only so
  744          * we can then clear the object flags.
  745          *
  746          * However, if this is a nosync mmap then the object is likely to 
  747          * stay dirty so do not mess with the page and do not clear the
  748          * object flags.
  749          */
  750         clearobjflags = 1;
  751         vm_page_lock_queues();
  752         TAILQ_FOREACH(p, &object->memq, listq) {
  753                 vm_page_flag_set(p, PG_CLEANCHK);
  754                 if ((flags & OBJPC_NOSYNC) && (p->flags & PG_NOSYNC))
  755                         clearobjflags = 0;
  756                 else
  757                         pmap_page_protect(p, VM_PROT_READ);
  758         }
  759         vm_page_unlock_queues();
  760 
  761         if (clearobjflags && (tstart == 0) && (tend == object->size)) {
  762                 struct vnode *vp;
  763 
  764                 vm_object_clear_flag(object, OBJ_WRITEABLE|OBJ_MIGHTBEDIRTY);
  765                 if (object->type == OBJT_VNODE &&
  766                     (vp = (struct vnode *)object->handle) != NULL) {
  767                         VI_LOCK(vp);
  768                         if (vp->v_iflag & VI_OBJDIRTY)
  769                                 vp->v_iflag &= ~VI_OBJDIRTY;
  770                         VI_UNLOCK(vp);
  771                 }
  772         }
  773 
  774 rescan:
  775         curgeneration = object->generation;
  776 
  777         for (p = TAILQ_FIRST(&object->memq); p; p = np) {
  778                 int n;
  779 
  780                 np = TAILQ_NEXT(p, listq);
  781 
  782 again:
  783                 pi = p->pindex;
  784                 if (((p->flags & PG_CLEANCHK) == 0) ||
  785                         (pi < tstart) || (pi >= tend) ||
  786                         (p->valid == 0) ||
  787                         ((p->queue - p->pc) == PQ_CACHE)) {
  788                         vm_page_flag_clear(p, PG_CLEANCHK);
  789                         continue;
  790                 }
  791 
  792                 vm_page_test_dirty(p);
  793                 if ((p->dirty & p->valid) == 0) {
  794                         vm_page_flag_clear(p, PG_CLEANCHK);
  795                         continue;
  796                 }
  797 
  798                 /*
  799                  * If we have been asked to skip nosync pages and this is a
  800                  * nosync page, skip it.  Note that the object flags were
  801                  * not cleared in this case so we do not have to set them.
  802                  */
  803                 if ((flags & OBJPC_NOSYNC) && (p->flags & PG_NOSYNC)) {
  804                         vm_page_flag_clear(p, PG_CLEANCHK);
  805                         continue;
  806                 }
  807 
  808                 n = vm_object_page_collect_flush(object, p,
  809                         curgeneration, pagerflags);
  810                 if (n == 0)
  811                         goto rescan;
  812 
  813                 if (object->generation != curgeneration)
  814                         goto rescan;
  815 
  816                 /*
  817                  * Try to optimize the next page.  If we can't we pick up
  818                  * our (random) scan where we left off.
  819                  */
  820                 if (msync_flush_flags & MSYNC_FLUSH_SOFTSEQ) {
  821                         if ((p = vm_page_lookup(object, pi + n)) != NULL)
  822                                 goto again;
  823                 }
  824         }
  825 
  826 #if 0
  827         VOP_FSYNC(vp, NULL, (pagerflags & VM_PAGER_PUT_SYNC)?MNT_WAIT:0, curproc);
  828 #endif
  829 
  830         vm_object_clear_flag(object, OBJ_CLEANING);
  831         return;
  832 }
  833 
  834 static int
  835 vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int curgeneration, int pagerflags)
  836 {
  837         int runlen;
  838         int s;
  839         int maxf;
  840         int chkb;
  841         int maxb;
  842         int i;
  843         vm_pindex_t pi;
  844         vm_page_t maf[vm_pageout_page_count];
  845         vm_page_t mab[vm_pageout_page_count];
  846         vm_page_t ma[vm_pageout_page_count];
  847 
  848         s = splvm();
  849         pi = p->pindex;
  850         while (vm_page_sleep_busy(p, TRUE, "vpcwai")) {
  851                 if (object->generation != curgeneration) {
  852                         splx(s);
  853                         return(0);
  854                 }
  855         }
  856         vm_page_lock_queues();
  857         maxf = 0;
  858         for(i = 1; i < vm_pageout_page_count; i++) {
  859                 vm_page_t tp;
  860 
  861                 if ((tp = vm_page_lookup(object, pi + i)) != NULL) {
  862                         if ((tp->flags & PG_BUSY) ||
  863                                 (tp->flags & PG_CLEANCHK) == 0 ||
  864                                 (tp->busy != 0))
  865                                 break;
  866                         if((tp->queue - tp->pc) == PQ_CACHE) {
  867                                 vm_page_flag_clear(tp, PG_CLEANCHK);
  868                                 break;
  869                         }
  870                         vm_page_test_dirty(tp);
  871                         if ((tp->dirty & tp->valid) == 0) {
  872                                 vm_page_flag_clear(tp, PG_CLEANCHK);
  873                                 break;
  874                         }
  875                         maf[ i - 1 ] = tp;
  876                         maxf++;
  877                         continue;
  878                 }
  879                 break;
  880         }
  881 
  882         maxb = 0;
  883         chkb = vm_pageout_page_count -  maxf;
  884         if (chkb) {
  885                 for(i = 1; i < chkb;i++) {
  886                         vm_page_t tp;
  887 
  888                         if ((tp = vm_page_lookup(object, pi - i)) != NULL) {
  889                                 if ((tp->flags & PG_BUSY) ||
  890                                         (tp->flags & PG_CLEANCHK) == 0 ||
  891                                         (tp->busy != 0))
  892                                         break;
  893                                 if ((tp->queue - tp->pc) == PQ_CACHE) {
  894                                         vm_page_flag_clear(tp, PG_CLEANCHK);
  895                                         break;
  896                                 }
  897                                 vm_page_test_dirty(tp);
  898                                 if ((tp->dirty & tp->valid) == 0) {
  899                                         vm_page_flag_clear(tp, PG_CLEANCHK);
  900                                         break;
  901                                 }
  902                                 mab[ i - 1 ] = tp;
  903                                 maxb++;
  904                                 continue;
  905                         }
  906                         break;
  907                 }
  908         }
  909 
  910         for(i = 0; i < maxb; i++) {
  911                 int index = (maxb - i) - 1;
  912                 ma[index] = mab[i];
  913                 vm_page_flag_clear(ma[index], PG_CLEANCHK);
  914         }
  915         vm_page_flag_clear(p, PG_CLEANCHK);
  916         ma[maxb] = p;
  917         for(i = 0; i < maxf; i++) {
  918                 int index = (maxb + i) + 1;
  919                 ma[index] = maf[i];
  920                 vm_page_flag_clear(ma[index], PG_CLEANCHK);
  921         }
  922         runlen = maxb + maxf + 1;
  923 
  924         splx(s);
  925         vm_pageout_flush(ma, runlen, pagerflags);
  926         for (i = 0; i < runlen; i++) {
  927                 if (ma[i]->valid & ma[i]->dirty) {
  928                         pmap_page_protect(ma[i], VM_PROT_READ);
  929                         vm_page_flag_set(ma[i], PG_CLEANCHK);
  930 
  931                         /*
  932                          * maxf will end up being the actual number of pages
  933                          * we wrote out contiguously, non-inclusive of the
  934                          * first page.  We do not count look-behind pages.
  935                          */
  936                         if (i >= maxb + 1 && (maxf > i - maxb - 1))
  937                                 maxf = i - maxb - 1;
  938                 }
  939         }
  940         vm_page_unlock_queues();
  941         return(maxf + 1);
  942 }
  943 
  944 #ifdef ENABLE_VFS_IOOPT
  945 /*
  946  * Same as vm_object_pmap_copy, except range checking really
  947  * works, and is meant for small sections of an object.
  948  *
  949  * This code protects resident pages by making them read-only
  950  * and is typically called on a fork or split when a page
  951  * is converted to copy-on-write.  
  952  *
  953  * NOTE: If the page is already at VM_PROT_NONE, calling
  954  * pmap_page_protect will have no effect.
  955  */
  956 void
  957 vm_object_pmap_copy_1(vm_object_t object, vm_pindex_t start, vm_pindex_t end)
  958 {
  959         vm_pindex_t idx;
  960         vm_page_t p;
  961 
  962         GIANT_REQUIRED;
  963 
  964         if (object == NULL || (object->flags & OBJ_WRITEABLE) == 0)
  965                 return;
  966         vm_page_lock_queues();
  967         for (idx = start; idx < end; idx++) {
  968                 p = vm_page_lookup(object, idx);
  969                 if (p == NULL)
  970                         continue;
  971                 pmap_page_protect(p, VM_PROT_READ);
  972         }
  973         vm_page_unlock_queues();
  974 }
  975 #endif
  976 
  977 /*
  978  *      vm_object_madvise:
  979  *
  980  *      Implements the madvise function at the object/page level.
  981  *
  982  *      MADV_WILLNEED   (any object)
  983  *
  984  *          Activate the specified pages if they are resident.
  985  *
  986  *      MADV_DONTNEED   (any object)
  987  *
  988  *          Deactivate the specified pages if they are resident.
  989  *
  990  *      MADV_FREE       (OBJT_DEFAULT/OBJT_SWAP objects,
  991  *                       OBJ_ONEMAPPING only)
  992  *
  993  *          Deactivate and clean the specified pages if they are
  994  *          resident.  This permits the process to reuse the pages
  995  *          without faulting or the kernel to reclaim the pages
  996  *          without I/O.
  997  */
  998 void
  999 vm_object_madvise(vm_object_t object, vm_pindex_t pindex, int count, int advise)
 1000 {
 1001         vm_pindex_t end, tpindex;
 1002         vm_object_t tobject;
 1003         vm_page_t m;
 1004 
 1005         if (object == NULL)
 1006                 return;
 1007 
 1008         vm_object_lock(object);
 1009 
 1010         end = pindex + count;
 1011 
 1012         /*
 1013          * Locate and adjust resident pages
 1014          */
 1015         for (; pindex < end; pindex += 1) {
 1016 relookup:
 1017                 tobject = object;
 1018                 tpindex = pindex;
 1019 shadowlookup:
 1020                 /*
 1021                  * MADV_FREE only operates on OBJT_DEFAULT or OBJT_SWAP pages
 1022                  * and those pages must be OBJ_ONEMAPPING.
 1023                  */
 1024                 if (advise == MADV_FREE) {
 1025                         if ((tobject->type != OBJT_DEFAULT &&
 1026                              tobject->type != OBJT_SWAP) ||
 1027                             (tobject->flags & OBJ_ONEMAPPING) == 0) {
 1028                                 continue;
 1029                         }
 1030                 }
 1031 
 1032                 m = vm_page_lookup(tobject, tpindex);
 1033 
 1034                 if (m == NULL) {
 1035                         /*
 1036                          * There may be swap even if there is no backing page
 1037                          */
 1038                         if (advise == MADV_FREE && tobject->type == OBJT_SWAP)
 1039                                 swap_pager_freespace(tobject, tpindex, 1);
 1040 
 1041                         /*
 1042                          * next object
 1043                          */
 1044                         tobject = tobject->backing_object;
 1045                         if (tobject == NULL)
 1046                                 continue;
 1047                         tpindex += OFF_TO_IDX(tobject->backing_object_offset);
 1048                         goto shadowlookup;
 1049                 }
 1050 
 1051                 /*
 1052                  * If the page is busy or not in a normal active state,
 1053                  * we skip it.  If the page is not managed there are no
 1054                  * page queues to mess with.  Things can break if we mess
 1055                  * with pages in any of the below states.
 1056                  */
 1057                 vm_page_lock_queues();
 1058                 if (m->hold_count ||
 1059                     m->wire_count ||
 1060                     (m->flags & PG_UNMANAGED) ||
 1061                     m->valid != VM_PAGE_BITS_ALL) {
 1062                         vm_page_unlock_queues();
 1063                         continue;
 1064                 }
 1065                 if (vm_page_sleep_if_busy(m, TRUE, "madvpo"))
 1066                         goto relookup;
 1067                 if (advise == MADV_WILLNEED) {
 1068                         vm_page_activate(m);
 1069                 } else if (advise == MADV_DONTNEED) {
 1070                         vm_page_dontneed(m);
 1071                 } else if (advise == MADV_FREE) {
 1072                         /*
 1073                          * Mark the page clean.  This will allow the page
 1074                          * to be freed up by the system.  However, such pages
 1075                          * are often reused quickly by malloc()/free()
 1076                          * so we do not do anything that would cause
 1077                          * a page fault if we can help it.
 1078                          *
 1079                          * Specifically, we do not try to actually free
 1080                          * the page now nor do we try to put it in the
 1081                          * cache (which would cause a page fault on reuse).
 1082                          *
 1083                          * But we do make the page is freeable as we
 1084                          * can without actually taking the step of unmapping
 1085                          * it.
 1086                          */
 1087                         pmap_clear_modify(m);
 1088                         m->dirty = 0;
 1089                         m->act_count = 0;
 1090                         vm_page_dontneed(m);
 1091                 }
 1092                 vm_page_unlock_queues();
 1093                 if (advise == MADV_FREE && tobject->type == OBJT_SWAP)
 1094                         swap_pager_freespace(tobject, tpindex, 1);
 1095         }       
 1096         vm_object_unlock(object);
 1097 }
 1098 
 1099 /*
 1100  *      vm_object_shadow:
 1101  *
 1102  *      Create a new object which is backed by the
 1103  *      specified existing object range.  The source
 1104  *      object reference is deallocated.
 1105  *
 1106  *      The new object and offset into that object
 1107  *      are returned in the source parameters.
 1108  */
 1109 void
 1110 vm_object_shadow(
 1111         vm_object_t *object,    /* IN/OUT */
 1112         vm_ooffset_t *offset,   /* IN/OUT */
 1113         vm_size_t length)
 1114 {
 1115         vm_object_t source;
 1116         vm_object_t result;
 1117 
 1118         source = *object;
 1119 
 1120         vm_object_lock(source);
 1121         /*
 1122          * Don't create the new object if the old object isn't shared.
 1123          */
 1124         if (source != NULL &&
 1125             source->ref_count == 1 &&
 1126             source->handle == NULL &&
 1127             (source->type == OBJT_DEFAULT ||
 1128              source->type == OBJT_SWAP)) {
 1129                 vm_object_unlock(source);
 1130                 return;
 1131         }
 1132 
 1133         /*
 1134          * Allocate a new object with the given length
 1135          */
 1136         result = vm_object_allocate(OBJT_DEFAULT, length);
 1137         KASSERT(result != NULL, ("vm_object_shadow: no object for shadowing"));
 1138 
 1139         /*
 1140          * The new object shadows the source object, adding a reference to it.
 1141          * Our caller changes his reference to point to the new object,
 1142          * removing a reference to the source object.  Net result: no change
 1143          * of reference count.
 1144          *
 1145          * Try to optimize the result object's page color when shadowing
 1146          * in order to maintain page coloring consistency in the combined 
 1147          * shadowed object.
 1148          */
 1149         result->backing_object = source;
 1150         if (source) {
 1151                 TAILQ_INSERT_TAIL(&source->shadow_head, result, shadow_list);
 1152                 source->shadow_count++;
 1153                 source->generation++;
 1154                 if (length < source->size)
 1155                         length = source->size;
 1156                 if (length > PQ_L2_SIZE / 3 + PQ_PRIME1 ||
 1157                     source->generation > 1)
 1158                         length = PQ_L2_SIZE / 3 + PQ_PRIME1;
 1159                 result->pg_color = (source->pg_color +
 1160                     length * source->generation) & PQ_L2_MASK;
 1161                 next_index = (result->pg_color + PQ_L2_SIZE / 3 + PQ_PRIME1) &
 1162                     PQ_L2_MASK;
 1163         }
 1164 
 1165         /*
 1166          * Store the offset into the source object, and fix up the offset into
 1167          * the new object.
 1168          */
 1169         result->backing_object_offset = *offset;
 1170 
 1171         /*
 1172          * Return the new things
 1173          */
 1174         *offset = 0;
 1175         *object = result;
 1176 
 1177         vm_object_unlock(source);
 1178 }
 1179 
 1180 /*
 1181  *      vm_object_split:
 1182  *
 1183  * Split the pages in a map entry into a new object.  This affords
 1184  * easier removal of unused pages, and keeps object inheritance from
 1185  * being a negative impact on memory usage.
 1186  */
 1187 void
 1188 vm_object_split(vm_map_entry_t entry)
 1189 {
 1190         vm_page_t m;
 1191         vm_object_t orig_object, new_object, source;
 1192         vm_offset_t s, e;
 1193         vm_pindex_t offidxstart, offidxend;
 1194         vm_size_t idx, size;
 1195         vm_ooffset_t offset;
 1196 
 1197         GIANT_REQUIRED;
 1198 
 1199         orig_object = entry->object.vm_object;
 1200         if (orig_object->type != OBJT_DEFAULT && orig_object->type != OBJT_SWAP)
 1201                 return;
 1202         if (orig_object->ref_count <= 1)
 1203                 return;
 1204 
 1205         offset = entry->offset;
 1206         s = entry->start;
 1207         e = entry->end;
 1208 
 1209         offidxstart = OFF_TO_IDX(offset);
 1210         offidxend = offidxstart + OFF_TO_IDX(e - s);
 1211         size = offidxend - offidxstart;
 1212 
 1213         new_object = vm_pager_allocate(orig_object->type,
 1214                 NULL, IDX_TO_OFF(size), VM_PROT_ALL, 0LL);
 1215         if (new_object == NULL)
 1216                 return;
 1217 
 1218         source = orig_object->backing_object;
 1219         if (source != NULL) {
 1220                 vm_object_reference(source);    /* Referenced by new_object */
 1221                 TAILQ_INSERT_TAIL(&source->shadow_head,
 1222                                   new_object, shadow_list);
 1223                 vm_object_clear_flag(source, OBJ_ONEMAPPING);
 1224                 new_object->backing_object_offset = 
 1225                         orig_object->backing_object_offset + offset;
 1226                 new_object->backing_object = source;
 1227                 source->shadow_count++;
 1228                 source->generation++;
 1229         }
 1230         for (idx = 0; idx < size; idx++) {
 1231         retry:
 1232                 m = vm_page_lookup(orig_object, offidxstart + idx);
 1233                 if (m == NULL)
 1234                         continue;
 1235 
 1236                 /*
 1237                  * We must wait for pending I/O to complete before we can
 1238                  * rename the page.
 1239                  *
 1240                  * We do not have to VM_PROT_NONE the page as mappings should
 1241                  * not be changed by this operation.
 1242                  */
 1243                 vm_page_lock_queues();
 1244                 if (vm_page_sleep_if_busy(m, TRUE, "spltwt"))
 1245                         goto retry;
 1246                         
 1247                 vm_page_busy(m);
 1248                 vm_page_unlock_queues();
 1249                 vm_page_rename(m, new_object, idx);
 1250                 /* page automatically made dirty by rename and cache handled */
 1251                 vm_page_busy(m);
 1252         }
 1253         if (orig_object->type == OBJT_SWAP) {
 1254                 vm_object_pip_add(orig_object, 1);
 1255                 /*
 1256                  * copy orig_object pages into new_object
 1257                  * and destroy unneeded pages in
 1258                  * shadow object.
 1259                  */
 1260                 swap_pager_copy(orig_object, new_object, offidxstart, 0);
 1261                 vm_object_pip_wakeup(orig_object);
 1262         }
 1263         TAILQ_FOREACH(m, &new_object->memq, listq)
 1264                 vm_page_wakeup(m);
 1265         entry->object.vm_object = new_object;
 1266         entry->offset = 0LL;
 1267         vm_object_deallocate(orig_object);
 1268 }
 1269 
 1270 #define OBSC_TEST_ALL_SHADOWED  0x0001
 1271 #define OBSC_COLLAPSE_NOWAIT    0x0002
 1272 #define OBSC_COLLAPSE_WAIT      0x0004
 1273 
 1274 static __inline int
 1275 vm_object_backing_scan(vm_object_t object, int op)
 1276 {
 1277         int s;
 1278         int r = 1;
 1279         vm_page_t p;
 1280         vm_object_t backing_object;
 1281         vm_pindex_t backing_offset_index;
 1282 
 1283         s = splvm();
 1284         GIANT_REQUIRED;
 1285 
 1286         backing_object = object->backing_object;
 1287         backing_offset_index = OFF_TO_IDX(object->backing_object_offset);
 1288 
 1289         /*
 1290          * Initial conditions
 1291          */
 1292         if (op & OBSC_TEST_ALL_SHADOWED) {
 1293                 /*
 1294                  * We do not want to have to test for the existence of
 1295                  * swap pages in the backing object.  XXX but with the
 1296                  * new swapper this would be pretty easy to do.
 1297                  *
 1298                  * XXX what about anonymous MAP_SHARED memory that hasn't
 1299                  * been ZFOD faulted yet?  If we do not test for this, the
 1300                  * shadow test may succeed! XXX
 1301                  */
 1302                 if (backing_object->type != OBJT_DEFAULT) {
 1303                         splx(s);
 1304                         return (0);
 1305                 }
 1306         }
 1307         if (op & OBSC_COLLAPSE_WAIT) {
 1308                 vm_object_set_flag(backing_object, OBJ_DEAD);
 1309         }
 1310 
 1311         /*
 1312          * Our scan
 1313          */
 1314         p = TAILQ_FIRST(&backing_object->memq);
 1315         while (p) {
 1316                 vm_page_t next = TAILQ_NEXT(p, listq);
 1317                 vm_pindex_t new_pindex = p->pindex - backing_offset_index;
 1318 
 1319                 if (op & OBSC_TEST_ALL_SHADOWED) {
 1320                         vm_page_t pp;
 1321 
 1322                         /*
 1323                          * Ignore pages outside the parent object's range
 1324                          * and outside the parent object's mapping of the 
 1325                          * backing object.
 1326                          *
 1327                          * note that we do not busy the backing object's
 1328                          * page.
 1329                          */
 1330                         if (
 1331                             p->pindex < backing_offset_index ||
 1332                             new_pindex >= object->size
 1333                         ) {
 1334                                 p = next;
 1335                                 continue;
 1336                         }
 1337 
 1338                         /*
 1339                          * See if the parent has the page or if the parent's
 1340                          * object pager has the page.  If the parent has the
 1341                          * page but the page is not valid, the parent's
 1342                          * object pager must have the page.
 1343                          *
 1344                          * If this fails, the parent does not completely shadow
 1345                          * the object and we might as well give up now.
 1346                          */
 1347 
 1348                         pp = vm_page_lookup(object, new_pindex);
 1349                         if (
 1350                             (pp == NULL || pp->valid == 0) &&
 1351                             !vm_pager_has_page(object, new_pindex, NULL, NULL)
 1352                         ) {
 1353                                 r = 0;
 1354                                 break;
 1355                         }
 1356                 }
 1357 
 1358                 /*
 1359                  * Check for busy page
 1360                  */
 1361                 if (op & (OBSC_COLLAPSE_WAIT | OBSC_COLLAPSE_NOWAIT)) {
 1362                         vm_page_t pp;
 1363 
 1364                         vm_page_lock_queues();
 1365                         if (op & OBSC_COLLAPSE_NOWAIT) {
 1366                                 if ((p->flags & PG_BUSY) ||
 1367                                     !p->valid || 
 1368                                     p->hold_count || 
 1369                                     p->wire_count ||
 1370                                     p->busy) {
 1371                                         vm_page_unlock_queues();
 1372                                         p = next;
 1373                                         continue;
 1374                                 }
 1375                         } else if (op & OBSC_COLLAPSE_WAIT) {
 1376                                 if (vm_page_sleep_if_busy(p, TRUE, "vmocol")) {
 1377                                         /*
 1378                                          * If we slept, anything could have
 1379                                          * happened.  Since the object is
 1380                                          * marked dead, the backing offset
 1381                                          * should not have changed so we
 1382                                          * just restart our scan.
 1383                                          */
 1384                                         p = TAILQ_FIRST(&backing_object->memq);
 1385                                         continue;
 1386                                 }
 1387                         }
 1388 
 1389                         /* 
 1390                          * Busy the page
 1391                          */
 1392                         vm_page_busy(p);
 1393                         vm_page_unlock_queues();
 1394 
 1395                         KASSERT(
 1396                             p->object == backing_object,
 1397                             ("vm_object_qcollapse(): object mismatch")
 1398                         );
 1399 
 1400                         /*
 1401                          * Destroy any associated swap
 1402                          */
 1403                         if (backing_object->type == OBJT_SWAP) {
 1404                                 swap_pager_freespace(
 1405                                     backing_object, 
 1406                                     p->pindex,
 1407                                     1
 1408                                 );
 1409                         }
 1410 
 1411                         if (
 1412                             p->pindex < backing_offset_index ||
 1413                             new_pindex >= object->size
 1414                         ) {
 1415                                 /*
 1416                                  * Page is out of the parent object's range, we 
 1417                                  * can simply destroy it. 
 1418                                  */
 1419                                 vm_page_lock_queues();
 1420                                 pmap_remove_all(p);
 1421                                 vm_page_free(p);
 1422                                 vm_page_unlock_queues();
 1423                                 p = next;
 1424                                 continue;
 1425                         }
 1426 
 1427                         pp = vm_page_lookup(object, new_pindex);
 1428                         if (
 1429                             pp != NULL ||
 1430                             vm_pager_has_page(object, new_pindex, NULL, NULL)
 1431                         ) {
 1432                                 /*
 1433                                  * page already exists in parent OR swap exists
 1434                                  * for this location in the parent.  Destroy 
 1435                                  * the original page from the backing object.
 1436                                  *
 1437                                  * Leave the parent's page alone
 1438                                  */
 1439                                 vm_page_lock_queues();
 1440                                 pmap_remove_all(p);
 1441                                 vm_page_free(p);
 1442                                 vm_page_unlock_queues();
 1443                                 p = next;
 1444                                 continue;
 1445                         }
 1446 
 1447                         /*
 1448                          * Page does not exist in parent, rename the
 1449                          * page from the backing object to the main object. 
 1450                          *
 1451                          * If the page was mapped to a process, it can remain 
 1452                          * mapped through the rename.
 1453                          */
 1454                         vm_page_rename(p, object, new_pindex);
 1455                         /* page automatically made dirty by rename */
 1456                 }
 1457                 p = next;
 1458         }
 1459         splx(s);
 1460         return (r);
 1461 }
 1462 
 1463 
 1464 /*
 1465  * this version of collapse allows the operation to occur earlier and
 1466  * when paging_in_progress is true for an object...  This is not a complete
 1467  * operation, but should plug 99.9% of the rest of the leaks.
 1468  */
 1469 static void
 1470 vm_object_qcollapse(vm_object_t object)
 1471 {
 1472         vm_object_t backing_object = object->backing_object;
 1473 
 1474         GIANT_REQUIRED;
 1475 
 1476         if (backing_object->ref_count != 1)
 1477                 return;
 1478 
 1479         backing_object->ref_count += 2;
 1480 
 1481         vm_object_backing_scan(object, OBSC_COLLAPSE_NOWAIT);
 1482 
 1483         backing_object->ref_count -= 2;
 1484 }
 1485 
 1486 /*
 1487  *      vm_object_collapse:
 1488  *
 1489  *      Collapse an object with the object backing it.
 1490  *      Pages in the backing object are moved into the
 1491  *      parent, and the backing object is deallocated.
 1492  */
 1493 void
 1494 vm_object_collapse(vm_object_t object)
 1495 {
 1496         GIANT_REQUIRED;
 1497         
 1498         while (TRUE) {
 1499                 vm_object_t backing_object;
 1500 
 1501                 /*
 1502                  * Verify that the conditions are right for collapse:
 1503                  *
 1504                  * The object exists and the backing object exists.
 1505                  */
 1506                 if (object == NULL)
 1507                         break;
 1508 
 1509                 if ((backing_object = object->backing_object) == NULL)
 1510                         break;
 1511 
 1512                 /*
 1513                  * we check the backing object first, because it is most likely
 1514                  * not collapsable.
 1515                  */
 1516                 if (backing_object->handle != NULL ||
 1517                     (backing_object->type != OBJT_DEFAULT &&
 1518                      backing_object->type != OBJT_SWAP) ||
 1519                     (backing_object->flags & OBJ_DEAD) ||
 1520                     object->handle != NULL ||
 1521                     (object->type != OBJT_DEFAULT &&
 1522                      object->type != OBJT_SWAP) ||
 1523                     (object->flags & OBJ_DEAD)) {
 1524                         break;
 1525                 }
 1526 
 1527                 if (
 1528                     object->paging_in_progress != 0 ||
 1529                     backing_object->paging_in_progress != 0
 1530                 ) {
 1531                         vm_object_qcollapse(object);
 1532                         break;
 1533                 }
 1534 
 1535                 /*
 1536                  * We know that we can either collapse the backing object (if
 1537                  * the parent is the only reference to it) or (perhaps) have
 1538                  * the parent bypass the object if the parent happens to shadow
 1539                  * all the resident pages in the entire backing object.
 1540                  *
 1541                  * This is ignoring pager-backed pages such as swap pages.
 1542                  * vm_object_backing_scan fails the shadowing test in this
 1543                  * case.
 1544                  */
 1545                 if (backing_object->ref_count == 1) {
 1546                         /*
 1547                          * If there is exactly one reference to the backing
 1548                          * object, we can collapse it into the parent.  
 1549                          */
 1550                         vm_object_backing_scan(object, OBSC_COLLAPSE_WAIT);
 1551 
 1552                         /*
 1553                          * Move the pager from backing_object to object.
 1554                          */
 1555                         if (backing_object->type == OBJT_SWAP) {
 1556                                 vm_object_pip_add(backing_object, 1);
 1557 
 1558                                 /*
 1559                                  * scrap the paging_offset junk and do a 
 1560                                  * discrete copy.  This also removes major 
 1561                                  * assumptions about how the swap-pager 
 1562                                  * works from where it doesn't belong.  The
 1563                                  * new swapper is able to optimize the
 1564                                  * destroy-source case.
 1565                                  */
 1566                                 vm_object_pip_add(object, 1);
 1567                                 swap_pager_copy(
 1568                                     backing_object,
 1569                                     object,
 1570                                     OFF_TO_IDX(object->backing_object_offset), TRUE);
 1571                                 vm_object_pip_wakeup(object);
 1572 
 1573                                 vm_object_pip_wakeup(backing_object);
 1574                         }
 1575                         /*
 1576                          * Object now shadows whatever backing_object did.
 1577                          * Note that the reference to 
 1578                          * backing_object->backing_object moves from within 
 1579                          * backing_object to within object.
 1580                          */
 1581                         TAILQ_REMOVE(
 1582                             &object->backing_object->shadow_head, 
 1583                             object,
 1584                             shadow_list
 1585                         );
 1586                         object->backing_object->shadow_count--;
 1587                         object->backing_object->generation++;
 1588                         if (backing_object->backing_object) {
 1589                                 TAILQ_REMOVE(
 1590                                     &backing_object->backing_object->shadow_head,
 1591                                     backing_object, 
 1592                                     shadow_list
 1593                                 );
 1594                                 backing_object->backing_object->shadow_count--;
 1595                                 backing_object->backing_object->generation++;
 1596                         }
 1597                         object->backing_object = backing_object->backing_object;
 1598                         if (object->backing_object) {
 1599                                 TAILQ_INSERT_TAIL(
 1600                                     &object->backing_object->shadow_head,
 1601                                     object, 
 1602                                     shadow_list
 1603                                 );
 1604                                 object->backing_object->shadow_count++;
 1605                                 object->backing_object->generation++;
 1606                         }
 1607 
 1608                         object->backing_object_offset +=
 1609                             backing_object->backing_object_offset;
 1610 
 1611                         /*
 1612                          * Discard backing_object.
 1613                          *
 1614                          * Since the backing object has no pages, no pager left,
 1615                          * and no object references within it, all that is
 1616                          * necessary is to dispose of it.
 1617                          */
 1618                         KASSERT(backing_object->ref_count == 1, ("backing_object %p was somehow re-referenced during collapse!", backing_object));
 1619                         KASSERT(TAILQ_FIRST(&backing_object->memq) == NULL, ("backing_object %p somehow has left over pages during collapse!", backing_object));
 1620 
 1621                         mtx_lock(&vm_object_list_mtx);
 1622                         TAILQ_REMOVE(
 1623                             &vm_object_list, 
 1624                             backing_object,
 1625                             object_list
 1626                         );
 1627                         mtx_unlock(&vm_object_list_mtx);
 1628 
 1629                         uma_zfree(obj_zone, backing_object);
 1630 
 1631                         object_collapses++;
 1632                 } else {
 1633                         vm_object_t new_backing_object;
 1634 
 1635                         /*
 1636                          * If we do not entirely shadow the backing object,
 1637                          * there is nothing we can do so we give up.
 1638                          */
 1639                         if (vm_object_backing_scan(object, OBSC_TEST_ALL_SHADOWED) == 0) {
 1640                                 break;
 1641                         }
 1642 
 1643                         /*
 1644                          * Make the parent shadow the next object in the
 1645                          * chain.  Deallocating backing_object will not remove
 1646                          * it, since its reference count is at least 2.
 1647                          */
 1648                         TAILQ_REMOVE(
 1649                             &backing_object->shadow_head,
 1650                             object,
 1651                             shadow_list
 1652                         );
 1653                         backing_object->shadow_count--;
 1654                         backing_object->generation++;
 1655 
 1656                         new_backing_object = backing_object->backing_object;
 1657                         if ((object->backing_object = new_backing_object) != NULL) {
 1658                                 vm_object_reference(new_backing_object);
 1659                                 TAILQ_INSERT_TAIL(
 1660                                     &new_backing_object->shadow_head,
 1661                                     object,
 1662                                     shadow_list
 1663                                 );
 1664                                 new_backing_object->shadow_count++;
 1665                                 new_backing_object->generation++;
 1666                                 object->backing_object_offset +=
 1667                                         backing_object->backing_object_offset;
 1668                         }
 1669 
 1670                         /*
 1671                          * Drop the reference count on backing_object. Since
 1672                          * its ref_count was at least 2, it will not vanish;
 1673                          * so we don't need to call vm_object_deallocate, but
 1674                          * we do anyway.
 1675                          */
 1676                         vm_object_deallocate(backing_object);
 1677                         object_bypasses++;
 1678                 }
 1679 
 1680                 /*
 1681                  * Try again with this object's new backing object.
 1682                  */
 1683         }
 1684 }
 1685 
 1686 /*
 1687  *      vm_object_page_remove: [internal]
 1688  *
 1689  *      Removes all physical pages in the specified
 1690  *      object range from the object's list of pages.
 1691  *
 1692  *      The object must be locked.
 1693  */
 1694 void
 1695 vm_object_page_remove(vm_object_t object, vm_pindex_t start, vm_pindex_t end, boolean_t clean_only)
 1696 {
 1697         vm_page_t p, next;
 1698         vm_pindex_t size;
 1699         int all;
 1700 
 1701         if (object == NULL)
 1702                 return;
 1703 
 1704         mtx_lock(&Giant);
 1705         if (object->resident_page_count == 0) {
 1706                 mtx_unlock(&Giant);
 1707                 return;
 1708         }
 1709         all = ((end == 0) && (start == 0));
 1710 
 1711         /*
 1712          * Since physically-backed objects do not use managed pages, we can't
 1713          * remove pages from the object (we must instead remove the page
 1714          * references, and then destroy the object).
 1715          */
 1716         KASSERT(object->type != OBJT_PHYS, ("attempt to remove pages from a physical object"));
 1717 
 1718         vm_object_pip_add(object, 1);
 1719 again:
 1720         vm_page_lock_queues();
 1721         size = end - start;
 1722         if (all || size > object->resident_page_count / 4) {
 1723                 for (p = TAILQ_FIRST(&object->memq); p != NULL; p = next) {
 1724                         next = TAILQ_NEXT(p, listq);
 1725                         if (all || ((start <= p->pindex) && (p->pindex < end))) {
 1726                                 if (p->wire_count != 0) {
 1727                                         pmap_remove_all(p);
 1728                                         if (!clean_only)
 1729                                                 p->valid = 0;
 1730                                         continue;
 1731                                 }
 1732 
 1733                                 /*
 1734                                  * The busy flags are only cleared at
 1735                                  * interrupt -- minimize the spl transitions
 1736                                  */
 1737                                 if (vm_page_sleep_if_busy(p, TRUE, "vmopar"))
 1738                                         goto again;
 1739 
 1740                                 if (clean_only && p->valid) {
 1741                                         vm_page_test_dirty(p);
 1742                                         if (p->valid & p->dirty)
 1743                                                 continue;
 1744                                 }
 1745                                 vm_page_busy(p);
 1746                                 pmap_remove_all(p);
 1747                                 vm_page_free(p);
 1748                         }
 1749                 }
 1750         } else {
 1751                 while (size > 0) {
 1752                         if ((p = vm_page_lookup(object, start)) != NULL) {
 1753                                 if (p->wire_count != 0) {
 1754                                         pmap_remove_all(p);
 1755                                         if (!clean_only)
 1756                                                 p->valid = 0;
 1757                                         start += 1;
 1758                                         size -= 1;
 1759                                         continue;
 1760                                 }
 1761 
 1762                                 /*
 1763                                  * The busy flags are only cleared at
 1764                                  * interrupt -- minimize the spl transitions
 1765                                  */
 1766                                 if (vm_page_sleep_if_busy(p, TRUE, "vmopar"))
 1767                                         goto again;
 1768 
 1769                                 if (clean_only && p->valid) {
 1770                                         vm_page_test_dirty(p);
 1771                                         if (p->valid & p->dirty) {
 1772                                                 start += 1;
 1773                                                 size -= 1;
 1774                                                 continue;
 1775                                         }
 1776                                 }
 1777                                 vm_page_busy(p);
 1778                                 pmap_remove_all(p);
 1779                                 vm_page_free(p);
 1780                         }
 1781                         start += 1;
 1782                         size -= 1;
 1783                 }
 1784         }
 1785         vm_page_unlock_queues();
 1786         vm_object_pip_wakeup(object);
 1787         mtx_unlock(&Giant);
 1788 }
 1789 
 1790 /*
 1791  *      Routine:        vm_object_coalesce
 1792  *      Function:       Coalesces two objects backing up adjoining
 1793  *                      regions of memory into a single object.
 1794  *
 1795  *      returns TRUE if objects were combined.
 1796  *
 1797  *      NOTE:   Only works at the moment if the second object is NULL -
 1798  *              if it's not, which object do we lock first?
 1799  *
 1800  *      Parameters:
 1801  *              prev_object     First object to coalesce
 1802  *              prev_offset     Offset into prev_object
 1803  *              next_object     Second object into coalesce
 1804  *              next_offset     Offset into next_object
 1805  *
 1806  *              prev_size       Size of reference to prev_object
 1807  *              next_size       Size of reference to next_object
 1808  *
 1809  *      Conditions:
 1810  *      The object must *not* be locked.
 1811  */
 1812 boolean_t
 1813 vm_object_coalesce(vm_object_t prev_object, vm_pindex_t prev_pindex,
 1814         vm_size_t prev_size, vm_size_t next_size)
 1815 {
 1816         vm_pindex_t next_pindex;
 1817 
 1818         if (prev_object == NULL)
 1819                 return (TRUE);
 1820         vm_object_lock(prev_object);
 1821         if (prev_object->type != OBJT_DEFAULT &&
 1822             prev_object->type != OBJT_SWAP) {
 1823                 vm_object_unlock(prev_object);
 1824                 return (FALSE);
 1825         }
 1826 
 1827         /*
 1828          * Try to collapse the object first
 1829          */
 1830         vm_object_collapse(prev_object);
 1831 
 1832         /*
 1833          * Can't coalesce if: . more than one reference . paged out . shadows
 1834          * another object . has a copy elsewhere (any of which mean that the
 1835          * pages not mapped to prev_entry may be in use anyway)
 1836          */
 1837         if (prev_object->backing_object != NULL) {
 1838                 vm_object_unlock(prev_object);
 1839                 return (FALSE);
 1840         }
 1841 
 1842         prev_size >>= PAGE_SHIFT;
 1843         next_size >>= PAGE_SHIFT;
 1844         next_pindex = prev_pindex + prev_size;
 1845 
 1846         if ((prev_object->ref_count > 1) &&
 1847             (prev_object->size != next_pindex)) {
 1848                 vm_object_unlock(prev_object);
 1849                 return (FALSE);
 1850         }
 1851 
 1852         /*
 1853          * Remove any pages that may still be in the object from a previous
 1854          * deallocation.
 1855          */
 1856         if (next_pindex < prev_object->size) {
 1857                 vm_object_page_remove(prev_object,
 1858                                       next_pindex,
 1859                                       next_pindex + next_size, FALSE);
 1860                 if (prev_object->type == OBJT_SWAP)
 1861                         swap_pager_freespace(prev_object,
 1862                                              next_pindex, next_size);
 1863         }
 1864 
 1865         /*
 1866          * Extend the object if necessary.
 1867          */
 1868         if (next_pindex + next_size > prev_object->size)
 1869                 prev_object->size = next_pindex + next_size;
 1870 
 1871         vm_object_unlock(prev_object);
 1872         return (TRUE);
 1873 }
 1874 
 1875 void
 1876 vm_object_set_writeable_dirty(vm_object_t object)
 1877 {
 1878         struct vnode *vp;
 1879 
 1880         vm_object_set_flag(object, OBJ_WRITEABLE|OBJ_MIGHTBEDIRTY);
 1881         if (object->type == OBJT_VNODE &&
 1882             (vp = (struct vnode *)object->handle) != NULL) {
 1883                 VI_LOCK(vp);
 1884                 if ((vp->v_iflag & VI_OBJDIRTY) == 0)
 1885                         vp->v_iflag |= VI_OBJDIRTY;
 1886                 VI_UNLOCK(vp);
 1887         }
 1888 }
 1889 
 1890 #ifdef ENABLE_VFS_IOOPT
 1891 /*
 1892  * Experimental support for zero-copy I/O
 1893  *
 1894  * Performs the copy_on_write operations necessary to allow the virtual copies
 1895  * into user space to work.  This has to be called for write(2) system calls
 1896  * from other processes, file unlinking, and file size shrinkage.
 1897  */
 1898 void
 1899 vm_freeze_copyopts(vm_object_t object, vm_pindex_t froma, vm_pindex_t toa)
 1900 {
 1901         int rv;
 1902         vm_object_t robject;
 1903         vm_pindex_t idx;
 1904 
 1905         GIANT_REQUIRED;
 1906         if ((object == NULL) ||
 1907                 ((object->flags & OBJ_OPT) == 0))
 1908                 return;
 1909 
 1910         if (object->shadow_count > object->ref_count)
 1911                 panic("vm_freeze_copyopts: sc > rc");
 1912 
 1913         while ((robject = TAILQ_FIRST(&object->shadow_head)) != NULL) {
 1914                 vm_pindex_t bo_pindex;
 1915                 vm_page_t m_in, m_out;
 1916 
 1917                 bo_pindex = OFF_TO_IDX(robject->backing_object_offset);
 1918 
 1919                 vm_object_reference(robject);
 1920 
 1921                 vm_object_pip_wait(robject, "objfrz");
 1922 
 1923                 if (robject->ref_count == 1) {
 1924                         vm_object_deallocate(robject);
 1925                         continue;
 1926                 }
 1927 
 1928                 vm_object_pip_add(robject, 1);
 1929 
 1930                 for (idx = 0; idx < robject->size; idx++) {
 1931 
 1932                         m_out = vm_page_grab(robject, idx,
 1933                                                 VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
 1934 
 1935                         if (m_out->valid == 0) {
 1936                                 m_in = vm_page_grab(object, bo_pindex + idx,
 1937                                                 VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
 1938                                 vm_page_lock_queues();
 1939                                 if (m_in->valid == 0) {
 1940                                         vm_page_unlock_queues();
 1941                                         rv = vm_pager_get_pages(object, &m_in, 1, 0);
 1942                                         if (rv != VM_PAGER_OK) {
 1943                                                 printf("vm_freeze_copyopts: cannot read page from file: %lx\n", (long)m_in->pindex);
 1944                                                 continue;
 1945                                         }
 1946                                         vm_page_lock_queues();
 1947                                         vm_page_deactivate(m_in);
 1948                                 }
 1949 
 1950                                 pmap_remove_all(m_in);
 1951                                 vm_page_unlock_queues();
 1952                                 pmap_copy_page(m_in, m_out);
 1953                                 m_out->valid = m_in->valid;
 1954                                 vm_page_dirty(m_out);
 1955                                 vm_page_lock_queues();
 1956                                 vm_page_activate(m_out);
 1957                                 vm_page_unlock_queues();
 1958                                 vm_page_wakeup(m_in);
 1959                         }
 1960                         vm_page_wakeup(m_out);
 1961                 }
 1962 
 1963                 object->shadow_count--;
 1964                 object->ref_count--;
 1965                 TAILQ_REMOVE(&object->shadow_head, robject, shadow_list);
 1966                 robject->backing_object = NULL;
 1967                 robject->backing_object_offset = 0;
 1968 
 1969                 vm_object_pip_wakeup(robject);
 1970                 vm_object_deallocate(robject);
 1971         }
 1972 
 1973         vm_object_clear_flag(object, OBJ_OPT);
 1974 }
 1975 #endif
 1976 
 1977 #include "opt_ddb.h"
 1978 #ifdef DDB
 1979 #include <sys/kernel.h>
 1980 
 1981 #include <sys/cons.h>
 1982 
 1983 #include <ddb/ddb.h>
 1984 
 1985 static int
 1986 _vm_object_in_map(vm_map_t map, vm_object_t object, vm_map_entry_t entry)
 1987 {
 1988         vm_map_t tmpm;
 1989         vm_map_entry_t tmpe;
 1990         vm_object_t obj;
 1991         int entcount;
 1992 
 1993         if (map == 0)
 1994                 return 0;
 1995 
 1996         if (entry == 0) {
 1997                 tmpe = map->header.next;
 1998                 entcount = map->nentries;
 1999                 while (entcount-- && (tmpe != &map->header)) {
 2000                         if (_vm_object_in_map(map, object, tmpe)) {
 2001                                 return 1;
 2002                         }
 2003                         tmpe = tmpe->next;
 2004                 }
 2005         } else if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
 2006                 tmpm = entry->object.sub_map;
 2007                 tmpe = tmpm->header.next;
 2008                 entcount = tmpm->nentries;
 2009                 while (entcount-- && tmpe != &tmpm->header) {
 2010                         if (_vm_object_in_map(tmpm, object, tmpe)) {
 2011                                 return 1;
 2012                         }
 2013                         tmpe = tmpe->next;
 2014                 }
 2015         } else if ((obj = entry->object.vm_object) != NULL) {
 2016                 for (; obj; obj = obj->backing_object)
 2017                         if (obj == object) {
 2018                                 return 1;
 2019                         }
 2020         }
 2021         return 0;
 2022 }
 2023 
 2024 static int
 2025 vm_object_in_map(vm_object_t object)
 2026 {
 2027         struct proc *p;
 2028 
 2029         /* sx_slock(&allproc_lock); */
 2030         LIST_FOREACH(p, &allproc, p_list) {
 2031                 if (!p->p_vmspace /* || (p->p_flag & (P_SYSTEM|P_WEXIT)) */)
 2032                         continue;
 2033                 if (_vm_object_in_map(&p->p_vmspace->vm_map, object, 0)) {
 2034                         /* sx_sunlock(&allproc_lock); */
 2035                         return 1;
 2036                 }
 2037         }
 2038         /* sx_sunlock(&allproc_lock); */
 2039         if (_vm_object_in_map(kernel_map, object, 0))
 2040                 return 1;
 2041         if (_vm_object_in_map(kmem_map, object, 0))
 2042                 return 1;
 2043         if (_vm_object_in_map(pager_map, object, 0))
 2044                 return 1;
 2045         if (_vm_object_in_map(buffer_map, object, 0))
 2046                 return 1;
 2047         return 0;
 2048 }
 2049 
 2050 DB_SHOW_COMMAND(vmochk, vm_object_check)
 2051 {
 2052         vm_object_t object;
 2053 
 2054         /*
 2055          * make sure that internal objs are in a map somewhere
 2056          * and none have zero ref counts.
 2057          */
 2058         TAILQ_FOREACH(object, &vm_object_list, object_list) {
 2059                 if (object->handle == NULL &&
 2060                     (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) {
 2061                         if (object->ref_count == 0) {
 2062                                 db_printf("vmochk: internal obj has zero ref count: %ld\n",
 2063                                         (long)object->size);
 2064                         }
 2065                         if (!vm_object_in_map(object)) {
 2066                                 db_printf(
 2067                         "vmochk: internal obj is not in a map: "
 2068                         "ref: %d, size: %lu: 0x%lx, backing_object: %p\n",
 2069                                     object->ref_count, (u_long)object->size, 
 2070                                     (u_long)object->size,
 2071                                     (void *)object->backing_object);
 2072                         }
 2073                 }
 2074         }
 2075 }
 2076 
 2077 /*
 2078  *      vm_object_print:        [ debug ]
 2079  */
 2080 DB_SHOW_COMMAND(object, vm_object_print_static)
 2081 {
 2082         /* XXX convert args. */
 2083         vm_object_t object = (vm_object_t)addr;
 2084         boolean_t full = have_addr;
 2085 
 2086         vm_page_t p;
 2087 
 2088         /* XXX count is an (unused) arg.  Avoid shadowing it. */
 2089 #define count   was_count
 2090 
 2091         int count;
 2092 
 2093         if (object == NULL)
 2094                 return;
 2095 
 2096         db_iprintf(
 2097             "Object %p: type=%d, size=0x%jx, res=%d, ref=%d, flags=0x%x\n",
 2098             object, (int)object->type, (uintmax_t)object->size,
 2099             object->resident_page_count, object->ref_count, object->flags);
 2100         db_iprintf(" sref=%d, backing_object(%d)=(%p)+0x%jx\n",
 2101             object->shadow_count, 
 2102             object->backing_object ? object->backing_object->ref_count : 0,
 2103             object->backing_object, (uintmax_t)object->backing_object_offset);
 2104 
 2105         if (!full)
 2106                 return;
 2107 
 2108         db_indent += 2;
 2109         count = 0;
 2110         TAILQ_FOREACH(p, &object->memq, listq) {
 2111                 if (count == 0)
 2112                         db_iprintf("memory:=");
 2113                 else if (count == 6) {
 2114                         db_printf("\n");
 2115                         db_iprintf(" ...");
 2116                         count = 0;
 2117                 } else
 2118                         db_printf(",");
 2119                 count++;
 2120 
 2121                 db_printf("(off=0x%jx,page=0x%jx)",
 2122                     (uintmax_t)p->pindex, (uintmax_t)VM_PAGE_TO_PHYS(p));
 2123         }
 2124         if (count != 0)
 2125                 db_printf("\n");
 2126         db_indent -= 2;
 2127 }
 2128 
 2129 /* XXX. */
 2130 #undef count
 2131 
 2132 /* XXX need this non-static entry for calling from vm_map_print. */
 2133 void
 2134 vm_object_print(
 2135         /* db_expr_t */ long addr,
 2136         boolean_t have_addr,
 2137         /* db_expr_t */ long count,
 2138         char *modif)
 2139 {
 2140         vm_object_print_static(addr, have_addr, count, modif);
 2141 }
 2142 
 2143 DB_SHOW_COMMAND(vmopag, vm_object_print_pages)
 2144 {
 2145         vm_object_t object;
 2146         int nl = 0;
 2147         int c;
 2148 
 2149         TAILQ_FOREACH(object, &vm_object_list, object_list) {
 2150                 vm_pindex_t idx, fidx;
 2151                 vm_pindex_t osize;
 2152                 vm_offset_t pa = -1, padiff;
 2153                 int rcount;
 2154                 vm_page_t m;
 2155 
 2156                 db_printf("new object: %p\n", (void *)object);
 2157                 if (nl > 18) {
 2158                         c = cngetc();
 2159                         if (c != ' ')
 2160                                 return;
 2161                         nl = 0;
 2162                 }
 2163                 nl++;
 2164                 rcount = 0;
 2165                 fidx = 0;
 2166                 osize = object->size;
 2167                 if (osize > 128)
 2168                         osize = 128;
 2169                 for (idx = 0; idx < osize; idx++) {
 2170                         m = vm_page_lookup(object, idx);
 2171                         if (m == NULL) {
 2172                                 if (rcount) {
 2173                                         db_printf(" index(%ld)run(%d)pa(0x%lx)\n",
 2174                                                 (long)fidx, rcount, (long)pa);
 2175                                         if (nl > 18) {
 2176                                                 c = cngetc();
 2177                                                 if (c != ' ')
 2178                                                         return;
 2179                                                 nl = 0;
 2180                                         }
 2181                                         nl++;
 2182                                         rcount = 0;
 2183                                 }
 2184                                 continue;
 2185                         }
 2186 
 2187                                 
 2188                         if (rcount &&
 2189                                 (VM_PAGE_TO_PHYS(m) == pa + rcount * PAGE_SIZE)) {
 2190                                 ++rcount;
 2191                                 continue;
 2192                         }
 2193                         if (rcount) {
 2194                                 padiff = pa + rcount * PAGE_SIZE - VM_PAGE_TO_PHYS(m);
 2195                                 padiff >>= PAGE_SHIFT;
 2196                                 padiff &= PQ_L2_MASK;
 2197                                 if (padiff == 0) {
 2198                                         pa = VM_PAGE_TO_PHYS(m) - rcount * PAGE_SIZE;
 2199                                         ++rcount;
 2200                                         continue;
 2201                                 }
 2202                                 db_printf(" index(%ld)run(%d)pa(0x%lx)",
 2203                                         (long)fidx, rcount, (long)pa);
 2204                                 db_printf("pd(%ld)\n", (long)padiff);
 2205                                 if (nl > 18) {
 2206                                         c = cngetc();
 2207                                         if (c != ' ')
 2208                                                 return;
 2209                                         nl = 0;
 2210                                 }
 2211                                 nl++;
 2212                         }
 2213                         fidx = idx;
 2214                         pa = VM_PAGE_TO_PHYS(m);
 2215                         rcount = 1;
 2216                 }
 2217                 if (rcount) {
 2218                         db_printf(" index(%ld)run(%d)pa(0x%lx)\n",
 2219                                 (long)fidx, rcount, (long)pa);
 2220                         if (nl > 18) {
 2221                                 c = cngetc();
 2222                                 if (c != ' ')
 2223                                         return;
 2224                                 nl = 0;
 2225                         }
 2226                         nl++;
 2227                 }
 2228         }
 2229 }
 2230 #endif /* DDB */

Cache object: 90d9ec11192df3a10b6a61cdb2c2b73e


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.