FreeBSD/Linux Kernel Cross Reference
sys/vm/vm_object.c
1 /*-
2 * Copyright (c) 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * The Mach Operating System project at Carnegie-Mellon University.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * from: @(#)vm_object.c 8.5 (Berkeley) 3/22/94
33 *
34 *
35 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
36 * All rights reserved.
37 *
38 * Authors: Avadis Tevanian, Jr., Michael Wayne Young
39 *
40 * Permission to use, copy, modify and distribute this software and
41 * its documentation is hereby granted, provided that both the copyright
42 * notice and this permission notice appear in all copies of the
43 * software, derivative works or modified versions, and any portions
44 * thereof, and that both notices appear in supporting documentation.
45 *
46 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
47 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
48 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
49 *
50 * Carnegie Mellon requests users of this software to return to
51 *
52 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
53 * School of Computer Science
54 * Carnegie Mellon University
55 * Pittsburgh PA 15213-3890
56 *
57 * any improvements or extensions that they make and grant Carnegie the
58 * rights to redistribute these changes.
59 */
60
61 /*
62 * Virtual memory object module.
63 */
64
65 #include <sys/cdefs.h>
66 __FBSDID("$FreeBSD$");
67
68 #include "opt_vm.h"
69
70 #include <sys/param.h>
71 #include <sys/systm.h>
72 #include <sys/lock.h>
73 #include <sys/mman.h>
74 #include <sys/mount.h>
75 #include <sys/kernel.h>
76 #include <sys/sysctl.h>
77 #include <sys/mutex.h>
78 #include <sys/proc.h> /* for curproc, pageproc */
79 #include <sys/socket.h>
80 #include <sys/vnode.h>
81 #include <sys/vmmeter.h>
82 #include <sys/sx.h>
83
84 #include <vm/vm.h>
85 #include <vm/vm_param.h>
86 #include <vm/pmap.h>
87 #include <vm/vm_map.h>
88 #include <vm/vm_object.h>
89 #include <vm/vm_page.h>
90 #include <vm/vm_pageout.h>
91 #include <vm/vm_pager.h>
92 #include <vm/swap_pager.h>
93 #include <vm/vm_kern.h>
94 #include <vm/vm_extern.h>
95 #include <vm/vm_reserv.h>
96 #include <vm/uma.h>
97
98 #define EASY_SCAN_FACTOR 8
99
100 #define MSYNC_FLUSH_HARDSEQ 0x01
101 #define MSYNC_FLUSH_SOFTSEQ 0x02
102
103 /*
104 * msync / VM object flushing optimizations
105 */
106 static int msync_flush_flags = MSYNC_FLUSH_HARDSEQ | MSYNC_FLUSH_SOFTSEQ;
107 SYSCTL_INT(_vm, OID_AUTO, msync_flush_flags,
108 CTLFLAG_RW, &msync_flush_flags, 0, "");
109
110 static int old_msync;
111 SYSCTL_INT(_vm, OID_AUTO, old_msync, CTLFLAG_RW, &old_msync, 0,
112 "Use old (insecure) msync behavior");
113
114 static void vm_object_qcollapse(vm_object_t object);
115 static int vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int curgeneration, int pagerflags);
116 static void vm_object_vndeallocate(vm_object_t object);
117
118 /*
119 * Virtual memory objects maintain the actual data
120 * associated with allocated virtual memory. A given
121 * page of memory exists within exactly one object.
122 *
123 * An object is only deallocated when all "references"
124 * are given up. Only one "reference" to a given
125 * region of an object should be writeable.
126 *
127 * Associated with each object is a list of all resident
128 * memory pages belonging to that object; this list is
129 * maintained by the "vm_page" module, and locked by the object's
130 * lock.
131 *
132 * Each object also records a "pager" routine which is
133 * used to retrieve (and store) pages to the proper backing
134 * storage. In addition, objects may be backed by other
135 * objects from which they were virtual-copied.
136 *
137 * The only items within the object structure which are
138 * modified after time of creation are:
139 * reference count locked by object's lock
140 * pager routine locked by object's lock
141 *
142 */
143
144 struct object_q vm_object_list;
145 struct mtx vm_object_list_mtx; /* lock for object list and count */
146
147 struct vm_object kernel_object_store;
148 struct vm_object kmem_object_store;
149
150 SYSCTL_NODE(_vm_stats, OID_AUTO, object, CTLFLAG_RD, 0, "VM object stats");
151
152 static long object_collapses;
153 SYSCTL_LONG(_vm_stats_object, OID_AUTO, collapses, CTLFLAG_RD,
154 &object_collapses, 0, "VM object collapses");
155
156 static long object_bypasses;
157 SYSCTL_LONG(_vm_stats_object, OID_AUTO, bypasses, CTLFLAG_RD,
158 &object_bypasses, 0, "VM object bypasses");
159
160 static uma_zone_t obj_zone;
161
162 static int vm_object_zinit(void *mem, int size, int flags);
163
164 #ifdef INVARIANTS
165 static void vm_object_zdtor(void *mem, int size, void *arg);
166
167 static void
168 vm_object_zdtor(void *mem, int size, void *arg)
169 {
170 vm_object_t object;
171
172 object = (vm_object_t)mem;
173 KASSERT(TAILQ_EMPTY(&object->memq),
174 ("object %p has resident pages",
175 object));
176 #if VM_NRESERVLEVEL > 0
177 KASSERT(LIST_EMPTY(&object->rvq),
178 ("object %p has reservations",
179 object));
180 #endif
181 KASSERT(object->cache == NULL,
182 ("object %p has cached pages",
183 object));
184 KASSERT(object->paging_in_progress == 0,
185 ("object %p paging_in_progress = %d",
186 object, object->paging_in_progress));
187 KASSERT(object->resident_page_count == 0,
188 ("object %p resident_page_count = %d",
189 object, object->resident_page_count));
190 KASSERT(object->shadow_count == 0,
191 ("object %p shadow_count = %d",
192 object, object->shadow_count));
193 }
194 #endif
195
196 static int
197 vm_object_zinit(void *mem, int size, int flags)
198 {
199 vm_object_t object;
200
201 object = (vm_object_t)mem;
202 bzero(&object->mtx, sizeof(object->mtx));
203 VM_OBJECT_LOCK_INIT(object, "standard object");
204
205 /* These are true for any object that has been freed */
206 object->paging_in_progress = 0;
207 object->resident_page_count = 0;
208 object->shadow_count = 0;
209 return (0);
210 }
211
212 void
213 _vm_object_allocate(objtype_t type, vm_pindex_t size, vm_object_t object)
214 {
215
216 TAILQ_INIT(&object->memq);
217 LIST_INIT(&object->shadow_head);
218
219 object->root = NULL;
220 object->type = type;
221 object->size = size;
222 object->generation = 1;
223 object->ref_count = 1;
224 object->memattr = VM_MEMATTR_DEFAULT;
225 object->flags = 0;
226 if ((object->type == OBJT_DEFAULT) || (object->type == OBJT_SWAP))
227 object->flags = OBJ_ONEMAPPING;
228 object->pg_color = 0;
229 object->handle = NULL;
230 object->backing_object = NULL;
231 object->backing_object_offset = (vm_ooffset_t) 0;
232 #if VM_NRESERVLEVEL > 0
233 LIST_INIT(&object->rvq);
234 #endif
235 object->cache = NULL;
236
237 mtx_lock(&vm_object_list_mtx);
238 TAILQ_INSERT_TAIL(&vm_object_list, object, object_list);
239 mtx_unlock(&vm_object_list_mtx);
240 }
241
242 /*
243 * vm_object_init:
244 *
245 * Initialize the VM objects module.
246 */
247 void
248 vm_object_init(void)
249 {
250 TAILQ_INIT(&vm_object_list);
251 mtx_init(&vm_object_list_mtx, "vm object_list", NULL, MTX_DEF);
252
253 VM_OBJECT_LOCK_INIT(&kernel_object_store, "kernel object");
254 _vm_object_allocate(OBJT_PHYS, OFF_TO_IDX(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS),
255 kernel_object);
256 #if VM_NRESERVLEVEL > 0
257 kernel_object->flags |= OBJ_COLORED;
258 kernel_object->pg_color = (u_short)atop(VM_MIN_KERNEL_ADDRESS);
259 #endif
260
261 VM_OBJECT_LOCK_INIT(&kmem_object_store, "kmem object");
262 _vm_object_allocate(OBJT_PHYS, OFF_TO_IDX(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS),
263 kmem_object);
264 #if VM_NRESERVLEVEL > 0
265 kmem_object->flags |= OBJ_COLORED;
266 kmem_object->pg_color = (u_short)atop(VM_MIN_KERNEL_ADDRESS);
267 #endif
268
269 /*
270 * The lock portion of struct vm_object must be type stable due
271 * to vm_pageout_fallback_object_lock locking a vm object
272 * without holding any references to it.
273 */
274 obj_zone = uma_zcreate("VM OBJECT", sizeof (struct vm_object), NULL,
275 #ifdef INVARIANTS
276 vm_object_zdtor,
277 #else
278 NULL,
279 #endif
280 vm_object_zinit, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM|UMA_ZONE_NOFREE);
281 }
282
283 void
284 vm_object_clear_flag(vm_object_t object, u_short bits)
285 {
286
287 VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
288 object->flags &= ~bits;
289 }
290
291 /*
292 * Sets the default memory attribute for the specified object. Pages
293 * that are allocated to this object are by default assigned this memory
294 * attribute.
295 *
296 * Presently, this function must be called before any pages are allocated
297 * to the object. In the future, this requirement may be relaxed for
298 * "default" and "swap" objects.
299 */
300 int
301 vm_object_set_memattr(vm_object_t object, vm_memattr_t memattr)
302 {
303
304 VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
305 switch (object->type) {
306 case OBJT_DEFAULT:
307 case OBJT_DEVICE:
308 case OBJT_PHYS:
309 case OBJT_SG:
310 case OBJT_SWAP:
311 case OBJT_VNODE:
312 if (!TAILQ_EMPTY(&object->memq))
313 return (KERN_FAILURE);
314 break;
315 case OBJT_DEAD:
316 return (KERN_INVALID_ARGUMENT);
317 }
318 object->memattr = memattr;
319 return (KERN_SUCCESS);
320 }
321
322 void
323 vm_object_pip_add(vm_object_t object, short i)
324 {
325
326 VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
327 object->paging_in_progress += i;
328 }
329
330 void
331 vm_object_pip_subtract(vm_object_t object, short i)
332 {
333
334 VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
335 object->paging_in_progress -= i;
336 }
337
338 void
339 vm_object_pip_wakeup(vm_object_t object)
340 {
341
342 VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
343 object->paging_in_progress--;
344 if ((object->flags & OBJ_PIPWNT) && object->paging_in_progress == 0) {
345 vm_object_clear_flag(object, OBJ_PIPWNT);
346 wakeup(object);
347 }
348 }
349
350 void
351 vm_object_pip_wakeupn(vm_object_t object, short i)
352 {
353
354 VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
355 if (i)
356 object->paging_in_progress -= i;
357 if ((object->flags & OBJ_PIPWNT) && object->paging_in_progress == 0) {
358 vm_object_clear_flag(object, OBJ_PIPWNT);
359 wakeup(object);
360 }
361 }
362
363 void
364 vm_object_pip_wait(vm_object_t object, char *waitid)
365 {
366
367 VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
368 while (object->paging_in_progress) {
369 object->flags |= OBJ_PIPWNT;
370 msleep(object, VM_OBJECT_MTX(object), PVM, waitid, 0);
371 }
372 }
373
374 /*
375 * vm_object_allocate:
376 *
377 * Returns a new object with the given size.
378 */
379 vm_object_t
380 vm_object_allocate(objtype_t type, vm_pindex_t size)
381 {
382 vm_object_t object;
383
384 object = (vm_object_t)uma_zalloc(obj_zone, M_WAITOK);
385 _vm_object_allocate(type, size, object);
386 return (object);
387 }
388
389
390 /*
391 * vm_object_reference:
392 *
393 * Gets another reference to the given object. Note: OBJ_DEAD
394 * objects can be referenced during final cleaning.
395 */
396 void
397 vm_object_reference(vm_object_t object)
398 {
399 struct vnode *vp;
400
401 if (object == NULL)
402 return;
403 VM_OBJECT_LOCK(object);
404 object->ref_count++;
405 if (object->type == OBJT_VNODE) {
406 int vfslocked;
407
408 vp = object->handle;
409 VM_OBJECT_UNLOCK(object);
410 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
411 vget(vp, LK_RETRY, curthread);
412 VFS_UNLOCK_GIANT(vfslocked);
413 } else
414 VM_OBJECT_UNLOCK(object);
415 }
416
417 /*
418 * vm_object_reference_locked:
419 *
420 * Gets another reference to the given object.
421 *
422 * The object must be locked.
423 */
424 void
425 vm_object_reference_locked(vm_object_t object)
426 {
427 struct vnode *vp;
428
429 VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
430 KASSERT((object->flags & OBJ_DEAD) == 0,
431 ("vm_object_reference_locked: dead object referenced"));
432 object->ref_count++;
433 if (object->type == OBJT_VNODE) {
434 vp = object->handle;
435 vref(vp);
436 }
437 }
438
439 /*
440 * Handle deallocating an object of type OBJT_VNODE.
441 */
442 static void
443 vm_object_vndeallocate(vm_object_t object)
444 {
445 struct vnode *vp = (struct vnode *) object->handle;
446
447 VFS_ASSERT_GIANT(vp->v_mount);
448 VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
449 KASSERT(object->type == OBJT_VNODE,
450 ("vm_object_vndeallocate: not a vnode object"));
451 KASSERT(vp != NULL, ("vm_object_vndeallocate: missing vp"));
452 #ifdef INVARIANTS
453 if (object->ref_count == 0) {
454 vprint("vm_object_vndeallocate", vp);
455 panic("vm_object_vndeallocate: bad object reference count");
456 }
457 #endif
458
459 object->ref_count--;
460 if (object->ref_count == 0) {
461 mp_fixme("Unlocked vflag access.");
462 vp->v_vflag &= ~VV_TEXT;
463 }
464 VM_OBJECT_UNLOCK(object);
465 /*
466 * vrele may need a vop lock
467 */
468 vrele(vp);
469 }
470
471 /*
472 * vm_object_deallocate:
473 *
474 * Release a reference to the specified object,
475 * gained either through a vm_object_allocate
476 * or a vm_object_reference call. When all references
477 * are gone, storage associated with this object
478 * may be relinquished.
479 *
480 * No object may be locked.
481 */
482 void
483 vm_object_deallocate(vm_object_t object)
484 {
485 vm_object_t temp;
486
487 while (object != NULL) {
488 int vfslocked;
489
490 vfslocked = 0;
491 restart:
492 VM_OBJECT_LOCK(object);
493 if (object->type == OBJT_VNODE) {
494 struct vnode *vp = (struct vnode *) object->handle;
495
496 /*
497 * Conditionally acquire Giant for a vnode-backed
498 * object. We have to be careful since the type of
499 * a vnode object can change while the object is
500 * unlocked.
501 */
502 if (VFS_NEEDSGIANT(vp->v_mount) && !vfslocked) {
503 vfslocked = 1;
504 if (!mtx_trylock(&Giant)) {
505 VM_OBJECT_UNLOCK(object);
506 mtx_lock(&Giant);
507 goto restart;
508 }
509 }
510 vm_object_vndeallocate(object);
511 VFS_UNLOCK_GIANT(vfslocked);
512 return;
513 } else
514 /*
515 * This is to handle the case that the object
516 * changed type while we dropped its lock to
517 * obtain Giant.
518 */
519 VFS_UNLOCK_GIANT(vfslocked);
520
521 KASSERT(object->ref_count != 0,
522 ("vm_object_deallocate: object deallocated too many times: %d", object->type));
523
524 /*
525 * If the reference count goes to 0 we start calling
526 * vm_object_terminate() on the object chain.
527 * A ref count of 1 may be a special case depending on the
528 * shadow count being 0 or 1.
529 */
530 object->ref_count--;
531 if (object->ref_count > 1) {
532 VM_OBJECT_UNLOCK(object);
533 return;
534 } else if (object->ref_count == 1) {
535 if (object->shadow_count == 0 &&
536 object->handle == NULL &&
537 (object->type == OBJT_DEFAULT ||
538 object->type == OBJT_SWAP)) {
539 vm_object_set_flag(object, OBJ_ONEMAPPING);
540 } else if ((object->shadow_count == 1) &&
541 (object->handle == NULL) &&
542 (object->type == OBJT_DEFAULT ||
543 object->type == OBJT_SWAP)) {
544 vm_object_t robject;
545
546 robject = LIST_FIRST(&object->shadow_head);
547 KASSERT(robject != NULL,
548 ("vm_object_deallocate: ref_count: %d, shadow_count: %d",
549 object->ref_count,
550 object->shadow_count));
551 if (!VM_OBJECT_TRYLOCK(robject)) {
552 /*
553 * Avoid a potential deadlock.
554 */
555 object->ref_count++;
556 VM_OBJECT_UNLOCK(object);
557 /*
558 * More likely than not the thread
559 * holding robject's lock has lower
560 * priority than the current thread.
561 * Let the lower priority thread run.
562 */
563 pause("vmo_de", 1);
564 continue;
565 }
566 /*
567 * Collapse object into its shadow unless its
568 * shadow is dead. In that case, object will
569 * be deallocated by the thread that is
570 * deallocating its shadow.
571 */
572 if ((robject->flags & OBJ_DEAD) == 0 &&
573 (robject->handle == NULL) &&
574 (robject->type == OBJT_DEFAULT ||
575 robject->type == OBJT_SWAP)) {
576
577 robject->ref_count++;
578 retry:
579 if (robject->paging_in_progress) {
580 VM_OBJECT_UNLOCK(object);
581 vm_object_pip_wait(robject,
582 "objde1");
583 temp = robject->backing_object;
584 if (object == temp) {
585 VM_OBJECT_LOCK(object);
586 goto retry;
587 }
588 } else if (object->paging_in_progress) {
589 VM_OBJECT_UNLOCK(robject);
590 object->flags |= OBJ_PIPWNT;
591 msleep(object,
592 VM_OBJECT_MTX(object),
593 PDROP | PVM, "objde2", 0);
594 VM_OBJECT_LOCK(robject);
595 temp = robject->backing_object;
596 if (object == temp) {
597 VM_OBJECT_LOCK(object);
598 goto retry;
599 }
600 } else
601 VM_OBJECT_UNLOCK(object);
602
603 if (robject->ref_count == 1) {
604 robject->ref_count--;
605 object = robject;
606 goto doterm;
607 }
608 object = robject;
609 vm_object_collapse(object);
610 VM_OBJECT_UNLOCK(object);
611 continue;
612 }
613 VM_OBJECT_UNLOCK(robject);
614 }
615 VM_OBJECT_UNLOCK(object);
616 return;
617 }
618 doterm:
619 temp = object->backing_object;
620 if (temp != NULL) {
621 VM_OBJECT_LOCK(temp);
622 LIST_REMOVE(object, shadow_list);
623 temp->shadow_count--;
624 temp->generation++;
625 VM_OBJECT_UNLOCK(temp);
626 object->backing_object = NULL;
627 }
628 /*
629 * Don't double-terminate, we could be in a termination
630 * recursion due to the terminate having to sync data
631 * to disk.
632 */
633 if ((object->flags & OBJ_DEAD) == 0)
634 vm_object_terminate(object);
635 else
636 VM_OBJECT_UNLOCK(object);
637 object = temp;
638 }
639 }
640
641 /*
642 * vm_object_destroy removes the object from the global object list
643 * and frees the space for the object.
644 */
645 void
646 vm_object_destroy(vm_object_t object)
647 {
648
649 /*
650 * Remove the object from the global object list.
651 */
652 mtx_lock(&vm_object_list_mtx);
653 TAILQ_REMOVE(&vm_object_list, object, object_list);
654 mtx_unlock(&vm_object_list_mtx);
655
656 /*
657 * Free the space for the object.
658 */
659 uma_zfree(obj_zone, object);
660 }
661
662 /*
663 * vm_object_terminate actually destroys the specified object, freeing
664 * up all previously used resources.
665 *
666 * The object must be locked.
667 * This routine may block.
668 */
669 void
670 vm_object_terminate(vm_object_t object)
671 {
672 vm_page_t p;
673
674 VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
675
676 /*
677 * Make sure no one uses us.
678 */
679 vm_object_set_flag(object, OBJ_DEAD);
680
681 /*
682 * wait for the pageout daemon to be done with the object
683 */
684 vm_object_pip_wait(object, "objtrm");
685
686 KASSERT(!object->paging_in_progress,
687 ("vm_object_terminate: pageout in progress"));
688
689 /*
690 * Clean and free the pages, as appropriate. All references to the
691 * object are gone, so we don't need to lock it.
692 */
693 if (object->type == OBJT_VNODE) {
694 struct vnode *vp = (struct vnode *)object->handle;
695
696 /*
697 * Clean pages and flush buffers.
698 */
699 vm_object_page_clean(object, 0, 0, OBJPC_SYNC);
700 VM_OBJECT_UNLOCK(object);
701
702 vinvalbuf(vp, V_SAVE, NULL, 0, 0);
703
704 VM_OBJECT_LOCK(object);
705 }
706
707 KASSERT(object->ref_count == 0,
708 ("vm_object_terminate: object with references, ref_count=%d",
709 object->ref_count));
710
711 /*
712 * Now free any remaining pages. For internal objects, this also
713 * removes them from paging queues. Don't free wired pages, just
714 * remove them from the object.
715 */
716 vm_page_lock_queues();
717 while ((p = TAILQ_FIRST(&object->memq)) != NULL) {
718 KASSERT(!p->busy && (p->oflags & VPO_BUSY) == 0,
719 ("vm_object_terminate: freeing busy page %p "
720 "p->busy = %d, p->flags %x\n", p, p->busy, p->flags));
721 if (p->wire_count == 0) {
722 vm_page_free(p);
723 cnt.v_pfree++;
724 } else {
725 vm_page_remove(p);
726 }
727 }
728 vm_page_unlock_queues();
729
730 #if VM_NRESERVLEVEL > 0
731 if (__predict_false(!LIST_EMPTY(&object->rvq)))
732 vm_reserv_break_all(object);
733 #endif
734 if (__predict_false(object->cache != NULL))
735 vm_page_cache_free(object, 0, 0);
736
737 /*
738 * Let the pager know object is dead.
739 */
740 vm_pager_deallocate(object);
741 VM_OBJECT_UNLOCK(object);
742
743 vm_object_destroy(object);
744 }
745
746 /*
747 * vm_object_page_clean
748 *
749 * Clean all dirty pages in the specified range of object. Leaves page
750 * on whatever queue it is currently on. If NOSYNC is set then do not
751 * write out pages with VPO_NOSYNC set (originally comes from MAP_NOSYNC),
752 * leaving the object dirty.
753 *
754 * When stuffing pages asynchronously, allow clustering. XXX we need a
755 * synchronous clustering mode implementation.
756 *
757 * Odd semantics: if start == end, we clean everything.
758 *
759 * The object must be locked.
760 */
761 void
762 vm_object_page_clean(vm_object_t object, vm_pindex_t start, vm_pindex_t end, int flags)
763 {
764 vm_page_t p, np;
765 vm_pindex_t tstart, tend;
766 vm_pindex_t pi;
767 int clearobjflags;
768 int pagerflags;
769 int curgeneration;
770
771 VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
772 if (object->type != OBJT_VNODE ||
773 (object->flags & OBJ_MIGHTBEDIRTY) == 0)
774 return;
775
776 pagerflags = (flags & (OBJPC_SYNC | OBJPC_INVAL)) ? VM_PAGER_PUT_SYNC : VM_PAGER_CLUSTER_OK;
777 pagerflags |= (flags & OBJPC_INVAL) ? VM_PAGER_PUT_INVAL : 0;
778
779 vm_object_set_flag(object, OBJ_CLEANING);
780
781 tstart = start;
782 if (end == 0) {
783 tend = object->size;
784 } else {
785 tend = end;
786 }
787
788 vm_page_lock_queues();
789 /*
790 * If the caller is smart and only msync()s a range he knows is
791 * dirty, we may be able to avoid an object scan. This results in
792 * a phenominal improvement in performance. We cannot do this
793 * as a matter of course because the object may be huge - e.g.
794 * the size might be in the gigabytes or terrabytes.
795 */
796 if (msync_flush_flags & MSYNC_FLUSH_HARDSEQ) {
797 vm_pindex_t tscan;
798 int scanlimit;
799 int scanreset;
800
801 scanreset = object->resident_page_count / EASY_SCAN_FACTOR;
802 if (scanreset < 16)
803 scanreset = 16;
804 pagerflags |= VM_PAGER_IGNORE_CLEANCHK;
805
806 scanlimit = scanreset;
807 tscan = tstart;
808 while (tscan < tend) {
809 curgeneration = object->generation;
810 p = vm_page_lookup(object, tscan);
811 if (p == NULL || p->valid == 0) {
812 if (--scanlimit == 0)
813 break;
814 ++tscan;
815 continue;
816 }
817 vm_page_test_dirty(p);
818 if ((p->dirty & p->valid) == 0) {
819 if (--scanlimit == 0)
820 break;
821 ++tscan;
822 continue;
823 }
824 /*
825 * If we have been asked to skip nosync pages and
826 * this is a nosync page, we can't continue.
827 */
828 if ((flags & OBJPC_NOSYNC) && (p->oflags & VPO_NOSYNC)) {
829 if (--scanlimit == 0)
830 break;
831 ++tscan;
832 continue;
833 }
834 scanlimit = scanreset;
835
836 /*
837 * This returns 0 if it was unable to busy the first
838 * page (i.e. had to sleep).
839 */
840 tscan += vm_object_page_collect_flush(object, p, curgeneration, pagerflags);
841 }
842
843 /*
844 * If everything was dirty and we flushed it successfully,
845 * and the requested range is not the entire object, we
846 * don't have to mess with CLEANCHK or MIGHTBEDIRTY and can
847 * return immediately.
848 */
849 if (tscan >= tend && (tstart || tend < object->size)) {
850 vm_page_unlock_queues();
851 vm_object_clear_flag(object, OBJ_CLEANING);
852 return;
853 }
854 pagerflags &= ~VM_PAGER_IGNORE_CLEANCHK;
855 }
856
857 /*
858 * Generally set CLEANCHK interlock and make the page read-only so
859 * we can then clear the object flags.
860 *
861 * However, if this is a nosync mmap then the object is likely to
862 * stay dirty so do not mess with the page and do not clear the
863 * object flags.
864 */
865 clearobjflags = 1;
866 TAILQ_FOREACH(p, &object->memq, listq) {
867 p->oflags |= VPO_CLEANCHK;
868 if ((flags & OBJPC_NOSYNC) && (p->oflags & VPO_NOSYNC))
869 clearobjflags = 0;
870 else
871 pmap_remove_write(p);
872 }
873
874 if (clearobjflags && (tstart == 0) && (tend == object->size)) {
875 struct vnode *vp;
876
877 vm_object_clear_flag(object, OBJ_MIGHTBEDIRTY);
878 if (object->type == OBJT_VNODE &&
879 (vp = (struct vnode *)object->handle) != NULL) {
880 VI_LOCK(vp);
881 if (vp->v_iflag & VI_OBJDIRTY)
882 vp->v_iflag &= ~VI_OBJDIRTY;
883 VI_UNLOCK(vp);
884 }
885 }
886
887 rescan:
888 curgeneration = object->generation;
889
890 for (p = TAILQ_FIRST(&object->memq); p; p = np) {
891 int n;
892
893 np = TAILQ_NEXT(p, listq);
894
895 again:
896 pi = p->pindex;
897 if ((p->oflags & VPO_CLEANCHK) == 0 ||
898 (pi < tstart) || (pi >= tend) ||
899 p->valid == 0) {
900 p->oflags &= ~VPO_CLEANCHK;
901 continue;
902 }
903
904 vm_page_test_dirty(p);
905 if ((p->dirty & p->valid) == 0) {
906 p->oflags &= ~VPO_CLEANCHK;
907 continue;
908 }
909
910 /*
911 * If we have been asked to skip nosync pages and this is a
912 * nosync page, skip it. Note that the object flags were
913 * not cleared in this case so we do not have to set them.
914 */
915 if ((flags & OBJPC_NOSYNC) && (p->oflags & VPO_NOSYNC)) {
916 p->oflags &= ~VPO_CLEANCHK;
917 continue;
918 }
919
920 n = vm_object_page_collect_flush(object, p,
921 curgeneration, pagerflags);
922 if (n == 0)
923 goto rescan;
924
925 if (object->generation != curgeneration)
926 goto rescan;
927
928 /*
929 * Try to optimize the next page. If we can't we pick up
930 * our (random) scan where we left off.
931 */
932 if (msync_flush_flags & MSYNC_FLUSH_SOFTSEQ) {
933 if ((p = vm_page_lookup(object, pi + n)) != NULL)
934 goto again;
935 }
936 }
937 vm_page_unlock_queues();
938 #if 0
939 VOP_FSYNC(vp, (pagerflags & VM_PAGER_PUT_SYNC)?MNT_WAIT:0, curproc);
940 #endif
941
942 vm_object_clear_flag(object, OBJ_CLEANING);
943 return;
944 }
945
946 static int
947 vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int curgeneration, int pagerflags)
948 {
949 int runlen;
950 int maxf;
951 int chkb;
952 int maxb;
953 int i;
954 vm_pindex_t pi;
955 vm_page_t maf[vm_pageout_page_count];
956 vm_page_t mab[vm_pageout_page_count];
957 vm_page_t ma[vm_pageout_page_count];
958
959 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
960 pi = p->pindex;
961 while (vm_page_sleep_if_busy(p, TRUE, "vpcwai")) {
962 vm_page_lock_queues();
963 if (object->generation != curgeneration) {
964 return(0);
965 }
966 }
967 maxf = 0;
968 for(i = 1; i < vm_pageout_page_count; i++) {
969 vm_page_t tp;
970
971 if ((tp = vm_page_lookup(object, pi + i)) != NULL) {
972 if ((tp->oflags & VPO_BUSY) ||
973 ((pagerflags & VM_PAGER_IGNORE_CLEANCHK) == 0 &&
974 (tp->oflags & VPO_CLEANCHK) == 0) ||
975 (tp->busy != 0))
976 break;
977 vm_page_test_dirty(tp);
978 if ((tp->dirty & tp->valid) == 0) {
979 tp->oflags &= ~VPO_CLEANCHK;
980 break;
981 }
982 maf[ i - 1 ] = tp;
983 maxf++;
984 continue;
985 }
986 break;
987 }
988
989 maxb = 0;
990 chkb = vm_pageout_page_count - maxf;
991 if (chkb) {
992 for(i = 1; i < chkb;i++) {
993 vm_page_t tp;
994
995 if ((tp = vm_page_lookup(object, pi - i)) != NULL) {
996 if ((tp->oflags & VPO_BUSY) ||
997 ((pagerflags & VM_PAGER_IGNORE_CLEANCHK) == 0 &&
998 (tp->oflags & VPO_CLEANCHK) == 0) ||
999 (tp->busy != 0))
1000 break;
1001 vm_page_test_dirty(tp);
1002 if ((tp->dirty & tp->valid) == 0) {
1003 tp->oflags &= ~VPO_CLEANCHK;
1004 break;
1005 }
1006 mab[ i - 1 ] = tp;
1007 maxb++;
1008 continue;
1009 }
1010 break;
1011 }
1012 }
1013
1014 for(i = 0; i < maxb; i++) {
1015 int index = (maxb - i) - 1;
1016 ma[index] = mab[i];
1017 ma[index]->oflags &= ~VPO_CLEANCHK;
1018 }
1019 p->oflags &= ~VPO_CLEANCHK;
1020 ma[maxb] = p;
1021 for(i = 0; i < maxf; i++) {
1022 int index = (maxb + i) + 1;
1023 ma[index] = maf[i];
1024 ma[index]->oflags &= ~VPO_CLEANCHK;
1025 }
1026 runlen = maxb + maxf + 1;
1027
1028 vm_pageout_flush(ma, runlen, pagerflags);
1029 for (i = 0; i < runlen; i++) {
1030 if (ma[i]->valid & ma[i]->dirty) {
1031 pmap_remove_write(ma[i]);
1032 ma[i]->oflags |= VPO_CLEANCHK;
1033
1034 /*
1035 * maxf will end up being the actual number of pages
1036 * we wrote out contiguously, non-inclusive of the
1037 * first page. We do not count look-behind pages.
1038 */
1039 if (i >= maxb + 1 && (maxf > i - maxb - 1))
1040 maxf = i - maxb - 1;
1041 }
1042 }
1043 return(maxf + 1);
1044 }
1045
1046 /*
1047 * Note that there is absolutely no sense in writing out
1048 * anonymous objects, so we track down the vnode object
1049 * to write out.
1050 * We invalidate (remove) all pages from the address space
1051 * for semantic correctness.
1052 *
1053 * Note: certain anonymous maps, such as MAP_NOSYNC maps,
1054 * may start out with a NULL object.
1055 */
1056 void
1057 vm_object_sync(vm_object_t object, vm_ooffset_t offset, vm_size_t size,
1058 boolean_t syncio, boolean_t invalidate)
1059 {
1060 vm_object_t backing_object;
1061 struct vnode *vp;
1062 struct mount *mp;
1063 int flags;
1064
1065 if (object == NULL)
1066 return;
1067 VM_OBJECT_LOCK(object);
1068 while ((backing_object = object->backing_object) != NULL) {
1069 VM_OBJECT_LOCK(backing_object);
1070 offset += object->backing_object_offset;
1071 VM_OBJECT_UNLOCK(object);
1072 object = backing_object;
1073 if (object->size < OFF_TO_IDX(offset + size))
1074 size = IDX_TO_OFF(object->size) - offset;
1075 }
1076 /*
1077 * Flush pages if writing is allowed, invalidate them
1078 * if invalidation requested. Pages undergoing I/O
1079 * will be ignored by vm_object_page_remove().
1080 *
1081 * We cannot lock the vnode and then wait for paging
1082 * to complete without deadlocking against vm_fault.
1083 * Instead we simply call vm_object_page_remove() and
1084 * allow it to block internally on a page-by-page
1085 * basis when it encounters pages undergoing async
1086 * I/O.
1087 */
1088 if (object->type == OBJT_VNODE &&
1089 (object->flags & OBJ_MIGHTBEDIRTY) != 0) {
1090 int vfslocked;
1091 vp = object->handle;
1092 VM_OBJECT_UNLOCK(object);
1093 (void) vn_start_write(vp, &mp, V_WAIT);
1094 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1095 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, curthread);
1096 flags = (syncio || invalidate) ? OBJPC_SYNC : 0;
1097 flags |= invalidate ? OBJPC_INVAL : 0;
1098 VM_OBJECT_LOCK(object);
1099 vm_object_page_clean(object,
1100 OFF_TO_IDX(offset),
1101 OFF_TO_IDX(offset + size + PAGE_MASK),
1102 flags);
1103 VM_OBJECT_UNLOCK(object);
1104 VOP_UNLOCK(vp, 0, curthread);
1105 VFS_UNLOCK_GIANT(vfslocked);
1106 vn_finished_write(mp);
1107 VM_OBJECT_LOCK(object);
1108 }
1109 if ((object->type == OBJT_VNODE ||
1110 object->type == OBJT_DEVICE) && invalidate) {
1111 boolean_t purge;
1112 purge = old_msync || (object->type == OBJT_DEVICE);
1113 vm_object_page_remove(object,
1114 OFF_TO_IDX(offset),
1115 OFF_TO_IDX(offset + size + PAGE_MASK),
1116 purge ? FALSE : TRUE);
1117 }
1118 VM_OBJECT_UNLOCK(object);
1119 }
1120
1121 /*
1122 * vm_object_madvise:
1123 *
1124 * Implements the madvise function at the object/page level.
1125 *
1126 * MADV_WILLNEED (any object)
1127 *
1128 * Activate the specified pages if they are resident.
1129 *
1130 * MADV_DONTNEED (any object)
1131 *
1132 * Deactivate the specified pages if they are resident.
1133 *
1134 * MADV_FREE (OBJT_DEFAULT/OBJT_SWAP objects,
1135 * OBJ_ONEMAPPING only)
1136 *
1137 * Deactivate and clean the specified pages if they are
1138 * resident. This permits the process to reuse the pages
1139 * without faulting or the kernel to reclaim the pages
1140 * without I/O.
1141 */
1142 void
1143 vm_object_madvise(vm_object_t object, vm_pindex_t pindex, int count, int advise)
1144 {
1145 vm_pindex_t end, tpindex;
1146 vm_object_t backing_object, tobject;
1147 vm_page_t m;
1148
1149 if (object == NULL)
1150 return;
1151 VM_OBJECT_LOCK(object);
1152 end = pindex + count;
1153 /*
1154 * Locate and adjust resident pages
1155 */
1156 for (; pindex < end; pindex += 1) {
1157 relookup:
1158 tobject = object;
1159 tpindex = pindex;
1160 shadowlookup:
1161 /*
1162 * MADV_FREE only operates on OBJT_DEFAULT or OBJT_SWAP pages
1163 * and those pages must be OBJ_ONEMAPPING.
1164 */
1165 if (advise == MADV_FREE) {
1166 if ((tobject->type != OBJT_DEFAULT &&
1167 tobject->type != OBJT_SWAP) ||
1168 (tobject->flags & OBJ_ONEMAPPING) == 0) {
1169 goto unlock_tobject;
1170 }
1171 }
1172 m = vm_page_lookup(tobject, tpindex);
1173 if (m == NULL && advise == MADV_WILLNEED) {
1174 /*
1175 * If the page is cached, reactivate it.
1176 */
1177 m = vm_page_alloc(tobject, tpindex, VM_ALLOC_IFCACHED |
1178 VM_ALLOC_NOBUSY);
1179 }
1180 if (m == NULL) {
1181 /*
1182 * There may be swap even if there is no backing page
1183 */
1184 if (advise == MADV_FREE && tobject->type == OBJT_SWAP)
1185 swap_pager_freespace(tobject, tpindex, 1);
1186 /*
1187 * next object
1188 */
1189 backing_object = tobject->backing_object;
1190 if (backing_object == NULL)
1191 goto unlock_tobject;
1192 VM_OBJECT_LOCK(backing_object);
1193 tpindex += OFF_TO_IDX(tobject->backing_object_offset);
1194 if (tobject != object)
1195 VM_OBJECT_UNLOCK(tobject);
1196 tobject = backing_object;
1197 goto shadowlookup;
1198 }
1199 /*
1200 * If the page is busy or not in a normal active state,
1201 * we skip it. If the page is not managed there are no
1202 * page queues to mess with. Things can break if we mess
1203 * with pages in any of the below states.
1204 */
1205 vm_page_lock_queues();
1206 if (m->hold_count ||
1207 m->wire_count ||
1208 (m->flags & PG_UNMANAGED) ||
1209 m->valid != VM_PAGE_BITS_ALL) {
1210 vm_page_unlock_queues();
1211 goto unlock_tobject;
1212 }
1213 if ((m->oflags & VPO_BUSY) || m->busy) {
1214 vm_page_flag_set(m, PG_REFERENCED);
1215 vm_page_unlock_queues();
1216 if (object != tobject)
1217 VM_OBJECT_UNLOCK(object);
1218 m->oflags |= VPO_WANTED;
1219 msleep(m, VM_OBJECT_MTX(tobject), PDROP | PVM, "madvpo", 0);
1220 VM_OBJECT_LOCK(object);
1221 goto relookup;
1222 }
1223 if (advise == MADV_WILLNEED) {
1224 vm_page_activate(m);
1225 } else if (advise == MADV_DONTNEED) {
1226 vm_page_dontneed(m);
1227 } else if (advise == MADV_FREE) {
1228 /*
1229 * Mark the page clean. This will allow the page
1230 * to be freed up by the system. However, such pages
1231 * are often reused quickly by malloc()/free()
1232 * so we do not do anything that would cause
1233 * a page fault if we can help it.
1234 *
1235 * Specifically, we do not try to actually free
1236 * the page now nor do we try to put it in the
1237 * cache (which would cause a page fault on reuse).
1238 *
1239 * But we do make the page is freeable as we
1240 * can without actually taking the step of unmapping
1241 * it.
1242 */
1243 pmap_clear_modify(m);
1244 m->dirty = 0;
1245 m->act_count = 0;
1246 vm_page_dontneed(m);
1247 }
1248 vm_page_unlock_queues();
1249 if (advise == MADV_FREE && tobject->type == OBJT_SWAP)
1250 swap_pager_freespace(tobject, tpindex, 1);
1251 unlock_tobject:
1252 if (tobject != object)
1253 VM_OBJECT_UNLOCK(tobject);
1254 }
1255 VM_OBJECT_UNLOCK(object);
1256 }
1257
1258 /*
1259 * vm_object_shadow:
1260 *
1261 * Create a new object which is backed by the
1262 * specified existing object range. The source
1263 * object reference is deallocated.
1264 *
1265 * The new object and offset into that object
1266 * are returned in the source parameters.
1267 */
1268 void
1269 vm_object_shadow(
1270 vm_object_t *object, /* IN/OUT */
1271 vm_ooffset_t *offset, /* IN/OUT */
1272 vm_size_t length)
1273 {
1274 vm_object_t source;
1275 vm_object_t result;
1276
1277 source = *object;
1278
1279 /*
1280 * Don't create the new object if the old object isn't shared.
1281 */
1282 if (source != NULL) {
1283 VM_OBJECT_LOCK(source);
1284 if (source->ref_count == 1 &&
1285 source->handle == NULL &&
1286 (source->type == OBJT_DEFAULT ||
1287 source->type == OBJT_SWAP)) {
1288 VM_OBJECT_UNLOCK(source);
1289 return;
1290 }
1291 VM_OBJECT_UNLOCK(source);
1292 }
1293
1294 /*
1295 * Allocate a new object with the given length.
1296 */
1297 result = vm_object_allocate(OBJT_DEFAULT, length);
1298
1299 /*
1300 * The new object shadows the source object, adding a reference to it.
1301 * Our caller changes his reference to point to the new object,
1302 * removing a reference to the source object. Net result: no change
1303 * of reference count.
1304 *
1305 * Try to optimize the result object's page color when shadowing
1306 * in order to maintain page coloring consistency in the combined
1307 * shadowed object.
1308 */
1309 result->backing_object = source;
1310 /*
1311 * Store the offset into the source object, and fix up the offset into
1312 * the new object.
1313 */
1314 result->backing_object_offset = *offset;
1315 if (source != NULL) {
1316 VM_OBJECT_LOCK(source);
1317 LIST_INSERT_HEAD(&source->shadow_head, result, shadow_list);
1318 source->shadow_count++;
1319 source->generation++;
1320 #if VM_NRESERVLEVEL > 0
1321 result->flags |= source->flags & (OBJ_NEEDGIANT | OBJ_COLORED);
1322 result->pg_color = (source->pg_color + OFF_TO_IDX(*offset)) &
1323 ((1 << (VM_NFREEORDER - 1)) - 1);
1324 #else
1325 result->flags |= source->flags & OBJ_NEEDGIANT;
1326 #endif
1327 VM_OBJECT_UNLOCK(source);
1328 }
1329
1330
1331 /*
1332 * Return the new things
1333 */
1334 *offset = 0;
1335 *object = result;
1336 }
1337
1338 /*
1339 * vm_object_split:
1340 *
1341 * Split the pages in a map entry into a new object. This affords
1342 * easier removal of unused pages, and keeps object inheritance from
1343 * being a negative impact on memory usage.
1344 */
1345 void
1346 vm_object_split(vm_map_entry_t entry)
1347 {
1348 vm_page_t m, m_next;
1349 vm_object_t orig_object, new_object, source;
1350 vm_pindex_t idx, offidxstart;
1351 vm_size_t size;
1352
1353 orig_object = entry->object.vm_object;
1354 if (orig_object->type != OBJT_DEFAULT && orig_object->type != OBJT_SWAP)
1355 return;
1356 if (orig_object->ref_count <= 1)
1357 return;
1358 VM_OBJECT_UNLOCK(orig_object);
1359
1360 offidxstart = OFF_TO_IDX(entry->offset);
1361 size = atop(entry->end - entry->start);
1362
1363 /*
1364 * If swap_pager_copy() is later called, it will convert new_object
1365 * into a swap object.
1366 */
1367 new_object = vm_object_allocate(OBJT_DEFAULT, size);
1368
1369 /*
1370 * At this point, the new object is still private, so the order in
1371 * which the original and new objects are locked does not matter.
1372 */
1373 VM_OBJECT_LOCK(new_object);
1374 VM_OBJECT_LOCK(orig_object);
1375 source = orig_object->backing_object;
1376 if (source != NULL) {
1377 VM_OBJECT_LOCK(source);
1378 if ((source->flags & OBJ_DEAD) != 0) {
1379 VM_OBJECT_UNLOCK(source);
1380 VM_OBJECT_UNLOCK(orig_object);
1381 VM_OBJECT_UNLOCK(new_object);
1382 vm_object_deallocate(new_object);
1383 VM_OBJECT_LOCK(orig_object);
1384 return;
1385 }
1386 LIST_INSERT_HEAD(&source->shadow_head,
1387 new_object, shadow_list);
1388 source->shadow_count++;
1389 source->generation++;
1390 vm_object_reference_locked(source); /* for new_object */
1391 vm_object_clear_flag(source, OBJ_ONEMAPPING);
1392 VM_OBJECT_UNLOCK(source);
1393 new_object->backing_object_offset =
1394 orig_object->backing_object_offset + entry->offset;
1395 new_object->backing_object = source;
1396 }
1397 new_object->flags |= orig_object->flags & OBJ_NEEDGIANT;
1398 retry:
1399 if ((m = TAILQ_FIRST(&orig_object->memq)) != NULL) {
1400 if (m->pindex < offidxstart) {
1401 m = vm_page_splay(offidxstart, orig_object->root);
1402 if ((orig_object->root = m)->pindex < offidxstart)
1403 m = TAILQ_NEXT(m, listq);
1404 }
1405 }
1406 vm_page_lock_queues();
1407 for (; m != NULL && (idx = m->pindex - offidxstart) < size;
1408 m = m_next) {
1409 m_next = TAILQ_NEXT(m, listq);
1410
1411 /*
1412 * We must wait for pending I/O to complete before we can
1413 * rename the page.
1414 *
1415 * We do not have to VM_PROT_NONE the page as mappings should
1416 * not be changed by this operation.
1417 */
1418 if ((m->oflags & VPO_BUSY) || m->busy) {
1419 vm_page_flag_set(m, PG_REFERENCED);
1420 vm_page_unlock_queues();
1421 VM_OBJECT_UNLOCK(new_object);
1422 m->oflags |= VPO_WANTED;
1423 msleep(m, VM_OBJECT_MTX(orig_object), PVM, "spltwt", 0);
1424 VM_OBJECT_LOCK(new_object);
1425 goto retry;
1426 }
1427 vm_page_rename(m, new_object, idx);
1428 /* page automatically made dirty by rename and cache handled */
1429 vm_page_busy(m);
1430 }
1431 vm_page_unlock_queues();
1432 if (orig_object->type == OBJT_SWAP) {
1433 /*
1434 * swap_pager_copy() can sleep, in which case the orig_object's
1435 * and new_object's locks are released and reacquired.
1436 */
1437 swap_pager_copy(orig_object, new_object, offidxstart, 0);
1438
1439 /*
1440 * Transfer any cached pages from orig_object to new_object.
1441 */
1442 if (__predict_false(orig_object->cache != NULL))
1443 vm_page_cache_transfer(orig_object, offidxstart,
1444 new_object);
1445 }
1446 VM_OBJECT_UNLOCK(orig_object);
1447 TAILQ_FOREACH(m, &new_object->memq, listq)
1448 vm_page_wakeup(m);
1449 VM_OBJECT_UNLOCK(new_object);
1450 entry->object.vm_object = new_object;
1451 entry->offset = 0LL;
1452 vm_object_deallocate(orig_object);
1453 VM_OBJECT_LOCK(new_object);
1454 }
1455
1456 #define OBSC_TEST_ALL_SHADOWED 0x0001
1457 #define OBSC_COLLAPSE_NOWAIT 0x0002
1458 #define OBSC_COLLAPSE_WAIT 0x0004
1459
1460 static int
1461 vm_object_backing_scan(vm_object_t object, int op)
1462 {
1463 int r = 1;
1464 vm_page_t p;
1465 vm_object_t backing_object;
1466 vm_pindex_t backing_offset_index;
1467
1468 VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
1469 VM_OBJECT_LOCK_ASSERT(object->backing_object, MA_OWNED);
1470
1471 backing_object = object->backing_object;
1472 backing_offset_index = OFF_TO_IDX(object->backing_object_offset);
1473
1474 /*
1475 * Initial conditions
1476 */
1477 if (op & OBSC_TEST_ALL_SHADOWED) {
1478 /*
1479 * We do not want to have to test for the existence of cache
1480 * or swap pages in the backing object. XXX but with the
1481 * new swapper this would be pretty easy to do.
1482 *
1483 * XXX what about anonymous MAP_SHARED memory that hasn't
1484 * been ZFOD faulted yet? If we do not test for this, the
1485 * shadow test may succeed! XXX
1486 */
1487 if (backing_object->type != OBJT_DEFAULT) {
1488 return (0);
1489 }
1490 }
1491 if (op & OBSC_COLLAPSE_WAIT) {
1492 vm_object_set_flag(backing_object, OBJ_DEAD);
1493 }
1494
1495 /*
1496 * Our scan
1497 */
1498 p = TAILQ_FIRST(&backing_object->memq);
1499 while (p) {
1500 vm_page_t next = TAILQ_NEXT(p, listq);
1501 vm_pindex_t new_pindex = p->pindex - backing_offset_index;
1502
1503 if (op & OBSC_TEST_ALL_SHADOWED) {
1504 vm_page_t pp;
1505
1506 /*
1507 * Ignore pages outside the parent object's range
1508 * and outside the parent object's mapping of the
1509 * backing object.
1510 *
1511 * note that we do not busy the backing object's
1512 * page.
1513 */
1514 if (
1515 p->pindex < backing_offset_index ||
1516 new_pindex >= object->size
1517 ) {
1518 p = next;
1519 continue;
1520 }
1521
1522 /*
1523 * See if the parent has the page or if the parent's
1524 * object pager has the page. If the parent has the
1525 * page but the page is not valid, the parent's
1526 * object pager must have the page.
1527 *
1528 * If this fails, the parent does not completely shadow
1529 * the object and we might as well give up now.
1530 */
1531
1532 pp = vm_page_lookup(object, new_pindex);
1533 if (
1534 (pp == NULL || pp->valid == 0) &&
1535 !vm_pager_has_page(object, new_pindex, NULL, NULL)
1536 ) {
1537 r = 0;
1538 break;
1539 }
1540 }
1541
1542 /*
1543 * Check for busy page
1544 */
1545 if (op & (OBSC_COLLAPSE_WAIT | OBSC_COLLAPSE_NOWAIT)) {
1546 vm_page_t pp;
1547
1548 if (op & OBSC_COLLAPSE_NOWAIT) {
1549 if ((p->oflags & VPO_BUSY) ||
1550 !p->valid ||
1551 p->busy) {
1552 p = next;
1553 continue;
1554 }
1555 } else if (op & OBSC_COLLAPSE_WAIT) {
1556 if ((p->oflags & VPO_BUSY) || p->busy) {
1557 vm_page_lock_queues();
1558 vm_page_flag_set(p, PG_REFERENCED);
1559 vm_page_unlock_queues();
1560 VM_OBJECT_UNLOCK(object);
1561 p->oflags |= VPO_WANTED;
1562 msleep(p, VM_OBJECT_MTX(backing_object),
1563 PDROP | PVM, "vmocol", 0);
1564 VM_OBJECT_LOCK(object);
1565 VM_OBJECT_LOCK(backing_object);
1566 /*
1567 * If we slept, anything could have
1568 * happened. Since the object is
1569 * marked dead, the backing offset
1570 * should not have changed so we
1571 * just restart our scan.
1572 */
1573 p = TAILQ_FIRST(&backing_object->memq);
1574 continue;
1575 }
1576 }
1577
1578 KASSERT(
1579 p->object == backing_object,
1580 ("vm_object_backing_scan: object mismatch")
1581 );
1582
1583 /*
1584 * Destroy any associated swap
1585 */
1586 if (backing_object->type == OBJT_SWAP) {
1587 swap_pager_freespace(
1588 backing_object,
1589 p->pindex,
1590 1
1591 );
1592 }
1593
1594 if (
1595 p->pindex < backing_offset_index ||
1596 new_pindex >= object->size
1597 ) {
1598 /*
1599 * Page is out of the parent object's range, we
1600 * can simply destroy it.
1601 */
1602 vm_page_lock_queues();
1603 KASSERT(!pmap_page_is_mapped(p),
1604 ("freeing mapped page %p", p));
1605 if (p->wire_count == 0)
1606 vm_page_free(p);
1607 else
1608 vm_page_remove(p);
1609 vm_page_unlock_queues();
1610 p = next;
1611 continue;
1612 }
1613
1614 pp = vm_page_lookup(object, new_pindex);
1615 if (
1616 (op & OBSC_COLLAPSE_NOWAIT) != 0 &&
1617 (pp != NULL && pp->valid == 0)
1618 ) {
1619 /*
1620 * The page in the parent is not (yet) valid.
1621 * We don't know anything about the state of
1622 * the original page. It might be mapped,
1623 * so we must avoid the next if here.
1624 *
1625 * This is due to a race in vm_fault() where
1626 * we must unbusy the original (backing_obj)
1627 * page before we can (re)lock the parent.
1628 * Hence we can get here.
1629 */
1630 p = next;
1631 continue;
1632 }
1633 if (
1634 pp != NULL ||
1635 vm_pager_has_page(object, new_pindex, NULL, NULL)
1636 ) {
1637 /*
1638 * page already exists in parent OR swap exists
1639 * for this location in the parent. Destroy
1640 * the original page from the backing object.
1641 *
1642 * Leave the parent's page alone
1643 */
1644 vm_page_lock_queues();
1645 KASSERT(!pmap_page_is_mapped(p),
1646 ("freeing mapped page %p", p));
1647 if (p->wire_count == 0)
1648 vm_page_free(p);
1649 else
1650 vm_page_remove(p);
1651 vm_page_unlock_queues();
1652 p = next;
1653 continue;
1654 }
1655
1656 #if VM_NRESERVLEVEL > 0
1657 /*
1658 * Rename the reservation.
1659 */
1660 vm_reserv_rename(p, object, backing_object,
1661 backing_offset_index);
1662 #endif
1663
1664 /*
1665 * Page does not exist in parent, rename the
1666 * page from the backing object to the main object.
1667 *
1668 * If the page was mapped to a process, it can remain
1669 * mapped through the rename.
1670 */
1671 vm_page_lock_queues();
1672 vm_page_rename(p, object, new_pindex);
1673 vm_page_unlock_queues();
1674 /* page automatically made dirty by rename */
1675 }
1676 p = next;
1677 }
1678 return (r);
1679 }
1680
1681
1682 /*
1683 * this version of collapse allows the operation to occur earlier and
1684 * when paging_in_progress is true for an object... This is not a complete
1685 * operation, but should plug 99.9% of the rest of the leaks.
1686 */
1687 static void
1688 vm_object_qcollapse(vm_object_t object)
1689 {
1690 vm_object_t backing_object = object->backing_object;
1691
1692 VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
1693 VM_OBJECT_LOCK_ASSERT(backing_object, MA_OWNED);
1694
1695 if (backing_object->ref_count != 1)
1696 return;
1697
1698 vm_object_backing_scan(object, OBSC_COLLAPSE_NOWAIT);
1699 }
1700
1701 /*
1702 * vm_object_collapse:
1703 *
1704 * Collapse an object with the object backing it.
1705 * Pages in the backing object are moved into the
1706 * parent, and the backing object is deallocated.
1707 */
1708 void
1709 vm_object_collapse(vm_object_t object)
1710 {
1711 VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
1712
1713 while (TRUE) {
1714 vm_object_t backing_object;
1715
1716 /*
1717 * Verify that the conditions are right for collapse:
1718 *
1719 * The object exists and the backing object exists.
1720 */
1721 if ((backing_object = object->backing_object) == NULL)
1722 break;
1723
1724 /*
1725 * we check the backing object first, because it is most likely
1726 * not collapsable.
1727 */
1728 VM_OBJECT_LOCK(backing_object);
1729 if (backing_object->handle != NULL ||
1730 (backing_object->type != OBJT_DEFAULT &&
1731 backing_object->type != OBJT_SWAP) ||
1732 (backing_object->flags & OBJ_DEAD) ||
1733 object->handle != NULL ||
1734 (object->type != OBJT_DEFAULT &&
1735 object->type != OBJT_SWAP) ||
1736 (object->flags & OBJ_DEAD)) {
1737 VM_OBJECT_UNLOCK(backing_object);
1738 break;
1739 }
1740
1741 if (
1742 object->paging_in_progress != 0 ||
1743 backing_object->paging_in_progress != 0
1744 ) {
1745 vm_object_qcollapse(object);
1746 VM_OBJECT_UNLOCK(backing_object);
1747 break;
1748 }
1749 /*
1750 * We know that we can either collapse the backing object (if
1751 * the parent is the only reference to it) or (perhaps) have
1752 * the parent bypass the object if the parent happens to shadow
1753 * all the resident pages in the entire backing object.
1754 *
1755 * This is ignoring pager-backed pages such as swap pages.
1756 * vm_object_backing_scan fails the shadowing test in this
1757 * case.
1758 */
1759 if (backing_object->ref_count == 1) {
1760 /*
1761 * If there is exactly one reference to the backing
1762 * object, we can collapse it into the parent.
1763 */
1764 vm_object_backing_scan(object, OBSC_COLLAPSE_WAIT);
1765
1766 #if VM_NRESERVLEVEL > 0
1767 /*
1768 * Break any reservations from backing_object.
1769 */
1770 if (__predict_false(!LIST_EMPTY(&backing_object->rvq)))
1771 vm_reserv_break_all(backing_object);
1772 #endif
1773
1774 /*
1775 * Move the pager from backing_object to object.
1776 */
1777 if (backing_object->type == OBJT_SWAP) {
1778 /*
1779 * swap_pager_copy() can sleep, in which case
1780 * the backing_object's and object's locks are
1781 * released and reacquired.
1782 */
1783 swap_pager_copy(
1784 backing_object,
1785 object,
1786 OFF_TO_IDX(object->backing_object_offset), TRUE);
1787
1788 /*
1789 * Free any cached pages from backing_object.
1790 */
1791 if (__predict_false(backing_object->cache != NULL))
1792 vm_page_cache_free(backing_object, 0, 0);
1793 }
1794 /*
1795 * Object now shadows whatever backing_object did.
1796 * Note that the reference to
1797 * backing_object->backing_object moves from within
1798 * backing_object to within object.
1799 */
1800 LIST_REMOVE(object, shadow_list);
1801 backing_object->shadow_count--;
1802 backing_object->generation++;
1803 if (backing_object->backing_object) {
1804 VM_OBJECT_LOCK(backing_object->backing_object);
1805 LIST_REMOVE(backing_object, shadow_list);
1806 LIST_INSERT_HEAD(
1807 &backing_object->backing_object->shadow_head,
1808 object, shadow_list);
1809 /*
1810 * The shadow_count has not changed.
1811 */
1812 backing_object->backing_object->generation++;
1813 VM_OBJECT_UNLOCK(backing_object->backing_object);
1814 }
1815 object->backing_object = backing_object->backing_object;
1816 object->backing_object_offset +=
1817 backing_object->backing_object_offset;
1818
1819 /*
1820 * Discard backing_object.
1821 *
1822 * Since the backing object has no pages, no pager left,
1823 * and no object references within it, all that is
1824 * necessary is to dispose of it.
1825 */
1826 KASSERT(backing_object->ref_count == 1, ("backing_object %p was somehow re-referenced during collapse!", backing_object));
1827 VM_OBJECT_UNLOCK(backing_object);
1828
1829 mtx_lock(&vm_object_list_mtx);
1830 TAILQ_REMOVE(
1831 &vm_object_list,
1832 backing_object,
1833 object_list
1834 );
1835 mtx_unlock(&vm_object_list_mtx);
1836
1837 uma_zfree(obj_zone, backing_object);
1838
1839 object_collapses++;
1840 } else {
1841 vm_object_t new_backing_object;
1842
1843 /*
1844 * If we do not entirely shadow the backing object,
1845 * there is nothing we can do so we give up.
1846 */
1847 if (object->resident_page_count != object->size &&
1848 vm_object_backing_scan(object,
1849 OBSC_TEST_ALL_SHADOWED) == 0) {
1850 VM_OBJECT_UNLOCK(backing_object);
1851 break;
1852 }
1853
1854 /*
1855 * Make the parent shadow the next object in the
1856 * chain. Deallocating backing_object will not remove
1857 * it, since its reference count is at least 2.
1858 */
1859 LIST_REMOVE(object, shadow_list);
1860 backing_object->shadow_count--;
1861 backing_object->generation++;
1862
1863 new_backing_object = backing_object->backing_object;
1864 if ((object->backing_object = new_backing_object) != NULL) {
1865 VM_OBJECT_LOCK(new_backing_object);
1866 LIST_INSERT_HEAD(
1867 &new_backing_object->shadow_head,
1868 object,
1869 shadow_list
1870 );
1871 new_backing_object->shadow_count++;
1872 new_backing_object->generation++;
1873 vm_object_reference_locked(new_backing_object);
1874 VM_OBJECT_UNLOCK(new_backing_object);
1875 object->backing_object_offset +=
1876 backing_object->backing_object_offset;
1877 }
1878
1879 /*
1880 * Drop the reference count on backing_object. Since
1881 * its ref_count was at least 2, it will not vanish.
1882 */
1883 backing_object->ref_count--;
1884 VM_OBJECT_UNLOCK(backing_object);
1885 object_bypasses++;
1886 }
1887
1888 /*
1889 * Try again with this object's new backing object.
1890 */
1891 }
1892 }
1893
1894 /*
1895 * vm_object_page_remove:
1896 *
1897 * For the given object, either frees or invalidates each of the
1898 * specified pages. In general, a page is freed. However, if a
1899 * page is wired for any reason other than the existence of a
1900 * managed, wired mapping, then it may be invalidated but not
1901 * removed from the object. Pages are specified by the given
1902 * range ["start", "end") and Boolean "clean_only". As a
1903 * special case, if "end" is zero, then the range extends from
1904 * "start" to the end of the object. If "clean_only" is TRUE,
1905 * then only the non-dirty pages within the specified range are
1906 * affected.
1907 *
1908 * In general, this operation should only be performed on objects
1909 * that contain managed pages. There are two exceptions. First,
1910 * it may be performed on the kernel and kmem objects. Second,
1911 * it may be used by msync(..., MS_INVALIDATE) to invalidate
1912 * device-backed pages.
1913 *
1914 * The object must be locked.
1915 */
1916 void
1917 vm_object_page_remove(vm_object_t object, vm_pindex_t start, vm_pindex_t end,
1918 boolean_t clean_only)
1919 {
1920 vm_page_t p, next;
1921
1922 VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
1923 if (object->resident_page_count == 0)
1924 goto skipmemq;
1925
1926 /*
1927 * Since physically-backed objects do not use managed pages, we can't
1928 * remove pages from the object (we must instead remove the page
1929 * references, and then destroy the object).
1930 */
1931 KASSERT(object->type != OBJT_PHYS || object == kernel_object ||
1932 object == kmem_object,
1933 ("attempt to remove pages from a physical object"));
1934
1935 vm_object_pip_add(object, 1);
1936 again:
1937 vm_page_lock_queues();
1938 if ((p = TAILQ_FIRST(&object->memq)) != NULL) {
1939 if (p->pindex < start) {
1940 p = vm_page_splay(start, object->root);
1941 if ((object->root = p)->pindex < start)
1942 p = TAILQ_NEXT(p, listq);
1943 }
1944 }
1945 /*
1946 * Assert: the variable p is either (1) the page with the
1947 * least pindex greater than or equal to the parameter pindex
1948 * or (2) NULL.
1949 */
1950 for (;
1951 p != NULL && (p->pindex < end || end == 0);
1952 p = next) {
1953 next = TAILQ_NEXT(p, listq);
1954
1955 if (p->wire_count != 0) {
1956 /* Fictitious pages do not have managed mappings. */
1957 if ((p->flags & PG_FICTITIOUS) == 0)
1958 pmap_remove_all(p);
1959 if (!clean_only)
1960 p->valid = 0;
1961 continue;
1962 }
1963 if (vm_page_sleep_if_busy(p, TRUE, "vmopar"))
1964 goto again;
1965 KASSERT((p->flags & PG_FICTITIOUS) == 0,
1966 ("vm_object_page_remove: page %p is fictitious", p));
1967 if (clean_only && p->valid) {
1968 pmap_remove_write(p);
1969 if (p->valid & p->dirty)
1970 continue;
1971 }
1972 pmap_remove_all(p);
1973 vm_page_free(p);
1974 }
1975 vm_page_unlock_queues();
1976 vm_object_pip_wakeup(object);
1977 skipmemq:
1978 if (__predict_false(object->cache != NULL))
1979 vm_page_cache_free(object, start, end);
1980 }
1981
1982 /*
1983 * Populate the specified range of the object with valid pages. Returns
1984 * TRUE if the range is successfully populated and FALSE otherwise.
1985 *
1986 * Note: This function should be optimized to pass a larger array of
1987 * pages to vm_pager_get_pages() before it is applied to a non-
1988 * OBJT_DEVICE object.
1989 *
1990 * The object must be locked.
1991 */
1992 boolean_t
1993 vm_object_populate(vm_object_t object, vm_pindex_t start, vm_pindex_t end)
1994 {
1995 vm_page_t m, ma[1];
1996 vm_pindex_t pindex;
1997 int rv;
1998
1999 VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
2000 for (pindex = start; pindex < end; pindex++) {
2001 m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL |
2002 VM_ALLOC_RETRY);
2003 if (m->valid != VM_PAGE_BITS_ALL) {
2004 ma[0] = m;
2005 rv = vm_pager_get_pages(object, ma, 1, 0);
2006 m = vm_page_lookup(object, pindex);
2007 if (m == NULL)
2008 break;
2009 if (rv != VM_PAGER_OK) {
2010 vm_page_lock_queues();
2011 vm_page_free(m);
2012 vm_page_unlock_queues();
2013 break;
2014 }
2015 }
2016 /*
2017 * Keep "m" busy because a subsequent iteration may unlock
2018 * the object.
2019 */
2020 }
2021 if (pindex > start) {
2022 m = vm_page_lookup(object, start);
2023 while (m != NULL && m->pindex < pindex) {
2024 vm_page_wakeup(m);
2025 m = TAILQ_NEXT(m, listq);
2026 }
2027 }
2028 return (pindex == end);
2029 }
2030
2031 /*
2032 * Routine: vm_object_coalesce
2033 * Function: Coalesces two objects backing up adjoining
2034 * regions of memory into a single object.
2035 *
2036 * returns TRUE if objects were combined.
2037 *
2038 * NOTE: Only works at the moment if the second object is NULL -
2039 * if it's not, which object do we lock first?
2040 *
2041 * Parameters:
2042 * prev_object First object to coalesce
2043 * prev_offset Offset into prev_object
2044 * prev_size Size of reference to prev_object
2045 * next_size Size of reference to the second object
2046 *
2047 * Conditions:
2048 * The object must *not* be locked.
2049 */
2050 boolean_t
2051 vm_object_coalesce(vm_object_t prev_object, vm_ooffset_t prev_offset,
2052 vm_size_t prev_size, vm_size_t next_size)
2053 {
2054 vm_pindex_t next_pindex;
2055
2056 if (prev_object == NULL)
2057 return (TRUE);
2058 VM_OBJECT_LOCK(prev_object);
2059 if (prev_object->type != OBJT_DEFAULT &&
2060 prev_object->type != OBJT_SWAP) {
2061 VM_OBJECT_UNLOCK(prev_object);
2062 return (FALSE);
2063 }
2064
2065 /*
2066 * Try to collapse the object first
2067 */
2068 vm_object_collapse(prev_object);
2069
2070 /*
2071 * Can't coalesce if: . more than one reference . paged out . shadows
2072 * another object . has a copy elsewhere (any of which mean that the
2073 * pages not mapped to prev_entry may be in use anyway)
2074 */
2075 if (prev_object->backing_object != NULL) {
2076 VM_OBJECT_UNLOCK(prev_object);
2077 return (FALSE);
2078 }
2079
2080 prev_size >>= PAGE_SHIFT;
2081 next_size >>= PAGE_SHIFT;
2082 next_pindex = OFF_TO_IDX(prev_offset) + prev_size;
2083
2084 if ((prev_object->ref_count > 1) &&
2085 (prev_object->size != next_pindex)) {
2086 VM_OBJECT_UNLOCK(prev_object);
2087 return (FALSE);
2088 }
2089
2090 /*
2091 * Remove any pages that may still be in the object from a previous
2092 * deallocation.
2093 */
2094 if (next_pindex < prev_object->size) {
2095 vm_object_page_remove(prev_object,
2096 next_pindex,
2097 next_pindex + next_size, FALSE);
2098 if (prev_object->type == OBJT_SWAP)
2099 swap_pager_freespace(prev_object,
2100 next_pindex, next_size);
2101 }
2102
2103 /*
2104 * Extend the object if necessary.
2105 */
2106 if (next_pindex + next_size > prev_object->size)
2107 prev_object->size = next_pindex + next_size;
2108
2109 VM_OBJECT_UNLOCK(prev_object);
2110 return (TRUE);
2111 }
2112
2113 void
2114 vm_object_set_writeable_dirty(vm_object_t object)
2115 {
2116 struct vnode *vp;
2117
2118 VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
2119 if ((object->flags & OBJ_MIGHTBEDIRTY) != 0)
2120 return;
2121 vm_object_set_flag(object, OBJ_MIGHTBEDIRTY);
2122 if (object->type == OBJT_VNODE &&
2123 (vp = (struct vnode *)object->handle) != NULL) {
2124 VI_LOCK(vp);
2125 vp->v_iflag |= VI_OBJDIRTY;
2126 VI_UNLOCK(vp);
2127 }
2128 }
2129
2130 #include "opt_ddb.h"
2131 #ifdef DDB
2132 #include <sys/kernel.h>
2133
2134 #include <sys/cons.h>
2135
2136 #include <ddb/ddb.h>
2137
2138 static int
2139 _vm_object_in_map(vm_map_t map, vm_object_t object, vm_map_entry_t entry)
2140 {
2141 vm_map_t tmpm;
2142 vm_map_entry_t tmpe;
2143 vm_object_t obj;
2144 int entcount;
2145
2146 if (map == 0)
2147 return 0;
2148
2149 if (entry == 0) {
2150 tmpe = map->header.next;
2151 entcount = map->nentries;
2152 while (entcount-- && (tmpe != &map->header)) {
2153 if (_vm_object_in_map(map, object, tmpe)) {
2154 return 1;
2155 }
2156 tmpe = tmpe->next;
2157 }
2158 } else if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
2159 tmpm = entry->object.sub_map;
2160 tmpe = tmpm->header.next;
2161 entcount = tmpm->nentries;
2162 while (entcount-- && tmpe != &tmpm->header) {
2163 if (_vm_object_in_map(tmpm, object, tmpe)) {
2164 return 1;
2165 }
2166 tmpe = tmpe->next;
2167 }
2168 } else if ((obj = entry->object.vm_object) != NULL) {
2169 for (; obj; obj = obj->backing_object)
2170 if (obj == object) {
2171 return 1;
2172 }
2173 }
2174 return 0;
2175 }
2176
2177 static int
2178 vm_object_in_map(vm_object_t object)
2179 {
2180 struct proc *p;
2181
2182 /* sx_slock(&allproc_lock); */
2183 FOREACH_PROC_IN_SYSTEM(p) {
2184 if (!p->p_vmspace /* || (p->p_flag & (P_SYSTEM|P_WEXIT)) */)
2185 continue;
2186 if (_vm_object_in_map(&p->p_vmspace->vm_map, object, 0)) {
2187 /* sx_sunlock(&allproc_lock); */
2188 return 1;
2189 }
2190 }
2191 /* sx_sunlock(&allproc_lock); */
2192 if (_vm_object_in_map(kernel_map, object, 0))
2193 return 1;
2194 if (_vm_object_in_map(kmem_map, object, 0))
2195 return 1;
2196 if (_vm_object_in_map(pager_map, object, 0))
2197 return 1;
2198 if (_vm_object_in_map(buffer_map, object, 0))
2199 return 1;
2200 return 0;
2201 }
2202
2203 DB_SHOW_COMMAND(vmochk, vm_object_check)
2204 {
2205 vm_object_t object;
2206
2207 /*
2208 * make sure that internal objs are in a map somewhere
2209 * and none have zero ref counts.
2210 */
2211 TAILQ_FOREACH(object, &vm_object_list, object_list) {
2212 if (object->handle == NULL &&
2213 (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) {
2214 if (object->ref_count == 0) {
2215 db_printf("vmochk: internal obj has zero ref count: %ld\n",
2216 (long)object->size);
2217 }
2218 if (!vm_object_in_map(object)) {
2219 db_printf(
2220 "vmochk: internal obj is not in a map: "
2221 "ref: %d, size: %lu: 0x%lx, backing_object: %p\n",
2222 object->ref_count, (u_long)object->size,
2223 (u_long)object->size,
2224 (void *)object->backing_object);
2225 }
2226 }
2227 }
2228 }
2229
2230 /*
2231 * vm_object_print: [ debug ]
2232 */
2233 DB_SHOW_COMMAND(object, vm_object_print_static)
2234 {
2235 /* XXX convert args. */
2236 vm_object_t object = (vm_object_t)addr;
2237 boolean_t full = have_addr;
2238
2239 vm_page_t p;
2240
2241 /* XXX count is an (unused) arg. Avoid shadowing it. */
2242 #define count was_count
2243
2244 int count;
2245
2246 if (object == NULL)
2247 return;
2248
2249 db_iprintf(
2250 "Object %p: type=%d, size=0x%jx, res=%d, ref=%d, flags=0x%x\n",
2251 object, (int)object->type, (uintmax_t)object->size,
2252 object->resident_page_count, object->ref_count, object->flags);
2253 db_iprintf(" sref=%d, backing_object(%d)=(%p)+0x%jx\n",
2254 object->shadow_count,
2255 object->backing_object ? object->backing_object->ref_count : 0,
2256 object->backing_object, (uintmax_t)object->backing_object_offset);
2257
2258 if (!full)
2259 return;
2260
2261 db_indent += 2;
2262 count = 0;
2263 TAILQ_FOREACH(p, &object->memq, listq) {
2264 if (count == 0)
2265 db_iprintf("memory:=");
2266 else if (count == 6) {
2267 db_printf("\n");
2268 db_iprintf(" ...");
2269 count = 0;
2270 } else
2271 db_printf(",");
2272 count++;
2273
2274 db_printf("(off=0x%jx,page=0x%jx)",
2275 (uintmax_t)p->pindex, (uintmax_t)VM_PAGE_TO_PHYS(p));
2276 }
2277 if (count != 0)
2278 db_printf("\n");
2279 db_indent -= 2;
2280 }
2281
2282 /* XXX. */
2283 #undef count
2284
2285 /* XXX need this non-static entry for calling from vm_map_print. */
2286 void
2287 vm_object_print(
2288 /* db_expr_t */ long addr,
2289 boolean_t have_addr,
2290 /* db_expr_t */ long count,
2291 char *modif)
2292 {
2293 vm_object_print_static(addr, have_addr, count, modif);
2294 }
2295
2296 DB_SHOW_COMMAND(vmopag, vm_object_print_pages)
2297 {
2298 vm_object_t object;
2299 vm_pindex_t fidx;
2300 vm_paddr_t pa;
2301 vm_page_t m, prev_m;
2302 int rcount, nl, c;
2303
2304 nl = 0;
2305 TAILQ_FOREACH(object, &vm_object_list, object_list) {
2306 db_printf("new object: %p\n", (void *)object);
2307 if (nl > 18) {
2308 c = cngetc();
2309 if (c != ' ')
2310 return;
2311 nl = 0;
2312 }
2313 nl++;
2314 rcount = 0;
2315 fidx = 0;
2316 pa = -1;
2317 TAILQ_FOREACH(m, &object->memq, listq) {
2318 if (m->pindex > 128)
2319 break;
2320 if ((prev_m = TAILQ_PREV(m, pglist, listq)) != NULL &&
2321 prev_m->pindex + 1 != m->pindex) {
2322 if (rcount) {
2323 db_printf(" index(%ld)run(%d)pa(0x%lx)\n",
2324 (long)fidx, rcount, (long)pa);
2325 if (nl > 18) {
2326 c = cngetc();
2327 if (c != ' ')
2328 return;
2329 nl = 0;
2330 }
2331 nl++;
2332 rcount = 0;
2333 }
2334 }
2335 if (rcount &&
2336 (VM_PAGE_TO_PHYS(m) == pa + rcount * PAGE_SIZE)) {
2337 ++rcount;
2338 continue;
2339 }
2340 if (rcount) {
2341 db_printf(" index(%ld)run(%d)pa(0x%lx)\n",
2342 (long)fidx, rcount, (long)pa);
2343 if (nl > 18) {
2344 c = cngetc();
2345 if (c != ' ')
2346 return;
2347 nl = 0;
2348 }
2349 nl++;
2350 }
2351 fidx = m->pindex;
2352 pa = VM_PAGE_TO_PHYS(m);
2353 rcount = 1;
2354 }
2355 if (rcount) {
2356 db_printf(" index(%ld)run(%d)pa(0x%lx)\n",
2357 (long)fidx, rcount, (long)pa);
2358 if (nl > 18) {
2359 c = cngetc();
2360 if (c != ' ')
2361 return;
2362 nl = 0;
2363 }
2364 nl++;
2365 }
2366 }
2367 }
2368 #endif /* DDB */
Cache object: bbe0fc73e67e4eb27198937211d53026
|