FreeBSD/Linux Kernel Cross Reference
sys/vm/vm_pageout.c
1 /*
2 * Mach Operating System
3 * Copyright (c) 1993,1991,1990,1989,1988,1987 Carnegie Mellon University
4 * All Rights Reserved.
5 *
6 * Permission to use, copy, modify and distribute this software and its
7 * documentation is hereby granted, provided that both the copyright
8 * notice and this permission notice appear in all copies of the
9 * software, derivative works or modified versions, and any portions
10 * thereof, and that both notices appear in supporting documentation.
11 *
12 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
13 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
14 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
15 *
16 * Carnegie Mellon requests users of this software to return to
17 *
18 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
19 * School of Computer Science
20 * Carnegie Mellon University
21 * Pittsburgh PA 15213-3890
22 *
23 * any improvements or extensions that they make and grant Carnegie Mellon
24 * the rights to redistribute these changes.
25 */
26 /*
27 * HISTORY
28 * $Log: vm_pageout.c,v $
29 * Revision 2.24 93/01/14 18:02:07 danner
30 * 64bit cleanup.
31 * [92/12/01 af]
32 *
33 * Revision 2.23 92/08/03 18:02:11 jfriedl
34 * removed silly prototypes
35 * [92/08/02 jfriedl]
36 *
37 * Revision 2.22 92/05/21 17:26:44 jfriedl
38 * Cleanup to quiet gcc warnings.
39 * [92/05/16 jfriedl]
40 *
41 * Revision 2.21 91/12/11 08:44:16 jsb
42 * Added vm_pageout_active, vm_pageout_inactive,
43 * and other measurement counters. Fixed the log.
44 * [91/11/24 rpd]
45 *
46 * Revision 2.20 91/10/09 16:20:36 af
47 * Added vm_pageout_pause_count, vm_pageout_pause_max technology
48 * so that vm_pageout_burst_wait can decrease as well as increase.
49 * [91/10/04 rpd]
50 *
51 * Revision 2.19 91/08/28 11:18:54 jsb
52 * Fixed vm_pageout_scan to send pages to the default pager
53 * when memory gets very tight. This is the same idea as the old
54 * vm_pageout_reserved_external and vm_pageout_reserved_internal,
55 * but with a different implementation that forcibly double-pages.
56 * [91/08/07 rpd]
57 * Precious page support: return precious pages on pageout, use
58 * memory_object_data_return instead of memory_object_data_write
59 * when appropriate,
60 * [91/07/03 14:20:57 dlb]
61 *
62 * Revision 2.18 91/07/01 08:28:13 jsb
63 * Add missing includes of vm/vm_map.h and kern/thread.h.
64 * [91/06/29 16:53:36 jsb]
65 *
66 * Revision 2.17 91/06/17 15:49:37 jsb
67 * NORMA_VM: declare definitions for memory_object_data_{initialize,write}
68 * since they won't be provided by interposed-on memory_object_user.h.
69 * [91/06/17 11:13:22 jsb]
70 *
71 * Revision 2.16 91/05/18 14:41:49 rpd
72 * Fixed vm_pageout_continue to always call vm_pageout_scan.
73 * Revamped vm_pageout_scan. Now it recalculates vm_page_inactive_target,
74 * gradually moves pages from the active list to the inactive list,
75 * looks at vm_page_free_wanted, handles absent and fictitious pages,
76 * and blocks with a continuation (vm_pageout_scan_continue), which
77 * uses vm_page_laundry_count to adjust vm_pageout_burst_wait.
78 * [91/04/06 rpd]
79 *
80 * Changed vm_page_free_wanted to unsigned int.
81 * [91/04/05 rpd]
82 * Added vm_page_grab_fictitious.
83 * [91/03/29 rpd]
84 * Changed vm_page_init.
85 * [91/03/24 rpd]
86 *
87 * Revision 2.15 91/05/14 17:50:59 mrt
88 * Correcting copyright
89 *
90 * Revision 2.14 91/03/16 15:06:50 rpd
91 * Modified vm_pageout_scan for further vm_page_deactivate changes.
92 * Also changed it to ignore pages in dead objects.
93 * [91/03/11 rpd]
94 * Added vm_pageout_continue.
95 * [91/01/20 rpd]
96 *
97 * Revision 2.13 91/02/05 17:59:57 mrt
98 * Changed to new Mach copyright
99 * [91/02/01 16:34:17 mrt]
100 *
101 * Revision 2.12 91/01/08 16:45:57 rpd
102 * Added net_kmsg_collect.
103 * [91/01/05 rpd]
104 * Added consider_task_collect, consider_thread_collect.
105 * [91/01/03 rpd]
106 *
107 * Added stack_collect.
108 * [90/12/31 rpd]
109 * Added continuation argument to thread_block.
110 * [90/12/08 rpd]
111 *
112 * Ensure that vm_page_free_target is at least five pages
113 * larger than vm_page_free_min, to avoid vm_page_wait deadlock.
114 * [90/11/19 rpd]
115 *
116 * Replaced swapout_threads with consider_zone_gc.
117 * [90/11/11 rpd]
118 *
119 * Revision 2.11 90/11/05 14:35:03 rpd
120 * Modified vm_pageout_scan for new vm_page_deactivate protocol.
121 * [90/11/04 rpd]
122 *
123 * Revision 2.10 90/10/12 13:06:53 rpd
124 * Fixed vm_pageout_page to take busy pages.
125 * [90/10/09 rpd]
126 *
127 * In vm_pageout_scan, check for new software reference bit
128 * in addition to using pmap_is_referenced. Remove busy pages
129 * found on the active and inactive queues.
130 * [90/10/08 rpd]
131 *
132 * Revision 2.9 90/08/27 22:16:02 dbg
133 * Fix error in initial assumptions: vm_pageout_setup must take a
134 * BUSY page, to prevent the page from being scrambled by pagein.
135 * [90/07/26 dbg]
136 *
137 * Revision 2.8 90/06/19 23:03:22 rpd
138 * Locking fix for vm_pageout_page from dlb/dbg.
139 * [90/06/11 rpd]
140 *
141 * Correct initial condition in vm_pageout_page (page is NOT busy).
142 * Fix documentation for vm_pageout_page and vm_pageout_setup.
143 * [90/06/05 dbg]
144 *
145 * Fixed vm_object_unlock type in vm_pageout_page.
146 * [90/06/04 rpd]
147 *
148 * Revision 2.7 90/06/02 15:11:56 rpd
149 * Removed pageout_task and references to kernel_vm_space.
150 * [90/04/29 rpd]
151 *
152 * Made vm_pageout_burst_max, vm_pageout_burst_wait tunable.
153 * [90/04/18 rpd]
154 * Converted to new IPC and vm_map_copyin_object.
155 * [90/03/26 23:18:10 rpd]
156 *
157 * Revision 2.6 90/05/29 18:39:52 rwd
158 * Picked up new vm_pageout_page from dbg.
159 * [90/05/17 rwd]
160 * Rfr change to send multiple pages to pager at once.
161 * [90/04/12 13:49:13 rwd]
162 *
163 * Revision 2.5 90/05/03 15:53:21 dbg
164 * vm_pageout_page flushes page only if asked; otherwise, it copies
165 * the page.
166 * [90/03/28 dbg]
167 *
168 * If an object's pager is not initialized, don't page out to it.
169 * [90/03/21 dbg]
170 *
171 * Revision 2.4 90/02/22 20:06:48 dbg
172 * PAGE_WAKEUP --> PAGE_WAKEUP_DONE to reflect the fact that it
173 * clears the busy flag.
174 * [89/12/13 dlb]
175 *
176 * Revision 2.3 90/01/11 11:48:27 dbg
177 * Pick up recent changes from mainline:
178 *
179 * Eliminate page lock when writing back a page.
180 * [89/11/09 mwyoung]
181 *
182 * Account for paging_offset when setting external page state.
183 * [89/10/16 15:29:08 af]
184 *
185 * Improve reserve tuning... it was a little too low.
186 *
187 * Remove laundry count computations, as the count is never used.
188 * [89/10/10 mwyoung]
189 *
190 * Only attempt to collapse if a memory object has not
191 * been initialized. Don't bother to PAGE_WAKEUP in
192 * vm_pageout_scan() before writing back a page -- it
193 * gets done in vm_pageout_page().
194 * [89/10/10 mwyoung]
195 *
196 * Don't reactivate a page when creating a new memory
197 * object... continue on to page it out immediately.
198 * [89/09/20 mwyoung]
199 *
200 * Reverse the sensing of the desperately-short-on-pages tests.
201 * [89/09/19 mwyoung]
202 * Check for absent pages before busy pages in vm_pageout_page().
203 * [89/07/10 00:01:22 mwyoung]
204 *
205 * Allow dirty pages to be reactivated.
206 * [89/04/22 mwyoung]
207 *
208 * Don't clean pages that are absent, in error, or not dirty in
209 * vm_pageout_page(). These checks were previously issued
210 * elsewhere.
211 * [89/04/22 mwyoung]
212 *
213 * Revision 2.2 89/09/08 11:28:55 dbg
214 * Reverse test for internal_only pages. Old sense left pageout
215 * daemon spinning.
216 * [89/08/15 dbg]
217 *
218 * 18-Jul-89 David Golub (dbg) at Carnegie-Mellon University
219 * Changes for MACH_KERNEL:
220 * . Removed non-XP code.
221 * Count page wiring when sending page to default pager.
222 * Increase reserved page count.
223 * Make 'internal-only' count LARGER than reserved page count.
224 *
225 * Revision 2.18 89/06/12 14:53:05 jsb
226 * Picked up bug fix (missing splimps) from Sequent via dlb.
227 * [89/06/12 14:39:28 jsb]
228 *
229 * Revision 2.17 89/04/18 21:27:08 mwyoung
230 * Recent history [mwyoung]:
231 * Keep hint when pages are written out (call
232 * vm_external_state_set).
233 * Use wired-down fictitious page data structure for "holding_page".
234 * History condensation:
235 * Avoid flooding memory managers by using timing [mwyoung].
236 * New pageout daemon for external memory management
237 * system [mwyoung].
238 * [89/04/18 mwyoung]
239 *
240 */
241 /*
242 * File: vm/vm_pageout.c
243 * Author: Avadis Tevanian, Jr., Michael Wayne Young
244 * Date: 1985
245 *
246 * The proverbial page-out daemon.
247 */
248
249 #include <mach_pagemap.h>
250 #include <norma_vm.h>
251
252 #include <mach/mach_types.h>
253 #include <mach/memory_object.h>
254 #include <mach/memory_object_default.h>
255 #include <mach/memory_object_user.h>
256 #include <mach/vm_param.h>
257 #include <mach/vm_statistics.h>
258 #include <kern/counters.h>
259 #include <kern/thread.h>
260 #include <vm/pmap.h>
261 #include <vm/vm_map.h>
262 #include <vm/vm_object.h>
263 #include <vm/vm_page.h>
264 #include <vm/vm_pageout.h>
265 #include <machine/vm_tuning.h>
266 #include <machine/thread.h> /* for KEEP_STACKS */
267
268
269
270 #ifndef VM_PAGEOUT_BURST_MAX
271 #define VM_PAGEOUT_BURST_MAX 10 /* number of pages */
272 #endif VM_PAGEOUT_BURST_MAX
273
274 #ifndef VM_PAGEOUT_BURST_MIN
275 #define VM_PAGEOUT_BURST_MIN 5 /* number of pages */
276 #endif VM_PAGEOUT_BURST_MIN
277
278 #ifndef VM_PAGEOUT_BURST_WAIT
279 #define VM_PAGEOUT_BURST_WAIT 30 /* milliseconds per page */
280 #endif VM_PAGEOUT_BURST_WAIT
281
282 #ifndef VM_PAGEOUT_EMPTY_WAIT
283 #define VM_PAGEOUT_EMPTY_WAIT 200 /* milliseconds */
284 #endif VM_PAGEOUT_EMPTY_WAIT
285
286 #ifndef VM_PAGEOUT_PAUSE_MAX
287 #define VM_PAGEOUT_PAUSE_MAX 10 /* number of pauses */
288 #endif VM_PAGEOUT_PAUSE_MAX
289
290 /*
291 * To obtain a reasonable LRU approximation, the inactive queue
292 * needs to be large enough to give pages on it a chance to be
293 * referenced a second time. This macro defines the fraction
294 * of active+inactive pages that should be inactive.
295 * The pageout daemon uses it to update vm_page_inactive_target.
296 *
297 * If vm_page_free_count falls below vm_page_free_target and
298 * vm_page_inactive_count is below vm_page_inactive_target,
299 * then the pageout daemon starts running.
300 */
301
302 #ifndef VM_PAGE_INACTIVE_TARGET
303 #define VM_PAGE_INACTIVE_TARGET(avail) ((avail) * 2 / 3)
304 #endif VM_PAGE_INACTIVE_TARGET
305
306 /*
307 * Once the pageout daemon starts running, it keeps going
308 * until vm_page_free_count meets or exceeds vm_page_free_target.
309 */
310
311 #ifndef VM_PAGE_FREE_TARGET
312 #define VM_PAGE_FREE_TARGET(free) (15 + (free) / 80)
313 #endif VM_PAGE_FREE_TARGET
314
315 /*
316 * The pageout daemon always starts running once vm_page_free_count
317 * falls below vm_page_free_min.
318 */
319
320 #ifndef VM_PAGE_FREE_MIN
321 #define VM_PAGE_FREE_MIN(free) (10 + (free) / 100)
322 #endif VM_PAGE_FREE_MIN
323
324 /*
325 * When vm_page_free_count falls below vm_page_free_reserved,
326 * only vm-privileged threads can allocate pages. vm-privilege
327 * allows the pageout daemon and default pager (and any other
328 * associated threads needed for default pageout) to continue
329 * operation by dipping into the reserved pool of pages.
330 */
331
332 #ifndef VM_PAGE_FREE_RESERVED
333 #define VM_PAGE_FREE_RESERVED 15
334 #endif VM_PAGE_FREE_RESERVED
335
336 /*
337 * When vm_page_free_count falls below vm_pageout_reserved_internal,
338 * the pageout daemon no longer trusts external pagers to clean pages.
339 * External pagers are probably all wedged waiting for a free page.
340 * It forcibly double-pages dirty pages belonging to external objects,
341 * getting the pages to the default pager to clean.
342 */
343
344 #ifndef VM_PAGEOUT_RESERVED_INTERNAL
345 #define VM_PAGEOUT_RESERVED_INTERNAL(reserve) ((reserve) - 5)
346 #endif VM_PAGEOUT_RESERVED_INTERNAL
347
348 /*
349 * When vm_page_free_count falls below vm_pageout_reserved_really,
350 * the pageout daemon stops work entirely to let the default pager
351 * catch up (assuming the default pager has pages to clean).
352 * Beyond this point, it is too dangerous to consume memory
353 * even for memory_object_data_write messages to the default pager.
354 */
355
356 #ifndef VM_PAGEOUT_RESERVED_REALLY
357 #define VM_PAGEOUT_RESERVED_REALLY(reserve) ((reserve) - 10)
358 #endif VM_PAGEOUT_RESERVED_REALLY
359
360 extern void vm_pageout_continue();
361 extern void vm_pageout_scan_continue();
362
363 unsigned int vm_pageout_reserved_internal = 0;
364 unsigned int vm_pageout_reserved_really = 0;
365
366 unsigned int vm_pageout_burst_max = 0;
367 unsigned int vm_pageout_burst_min = 0;
368 unsigned int vm_pageout_burst_wait = 0; /* milliseconds per page */
369 unsigned int vm_pageout_empty_wait = 0; /* milliseconds */
370 unsigned int vm_pageout_pause_count = 0;
371 unsigned int vm_pageout_pause_max = 0;
372
373 /*
374 * These variables record the pageout daemon's actions:
375 * how many pages it looks at and what happens to those pages.
376 * No locking needed because only one thread modifies the variables.
377 */
378
379 unsigned int vm_pageout_active = 0; /* debugging */
380 unsigned int vm_pageout_inactive = 0; /* debugging */
381 unsigned int vm_pageout_inactive_nolock = 0; /* debugging */
382 unsigned int vm_pageout_inactive_busy = 0; /* debugging */
383 unsigned int vm_pageout_inactive_absent = 0; /* debugging */
384 unsigned int vm_pageout_inactive_used = 0; /* debugging */
385 unsigned int vm_pageout_inactive_clean = 0; /* debugging */
386 unsigned int vm_pageout_inactive_dirty = 0; /* debugging */
387 unsigned int vm_pageout_inactive_double = 0; /* debugging */
388
389 #if NORMA_VM
390 /*
391 * Define them here, since they won't be defined by memory_object_user.h.
392 */
393 extern kern_return_t memory_object_data_initialize();
394 extern kern_return_t memory_object_data_write();
395 #endif NORMA_VM
396
397 /*
398 * Routine: vm_pageout_setup
399 * Purpose:
400 * Set up a page for pageout.
401 *
402 * Move or copy the page to a new object, as part
403 * of which it will be sent to its memory manager
404 * in a memory_object_data_write or memory_object_initialize
405 * message.
406 *
407 * The "paging_offset" argument specifies the offset
408 * of the page within its external memory object.
409 *
410 * The "new_object" and "new_offset" arguments
411 * indicate where the page should be moved.
412 *
413 * The "flush" argument specifies whether the page
414 * should be flushed from its object. If not, a
415 * copy of the page is moved to the new object.
416 *
417 * In/Out conditions:
418 * The page in question must not be on any pageout queues,
419 * and must be busy. The object to which it belongs
420 * must be unlocked, and the caller must hold a paging
421 * reference to it. The new_object must not be locked.
422 *
423 * If the page is flushed from its original object,
424 * this routine returns a pointer to a place-holder page,
425 * inserted at the same offset, to block out-of-order
426 * requests for the page. The place-holder page must
427 * be freed after the data_write or initialize message
428 * has been sent. If the page is copied,
429 * the holding page is VM_PAGE_NULL.
430 *
431 * The original page is put on a paging queue and marked
432 * not busy on exit.
433 */
434 vm_page_t
435 vm_pageout_setup(m, paging_offset, new_object, new_offset, flush)
436 register vm_page_t m;
437 vm_offset_t paging_offset;
438 register vm_object_t new_object;
439 vm_offset_t new_offset;
440 boolean_t flush;
441 {
442 register vm_object_t old_object = m->object;
443 register vm_page_t holding_page = 0; /*'=0'to quiet gcc warnings*/
444 register vm_page_t new_m;
445
446 assert(m->busy && !m->absent && !m->fictitious);
447
448 /*
449 * If we are not flushing the page, allocate a
450 * page in the object. If we cannot get the
451 * page, flush instead.
452 */
453 if (!flush) {
454 vm_object_lock(new_object);
455 new_m = vm_page_alloc(new_object, new_offset);
456 if (new_m == VM_PAGE_NULL)
457 flush = TRUE;
458 vm_object_unlock(new_object);
459 }
460
461 if (flush) {
462 /*
463 * Create a place-holder page where the old one was,
464 * to prevent anyone from attempting to page in this
465 * page while we`re unlocked.
466 */
467 while ((holding_page = vm_page_grab_fictitious())
468 == VM_PAGE_NULL)
469 vm_page_more_fictitious();
470
471 vm_object_lock(old_object);
472 vm_page_lock_queues();
473 vm_page_remove(m);
474 vm_page_unlock_queues();
475 PAGE_WAKEUP_DONE(m);
476
477 vm_page_lock_queues();
478 vm_page_insert(holding_page, old_object, m->offset);
479 vm_page_unlock_queues();
480
481 /*
482 * Record that this page has been written out
483 */
484 #if MACH_PAGEMAP
485 vm_external_state_set(old_object->existence_info,
486 paging_offset,
487 VM_EXTERNAL_STATE_EXISTS);
488 #endif MACH_PAGEMAP
489
490 vm_object_unlock(old_object);
491
492 vm_object_lock(new_object);
493
494 /*
495 * Move this page into the new object
496 */
497
498 vm_page_lock_queues();
499 vm_page_insert(m, new_object, new_offset);
500 vm_page_unlock_queues();
501
502 m->dirty = TRUE;
503 m->precious = FALSE;
504 m->page_lock = VM_PROT_NONE;
505 m->unlock_request = VM_PROT_NONE;
506 }
507 else {
508 /*
509 * Copy the data into the new page,
510 * and mark the new page as clean.
511 */
512 vm_page_copy(m, new_m);
513
514 vm_object_lock(old_object);
515 m->dirty = FALSE;
516 pmap_clear_modify(m->phys_addr);
517
518 /*
519 * Deactivate old page.
520 */
521 vm_page_lock_queues();
522 vm_page_deactivate(m);
523 vm_page_unlock_queues();
524
525 PAGE_WAKEUP_DONE(m);
526
527 /*
528 * Record that this page has been written out
529 */
530
531 #if MACH_PAGEMAP
532 vm_external_state_set(old_object->existence_info,
533 paging_offset,
534 VM_EXTERNAL_STATE_EXISTS);
535 #endif MACH_PAGEMAP
536
537 vm_object_unlock(old_object);
538
539 vm_object_lock(new_object);
540
541 /*
542 * Use the new page below.
543 */
544 m = new_m;
545 m->dirty = TRUE;
546 assert(!m->precious);
547 PAGE_WAKEUP_DONE(m);
548 }
549
550 /*
551 * Make the old page eligible for replacement again; if a
552 * user-supplied memory manager fails to release the page,
553 * it will be paged out again to the default memory manager.
554 *
555 * Note that pages written to the default memory manager
556 * must be wired down -- in return, it guarantees to free
557 * this page, rather than reusing it.
558 */
559
560 vm_page_lock_queues();
561 vm_stat.pageouts++;
562 if (m->laundry) {
563 /*
564 * vm_pageout_scan is telling us to put this page
565 * at the front of the inactive queue, so it will
566 * be immediately paged out to the default pager.
567 */
568
569 assert(!old_object->internal);
570 m->laundry = FALSE;
571
572 queue_enter_first(&vm_page_queue_inactive, m,
573 vm_page_t, pageq);
574 m->inactive = TRUE;
575 vm_page_inactive_count++;
576 } else if (old_object->internal) {
577 m->laundry = TRUE;
578 vm_page_laundry_count++;
579
580 vm_page_wire(m);
581 } else
582 vm_page_activate(m);
583 vm_page_unlock_queues();
584
585 /*
586 * Since IPC operations may block, we drop locks now.
587 * [The placeholder page is busy, and we still have
588 * paging_in_progress incremented.]
589 */
590
591 vm_object_unlock(new_object);
592
593 /*
594 * Return the placeholder page to simplify cleanup.
595 */
596 return (flush ? holding_page : VM_PAGE_NULL);
597 }
598
599 /*
600 * Routine: vm_pageout_page
601 * Purpose:
602 * Causes the specified page to be written back to
603 * the appropriate memory object.
604 *
605 * The "initial" argument specifies whether this
606 * data is an initialization only, and should use
607 * memory_object_data_initialize instead of
608 * memory_object_data_write.
609 *
610 * The "flush" argument specifies whether the page
611 * should be flushed from the object. If not, a
612 * copy of the data is sent to the memory object.
613 *
614 * In/out conditions:
615 * The page in question must not be on any pageout queues.
616 * The object to which it belongs must be locked.
617 * Implementation:
618 * Move this page to a completely new object, if flushing;
619 * copy to a new page in a new object, if not.
620 */
621 void
622 vm_pageout_page(m, initial, flush)
623 register vm_page_t m;
624 boolean_t initial;
625 boolean_t flush;
626 {
627 vm_map_copy_t copy;
628 register vm_object_t old_object;
629 register vm_object_t new_object;
630 register vm_page_t holding_page;
631 vm_offset_t paging_offset;
632 kern_return_t rc;
633 boolean_t precious_clean;
634
635 assert(m->busy);
636
637 /*
638 * Cleaning but not flushing a clean precious page is a
639 * no-op. Remember whether page is clean and precious now
640 * because vm_pageout_setup will mark it dirty and not precious.
641 *
642 * XXX Check if precious_clean && !flush can really happen.
643 */
644 precious_clean = (!m->dirty) && m->precious;
645 if (precious_clean && !flush) {
646 PAGE_WAKEUP_DONE(m);
647 return;
648 }
649
650 /*
651 * Verify that we really want to clean this page.
652 */
653 if (m->absent || m->error || (!m->dirty && !m->precious)) {
654 VM_PAGE_FREE(m);
655 return;
656 }
657
658 /*
659 * Create a paging reference to let us play with the object.
660 */
661 old_object = m->object;
662 paging_offset = m->offset + old_object->paging_offset;
663 vm_object_paging_begin(old_object);
664 vm_object_unlock(old_object);
665
666 /*
667 * Allocate a new object into which we can put the page.
668 */
669 new_object = vm_object_allocate(PAGE_SIZE);
670
671 /*
672 * Move the page into the new object.
673 */
674 holding_page = vm_pageout_setup(m,
675 paging_offset,
676 new_object,
677 0, /* new offset */
678 flush); /* flush */
679
680 rc = vm_map_copyin_object(new_object, 0, PAGE_SIZE, ©);
681 assert(rc == KERN_SUCCESS);
682
683 if (initial || old_object->use_old_pageout) {
684 rc = (*(initial ? memory_object_data_initialize
685 : memory_object_data_write))
686 (old_object->pager,
687 old_object->pager_request,
688 paging_offset, (pointer_t) copy, PAGE_SIZE);
689 }
690 else {
691 rc = memory_object_data_return(
692 old_object->pager,
693 old_object->pager_request,
694 paging_offset, (pointer_t) copy, PAGE_SIZE,
695 !precious_clean, !flush);
696 }
697
698 if (rc != KERN_SUCCESS)
699 vm_map_copy_discard(copy);
700
701 /*
702 * Clean up.
703 */
704 vm_object_lock(old_object);
705 if (holding_page != VM_PAGE_NULL)
706 VM_PAGE_FREE(holding_page);
707 vm_object_paging_end(old_object);
708 }
709
710 /*
711 * vm_pageout_scan does the dirty work for the pageout daemon.
712 * It returns with vm_page_queue_free_lock held and
713 * vm_page_free_wanted == 0.
714 */
715
716 void vm_pageout_scan()
717 {
718 unsigned int burst_count;
719
720 /*
721 * We want to gradually dribble pages from the active queue
722 * to the inactive queue. If we let the inactive queue get
723 * very small, and then suddenly dump many pages into it,
724 * those pages won't get a sufficient chance to be referenced
725 * before we start taking them from the inactive queue.
726 *
727 * We must limit the rate at which we send pages to the pagers.
728 * data_write messages consume memory, for message buffers and
729 * for map-copy objects. If we get too far ahead of the pagers,
730 * we can potentially run out of memory.
731 *
732 * We can use the laundry count to limit directly the number
733 * of pages outstanding to the default pager. A similar
734 * strategy for external pagers doesn't work, because
735 * external pagers don't have to deallocate the pages sent them,
736 * and because we might have to send pages to external pagers
737 * even if they aren't processing writes. So we also
738 * use a burst count to limit writes to external pagers.
739 *
740 * When memory is very tight, we can't rely on external pagers to
741 * clean pages. They probably aren't running, because they
742 * aren't vm-privileged. If we kept sending dirty pages to them,
743 * we could exhaust the free list. However, we can't just ignore
744 * pages belonging to external objects, because there might be no
745 * pages belonging to internal objects. Hence, we get the page
746 * into an internal object and then immediately double-page it,
747 * sending it to the default pager.
748 *
749 * consider_zone_gc should be last, because the other operations
750 * might return memory to zones. When we pause we use
751 * vm_pageout_scan_continue as our continuation, so we will
752 * reenter vm_pageout_scan periodically and attempt to reclaim
753 * internal memory even if we never reach vm_page_free_target.
754 */
755
756 #ifdef KEEP_STACKS
757 Restart:
758 #endif KEEP_STACKS
759 stack_collect();
760 net_kmsg_collect();
761 consider_task_collect();
762 consider_thread_collect();
763 consider_zone_gc();
764
765 for (burst_count = 0;;) {
766 register vm_page_t m;
767 register vm_object_t object;
768 unsigned int free_count;
769
770 /*
771 * Recalculate vm_page_inactivate_target.
772 */
773
774 vm_page_lock_queues();
775 vm_page_inactive_target =
776 VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
777 vm_page_inactive_count);
778
779 /*
780 * Move pages from active to inactive.
781 */
782
783 while ((vm_page_inactive_count < vm_page_inactive_target) &&
784 !queue_empty(&vm_page_queue_active)) {
785 register vm_object_t obj;
786
787 vm_pageout_active++;
788 m = (vm_page_t) queue_first(&vm_page_queue_active);
789 assert(m->active && !m->inactive);
790
791 obj = m->object;
792 if (!vm_object_lock_try(obj)) {
793 /*
794 * Move page to end and continue.
795 */
796
797 queue_remove(&vm_page_queue_active, m,
798 vm_page_t, pageq);
799 queue_enter(&vm_page_queue_active, m,
800 vm_page_t, pageq);
801 vm_page_unlock_queues();
802 vm_page_lock_queues();
803 continue;
804 }
805
806 /*
807 * If the page is busy, then we pull it
808 * off the active queue and leave it alone.
809 */
810
811 if (m->busy) {
812 vm_object_unlock(obj);
813 queue_remove(&vm_page_queue_active, m,
814 vm_page_t, pageq);
815 m->active = FALSE;
816 vm_page_active_count--;
817 continue;
818 }
819
820 /*
821 * Deactivate the page while holding the object
822 * locked, so we know the page is still not busy.
823 * This should prevent races between pmap_enter
824 * and pmap_clear_reference. The page might be
825 * absent or fictitious, but vm_page_deactivate
826 * can handle that.
827 */
828
829 vm_page_deactivate(m);
830 vm_object_unlock(obj);
831 }
832
833 /*
834 * We are done if we have met our target *and*
835 * nobody is still waiting for a page.
836 */
837
838 simple_lock(&vm_page_queue_free_lock);
839 free_count = vm_page_free_count;
840 if ((free_count >= vm_page_free_target) &
841 (vm_page_free_wanted == 0)) {
842 vm_page_unlock_queues();
843 break;
844 }
845 simple_unlock(&vm_page_queue_free_lock);
846
847 /*
848 * Sometimes we have to pause:
849 * 1) No inactive pages - nothing to do.
850 * 2) Flow control - wait for pagers to catch up.
851 * 3) Extremely low memory - sending out dirty pages
852 * consumes memory. We don't take the risk of doing
853 * this if the default pager already has work to do.
854 */
855
856 if (queue_empty(&vm_page_queue_inactive) ||
857 (burst_count >= vm_pageout_burst_max) ||
858 (vm_page_laundry_count >= vm_pageout_burst_max) ||
859 ((free_count < vm_pageout_reserved_really) &&
860 (vm_page_laundry_count > 0))) {
861 unsigned int pages, msecs;
862
863 /*
864 * vm_pageout_burst_wait is msecs/page.
865 * If there is nothing for us to do, we wait
866 * at least vm_pageout_empty_wait msecs.
867 */
868
869 if (vm_page_laundry_count > burst_count)
870 pages = vm_page_laundry_count;
871 else
872 pages = burst_count;
873 msecs = pages * vm_pageout_burst_wait;
874
875 if (queue_empty(&vm_page_queue_inactive) &&
876 (msecs < vm_pageout_empty_wait))
877 msecs = vm_pageout_empty_wait;
878 vm_page_unlock_queues();
879
880 thread_will_wait_with_timeout(current_thread(), msecs);
881 counter(c_vm_pageout_scan_block++);
882 thread_block(vm_pageout_scan_continue);
883 #ifdef KEEP_STACKS
884 /*
885 * Unfortunately, we don't have call_continuation
886 * so we can't rely on tail-recursion.
887 */
888
889 vm_pageout_scan_continue();
890 goto Restart;
891 #else KEEP_STACKS
892 call_continuation(vm_pageout_scan_continue);
893 /*NOTREACHED*/
894 #endif KEEP_STACKS
895 }
896
897 vm_pageout_inactive++;
898 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
899 assert(!m->active && m->inactive);
900 object = m->object;
901
902 /*
903 * Try to lock object; since we've got the
904 * page queues lock, we can only try for this one.
905 */
906
907 if (!vm_object_lock_try(object)) {
908 /*
909 * Move page to end and continue.
910 */
911
912 queue_remove(&vm_page_queue_inactive, m,
913 vm_page_t, pageq);
914 queue_enter(&vm_page_queue_inactive, m,
915 vm_page_t, pageq);
916 vm_page_unlock_queues();
917 vm_pageout_inactive_nolock++;
918 continue;
919 }
920
921 /*
922 * Remove the page from the inactive list.
923 */
924
925 queue_remove(&vm_page_queue_inactive, m, vm_page_t, pageq);
926 vm_page_inactive_count--;
927 m->inactive = FALSE;
928
929 if (m->busy || !object->alive) {
930 /*
931 * Somebody is already playing with this page.
932 * Leave it off the pageout queues.
933 */
934
935 vm_page_unlock_queues();
936 vm_object_unlock(object);
937 vm_pageout_inactive_busy++;
938 continue;
939 }
940
941 /*
942 * If it's absent, we can reclaim the page.
943 */
944
945 if (m->absent) {
946 vm_pageout_inactive_absent++;
947 reclaim_page:
948 vm_page_free(m);
949 vm_page_unlock_queues();
950 vm_object_unlock(object);
951 continue;
952 }
953
954 /*
955 * If it's being used, reactivate.
956 * (Fictitious pages are either busy or absent.)
957 */
958
959 assert(!m->fictitious);
960 if (m->reference || pmap_is_referenced(m->phys_addr)) {
961 vm_object_unlock(object);
962 vm_page_activate(m);
963 vm_stat.reactivations++;
964 vm_page_unlock_queues();
965 vm_pageout_inactive_used++;
966 continue;
967 }
968
969 /*
970 * Eliminate all mappings.
971 */
972
973 m->busy = TRUE;
974 pmap_page_protect(m->phys_addr, VM_PROT_NONE);
975 if (!m->dirty)
976 m->dirty = pmap_is_modified(m->phys_addr);
977
978 /*
979 * If it's clean and not precious, we can free the page.
980 */
981
982 if (!m->dirty && !m->precious) {
983 vm_pageout_inactive_clean++;
984 goto reclaim_page;
985 }
986
987 /*
988 * If we are very low on memory, then we can't
989 * rely on an external pager to clean a dirty page,
990 * because external pagers are not vm-privileged.
991 *
992 * The laundry bit tells vm_pageout_setup to
993 * put the page back at the front of the inactive
994 * queue instead of activating the page. Hence,
995 * we will pick the page up again immediately and
996 * resend it to the default pager.
997 */
998
999 assert(!m->laundry);
1000 if ((free_count < vm_pageout_reserved_internal) &&
1001 !object->internal) {
1002 m->laundry = TRUE;
1003 vm_pageout_inactive_double++;
1004 }
1005 vm_page_unlock_queues();
1006
1007 /*
1008 * If there is no memory object for the page, create
1009 * one and hand it to the default pager.
1010 * [First try to collapse, so we don't create
1011 * one unnecessarily.]
1012 */
1013
1014 if (!object->pager_initialized)
1015 vm_object_collapse(object);
1016 if (!object->pager_initialized)
1017 vm_object_pager_create(object);
1018 if (!object->pager_initialized)
1019 panic("vm_pageout_scan");
1020
1021 vm_pageout_inactive_dirty++;
1022 vm_pageout_page(m, FALSE, TRUE); /* flush it */
1023 vm_object_unlock(object);
1024 burst_count++;
1025 }
1026 }
1027
1028 void vm_pageout_scan_continue()
1029 {
1030 /*
1031 * We just paused to let the pagers catch up.
1032 * If vm_page_laundry_count is still high,
1033 * then we aren't waiting long enough.
1034 * If we have paused some vm_pageout_pause_max times without
1035 * adjusting vm_pageout_burst_wait, it might be too big,
1036 * so we decrease it.
1037 */
1038
1039 vm_page_lock_queues();
1040 if (vm_page_laundry_count > vm_pageout_burst_min) {
1041 vm_pageout_burst_wait++;
1042 vm_pageout_pause_count = 0;
1043 } else if (++vm_pageout_pause_count > vm_pageout_pause_max) {
1044 vm_pageout_burst_wait = (vm_pageout_burst_wait * 3) / 4;
1045 if (vm_pageout_burst_wait < 1)
1046 vm_pageout_burst_wait = 1;
1047 vm_pageout_pause_count = 0;
1048 }
1049 vm_page_unlock_queues();
1050
1051 #ifndef KEEP_STACKS
1052 vm_pageout_continue();
1053 /*NOTREACHED*/
1054 #endif KEEP_STACKS
1055 }
1056
1057 /*
1058 * vm_pageout is the high level pageout daemon.
1059 */
1060
1061 void vm_pageout_continue()
1062 {
1063 /*
1064 * The pageout daemon is never done, so loop forever.
1065 * We should call vm_pageout_scan at least once each
1066 * time we are woken, even if vm_page_free_wanted is
1067 * zero, to check vm_page_free_target and
1068 * vm_page_inactive_target.
1069 */
1070
1071 for (;;) {
1072 vm_pageout_scan();
1073 /* we hold vm_page_queue_free_lock now */
1074 assert(vm_page_free_wanted == 0);
1075
1076 assert_wait(&vm_page_free_wanted, FALSE);
1077 simple_unlock(&vm_page_queue_free_lock);
1078 counter(c_vm_pageout_block++);
1079 thread_block(vm_pageout_continue);
1080 }
1081 }
1082
1083 void vm_pageout()
1084 {
1085 int free_after_reserve;
1086
1087 current_thread()->vm_privilege = TRUE;
1088 stack_privilege(current_thread());
1089
1090 /*
1091 * Initialize some paging parameters.
1092 */
1093
1094 if (vm_pageout_burst_max == 0)
1095 vm_pageout_burst_max = VM_PAGEOUT_BURST_MAX;
1096
1097 if (vm_pageout_burst_min == 0)
1098 vm_pageout_burst_min = VM_PAGEOUT_BURST_MIN;
1099
1100 if (vm_pageout_burst_wait == 0)
1101 vm_pageout_burst_wait = VM_PAGEOUT_BURST_WAIT;
1102
1103 if (vm_pageout_empty_wait == 0)
1104 vm_pageout_empty_wait = VM_PAGEOUT_EMPTY_WAIT;
1105
1106 if (vm_page_free_reserved == 0)
1107 vm_page_free_reserved = VM_PAGE_FREE_RESERVED;
1108
1109 if (vm_pageout_pause_max == 0)
1110 vm_pageout_pause_max = VM_PAGEOUT_PAUSE_MAX;
1111
1112 if (vm_pageout_reserved_internal == 0)
1113 vm_pageout_reserved_internal =
1114 VM_PAGEOUT_RESERVED_INTERNAL(vm_page_free_reserved);
1115
1116 if (vm_pageout_reserved_really == 0)
1117 vm_pageout_reserved_really =
1118 VM_PAGEOUT_RESERVED_REALLY(vm_page_free_reserved);
1119
1120 free_after_reserve = vm_page_free_count - vm_page_free_reserved;
1121
1122 if (vm_page_free_min == 0)
1123 vm_page_free_min = vm_page_free_reserved +
1124 VM_PAGE_FREE_MIN(free_after_reserve);
1125
1126 if (vm_page_free_target == 0)
1127 vm_page_free_target = vm_page_free_reserved +
1128 VM_PAGE_FREE_TARGET(free_after_reserve);
1129
1130 if (vm_page_free_target < vm_page_free_min + 5)
1131 vm_page_free_target = vm_page_free_min + 5;
1132
1133 /*
1134 * vm_pageout_scan will set vm_page_inactive_target.
1135 */
1136
1137 vm_pageout_continue();
1138 /*NOTREACHED*/
1139 }
Cache object: 3cd164e6880cb214a41f7eb6733ec9c9
|