FreeBSD/Linux Kernel Cross Reference
sys/vm/vm_pageout.c
1 /*
2 * Mach Operating System
3 * Copyright (c) 1993-1987 Carnegie Mellon University
4 * All Rights Reserved.
5 *
6 * Permission to use, copy, modify and distribute this software and its
7 * documentation is hereby granted, provided that both the copyright
8 * notice and this permission notice appear in all copies of the
9 * software, derivative works or modified versions, and any portions
10 * thereof, and that both notices appear in supporting documentation.
11 *
12 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
13 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
14 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
15 *
16 * Carnegie Mellon requests users of this software to return to
17 *
18 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
19 * School of Computer Science
20 * Carnegie Mellon University
21 * Pittsburgh PA 15213-3890
22 *
23 * any improvements or extensions that they make and grant Carnegie Mellon
24 * the rights to redistribute these changes.
25 */
26 /*
27 * HISTORY
28 * $Log: vm_pageout.c,v $
29 * Revision 2.25 93/11/17 18:57:15 dbg
30 * Declared non-returning functions as type 'no_return'.
31 * [93/05/04 dbg]
32 *
33 * Added ANSI prototypes. Added AST_KERNEL_CHECK in appropriate
34 * places in pageout scan loops. Removed KEEP_STACKS code.
35 * [93/01/28 dbg]
36 *
37 * Revision 2.24 93/01/14 18:02:07 danner
38 * 64bit cleanup.
39 * [92/12/01 af]
40 *
41 * Revision 2.23 92/08/03 18:02:11 jfriedl
42 * removed silly prototypes
43 * [92/08/02 jfriedl]
44 *
45 * Revision 2.22 92/05/21 17:26:44 jfriedl
46 * Cleanup to quiet gcc warnings.
47 * [92/05/16 jfriedl]
48 *
49 * Revision 2.21 91/12/11 08:44:16 jsb
50 * Added vm_pageout_active, vm_pageout_inactive,
51 * and other measurement counters. Fixed the log.
52 * [91/11/24 rpd]
53 *
54 * Revision 2.20 91/10/09 16:20:36 af
55 * Added vm_pageout_pause_count, vm_pageout_pause_max technology
56 * so that vm_pageout_burst_wait can decrease as well as increase.
57 * [91/10/04 rpd]
58 *
59 * Revision 2.19 91/08/28 11:18:54 jsb
60 * Fixed vm_pageout_scan to send pages to the default pager
61 * when memory gets very tight. This is the same idea as the old
62 * vm_pageout_reserved_external and vm_pageout_reserved_internal,
63 * but with a different implementation that forcibly double-pages.
64 * [91/08/07 rpd]
65 * Precious page support: return precious pages on pageout, use
66 * memory_object_data_return instead of memory_object_data_write
67 * when appropriate,
68 * [91/07/03 14:20:57 dlb]
69 *
70 * Revision 2.18 91/07/01 08:28:13 jsb
71 * Add missing includes of vm/vm_map.h and kern/thread.h.
72 * [91/06/29 16:53:36 jsb]
73 *
74 * Revision 2.17 91/06/17 15:49:37 jsb
75 * NORMA_VM: declare definitions for memory_object_data_{initialize,write}
76 * since they won't be provided by interposed-on memory_object_user.h.
77 * [91/06/17 11:13:22 jsb]
78 *
79 * Revision 2.16 91/05/18 14:41:49 rpd
80 * Fixed vm_pageout_continue to always call vm_pageout_scan.
81 * Revamped vm_pageout_scan. Now it recalculates vm_page_inactive_target,
82 * gradually moves pages from the active list to the inactive list,
83 * looks at vm_page_free_wanted, handles absent and fictitious pages,
84 * and blocks with a continuation (vm_pageout_scan_continue), which
85 * uses vm_page_laundry_count to adjust vm_pageout_burst_wait.
86 * [91/04/06 rpd]
87 *
88 * Changed vm_page_free_wanted to unsigned int.
89 * [91/04/05 rpd]
90 * Added vm_page_grab_fictitious.
91 * [91/03/29 rpd]
92 * Changed vm_page_init.
93 * [91/03/24 rpd]
94 *
95 * Revision 2.15 91/05/14 17:50:59 mrt
96 * Correcting copyright
97 *
98 * Revision 2.14 91/03/16 15:06:50 rpd
99 * Modified vm_pageout_scan for further vm_page_deactivate changes.
100 * Also changed it to ignore pages in dead objects.
101 * [91/03/11 rpd]
102 * Added vm_pageout_continue.
103 * [91/01/20 rpd]
104 *
105 * Revision 2.13 91/02/05 17:59:57 mrt
106 * Changed to new Mach copyright
107 * [91/02/01 16:34:17 mrt]
108 *
109 * Revision 2.12 91/01/08 16:45:57 rpd
110 * Added net_kmsg_collect.
111 * [91/01/05 rpd]
112 * Added consider_task_collect, consider_thread_collect.
113 * [91/01/03 rpd]
114 *
115 * Added stack_collect.
116 * [90/12/31 rpd]
117 * Added continuation argument to thread_block.
118 * [90/12/08 rpd]
119 *
120 * Ensure that vm_page_free_target is at least five pages
121 * larger than vm_page_free_min, to avoid vm_page_wait deadlock.
122 * [90/11/19 rpd]
123 *
124 * Replaced swapout_threads with consider_zone_gc.
125 * [90/11/11 rpd]
126 *
127 * Revision 2.11 90/11/05 14:35:03 rpd
128 * Modified vm_pageout_scan for new vm_page_deactivate protocol.
129 * [90/11/04 rpd]
130 *
131 * Revision 2.10 90/10/12 13:06:53 rpd
132 * Fixed vm_pageout_page to take busy pages.
133 * [90/10/09 rpd]
134 *
135 * In vm_pageout_scan, check for new software reference bit
136 * in addition to using pmap_is_referenced. Remove busy pages
137 * found on the active and inactive queues.
138 * [90/10/08 rpd]
139 *
140 * Revision 2.9 90/08/27 22:16:02 dbg
141 * Fix error in initial assumptions: vm_pageout_setup must take a
142 * BUSY page, to prevent the page from being scrambled by pagein.
143 * [90/07/26 dbg]
144 *
145 * Revision 2.8 90/06/19 23:03:22 rpd
146 * Locking fix for vm_pageout_page from dlb/dbg.
147 * [90/06/11 rpd]
148 *
149 * Correct initial condition in vm_pageout_page (page is NOT busy).
150 * Fix documentation for vm_pageout_page and vm_pageout_setup.
151 * [90/06/05 dbg]
152 *
153 * Fixed vm_object_unlock type in vm_pageout_page.
154 * [90/06/04 rpd]
155 *
156 * Revision 2.7 90/06/02 15:11:56 rpd
157 * Removed pageout_task and references to kernel_vm_space.
158 * [90/04/29 rpd]
159 *
160 * Made vm_pageout_burst_max, vm_pageout_burst_wait tunable.
161 * [90/04/18 rpd]
162 * Converted to new IPC and vm_map_copyin_object.
163 * [90/03/26 23:18:10 rpd]
164 *
165 * Revision 2.6 90/05/29 18:39:52 rwd
166 * Picked up new vm_pageout_page from dbg.
167 * [90/05/17 rwd]
168 * Rfr change to send multiple pages to pager at once.
169 * [90/04/12 13:49:13 rwd]
170 *
171 * Revision 2.5 90/05/03 15:53:21 dbg
172 * vm_pageout_page flushes page only if asked; otherwise, it copies
173 * the page.
174 * [90/03/28 dbg]
175 *
176 * If an object's pager is not initialized, don't page out to it.
177 * [90/03/21 dbg]
178 *
179 * Revision 2.4 90/02/22 20:06:48 dbg
180 * PAGE_WAKEUP --> PAGE_WAKEUP_DONE to reflect the fact that it
181 * clears the busy flag.
182 * [89/12/13 dlb]
183 *
184 * Revision 2.3 90/01/11 11:48:27 dbg
185 * Pick up recent changes from mainline:
186 *
187 * Eliminate page lock when writing back a page.
188 * [89/11/09 mwyoung]
189 *
190 * Account for paging_offset when setting external page state.
191 * [89/10/16 15:29:08 af]
192 *
193 * Improve reserve tuning... it was a little too low.
194 *
195 * Remove laundry count computations, as the count is never used.
196 * [89/10/10 mwyoung]
197 *
198 * Only attempt to collapse if a memory object has not
199 * been initialized. Don't bother to PAGE_WAKEUP in
200 * vm_pageout_scan() before writing back a page -- it
201 * gets done in vm_pageout_page().
202 * [89/10/10 mwyoung]
203 *
204 * Don't reactivate a page when creating a new memory
205 * object... continue on to page it out immediately.
206 * [89/09/20 mwyoung]
207 *
208 * Reverse the sensing of the desperately-short-on-pages tests.
209 * [89/09/19 mwyoung]
210 * Check for absent pages before busy pages in vm_pageout_page().
211 * [89/07/10 00:01:22 mwyoung]
212 *
213 * Allow dirty pages to be reactivated.
214 * [89/04/22 mwyoung]
215 *
216 * Don't clean pages that are absent, in error, or not dirty in
217 * vm_pageout_page(). These checks were previously issued
218 * elsewhere.
219 * [89/04/22 mwyoung]
220 *
221 * Revision 2.2 89/09/08 11:28:55 dbg
222 * Reverse test for internal_only pages. Old sense left pageout
223 * daemon spinning.
224 * [89/08/15 dbg]
225 *
226 * 18-Jul-89 David Golub (dbg) at Carnegie-Mellon University
227 * Changes for MACH_KERNEL:
228 * . Removed non-XP code.
229 * Count page wiring when sending page to default pager.
230 * Increase reserved page count.
231 * Make 'internal-only' count LARGER than reserved page count.
232 *
233 * Revision 2.18 89/06/12 14:53:05 jsb
234 * Picked up bug fix (missing splimps) from Sequent via dlb.
235 * [89/06/12 14:39:28 jsb]
236 *
237 * Revision 2.17 89/04/18 21:27:08 mwyoung
238 * Recent history [mwyoung]:
239 * Keep hint when pages are written out (call
240 * vm_external_state_set).
241 * Use wired-down fictitious page data structure for "holding_page".
242 * History condensation:
243 * Avoid flooding memory managers by using timing [mwyoung].
244 * New pageout daemon for external memory management
245 * system [mwyoung].
246 * [89/04/18 mwyoung]
247 *
248 */
249 /*
250 * File: vm/vm_pageout.c
251 * Author: Avadis Tevanian, Jr., Michael Wayne Young
252 * Date: 1985
253 *
254 * The proverbial page-out daemon.
255 */
256
257 #include <mach_pagemap.h>
258 #include <norma_vm.h>
259
260 #include <mach/mach_types.h>
261 #include <mach/memory_object.h>
262 #include <mach/memory_object_default.h>
263 #include <mach/memory_object_user.h>
264 #include <mach/vm_param.h>
265 #include <mach/vm_statistics.h>
266 #include <kern/counters.h>
267 #include <kern/stack.h>
268 #include <kern/thread.h>
269 #include <vm/pmap.h>
270 #include <vm/vm_map.h>
271 #include <vm/vm_object.h>
272 #include <vm/vm_page.h>
273 #include <vm/vm_pageout.h>
274 #include <device/net_io.h> /* for net_kmsg_collect */
275 #include <machine/vm_tuning.h>
276
277
278
279 #ifndef VM_PAGEOUT_BURST_MAX
280 #define VM_PAGEOUT_BURST_MAX 10 /* number of pages */
281 #endif /* VM_PAGEOUT_BURST_MAX */
282
283 #ifndef VM_PAGEOUT_BURST_MIN
284 #define VM_PAGEOUT_BURST_MIN 5 /* number of pages */
285 #endif /* VM_PAGEOUT_BURST_MIN */
286
287 #ifndef VM_PAGEOUT_BURST_WAIT
288 #define VM_PAGEOUT_BURST_WAIT 30 /* milliseconds per page */
289 #endif /* VM_PAGEOUT_BURST_WAIT */
290
291 #ifndef VM_PAGEOUT_EMPTY_WAIT
292 #define VM_PAGEOUT_EMPTY_WAIT 200 /* milliseconds */
293 #endif /* VM_PAGEOUT_EMPTY_WAIT */
294
295 #ifndef VM_PAGEOUT_PAUSE_MAX
296 #define VM_PAGEOUT_PAUSE_MAX 10 /* number of pauses */
297 #endif /* VM_PAGEOUT_PAUSE_MAX */
298
299 /*
300 * To obtain a reasonable LRU approximation, the inactive queue
301 * needs to be large enough to give pages on it a chance to be
302 * referenced a second time. This macro defines the fraction
303 * of active+inactive pages that should be inactive.
304 * The pageout daemon uses it to update vm_page_inactive_target.
305 *
306 * If vm_page_free_count falls below vm_page_free_target and
307 * vm_page_inactive_count is below vm_page_inactive_target,
308 * then the pageout daemon starts running.
309 */
310
311 #ifndef VM_PAGE_INACTIVE_TARGET
312 #define VM_PAGE_INACTIVE_TARGET(avail) ((avail) * 2 / 3)
313 #endif /* VM_PAGE_INACTIVE_TARGET */
314
315 /*
316 * Once the pageout daemon starts running, it keeps going
317 * until vm_page_free_count meets or exceeds vm_page_free_target.
318 */
319
320 #ifndef VM_PAGE_FREE_TARGET
321 #define VM_PAGE_FREE_TARGET(free) (15 + (free) / 80)
322 #endif /* VM_PAGE_FREE_TARGET */
323
324 /*
325 * The pageout daemon always starts running once vm_page_free_count
326 * falls below vm_page_free_min.
327 */
328
329 #ifndef VM_PAGE_FREE_MIN
330 #define VM_PAGE_FREE_MIN(free) (10 + (free) / 100)
331 #endif /* VM_PAGE_FREE_MIN */
332
333 /*
334 * When vm_page_free_count falls below vm_page_free_reserved,
335 * only vm-privileged threads can allocate pages. vm-privilege
336 * allows the pageout daemon and default pager (and any other
337 * associated threads needed for default pageout) to continue
338 * operation by dipping into the reserved pool of pages.
339 */
340
341 #ifndef VM_PAGE_FREE_RESERVED
342 #define VM_PAGE_FREE_RESERVED 15
343 #endif /* VM_PAGE_FREE_RESERVED */
344
345 /*
346 * When vm_page_free_count falls below vm_pageout_reserved_internal,
347 * the pageout daemon no longer trusts external pagers to clean pages.
348 * External pagers are probably all wedged waiting for a free page.
349 * It forcibly double-pages dirty pages belonging to external objects,
350 * getting the pages to the default pager to clean.
351 */
352
353 #ifndef VM_PAGEOUT_RESERVED_INTERNAL
354 #define VM_PAGEOUT_RESERVED_INTERNAL(reserve) ((reserve) - 5)
355 #endif /* VM_PAGEOUT_RESERVED_INTERNAL */
356
357 /*
358 * When vm_page_free_count falls below vm_pageout_reserved_really,
359 * the pageout daemon stops work entirely to let the default pager
360 * catch up (assuming the default pager has pages to clean).
361 * Beyond this point, it is too dangerous to consume memory
362 * even for memory_object_data_write messages to the default pager.
363 */
364
365 #ifndef VM_PAGEOUT_RESERVED_REALLY
366 #define VM_PAGEOUT_RESERVED_REALLY(reserve) ((reserve) - 10)
367 #endif /* VM_PAGEOUT_RESERVED_REALLY */
368
369 no_return vm_pageout_continue(void); /* forward */
370 no_return vm_pageout_scan_continue(void); /* forward */
371
372 unsigned int vm_pageout_reserved_internal = 0;
373 unsigned int vm_pageout_reserved_really = 0;
374
375 unsigned int vm_pageout_burst_max = 0;
376 unsigned int vm_pageout_burst_min = 0;
377 unsigned int vm_pageout_burst_wait = 0; /* milliseconds per page */
378 unsigned int vm_pageout_empty_wait = 0; /* milliseconds */
379 unsigned int vm_pageout_pause_count = 0;
380 unsigned int vm_pageout_pause_max = 0;
381
382 /*
383 * These variables record the pageout daemon's actions:
384 * how many pages it looks at and what happens to those pages.
385 * No locking needed because only one thread modifies the variables.
386 */
387
388 unsigned int vm_pageout_active = 0; /* debugging */
389 unsigned int vm_pageout_inactive = 0; /* debugging */
390 unsigned int vm_pageout_inactive_nolock = 0; /* debugging */
391 unsigned int vm_pageout_inactive_busy = 0; /* debugging */
392 unsigned int vm_pageout_inactive_absent = 0; /* debugging */
393 unsigned int vm_pageout_inactive_used = 0; /* debugging */
394 unsigned int vm_pageout_inactive_clean = 0; /* debugging */
395 unsigned int vm_pageout_inactive_dirty = 0; /* debugging */
396 unsigned int vm_pageout_inactive_double = 0; /* debugging */
397
398 #if NORMA_VM
399 /*
400 * Define them here, since they won't be defined by memory_object_user.h.
401 */
402 extern kern_return_t memory_object_data_initialize();
403 extern kern_return_t memory_object_data_write();
404 #endif /* NORMA_VM */
405
406 /*
407 * Routine: vm_pageout_setup
408 * Purpose:
409 * Set up a page for pageout.
410 *
411 * Move or copy the page to a new object, as part
412 * of which it will be sent to its memory manager
413 * in a memory_object_data_write or memory_object_initialize
414 * message.
415 *
416 * The "paging_offset" argument specifies the offset
417 * of the page within its external memory object.
418 *
419 * The "new_object" and "new_offset" arguments
420 * indicate where the page should be moved.
421 *
422 * The "flush" argument specifies whether the page
423 * should be flushed from its object. If not, a
424 * copy of the page is moved to the new object.
425 *
426 * In/Out conditions:
427 * The page in question must not be on any pageout queues,
428 * and must be busy. The object to which it belongs
429 * must be unlocked, and the caller must hold a paging
430 * reference to it. The new_object must not be locked.
431 *
432 * If the page is flushed from its original object,
433 * this routine returns a pointer to a place-holder page,
434 * inserted at the same offset, to block out-of-order
435 * requests for the page. The place-holder page must
436 * be freed after the data_write or initialize message
437 * has been sent. If the page is copied,
438 * the holding page is VM_PAGE_NULL.
439 *
440 * The original page is put on a paging queue and marked
441 * not busy on exit.
442 */
443 vm_page_t
444 vm_pageout_setup(
445 register vm_page_t m,
446 vm_offset_t paging_offset,
447 register vm_object_t new_object,
448 vm_offset_t new_offset,
449 boolean_t flush)
450 {
451 register vm_object_t old_object = m->object;
452 register vm_page_t holding_page = 0; /*'=0'to quiet gcc warnings*/
453 register vm_page_t new_m;
454
455 assert(m->busy && !m->absent && !m->fictitious);
456
457 /*
458 * If we are not flushing the page, allocate a
459 * page in the object. If we cannot get the
460 * page, flush instead.
461 */
462 if (!flush) {
463 vm_object_lock(new_object);
464 new_m = vm_page_alloc(new_object, new_offset);
465 if (new_m == VM_PAGE_NULL)
466 flush = TRUE;
467 vm_object_unlock(new_object);
468 }
469
470 if (flush) {
471 /*
472 * Create a place-holder page where the old one was,
473 * to prevent anyone from attempting to page in this
474 * page while we`re unlocked.
475 */
476 while ((holding_page = vm_page_grab_fictitious())
477 == VM_PAGE_NULL)
478 vm_page_more_fictitious();
479
480 vm_object_lock(old_object);
481 vm_page_lock_queues();
482 vm_page_remove(m);
483 vm_page_unlock_queues();
484 PAGE_WAKEUP_DONE(m);
485
486 vm_page_lock_queues();
487 vm_page_insert(holding_page, old_object, m->offset);
488 vm_page_unlock_queues();
489
490 /*
491 * Record that this page has been written out
492 */
493 #if MACH_PAGEMAP
494 vm_external_state_set(old_object->existence_info,
495 paging_offset,
496 VM_EXTERNAL_STATE_EXISTS);
497 #endif /* MACH_PAGEMAP */
498
499 vm_object_unlock(old_object);
500
501 vm_object_lock(new_object);
502
503 /*
504 * Move this page into the new object
505 */
506
507 vm_page_lock_queues();
508 vm_page_insert(m, new_object, new_offset);
509 vm_page_unlock_queues();
510
511 m->dirty = TRUE;
512 m->precious = FALSE;
513 m->page_lock = VM_PROT_NONE;
514 m->unlock_request = VM_PROT_NONE;
515 }
516 else {
517 /*
518 * Copy the data into the new page,
519 * and mark the new page as clean.
520 */
521 vm_page_copy(m, new_m);
522
523 vm_object_lock(old_object);
524 m->dirty = FALSE;
525 pmap_clear_modify(m->phys_addr);
526
527 /*
528 * Deactivate old page.
529 */
530 vm_page_lock_queues();
531 vm_page_deactivate(m);
532 vm_page_unlock_queues();
533
534 PAGE_WAKEUP_DONE(m);
535
536 /*
537 * Record that this page has been written out
538 */
539
540 #if MACH_PAGEMAP
541 vm_external_state_set(old_object->existence_info,
542 paging_offset,
543 VM_EXTERNAL_STATE_EXISTS);
544 #endif /* MACH_PAGEMAP */
545
546 vm_object_unlock(old_object);
547
548 vm_object_lock(new_object);
549
550 /*
551 * Use the new page below.
552 */
553 m = new_m;
554 m->dirty = TRUE;
555 assert(!m->precious);
556 PAGE_WAKEUP_DONE(m);
557 }
558
559 /*
560 * Make the old page eligible for replacement again; if a
561 * user-supplied memory manager fails to release the page,
562 * it will be paged out again to the default memory manager.
563 *
564 * Note that pages written to the default memory manager
565 * must be wired down -- in return, it guarantees to free
566 * this page, rather than reusing it.
567 */
568
569 vm_page_lock_queues();
570 vm_stat.pageouts++;
571 if (m->laundry) {
572 /*
573 * vm_pageout_scan is telling us to put this page
574 * at the front of the inactive queue, so it will
575 * be immediately paged out to the default pager.
576 */
577
578 assert(!old_object->internal);
579 m->laundry = FALSE;
580
581 queue_enter_first(&vm_page_queue_inactive, m,
582 vm_page_t, pageq);
583 m->inactive = TRUE;
584 vm_page_inactive_count++;
585 } else if (old_object->internal) {
586 m->laundry = TRUE;
587 vm_page_laundry_count++;
588
589 vm_page_wire(m);
590 } else
591 vm_page_activate(m);
592 vm_page_unlock_queues();
593
594 /*
595 * Since IPC operations may block, we drop locks now.
596 * [The placeholder page is busy, and we still have
597 * paging_in_progress incremented.]
598 */
599
600 vm_object_unlock(new_object);
601
602 /*
603 * Return the placeholder page to simplify cleanup.
604 */
605 return (flush ? holding_page : VM_PAGE_NULL);
606 }
607
608 /*
609 * Routine: vm_pageout_page
610 * Purpose:
611 * Causes the specified page to be written back to
612 * the appropriate memory object.
613 *
614 * The "initial" argument specifies whether this
615 * data is an initialization only, and should use
616 * memory_object_data_initialize instead of
617 * memory_object_data_write.
618 *
619 * The "flush" argument specifies whether the page
620 * should be flushed from the object. If not, a
621 * copy of the data is sent to the memory object.
622 *
623 * In/out conditions:
624 * The page in question must not be on any pageout queues.
625 * The object to which it belongs must be locked.
626 * Implementation:
627 * Move this page to a completely new object, if flushing;
628 * copy to a new page in a new object, if not.
629 */
630 void
631 vm_pageout_page(
632 register vm_page_t m,
633 boolean_t initial,
634 boolean_t flush)
635 {
636 vm_map_copy_t copy;
637 register vm_object_t old_object;
638 register vm_object_t new_object;
639 register vm_page_t holding_page;
640 vm_offset_t paging_offset;
641 kern_return_t rc;
642 boolean_t precious_clean;
643
644 assert(m->busy);
645
646 /*
647 * Cleaning but not flushing a clean precious page is a
648 * no-op. Remember whether page is clean and precious now
649 * because vm_pageout_setup will mark it dirty and not precious.
650 *
651 * XXX Check if precious_clean && !flush can really happen.
652 */
653 precious_clean = (!m->dirty) && m->precious;
654 if (precious_clean && !flush) {
655 PAGE_WAKEUP_DONE(m);
656 return;
657 }
658
659 /*
660 * Verify that we really want to clean this page.
661 */
662 if (m->absent || m->error || (!m->dirty && !m->precious)) {
663 VM_PAGE_FREE(m);
664 return;
665 }
666
667 /*
668 * Create a paging reference to let us play with the object.
669 */
670 old_object = m->object;
671 paging_offset = m->offset + old_object->paging_offset;
672 vm_object_paging_begin(old_object);
673 vm_object_unlock(old_object);
674
675 /*
676 * Allocate a new object into which we can put the page.
677 */
678 new_object = vm_object_allocate(PAGE_SIZE);
679
680 /*
681 * Move the page into the new object.
682 */
683 holding_page = vm_pageout_setup(m,
684 paging_offset,
685 new_object,
686 0, /* new offset */
687 flush); /* flush */
688
689 rc = vm_map_copyin_object(new_object, 0, PAGE_SIZE, ©);
690 assert(rc == KERN_SUCCESS);
691
692 if (initial || old_object->use_old_pageout) {
693 rc = (*(initial ? memory_object_data_initialize
694 : memory_object_data_write))
695 (old_object->pager,
696 old_object->pager_request,
697 paging_offset, (pointer_t) copy, PAGE_SIZE);
698 }
699 else {
700 rc = memory_object_data_return(
701 old_object->pager,
702 old_object->pager_request,
703 paging_offset, (pointer_t) copy, PAGE_SIZE,
704 !precious_clean, !flush);
705 }
706
707 if (rc != KERN_SUCCESS)
708 vm_map_copy_discard(copy);
709
710 /*
711 * Clean up.
712 */
713 vm_object_lock(old_object);
714 if (holding_page != VM_PAGE_NULL)
715 VM_PAGE_FREE(holding_page);
716 vm_object_paging_end(old_object);
717 }
718
719 /*
720 * vm_pageout_scan does the dirty work for the pageout daemon.
721 * It returns with vm_page_queue_free_lock held and
722 * vm_page_free_wanted == 0.
723 */
724
725 void vm_pageout_scan(void)
726 {
727 unsigned int burst_count;
728
729 /*
730 * We want to gradually dribble pages from the active queue
731 * to the inactive queue. If we let the inactive queue get
732 * very small, and then suddenly dump many pages into it,
733 * those pages won't get a sufficient chance to be referenced
734 * before we start taking them from the inactive queue.
735 *
736 * We must limit the rate at which we send pages to the pagers.
737 * data_write messages consume memory, for message buffers and
738 * for map-copy objects. If we get too far ahead of the pagers,
739 * we can potentially run out of memory.
740 *
741 * We can use the laundry count to limit directly the number
742 * of pages outstanding to the default pager. A similar
743 * strategy for external pagers doesn't work, because
744 * external pagers don't have to deallocate the pages sent them,
745 * and because we might have to send pages to external pagers
746 * even if they aren't processing writes. So we also
747 * use a burst count to limit writes to external pagers.
748 *
749 * When memory is very tight, we can't rely on external pagers to
750 * clean pages. They probably aren't running, because they
751 * aren't vm-privileged. If we kept sending dirty pages to them,
752 * we could exhaust the free list. However, we can't just ignore
753 * pages belonging to external objects, because there might be no
754 * pages belonging to internal objects. Hence, we get the page
755 * into an internal object and then immediately double-page it,
756 * sending it to the default pager.
757 *
758 * consider_zone_gc should be last, because the other operations
759 * might return memory to zones. When we pause we use
760 * vm_pageout_scan_continue as our continuation, so we will
761 * reenter vm_pageout_scan periodically and attempt to reclaim
762 * internal memory even if we never reach vm_page_free_target.
763 */
764
765 stack_collect();
766 net_kmsg_collect();
767 consider_task_collect();
768 consider_thread_collect();
769 consider_zone_gc();
770
771 for (burst_count = 0;;) {
772 register vm_page_t m;
773 register vm_object_t object;
774 unsigned int free_count;
775
776 /*
777 * Check for ASTs here, while unlocked.
778 * We may run through this loop many times before
779 * blocking.
780 */
781 AST_KERNEL_CHECK(cpu_number());
782
783 /*
784 * Recalculate vm_page_inactivate_target.
785 */
786
787 vm_page_lock_queues();
788 vm_page_inactive_target =
789 VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
790 vm_page_inactive_count);
791
792 /*
793 * Move pages from active to inactive.
794 */
795
796 while ((vm_page_inactive_count < vm_page_inactive_target) &&
797 !queue_empty(&vm_page_queue_active)) {
798 register vm_object_t obj;
799
800 vm_pageout_active++;
801 m = (vm_page_t) queue_first(&vm_page_queue_active);
802 assert(m->active && !m->inactive);
803
804 obj = m->object;
805 if (!vm_object_lock_try(obj)) {
806 /*
807 * Move page to end and continue.
808 */
809
810 queue_remove(&vm_page_queue_active, m,
811 vm_page_t, pageq);
812 queue_enter(&vm_page_queue_active, m,
813 vm_page_t, pageq);
814 vm_page_unlock_queues();
815
816 /* Check for ASTs here, while unlocked */
817 AST_KERNEL_CHECK(cpu_number());
818
819 vm_page_lock_queues();
820 continue;
821 }
822
823 /*
824 * If the page is busy, then we pull it
825 * off the active queue and leave it alone.
826 */
827
828 if (m->busy) {
829 vm_object_unlock(obj);
830 queue_remove(&vm_page_queue_active, m,
831 vm_page_t, pageq);
832 m->active = FALSE;
833 vm_page_active_count--;
834 continue;
835 }
836
837 /*
838 * Deactivate the page while holding the object
839 * locked, so we know the page is still not busy.
840 * This should prevent races between pmap_enter
841 * and pmap_clear_reference. The page might be
842 * absent or fictitious, but vm_page_deactivate
843 * can handle that.
844 */
845
846 vm_page_deactivate(m);
847 vm_object_unlock(obj);
848 }
849
850 /*
851 * We are done if we have met our target *and*
852 * nobody is still waiting for a page.
853 */
854
855 simple_lock(&vm_page_queue_free_lock);
856 free_count = vm_page_free_count;
857 if ((free_count >= vm_page_free_target) &
858 (vm_page_free_wanted == 0)) {
859 vm_page_unlock_queues();
860 break;
861 }
862 simple_unlock(&vm_page_queue_free_lock);
863
864 /*
865 * Sometimes we have to pause:
866 * 1) No inactive pages - nothing to do.
867 * 2) Flow control - wait for pagers to catch up.
868 * 3) Extremely low memory - sending out dirty pages
869 * consumes memory. We don't take the risk of doing
870 * this if the default pager already has work to do.
871 */
872
873 if (queue_empty(&vm_page_queue_inactive) ||
874 (burst_count >= vm_pageout_burst_max) ||
875 (vm_page_laundry_count >= vm_pageout_burst_max) ||
876 ((free_count < vm_pageout_reserved_really) &&
877 (vm_page_laundry_count > 0))) {
878 unsigned int pages, msecs;
879
880 /*
881 * vm_pageout_burst_wait is msecs/page.
882 * If there is nothing for us to do, we wait
883 * at least vm_pageout_empty_wait msecs.
884 */
885
886 if (vm_page_laundry_count > burst_count)
887 pages = vm_page_laundry_count;
888 else
889 pages = burst_count;
890 msecs = pages * vm_pageout_burst_wait;
891
892 if (queue_empty(&vm_page_queue_inactive) &&
893 (msecs < vm_pageout_empty_wait))
894 msecs = vm_pageout_empty_wait;
895 vm_page_unlock_queues();
896
897 thread_will_wait_with_timeout(current_thread(), msecs);
898 counter(c_vm_pageout_scan_block++);
899 thread_block(vm_pageout_scan_continue);
900 /*NOTREACHED*/
901 }
902
903 vm_pageout_inactive++;
904 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
905 assert(!m->active && m->inactive);
906 object = m->object;
907
908 /*
909 * Try to lock object; since we've got the
910 * page queues lock, we can only try for this one.
911 */
912
913 if (!vm_object_lock_try(object)) {
914 /*
915 * Move page to end and continue.
916 */
917
918 queue_remove(&vm_page_queue_inactive, m,
919 vm_page_t, pageq);
920 queue_enter(&vm_page_queue_inactive, m,
921 vm_page_t, pageq);
922 vm_page_unlock_queues();
923 vm_pageout_inactive_nolock++;
924 continue;
925 }
926
927 /*
928 * Remove the page from the inactive list.
929 */
930
931 queue_remove(&vm_page_queue_inactive, m, vm_page_t, pageq);
932 vm_page_inactive_count--;
933 m->inactive = FALSE;
934
935 if (m->busy || !object->alive) {
936 /*
937 * Somebody is already playing with this page.
938 * Leave it off the pageout queues.
939 */
940
941 vm_page_unlock_queues();
942 vm_object_unlock(object);
943 vm_pageout_inactive_busy++;
944 continue;
945 }
946
947 /*
948 * If it's absent, we can reclaim the page.
949 */
950
951 if (m->absent) {
952 vm_pageout_inactive_absent++;
953 reclaim_page:
954 vm_page_free(m);
955 vm_page_unlock_queues();
956 vm_object_unlock(object);
957 continue;
958 }
959
960 /*
961 * If it's being used, reactivate.
962 * (Fictitious pages are either busy or absent.)
963 */
964
965 assert(!m->fictitious);
966 if (m->reference || pmap_is_referenced(m->phys_addr)) {
967 vm_object_unlock(object);
968 vm_page_activate(m);
969 vm_stat.reactivations++;
970 vm_page_unlock_queues();
971 vm_pageout_inactive_used++;
972 continue;
973 }
974
975 /*
976 * Eliminate all mappings.
977 */
978
979 m->busy = TRUE;
980 pmap_page_protect(m->phys_addr, VM_PROT_NONE);
981 if (!m->dirty)
982 m->dirty = pmap_is_modified(m->phys_addr);
983
984 /*
985 * If it's clean and not precious, we can free the page.
986 */
987
988 if (!m->dirty && !m->precious) {
989 vm_pageout_inactive_clean++;
990 goto reclaim_page;
991 }
992
993 /*
994 * If we are very low on memory, then we can't
995 * rely on an external pager to clean a dirty page,
996 * because external pagers are not vm-privileged.
997 *
998 * The laundry bit tells vm_pageout_setup to
999 * put the page back at the front of the inactive
1000 * queue instead of activating the page. Hence,
1001 * we will pick the page up again immediately and
1002 * resend it to the default pager.
1003 */
1004
1005 assert(!m->laundry);
1006 if ((free_count < vm_pageout_reserved_internal) &&
1007 !object->internal) {
1008 m->laundry = TRUE;
1009 vm_pageout_inactive_double++;
1010 }
1011 vm_page_unlock_queues();
1012
1013 /*
1014 * If there is no memory object for the page, create
1015 * one and hand it to the default pager.
1016 * [First try to collapse, so we don`t create
1017 * one unnecessarily.]
1018 */
1019
1020 if (!object->pager_initialized)
1021 vm_object_collapse(object);
1022 if (!object->pager_initialized)
1023 vm_object_pager_create(object);
1024 if (!object->pager_initialized)
1025 panic("vm_pageout_scan");
1026
1027 vm_pageout_inactive_dirty++;
1028 vm_pageout_page(m, FALSE, TRUE); /* flush it */
1029 vm_object_unlock(object);
1030 burst_count++;
1031 }
1032 }
1033
1034 no_return vm_pageout_scan_continue(void)
1035 {
1036 /*
1037 * We just paused to let the pagers catch up.
1038 * If vm_page_laundry_count is still high,
1039 * then we aren't waiting long enough.
1040 * If we have paused some vm_pageout_pause_max times without
1041 * adjusting vm_pageout_burst_wait, it might be too big,
1042 * so we decrease it.
1043 */
1044
1045 vm_page_lock_queues();
1046 if (vm_page_laundry_count > vm_pageout_burst_min) {
1047 vm_pageout_burst_wait++;
1048 vm_pageout_pause_count = 0;
1049 } else if (++vm_pageout_pause_count > vm_pageout_pause_max) {
1050 vm_pageout_burst_wait = (vm_pageout_burst_wait * 3) / 4;
1051 if (vm_pageout_burst_wait < 1)
1052 vm_pageout_burst_wait = 1;
1053 vm_pageout_pause_count = 0;
1054 }
1055 vm_page_unlock_queues();
1056
1057 vm_pageout_continue();
1058 /*NOTREACHED*/
1059 }
1060
1061 /*
1062 * vm_pageout is the high level pageout daemon.
1063 */
1064
1065 no_return vm_pageout_continue(void)
1066 {
1067 /*
1068 * The pageout daemon is never done, so loop forever.
1069 * We should call vm_pageout_scan at least once each
1070 * time we are woken, even if vm_page_free_wanted is
1071 * zero, to check vm_page_free_target and
1072 * vm_page_inactive_target.
1073 */
1074
1075 for (;;) {
1076 vm_pageout_scan();
1077 /* we hold vm_page_queue_free_lock now */
1078 assert(vm_page_free_wanted == 0);
1079
1080 assert_wait(&vm_page_free_wanted, FALSE);
1081 simple_unlock(&vm_page_queue_free_lock);
1082 counter(c_vm_pageout_block++);
1083 thread_block(vm_pageout_continue);
1084 }
1085 }
1086
1087 no_return vm_pageout(void)
1088 {
1089 int free_after_reserve;
1090
1091 current_thread()->vm_privilege = TRUE;
1092 stack_privilege(current_thread());
1093
1094 /*
1095 * Initialize some paging parameters.
1096 */
1097
1098 if (vm_pageout_burst_max == 0)
1099 vm_pageout_burst_max = VM_PAGEOUT_BURST_MAX;
1100
1101 if (vm_pageout_burst_min == 0)
1102 vm_pageout_burst_min = VM_PAGEOUT_BURST_MIN;
1103
1104 if (vm_pageout_burst_wait == 0)
1105 vm_pageout_burst_wait = VM_PAGEOUT_BURST_WAIT;
1106
1107 if (vm_pageout_empty_wait == 0)
1108 vm_pageout_empty_wait = VM_PAGEOUT_EMPTY_WAIT;
1109
1110 if (vm_page_free_reserved == 0)
1111 vm_page_free_reserved = VM_PAGE_FREE_RESERVED;
1112
1113 if (vm_pageout_pause_max == 0)
1114 vm_pageout_pause_max = VM_PAGEOUT_PAUSE_MAX;
1115
1116 if (vm_pageout_reserved_internal == 0)
1117 vm_pageout_reserved_internal =
1118 VM_PAGEOUT_RESERVED_INTERNAL(vm_page_free_reserved);
1119
1120 if (vm_pageout_reserved_really == 0)
1121 vm_pageout_reserved_really =
1122 VM_PAGEOUT_RESERVED_REALLY(vm_page_free_reserved);
1123
1124 free_after_reserve = vm_page_free_count - vm_page_free_reserved;
1125
1126 if (vm_page_free_min == 0)
1127 vm_page_free_min = vm_page_free_reserved +
1128 VM_PAGE_FREE_MIN(free_after_reserve);
1129
1130 if (vm_page_free_target == 0)
1131 vm_page_free_target = vm_page_free_reserved +
1132 VM_PAGE_FREE_TARGET(free_after_reserve);
1133
1134 if (vm_page_free_target < vm_page_free_min + 5)
1135 vm_page_free_target = vm_page_free_min + 5;
1136
1137 /*
1138 * vm_pageout_scan will set vm_page_inactive_target.
1139 */
1140
1141 vm_pageout_continue();
1142 /*NOTREACHED*/
1143 }
Cache object: 3de0fabad9d6e10d00ef5178a5b62f4b
|