FreeBSD/Linux Kernel Cross Reference
sys/uvm/uvm_pdaemon.c
1 /* $OpenBSD: uvm_pdaemon.c,v 1.105 2022/09/10 20:35:29 miod Exp $ */
2 /* $NetBSD: uvm_pdaemon.c,v 1.23 2000/08/20 10:24:14 bjh21 Exp $ */
3
4 /*
5 * Copyright (c) 1997 Charles D. Cranor and Washington University.
6 * Copyright (c) 1991, 1993, The Regents of the University of California.
7 *
8 * All rights reserved.
9 *
10 * This code is derived from software contributed to Berkeley by
11 * The Mach Operating System project at Carnegie-Mellon University.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 * 3. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 * @(#)vm_pageout.c 8.5 (Berkeley) 2/14/94
38 * from: Id: uvm_pdaemon.c,v 1.1.2.32 1998/02/06 05:26:30 chs Exp
39 *
40 *
41 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
42 * All rights reserved.
43 *
44 * Permission to use, copy, modify and distribute this software and
45 * its documentation is hereby granted, provided that both the copyright
46 * notice and this permission notice appear in all copies of the
47 * software, derivative works or modified versions, and any portions
48 * thereof, and that both notices appear in supporting documentation.
49 *
50 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
51 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
52 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
53 *
54 * Carnegie Mellon requests users of this software to return to
55 *
56 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
57 * School of Computer Science
58 * Carnegie Mellon University
59 * Pittsburgh PA 15213-3890
60 *
61 * any improvements or extensions that they make and grant Carnegie the
62 * rights to redistribute these changes.
63 */
64
65 /*
66 * uvm_pdaemon.c: the page daemon
67 */
68
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/kernel.h>
72 #include <sys/pool.h>
73 #include <sys/proc.h>
74 #include <sys/buf.h>
75 #include <sys/mount.h>
76 #include <sys/atomic.h>
77
78 #ifdef HIBERNATE
79 #include <sys/hibernate.h>
80 #endif
81
82 #include <uvm/uvm.h>
83
84 #include "drm.h"
85
86 #if NDRM > 0
87 extern void drmbackoff(long);
88 #endif
89
90 /*
91 * UVMPD_NUMDIRTYREACTS is how many dirty pages the pagedaemon will reactivate
92 * in a pass thru the inactive list when swap is full. the value should be
93 * "small"... if it's too large we'll cycle the active pages thru the inactive
94 * queue too quickly to for them to be referenced and avoid being freed.
95 */
96
97 #define UVMPD_NUMDIRTYREACTS 16
98
99
100 /*
101 * local prototypes
102 */
103
104 struct rwlock *uvmpd_trylockowner(struct vm_page *);
105 void uvmpd_scan(struct uvm_pmalloc *);
106 void uvmpd_scan_inactive(struct uvm_pmalloc *, struct pglist *);
107 void uvmpd_tune(void);
108 void uvmpd_drop(struct pglist *);
109 void uvmpd_dropswap(struct vm_page *);
110
111 /*
112 * uvm_wait: wait (sleep) for the page daemon to free some pages
113 *
114 * => should be called with all locks released
115 * => should _not_ be called by the page daemon (to avoid deadlock)
116 */
117
118 void
119 uvm_wait(const char *wmsg)
120 {
121 uint64_t timo = INFSLP;
122
123 #ifdef DIAGNOSTIC
124 if (curproc == &proc0)
125 panic("%s: cannot sleep for memory during boot", __func__);
126 #endif
127
128 /*
129 * check for page daemon going to sleep (waiting for itself)
130 */
131 if (curproc == uvm.pagedaemon_proc) {
132 printf("uvm_wait emergency bufbackoff\n");
133 if (bufbackoff(NULL, 4) == 0)
134 return;
135 /*
136 * now we have a problem: the pagedaemon wants to go to
137 * sleep until it frees more memory. but how can it
138 * free more memory if it is asleep? that is a deadlock.
139 * we have two options:
140 * [1] panic now
141 * [2] put a timeout on the sleep, thus causing the
142 * pagedaemon to only pause (rather than sleep forever)
143 *
144 * note that option [2] will only help us if we get lucky
145 * and some other process on the system breaks the deadlock
146 * by exiting or freeing memory (thus allowing the pagedaemon
147 * to continue). for now we panic if DEBUG is defined,
148 * otherwise we hope for the best with option [2] (better
149 * yet, this should never happen in the first place!).
150 */
151
152 printf("pagedaemon: deadlock detected!\n");
153 timo = MSEC_TO_NSEC(125); /* set timeout */
154 #if defined(DEBUG)
155 /* DEBUG: panic so we can debug it */
156 panic("pagedaemon deadlock");
157 #endif
158 }
159
160 uvm_lock_fpageq();
161 wakeup(&uvm.pagedaemon); /* wake the daemon! */
162 msleep_nsec(&uvmexp.free, &uvm.fpageqlock, PVM | PNORELOCK, wmsg, timo);
163 }
164
165 /*
166 * uvmpd_tune: tune paging parameters
167 *
168 * => called whenever memory is added to (or removed from?) the system
169 * => caller must call with page queues locked
170 */
171
172 void
173 uvmpd_tune(void)
174 {
175
176 uvmexp.freemin = uvmexp.npages / 30;
177
178 /* between 16k and 512k */
179 /* XXX: what are these values good for? */
180 uvmexp.freemin = max(uvmexp.freemin, (16*1024) >> PAGE_SHIFT);
181 #if 0
182 uvmexp.freemin = min(uvmexp.freemin, (512*1024) >> PAGE_SHIFT);
183 #endif
184
185 /* Make sure there's always a user page free. */
186 if (uvmexp.freemin < uvmexp.reserve_kernel + 1)
187 uvmexp.freemin = uvmexp.reserve_kernel + 1;
188
189 uvmexp.freetarg = (uvmexp.freemin * 4) / 3;
190 if (uvmexp.freetarg <= uvmexp.freemin)
191 uvmexp.freetarg = uvmexp.freemin + 1;
192
193 /* uvmexp.inactarg: computed in main daemon loop */
194
195 uvmexp.wiredmax = uvmexp.npages / 3;
196 }
197
198 /*
199 * Indicate to the page daemon that a nowait call failed and it should
200 * recover at least some memory in the most restricted region (assumed
201 * to be dma_constraint).
202 */
203 volatile int uvm_nowait_failed;
204
205 /*
206 * uvm_pageout: the main loop for the pagedaemon
207 */
208 void
209 uvm_pageout(void *arg)
210 {
211 struct uvm_constraint_range constraint;
212 struct uvm_pmalloc *pma;
213 int npages = 0;
214
215 /* ensure correct priority and set paging parameters... */
216 uvm.pagedaemon_proc = curproc;
217 (void) spl0();
218 uvm_lock_pageq();
219 npages = uvmexp.npages;
220 uvmpd_tune();
221 uvm_unlock_pageq();
222
223 for (;;) {
224 long size;
225
226 uvm_lock_fpageq();
227 if (!uvm_nowait_failed && TAILQ_EMPTY(&uvm.pmr_control.allocs)) {
228 msleep_nsec(&uvm.pagedaemon, &uvm.fpageqlock, PVM,
229 "pgdaemon", INFSLP);
230 uvmexp.pdwoke++;
231 }
232
233 if ((pma = TAILQ_FIRST(&uvm.pmr_control.allocs)) != NULL) {
234 pma->pm_flags |= UVM_PMA_BUSY;
235 constraint = pma->pm_constraint;
236 } else {
237 if (uvm_nowait_failed) {
238 /*
239 * XXX realisticly, this is what our
240 * nowait callers probably care about
241 */
242 constraint = dma_constraint;
243 uvm_nowait_failed = 0;
244 } else
245 constraint = no_constraint;
246 }
247
248 uvm_unlock_fpageq();
249
250 /*
251 * now lock page queues and recompute inactive count
252 */
253 uvm_lock_pageq();
254 if (npages != uvmexp.npages) { /* check for new pages? */
255 npages = uvmexp.npages;
256 uvmpd_tune();
257 }
258
259 uvmexp.inactarg = (uvmexp.active + uvmexp.inactive) / 3;
260 if (uvmexp.inactarg <= uvmexp.freetarg) {
261 uvmexp.inactarg = uvmexp.freetarg + 1;
262 }
263
264 /* Reclaim pages from the buffer cache if possible. */
265 size = 0;
266 if (pma != NULL)
267 size += pma->pm_size >> PAGE_SHIFT;
268 if (uvmexp.free - BUFPAGES_DEFICIT < uvmexp.freetarg)
269 size += uvmexp.freetarg - (uvmexp.free -
270 BUFPAGES_DEFICIT);
271 if (size == 0)
272 size = 16; /* XXX */
273 uvm_unlock_pageq();
274 (void) bufbackoff(&constraint, size * 2);
275 #if NDRM > 0
276 drmbackoff(size * 2);
277 #endif
278 uvm_lock_pageq();
279
280 /*
281 * scan if needed
282 */
283 if (pma != NULL ||
284 ((uvmexp.free - BUFPAGES_DEFICIT) < uvmexp.freetarg) ||
285 ((uvmexp.inactive + BUFPAGES_INACT) < uvmexp.inactarg)) {
286 uvmpd_scan(pma);
287 }
288
289 /*
290 * if there's any free memory to be had,
291 * wake up any waiters.
292 */
293 uvm_lock_fpageq();
294 if (uvmexp.free > uvmexp.reserve_kernel ||
295 uvmexp.paging == 0) {
296 wakeup(&uvmexp.free);
297 }
298
299 if (pma != NULL) {
300 /*
301 * XXX If UVM_PMA_FREED isn't set, no pages
302 * were freed. Should we set UVM_PMA_FAIL in
303 * that case?
304 */
305 pma->pm_flags &= ~UVM_PMA_BUSY;
306 if (pma->pm_flags & UVM_PMA_FREED) {
307 pma->pm_flags &= ~UVM_PMA_LINKED;
308 TAILQ_REMOVE(&uvm.pmr_control.allocs, pma,
309 pmq);
310 wakeup(pma);
311 }
312 }
313 uvm_unlock_fpageq();
314
315 /*
316 * scan done. unlock page queues (the only lock we are holding)
317 */
318 uvm_unlock_pageq();
319
320 sched_pause(yield);
321 }
322 /*NOTREACHED*/
323 }
324
325
326 /*
327 * uvm_aiodone_daemon: main loop for the aiodone daemon.
328 */
329 void
330 uvm_aiodone_daemon(void *arg)
331 {
332 int s, free;
333 struct buf *bp, *nbp;
334
335 uvm.aiodoned_proc = curproc;
336
337 for (;;) {
338 /*
339 * Check for done aio structures. If we've got structures to
340 * process, do so. Otherwise sleep while avoiding races.
341 */
342 mtx_enter(&uvm.aiodoned_lock);
343 while ((bp = TAILQ_FIRST(&uvm.aio_done)) == NULL)
344 msleep_nsec(&uvm.aiodoned, &uvm.aiodoned_lock,
345 PVM, "aiodoned", INFSLP);
346 /* Take the list for ourselves. */
347 TAILQ_INIT(&uvm.aio_done);
348 mtx_leave(&uvm.aiodoned_lock);
349
350 /* process each i/o that's done. */
351 free = uvmexp.free;
352 while (bp != NULL) {
353 if (bp->b_flags & B_PDAEMON) {
354 uvmexp.paging -= bp->b_bufsize >> PAGE_SHIFT;
355 }
356 nbp = TAILQ_NEXT(bp, b_freelist);
357 s = splbio(); /* b_iodone must by called at splbio */
358 (*bp->b_iodone)(bp);
359 splx(s);
360 bp = nbp;
361
362 sched_pause(yield);
363 }
364 uvm_lock_fpageq();
365 wakeup(free <= uvmexp.reserve_kernel ? &uvm.pagedaemon :
366 &uvmexp.free);
367 uvm_unlock_fpageq();
368 }
369 }
370
371 /*
372 * uvmpd_trylockowner: trylock the page's owner.
373 *
374 * => return the locked rwlock on success. otherwise, return NULL.
375 */
376 struct rwlock *
377 uvmpd_trylockowner(struct vm_page *pg)
378 {
379
380 struct uvm_object *uobj = pg->uobject;
381 struct rwlock *slock;
382
383 if (uobj != NULL) {
384 slock = uobj->vmobjlock;
385 } else {
386 struct vm_anon *anon = pg->uanon;
387
388 KASSERT(anon != NULL);
389 slock = anon->an_lock;
390 }
391
392 if (rw_enter(slock, RW_WRITE|RW_NOSLEEP)) {
393 return NULL;
394 }
395
396 return slock;
397 }
398
399
400 /*
401 * uvmpd_dropswap: free any swap allocated to this page.
402 *
403 * => called with owner locked.
404 */
405 void
406 uvmpd_dropswap(struct vm_page *pg)
407 {
408 struct vm_anon *anon = pg->uanon;
409
410 if ((pg->pg_flags & PQ_ANON) && anon->an_swslot) {
411 uvm_swap_free(anon->an_swslot, 1);
412 anon->an_swslot = 0;
413 } else if (pg->pg_flags & PQ_AOBJ) {
414 uao_dropswap(pg->uobject, pg->offset >> PAGE_SHIFT);
415 }
416 }
417
418 /*
419 * uvmpd_scan_inactive: scan an inactive list for pages to clean or free.
420 *
421 * => called with page queues locked
422 * => we work on meeting our free target by converting inactive pages
423 * into free pages.
424 * => we handle the building of swap-backed clusters
425 * => we return TRUE if we are exiting because we met our target
426 */
427 void
428 uvmpd_scan_inactive(struct uvm_pmalloc *pma, struct pglist *pglst)
429 {
430 int free, result;
431 struct vm_page *p, *nextpg;
432 struct uvm_object *uobj;
433 struct vm_page *pps[SWCLUSTPAGES], **ppsp;
434 int npages;
435 struct vm_page *swpps[SWCLUSTPAGES]; /* XXX: see below */
436 struct rwlock *slock;
437 int swnpages, swcpages; /* XXX: see below */
438 int swslot;
439 struct vm_anon *anon;
440 boolean_t swap_backed;
441 vaddr_t start;
442 int dirtyreacts;
443
444 /*
445 * swslot is non-zero if we are building a swap cluster. we want
446 * to stay in the loop while we have a page to scan or we have
447 * a swap-cluster to build.
448 */
449 swslot = 0;
450 swnpages = swcpages = 0;
451 dirtyreacts = 0;
452 p = NULL;
453
454 /* Start with the first page on the list that fit in pma's ranges */
455 if (pma != NULL) {
456 paddr_t paddr;
457
458 TAILQ_FOREACH(p, pglst, pageq) {
459 paddr = atop(VM_PAGE_TO_PHYS(p));
460 if (paddr >= pma->pm_constraint.ucr_low &&
461 paddr < pma->pm_constraint.ucr_high)
462 break;
463 }
464
465 }
466
467 if (p == NULL) {
468 p = TAILQ_FIRST(pglst);
469 pma = NULL;
470 }
471
472 for (; p != NULL || swslot != 0; p = nextpg) {
473 /*
474 * note that p can be NULL iff we have traversed the whole
475 * list and need to do one final swap-backed clustered pageout.
476 */
477 uobj = NULL;
478 anon = NULL;
479 if (p) {
480 /*
481 * see if we've met our target
482 */
483 free = uvmexp.free - BUFPAGES_DEFICIT;
484 if (((pma == NULL || (pma->pm_flags & UVM_PMA_FREED)) &&
485 (free + uvmexp.paging >= uvmexp.freetarg << 2)) ||
486 dirtyreacts == UVMPD_NUMDIRTYREACTS) {
487 if (swslot == 0) {
488 /* exit now if no swap-i/o pending */
489 break;
490 }
491
492 /* set p to null to signal final swap i/o */
493 p = NULL;
494 nextpg = NULL;
495 }
496 }
497 if (p) { /* if (we have a new page to consider) */
498 /*
499 * we are below target and have a new page to consider.
500 */
501 uvmexp.pdscans++;
502 nextpg = TAILQ_NEXT(p, pageq);
503
504 anon = p->uanon;
505 uobj = p->uobject;
506
507 /*
508 * first we attempt to lock the object that this page
509 * belongs to. if our attempt fails we skip on to
510 * the next page (no harm done). it is important to
511 * "try" locking the object as we are locking in the
512 * wrong order (pageq -> object) and we don't want to
513 * deadlock.
514 */
515 slock = uvmpd_trylockowner(p);
516 if (slock == NULL) {
517 continue;
518 }
519
520 /*
521 * move referenced pages back to active queue
522 * and skip to next page.
523 */
524 if (pmap_is_referenced(p)) {
525 uvm_pageactivate(p);
526 rw_exit(slock);
527 uvmexp.pdreact++;
528 continue;
529 }
530
531 if (p->pg_flags & PG_BUSY) {
532 rw_exit(slock);
533 uvmexp.pdbusy++;
534 continue;
535 }
536
537 /* does the page belong to an object? */
538 if (uobj != NULL) {
539 uvmexp.pdobscan++;
540 } else {
541 KASSERT(anon != NULL);
542 uvmexp.pdanscan++;
543 }
544
545 /*
546 * we now have the page queues locked.
547 * the page is not busy. if the page is clean we
548 * can free it now and continue.
549 */
550 if (p->pg_flags & PG_CLEAN) {
551 if (p->pg_flags & PQ_SWAPBACKED) {
552 /* this page now lives only in swap */
553 atomic_inc_int(&uvmexp.swpgonly);
554 }
555
556 /* zap all mappings with pmap_page_protect... */
557 pmap_page_protect(p, PROT_NONE);
558 uvm_pagefree(p);
559 uvmexp.pdfreed++;
560
561 if (anon) {
562
563 /*
564 * an anonymous page can only be clean
565 * if it has backing store assigned.
566 */
567
568 KASSERT(anon->an_swslot != 0);
569
570 /* remove from object */
571 anon->an_page = NULL;
572 }
573 rw_exit(slock);
574 continue;
575 }
576
577 /*
578 * this page is dirty, skip it if we'll have met our
579 * free target when all the current pageouts complete.
580 */
581 if ((pma == NULL || (pma->pm_flags & UVM_PMA_FREED)) &&
582 (free + uvmexp.paging > uvmexp.freetarg << 2)) {
583 rw_exit(slock);
584 continue;
585 }
586
587 /*
588 * this page is dirty, but we can't page it out
589 * since all pages in swap are only in swap.
590 * reactivate it so that we eventually cycle
591 * all pages thru the inactive queue.
592 */
593 if ((p->pg_flags & PQ_SWAPBACKED) && uvm_swapisfull()) {
594 dirtyreacts++;
595 uvm_pageactivate(p);
596 rw_exit(slock);
597 continue;
598 }
599
600 /*
601 * if the page is swap-backed and dirty and swap space
602 * is full, free any swap allocated to the page
603 * so that other pages can be paged out.
604 */
605 KASSERT(uvmexp.swpginuse <= uvmexp.swpages);
606 if ((p->pg_flags & PQ_SWAPBACKED) &&
607 uvmexp.swpginuse == uvmexp.swpages) {
608 uvmpd_dropswap(p);
609 }
610
611 /*
612 * the page we are looking at is dirty. we must
613 * clean it before it can be freed. to do this we
614 * first mark the page busy so that no one else will
615 * touch the page. we write protect all the mappings
616 * of the page so that no one touches it while it is
617 * in I/O.
618 */
619
620 swap_backed = ((p->pg_flags & PQ_SWAPBACKED) != 0);
621 atomic_setbits_int(&p->pg_flags, PG_BUSY);
622 UVM_PAGE_OWN(p, "scan_inactive");
623 pmap_page_protect(p, PROT_READ);
624 uvmexp.pgswapout++;
625
626 /*
627 * for swap-backed pages we need to (re)allocate
628 * swap space.
629 */
630 if (swap_backed) {
631 /* free old swap slot (if any) */
632 uvmpd_dropswap(p);
633
634 /* start new cluster (if necessary) */
635 if (swslot == 0) {
636 swnpages = SWCLUSTPAGES;
637 swslot = uvm_swap_alloc(&swnpages,
638 TRUE);
639 if (swslot == 0) {
640 /* no swap? give up! */
641 atomic_clearbits_int(
642 &p->pg_flags,
643 PG_BUSY);
644 UVM_PAGE_OWN(p, NULL);
645 rw_exit(slock);
646 continue;
647 }
648 swcpages = 0; /* cluster is empty */
649 }
650
651 /* add block to cluster */
652 swpps[swcpages] = p;
653 if (anon)
654 anon->an_swslot = swslot + swcpages;
655 else
656 uao_set_swslot(uobj,
657 p->offset >> PAGE_SHIFT,
658 swslot + swcpages);
659 swcpages++;
660 }
661 } else {
662 /* if p == NULL we must be doing a last swap i/o */
663 swap_backed = TRUE;
664 }
665
666 /*
667 * now consider doing the pageout.
668 *
669 * for swap-backed pages, we do the pageout if we have either
670 * filled the cluster (in which case (swnpages == swcpages) or
671 * run out of pages (p == NULL).
672 *
673 * for object pages, we always do the pageout.
674 */
675 if (swap_backed) {
676 if (p) { /* if we just added a page to cluster */
677 rw_exit(slock);
678
679 /* cluster not full yet? */
680 if (swcpages < swnpages)
681 continue;
682 }
683
684 /* starting I/O now... set up for it */
685 npages = swcpages;
686 ppsp = swpps;
687 /* for swap-backed pages only */
688 start = (vaddr_t) swslot;
689
690 /* if this is final pageout we could have a few
691 * extra swap blocks */
692 if (swcpages < swnpages) {
693 uvm_swap_free(swslot + swcpages,
694 (swnpages - swcpages));
695 }
696 } else {
697 /* normal object pageout */
698 ppsp = pps;
699 npages = sizeof(pps) / sizeof(struct vm_page *);
700 /* not looked at because PGO_ALLPAGES is set */
701 start = 0;
702 }
703
704 /*
705 * now do the pageout.
706 *
707 * for swap_backed pages we have already built the cluster.
708 * for !swap_backed pages, uvm_pager_put will call the object's
709 * "make put cluster" function to build a cluster on our behalf.
710 *
711 * we pass the PGO_PDFREECLUST flag to uvm_pager_put to instruct
712 * it to free the cluster pages for us on a successful I/O (it
713 * always does this for un-successful I/O requests). this
714 * allows us to do clustered pageout without having to deal
715 * with cluster pages at this level.
716 *
717 * note locking semantics of uvm_pager_put with PGO_PDFREECLUST:
718 * IN: locked: page queues
719 * OUT: locked:
720 * !locked: pageqs
721 */
722
723 uvmexp.pdpageouts++;
724 result = uvm_pager_put(swap_backed ? NULL : uobj, p,
725 &ppsp, &npages, PGO_ALLPAGES|PGO_PDFREECLUST, start, 0);
726
727 /*
728 * if we did i/o to swap, zero swslot to indicate that we are
729 * no longer building a swap-backed cluster.
730 */
731
732 if (swap_backed)
733 swslot = 0; /* done with this cluster */
734
735 /*
736 * first, we check for VM_PAGER_PEND which means that the
737 * async I/O is in progress and the async I/O done routine
738 * will clean up after us. in this case we move on to the
739 * next page.
740 *
741 * there is a very remote chance that the pending async i/o can
742 * finish _before_ we get here. if that happens, our page "p"
743 * may no longer be on the inactive queue. so we verify this
744 * when determining the next page (starting over at the head if
745 * we've lost our inactive page).
746 */
747
748 if (result == VM_PAGER_PEND) {
749 uvmexp.paging += npages;
750 uvm_lock_pageq();
751 uvmexp.pdpending++;
752 if (p) {
753 if (p->pg_flags & PQ_INACTIVE)
754 nextpg = TAILQ_NEXT(p, pageq);
755 else
756 nextpg = TAILQ_FIRST(pglst);
757 } else {
758 nextpg = NULL;
759 }
760 continue;
761 }
762
763 /* clean up "p" if we have one */
764 if (p) {
765 /*
766 * the I/O request to "p" is done and uvm_pager_put
767 * has freed any cluster pages it may have allocated
768 * during I/O. all that is left for us to do is
769 * clean up page "p" (which is still PG_BUSY).
770 *
771 * our result could be one of the following:
772 * VM_PAGER_OK: successful pageout
773 *
774 * VM_PAGER_AGAIN: tmp resource shortage, we skip
775 * to next page
776 * VM_PAGER_{FAIL,ERROR,BAD}: an error. we
777 * "reactivate" page to get it out of the way (it
778 * will eventually drift back into the inactive
779 * queue for a retry).
780 * VM_PAGER_UNLOCK: should never see this as it is
781 * only valid for "get" operations
782 */
783
784 /* relock p's object: page queues not lock yet, so
785 * no need for "try" */
786
787 /* !swap_backed case: already locked... */
788 if (swap_backed) {
789 rw_enter(slock, RW_WRITE);
790 }
791
792 #ifdef DIAGNOSTIC
793 if (result == VM_PAGER_UNLOCK)
794 panic("pagedaemon: pageout returned "
795 "invalid 'unlock' code");
796 #endif
797
798 /* handle PG_WANTED now */
799 if (p->pg_flags & PG_WANTED)
800 wakeup(p);
801
802 atomic_clearbits_int(&p->pg_flags, PG_BUSY|PG_WANTED);
803 UVM_PAGE_OWN(p, NULL);
804
805 /* released during I/O? Can only happen for anons */
806 if (p->pg_flags & PG_RELEASED) {
807 KASSERT(anon != NULL);
808 /*
809 * remove page so we can get nextpg,
810 * also zero out anon so we don't use
811 * it after the free.
812 */
813 anon->an_page = NULL;
814 p->uanon = NULL;
815
816 rw_exit(anon->an_lock);
817 uvm_anfree(anon); /* kills anon */
818 pmap_page_protect(p, PROT_NONE);
819 anon = NULL;
820 uvm_lock_pageq();
821 nextpg = TAILQ_NEXT(p, pageq);
822 /* free released page */
823 uvm_pagefree(p);
824 } else { /* page was not released during I/O */
825 uvm_lock_pageq();
826 nextpg = TAILQ_NEXT(p, pageq);
827 if (result != VM_PAGER_OK) {
828 /* pageout was a failure... */
829 if (result != VM_PAGER_AGAIN)
830 uvm_pageactivate(p);
831 pmap_clear_reference(p);
832 /* XXXCDC: if (swap_backed) FREE p's
833 * swap block? */
834 } else {
835 /* pageout was a success... */
836 pmap_clear_reference(p);
837 pmap_clear_modify(p);
838 atomic_setbits_int(&p->pg_flags,
839 PG_CLEAN);
840 }
841 }
842
843 /*
844 * drop object lock (if there is an object left). do
845 * a safety check of nextpg to make sure it is on the
846 * inactive queue (it should be since PG_BUSY pages on
847 * the inactive queue can't be re-queued [note: not
848 * true for active queue]).
849 */
850 rw_exit(slock);
851
852 if (nextpg && (nextpg->pg_flags & PQ_INACTIVE) == 0) {
853 nextpg = TAILQ_FIRST(pglst); /* reload! */
854 }
855 } else {
856 /*
857 * if p is null in this loop, make sure it stays null
858 * in the next loop.
859 */
860 nextpg = NULL;
861
862 /*
863 * lock page queues here just so they're always locked
864 * at the end of the loop.
865 */
866 uvm_lock_pageq();
867 }
868 }
869 }
870
871 /*
872 * uvmpd_scan: scan the page queues and attempt to meet our targets.
873 *
874 * => called with pageq's locked
875 */
876
877 void
878 uvmpd_scan(struct uvm_pmalloc *pma)
879 {
880 int free, inactive_shortage, swap_shortage, pages_freed;
881 struct vm_page *p, *nextpg;
882 struct rwlock *slock;
883
884 MUTEX_ASSERT_LOCKED(&uvm.pageqlock);
885
886 uvmexp.pdrevs++; /* counter */
887
888 /*
889 * get current "free" page count
890 */
891 free = uvmexp.free - BUFPAGES_DEFICIT;
892
893 #ifdef __HAVE_PMAP_COLLECT
894 /*
895 * swap out some processes if we are below our free target.
896 * we need to unlock the page queues for this.
897 */
898 if (free < uvmexp.freetarg) {
899 uvmexp.pdswout++;
900 uvm_unlock_pageq();
901 uvm_swapout_threads();
902 uvm_lock_pageq();
903 }
904 #endif
905
906 /*
907 * now we want to work on meeting our targets. first we work on our
908 * free target by converting inactive pages into free pages. then
909 * we work on meeting our inactive target by converting active pages
910 * to inactive ones.
911 */
912
913 pages_freed = uvmexp.pdfreed;
914 (void) uvmpd_scan_inactive(pma, &uvm.page_inactive);
915 pages_freed = uvmexp.pdfreed - pages_freed;
916
917 /*
918 * we have done the scan to get free pages. now we work on meeting
919 * our inactive target.
920 */
921 inactive_shortage = uvmexp.inactarg - uvmexp.inactive - BUFPAGES_INACT;
922
923 /*
924 * detect if we're not going to be able to page anything out
925 * until we free some swap resources from active pages.
926 */
927 free = uvmexp.free - BUFPAGES_DEFICIT;
928 swap_shortage = 0;
929 if (free < uvmexp.freetarg &&
930 uvmexp.swpginuse == uvmexp.swpages &&
931 !uvm_swapisfull() &&
932 pages_freed == 0) {
933 swap_shortage = uvmexp.freetarg - free;
934 }
935
936 for (p = TAILQ_FIRST(&uvm.page_active);
937 p != NULL && (inactive_shortage > 0 || swap_shortage > 0);
938 p = nextpg) {
939 nextpg = TAILQ_NEXT(p, pageq);
940 if (p->pg_flags & PG_BUSY) {
941 continue;
942 }
943
944 /*
945 * lock the page's owner.
946 */
947 slock = uvmpd_trylockowner(p);
948 if (slock == NULL) {
949 continue;
950 }
951
952 /*
953 * skip this page if it's busy.
954 */
955 if ((p->pg_flags & PG_BUSY) != 0) {
956 rw_exit(slock);
957 continue;
958 }
959
960 /*
961 * if there's a shortage of swap, free any swap allocated
962 * to this page so that other pages can be paged out.
963 */
964 if (swap_shortage > 0) {
965 if ((p->pg_flags & PQ_ANON) && p->uanon->an_swslot) {
966 uvm_swap_free(p->uanon->an_swslot, 1);
967 p->uanon->an_swslot = 0;
968 atomic_clearbits_int(&p->pg_flags, PG_CLEAN);
969 swap_shortage--;
970 }
971 if (p->pg_flags & PQ_AOBJ) {
972 int slot = uao_set_swslot(p->uobject,
973 p->offset >> PAGE_SHIFT, 0);
974 if (slot) {
975 uvm_swap_free(slot, 1);
976 atomic_clearbits_int(&p->pg_flags,
977 PG_CLEAN);
978 swap_shortage--;
979 }
980 }
981 }
982
983 /*
984 * deactivate this page if there's a shortage of
985 * inactive pages.
986 */
987 if (inactive_shortage > 0) {
988 pmap_page_protect(p, PROT_NONE);
989 /* no need to check wire_count as pg is "active" */
990 uvm_pagedeactivate(p);
991 uvmexp.pddeact++;
992 inactive_shortage--;
993 }
994
995 /*
996 * we're done with this page.
997 */
998 rw_exit(slock);
999 }
1000 }
1001
1002 #ifdef HIBERNATE
1003
1004 /*
1005 * uvmpd_drop: drop clean pages from list
1006 */
1007 void
1008 uvmpd_drop(struct pglist *pglst)
1009 {
1010 struct vm_page *p, *nextpg;
1011
1012 for (p = TAILQ_FIRST(pglst); p != NULL; p = nextpg) {
1013 nextpg = TAILQ_NEXT(p, pageq);
1014
1015 if (p->pg_flags & PQ_ANON || p->uobject == NULL)
1016 continue;
1017
1018 if (p->pg_flags & PG_BUSY)
1019 continue;
1020
1021 if (p->pg_flags & PG_CLEAN) {
1022 struct uvm_object * uobj = p->uobject;
1023
1024 rw_enter(uobj->vmobjlock, RW_WRITE);
1025 uvm_lock_pageq();
1026 /*
1027 * we now have the page queues locked.
1028 * the page is not busy. if the page is clean we
1029 * can free it now and continue.
1030 */
1031 if (p->pg_flags & PG_CLEAN) {
1032 if (p->pg_flags & PQ_SWAPBACKED) {
1033 /* this page now lives only in swap */
1034 atomic_inc_int(&uvmexp.swpgonly);
1035 }
1036
1037 /* zap all mappings with pmap_page_protect... */
1038 pmap_page_protect(p, PROT_NONE);
1039 uvm_pagefree(p);
1040 }
1041 uvm_unlock_pageq();
1042 rw_exit(uobj->vmobjlock);
1043 }
1044 }
1045 }
1046
1047 void
1048 uvmpd_hibernate(void)
1049 {
1050 uvmpd_drop(&uvm.page_inactive);
1051 uvmpd_drop(&uvm.page_active);
1052 }
1053
1054 #endif
Cache object: 6aff6dda1ebcd1d94a80d6693f29203c
|