FreeBSD/Linux Kernel Cross Reference
sys/uvm/uvm_pager.c
1 /* $OpenBSD: uvm_pager.c,v 1.89 2022/08/19 05:53:19 mpi Exp $ */
2 /* $NetBSD: uvm_pager.c,v 1.36 2000/11/27 18:26:41 chs Exp $ */
3
4 /*
5 * Copyright (c) 1997 Charles D. Cranor and Washington University.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * from: Id: uvm_pager.c,v 1.1.2.23 1998/02/02 20:38:06 chuck Exp
29 */
30
31 /*
32 * uvm_pager.c: generic functions used to assist the pagers.
33 */
34
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/malloc.h>
38 #include <sys/pool.h>
39 #include <sys/buf.h>
40 #include <sys/atomic.h>
41
42 #include <uvm/uvm.h>
43
44 const struct uvm_pagerops *uvmpagerops[] = {
45 &aobj_pager,
46 &uvm_deviceops,
47 &uvm_vnodeops,
48 };
49
50 /*
51 * the pager map: provides KVA for I/O
52 *
53 * Each uvm_pseg has room for MAX_PAGERMAP_SEGS pager io space of
54 * MAXBSIZE bytes.
55 *
56 * The number of uvm_pseg instances is dynamic using an array segs.
57 * At most UVM_PSEG_COUNT instances can exist.
58 *
59 * psegs[0/1] always exist (so that the pager can always map in pages).
60 * psegs[0/1] element 0 are always reserved for the pagedaemon.
61 *
62 * Any other pseg is automatically created when no space is available
63 * and automatically destroyed when it is no longer in use.
64 */
65 #define MAX_PAGER_SEGS 16
66 #define PSEG_NUMSEGS (PAGER_MAP_SIZE / MAX_PAGER_SEGS / MAXBSIZE)
67 struct uvm_pseg {
68 /* Start of virtual space; 0 if not inited. */
69 vaddr_t start;
70 /* Bitmap of the segments in use in this pseg. */
71 int use;
72 };
73 struct mutex uvm_pseg_lck;
74 struct uvm_pseg psegs[PSEG_NUMSEGS];
75
76 #define UVM_PSEG_FULL(pseg) ((pseg)->use == (1 << MAX_PAGER_SEGS) - 1)
77 #define UVM_PSEG_EMPTY(pseg) ((pseg)->use == 0)
78 #define UVM_PSEG_INUSE(pseg,id) (((pseg)->use & (1 << (id))) != 0)
79
80 void uvm_pseg_init(struct uvm_pseg *);
81 vaddr_t uvm_pseg_get(int);
82 void uvm_pseg_release(vaddr_t);
83
84 /*
85 * uvm_pager_init: init pagers (at boot time)
86 */
87 void
88 uvm_pager_init(void)
89 {
90 int lcv;
91
92 /* init pager map */
93 uvm_pseg_init(&psegs[0]);
94 uvm_pseg_init(&psegs[1]);
95 mtx_init(&uvm_pseg_lck, IPL_VM);
96
97 /* init ASYNC I/O queue */
98 TAILQ_INIT(&uvm.aio_done);
99
100 /* call pager init functions */
101 for (lcv = 0 ; lcv < sizeof(uvmpagerops)/sizeof(struct uvm_pagerops *);
102 lcv++) {
103 if (uvmpagerops[lcv]->pgo_init)
104 uvmpagerops[lcv]->pgo_init();
105 }
106 }
107
108 /*
109 * Initialize a uvm_pseg.
110 *
111 * May fail, in which case seg->start == 0.
112 *
113 * Caller locks uvm_pseg_lck.
114 */
115 void
116 uvm_pseg_init(struct uvm_pseg *pseg)
117 {
118 KASSERT(pseg->start == 0);
119 KASSERT(pseg->use == 0);
120 pseg->start = (vaddr_t)km_alloc(MAX_PAGER_SEGS * MAXBSIZE,
121 &kv_any, &kp_none, &kd_trylock);
122 }
123
124 /*
125 * Acquire a pager map segment.
126 *
127 * Returns a vaddr for paging. 0 on failure.
128 *
129 * Caller does not lock.
130 */
131 vaddr_t
132 uvm_pseg_get(int flags)
133 {
134 int i;
135 struct uvm_pseg *pseg;
136
137 /*
138 * XXX Prevent lock ordering issue in uvm_unmap_detach(). A real
139 * fix would be to move the KERNEL_LOCK() out of uvm_unmap_detach().
140 *
141 * witness_checkorder() at witness_checkorder+0xba0
142 * __mp_lock() at __mp_lock+0x5f
143 * uvm_unmap_detach() at uvm_unmap_detach+0xc5
144 * uvm_map() at uvm_map+0x857
145 * uvm_km_valloc_try() at uvm_km_valloc_try+0x65
146 * uvm_pseg_get() at uvm_pseg_get+0x6f
147 * uvm_pagermapin() at uvm_pagermapin+0x45
148 * uvn_io() at uvn_io+0xcf
149 * uvn_get() at uvn_get+0x156
150 * uvm_fault_lower() at uvm_fault_lower+0x28a
151 * uvm_fault() at uvm_fault+0x1b3
152 * upageflttrap() at upageflttrap+0x62
153 */
154 KERNEL_LOCK();
155 mtx_enter(&uvm_pseg_lck);
156
157 pager_seg_restart:
158 /* Find first pseg that has room. */
159 for (pseg = &psegs[0]; pseg != &psegs[PSEG_NUMSEGS]; pseg++) {
160 if (UVM_PSEG_FULL(pseg))
161 continue;
162
163 if (pseg->start == 0) {
164 /* Need initialization. */
165 uvm_pseg_init(pseg);
166 if (pseg->start == 0)
167 goto pager_seg_fail;
168 }
169
170 /* Keep indexes 0,1 reserved for pagedaemon. */
171 if ((pseg == &psegs[0] || pseg == &psegs[1]) &&
172 (curproc != uvm.pagedaemon_proc))
173 i = 2;
174 else
175 i = 0;
176
177 for (; i < MAX_PAGER_SEGS; i++) {
178 if (!UVM_PSEG_INUSE(pseg, i)) {
179 pseg->use |= 1 << i;
180 mtx_leave(&uvm_pseg_lck);
181 KERNEL_UNLOCK();
182 return pseg->start + i * MAXBSIZE;
183 }
184 }
185 }
186
187 pager_seg_fail:
188 if ((flags & UVMPAGER_MAPIN_WAITOK) != 0) {
189 msleep_nsec(&psegs, &uvm_pseg_lck, PVM, "pagerseg", INFSLP);
190 goto pager_seg_restart;
191 }
192
193 mtx_leave(&uvm_pseg_lck);
194 KERNEL_UNLOCK();
195 return 0;
196 }
197
198 /*
199 * Release a pager map segment.
200 *
201 * Caller does not lock.
202 *
203 * Deallocates pseg if it is no longer in use.
204 */
205 void
206 uvm_pseg_release(vaddr_t segaddr)
207 {
208 int id;
209 struct uvm_pseg *pseg;
210 vaddr_t va = 0;
211
212 mtx_enter(&uvm_pseg_lck);
213 for (pseg = &psegs[0]; pseg != &psegs[PSEG_NUMSEGS]; pseg++) {
214 if (pseg->start <= segaddr &&
215 segaddr < pseg->start + MAX_PAGER_SEGS * MAXBSIZE)
216 break;
217 }
218 KASSERT(pseg != &psegs[PSEG_NUMSEGS]);
219
220 id = (segaddr - pseg->start) / MAXBSIZE;
221 KASSERT(id >= 0 && id < MAX_PAGER_SEGS);
222
223 /* test for no remainder */
224 KDASSERT(segaddr == pseg->start + id * MAXBSIZE);
225
226
227 KASSERT(UVM_PSEG_INUSE(pseg, id));
228
229 pseg->use &= ~(1 << id);
230 wakeup(&psegs);
231
232 if ((pseg != &psegs[0] && pseg != &psegs[1]) && UVM_PSEG_EMPTY(pseg)) {
233 va = pseg->start;
234 pseg->start = 0;
235 }
236
237 mtx_leave(&uvm_pseg_lck);
238
239 if (va) {
240 km_free((void *)va, MAX_PAGER_SEGS * MAXBSIZE,
241 &kv_any, &kp_none);
242 }
243 }
244
245 /*
246 * uvm_pagermapin: map pages into KVA for I/O that needs mappings
247 *
248 * We basically just km_valloc a blank map entry to reserve the space in the
249 * kernel map and then use pmap_enter() to put the mappings in by hand.
250 */
251 vaddr_t
252 uvm_pagermapin(struct vm_page **pps, int npages, int flags)
253 {
254 vaddr_t kva, cva;
255 vm_prot_t prot;
256 vsize_t size;
257 struct vm_page *pp;
258
259 #if defined(__HAVE_PMAP_DIRECT)
260 /*
261 * Use direct mappings for single page, unless there is a risk
262 * of aliasing.
263 */
264 if (npages == 1 && PMAP_PREFER_ALIGN() == 0) {
265 KASSERT(pps[0]);
266 KASSERT(pps[0]->pg_flags & PG_BUSY);
267 return pmap_map_direct(pps[0]);
268 }
269 #endif
270
271 prot = PROT_READ;
272 if (flags & UVMPAGER_MAPIN_READ)
273 prot |= PROT_WRITE;
274 size = ptoa(npages);
275
276 KASSERT(size <= MAXBSIZE);
277
278 kva = uvm_pseg_get(flags);
279 if (kva == 0)
280 return 0;
281
282 for (cva = kva ; size != 0 ; size -= PAGE_SIZE, cva += PAGE_SIZE) {
283 pp = *pps++;
284 KASSERT(pp);
285 KASSERT(pp->pg_flags & PG_BUSY);
286 /* Allow pmap_enter to fail. */
287 if (pmap_enter(pmap_kernel(), cva, VM_PAGE_TO_PHYS(pp),
288 prot, PMAP_WIRED | PMAP_CANFAIL | prot) != 0) {
289 pmap_remove(pmap_kernel(), kva, cva);
290 pmap_update(pmap_kernel());
291 uvm_pseg_release(kva);
292 return 0;
293 }
294 }
295 pmap_update(pmap_kernel());
296 return kva;
297 }
298
299 /*
300 * uvm_pagermapout: remove KVA mapping
301 *
302 * We remove our mappings by hand and then remove the mapping.
303 */
304 void
305 uvm_pagermapout(vaddr_t kva, int npages)
306 {
307 #if defined(__HAVE_PMAP_DIRECT)
308 /*
309 * Use direct mappings for single page, unless there is a risk
310 * of aliasing.
311 */
312 if (npages == 1 && PMAP_PREFER_ALIGN() == 0) {
313 pmap_unmap_direct(kva);
314 return;
315 }
316 #endif
317
318 pmap_remove(pmap_kernel(), kva, kva + ((vsize_t)npages << PAGE_SHIFT));
319 pmap_update(pmap_kernel());
320 uvm_pseg_release(kva);
321
322 }
323
324 /*
325 * uvm_mk_pcluster
326 *
327 * generic "make 'pager put' cluster" function. a pager can either
328 * [1] set pgo_mk_pcluster to NULL (never cluster), [2] set it to this
329 * generic function, or [3] set it to a pager specific function.
330 *
331 * => caller must lock object _and_ pagequeues (since we need to look
332 * at active vs. inactive bits, etc.)
333 * => caller must make center page busy and write-protect it
334 * => we mark all cluster pages busy for the caller
335 * => the caller must unbusy all pages (and check wanted/released
336 * status if it drops the object lock)
337 * => flags:
338 * PGO_ALLPAGES: all pages in object are valid targets
339 * !PGO_ALLPAGES: use "lo" and "hi" to limit range of cluster
340 * PGO_DOACTCLUST: include active pages in cluster.
341 * PGO_FREE: set the PG_RELEASED bits on the cluster so they'll be freed
342 * in async io (caller must clean on error).
343 * NOTE: the caller should clear PG_CLEANCHK bits if PGO_DOACTCLUST.
344 * PG_CLEANCHK is only a hint, but clearing will help reduce
345 * the number of calls we make to the pmap layer.
346 */
347
348 struct vm_page **
349 uvm_mk_pcluster(struct uvm_object *uobj, struct vm_page **pps, int *npages,
350 struct vm_page *center, int flags, voff_t mlo, voff_t mhi)
351 {
352 struct vm_page **ppsp, *pclust;
353 voff_t lo, hi, curoff;
354 int center_idx, forward, incr;
355
356 /*
357 * center page should already be busy and write protected. XXX:
358 * suppose page is wired? if we lock, then a process could
359 * fault/block on it. if we don't lock, a process could write the
360 * pages in the middle of an I/O. (consider an msync()). let's
361 * lock it for now (better to delay than corrupt data?).
362 */
363 /* get cluster boundaries, check sanity, and apply our limits as well.*/
364 uobj->pgops->pgo_cluster(uobj, center->offset, &lo, &hi);
365 if ((flags & PGO_ALLPAGES) == 0) {
366 if (lo < mlo)
367 lo = mlo;
368 if (hi > mhi)
369 hi = mhi;
370 }
371 if ((hi - lo) >> PAGE_SHIFT > *npages) { /* pps too small, bail out! */
372 pps[0] = center;
373 *npages = 1;
374 return pps;
375 }
376
377 /* now determine the center and attempt to cluster around the edges */
378 center_idx = (center->offset - lo) >> PAGE_SHIFT;
379 pps[center_idx] = center; /* plug in the center page */
380 ppsp = &pps[center_idx];
381 *npages = 1;
382
383 /*
384 * attempt to cluster around the left [backward], and then
385 * the right side [forward].
386 *
387 * note that for inactive pages (pages that have been deactivated)
388 * there are no valid mappings and PG_CLEAN should be up to date.
389 * [i.e. there is no need to query the pmap with pmap_is_modified
390 * since there are no mappings].
391 */
392 for (forward = 0 ; forward <= 1 ; forward++) {
393 incr = forward ? PAGE_SIZE : -PAGE_SIZE;
394 curoff = center->offset + incr;
395 for ( ;(forward == 0 && curoff >= lo) ||
396 (forward && curoff < hi);
397 curoff += incr) {
398
399 pclust = uvm_pagelookup(uobj, curoff); /* lookup page */
400 if (pclust == NULL) {
401 break; /* no page */
402 }
403 /* handle active pages */
404 /* NOTE: inactive pages don't have pmap mappings */
405 if ((pclust->pg_flags & PQ_INACTIVE) == 0) {
406 if ((flags & PGO_DOACTCLUST) == 0) {
407 /* dont want mapped pages at all */
408 break;
409 }
410
411 /* make sure "clean" bit is sync'd */
412 if ((pclust->pg_flags & PG_CLEANCHK) == 0) {
413 if ((pclust->pg_flags & (PG_CLEAN|PG_BUSY))
414 == PG_CLEAN &&
415 pmap_is_modified(pclust))
416 atomic_clearbits_int(
417 &pclust->pg_flags,
418 PG_CLEAN);
419 /* now checked */
420 atomic_setbits_int(&pclust->pg_flags,
421 PG_CLEANCHK);
422 }
423 }
424
425 /* is page available for cleaning and does it need it */
426 if ((pclust->pg_flags & (PG_CLEAN|PG_BUSY)) != 0) {
427 break; /* page is already clean or is busy */
428 }
429
430 /* yes! enroll the page in our array */
431 atomic_setbits_int(&pclust->pg_flags, PG_BUSY);
432 UVM_PAGE_OWN(pclust, "uvm_mk_pcluster");
433
434 /*
435 * If we want to free after io is done, and we're
436 * async, set the released flag
437 */
438 if ((flags & (PGO_FREE|PGO_SYNCIO)) == PGO_FREE)
439 atomic_setbits_int(&pclust->pg_flags,
440 PG_RELEASED);
441
442 /* XXX: protect wired page? see above comment. */
443 pmap_page_protect(pclust, PROT_READ);
444 if (!forward) {
445 ppsp--; /* back up one page */
446 *ppsp = pclust;
447 } else {
448 /* move forward one page */
449 ppsp[*npages] = pclust;
450 }
451 (*npages)++;
452 }
453 }
454
455 /*
456 * done! return the cluster array to the caller!!!
457 */
458 return ppsp;
459 }
460
461 /*
462 * uvm_pager_put: high level pageout routine
463 *
464 * we want to pageout page "pg" to backing store, clustering if
465 * possible.
466 *
467 * => page queues must be locked by caller
468 * => if page is not swap-backed, then "uobj" points to the object
469 * backing it.
470 * => if page is swap-backed, then "uobj" should be NULL.
471 * => "pg" should be PG_BUSY (by caller), and !PG_CLEAN
472 * for swap-backed memory, "pg" can be NULL if there is no page
473 * of interest [sometimes the case for the pagedaemon]
474 * => "ppsp_ptr" should point to an array of npages vm_page pointers
475 * for possible cluster building
476 * => flags (first two for non-swap-backed pages)
477 * PGO_ALLPAGES: all pages in uobj are valid targets
478 * PGO_DOACTCLUST: include "PQ_ACTIVE" pages as valid targets
479 * PGO_SYNCIO: do SYNC I/O (no async)
480 * PGO_PDFREECLUST: pagedaemon: drop cluster on successful I/O
481 * PGO_FREE: tell the aio daemon to free pages in the async case.
482 * => start/stop: if (uobj && !PGO_ALLPAGES) limit targets to this range
483 * if (!uobj) start is the (daddr_t) of the starting swapblk
484 * => return state:
485 * 1. we return the VM_PAGER status code of the pageout
486 * 2. we return with the page queues unlocked
487 * 3. on errors we always drop the cluster. thus, if we return
488 * !PEND, !OK, then the caller only has to worry about
489 * un-busying the main page (not the cluster pages).
490 * 4. on success, if !PGO_PDFREECLUST, we return the cluster
491 * with all pages busy (caller must un-busy and check
492 * wanted/released flags).
493 */
494 int
495 uvm_pager_put(struct uvm_object *uobj, struct vm_page *pg,
496 struct vm_page ***ppsp_ptr, int *npages, int flags,
497 voff_t start, voff_t stop)
498 {
499 int result;
500 daddr_t swblk;
501 struct vm_page **ppsp = *ppsp_ptr;
502
503 /*
504 * note that uobj is null if we are doing a swap-backed pageout.
505 * note that uobj is !null if we are doing normal object pageout.
506 * note that the page queues must be locked to cluster.
507 */
508 if (uobj) { /* if !swap-backed */
509 /*
510 * attempt to build a cluster for pageout using its
511 * make-put-cluster function (if it has one).
512 */
513 if (uobj->pgops->pgo_mk_pcluster) {
514 ppsp = uobj->pgops->pgo_mk_pcluster(uobj, ppsp,
515 npages, pg, flags, start, stop);
516 *ppsp_ptr = ppsp; /* update caller's pointer */
517 } else {
518 ppsp[0] = pg;
519 *npages = 1;
520 }
521
522 swblk = 0; /* XXX: keep gcc happy */
523 } else {
524 /*
525 * for swap-backed pageout, the caller (the pagedaemon) has
526 * already built the cluster for us. the starting swap
527 * block we are writing to has been passed in as "start."
528 * "pg" could be NULL if there is no page we are especially
529 * interested in (in which case the whole cluster gets dropped
530 * in the event of an error or a sync "done").
531 */
532 swblk = start;
533 /* ppsp and npages should be ok */
534 }
535
536 /* now that we've clustered we can unlock the page queues */
537 uvm_unlock_pageq();
538
539 /*
540 * now attempt the I/O. if we have a failure and we are
541 * clustered, we will drop the cluster and try again.
542 */
543 ReTry:
544 if (uobj) {
545 result = uobj->pgops->pgo_put(uobj, ppsp, *npages, flags);
546 } else {
547 /* XXX daddr_t -> int */
548 result = uvm_swap_put(swblk, ppsp, *npages, flags);
549 }
550
551 /*
552 * we have attempted the I/O.
553 *
554 * if the I/O was a success then:
555 * if !PGO_PDFREECLUST, we return the cluster to the
556 * caller (who must un-busy all pages)
557 * else we un-busy cluster pages for the pagedaemon
558 *
559 * if I/O is pending (async i/o) then we return the pending code.
560 * [in this case the async i/o done function must clean up when
561 * i/o is done...]
562 */
563 if (result == VM_PAGER_PEND || result == VM_PAGER_OK) {
564 if (result == VM_PAGER_OK && (flags & PGO_PDFREECLUST)) {
565 /* drop cluster */
566 if (*npages > 1 || pg == NULL)
567 uvm_pager_dropcluster(uobj, pg, ppsp, npages,
568 PGO_PDFREECLUST);
569 }
570 return (result);
571 }
572
573 /*
574 * a pager error occurred (even after dropping the cluster, if there
575 * was one). give up! the caller only has one page ("pg")
576 * to worry about.
577 */
578 if (*npages > 1 || pg == NULL) {
579 uvm_pager_dropcluster(uobj, pg, ppsp, npages, PGO_REALLOCSWAP);
580
581 /*
582 * for failed swap-backed pageouts with a "pg",
583 * we need to reset pg's swslot to either:
584 * "swblk" (for transient errors, so we can retry),
585 * or 0 (for hard errors).
586 */
587 if (uobj == NULL && pg != NULL) {
588 /* XXX daddr_t -> int */
589 int nswblk = (result == VM_PAGER_AGAIN) ? swblk : 0;
590 if (pg->pg_flags & PQ_ANON) {
591 rw_enter(pg->uanon->an_lock, RW_WRITE);
592 pg->uanon->an_swslot = nswblk;
593 rw_exit(pg->uanon->an_lock);
594 } else {
595 rw_enter(pg->uobject->vmobjlock, RW_WRITE);
596 uao_set_swslot(pg->uobject,
597 pg->offset >> PAGE_SHIFT,
598 nswblk);
599 rw_exit(pg->uobject->vmobjlock);
600 }
601 }
602 if (result == VM_PAGER_AGAIN) {
603 /*
604 * for transient failures, free all the swslots that
605 * we're not going to retry with.
606 */
607 if (uobj == NULL) {
608 if (pg) {
609 /* XXX daddr_t -> int */
610 uvm_swap_free(swblk + 1, *npages - 1);
611 } else {
612 /* XXX daddr_t -> int */
613 uvm_swap_free(swblk, *npages);
614 }
615 }
616 if (pg) {
617 ppsp[0] = pg;
618 *npages = 1;
619 goto ReTry;
620 }
621 } else if (uobj == NULL) {
622 /*
623 * for hard errors on swap-backed pageouts,
624 * mark the swslots as bad. note that we do not
625 * free swslots that we mark bad.
626 */
627 /* XXX daddr_t -> int */
628 uvm_swap_markbad(swblk, *npages);
629 }
630 }
631
632 /*
633 * a pager error occurred (even after dropping the cluster, if there
634 * was one). give up! the caller only has one page ("pg")
635 * to worry about.
636 */
637
638 return result;
639 }
640
641 /*
642 * uvm_pager_dropcluster: drop a cluster we have built (because we
643 * got an error, or, if PGO_PDFREECLUST we are un-busying the
644 * cluster pages on behalf of the pagedaemon).
645 *
646 * => uobj, if non-null, is a non-swap-backed object
647 * => page queues are not locked
648 * => pg is our page of interest (the one we clustered around, can be null)
649 * => ppsp/npages is our current cluster
650 * => flags: PGO_PDFREECLUST: pageout was a success: un-busy cluster
651 * pages on behalf of the pagedaemon.
652 * PGO_REALLOCSWAP: drop previously allocated swap slots for
653 * clustered swap-backed pages (except for "pg" if !NULL)
654 * "swblk" is the start of swap alloc (e.g. for ppsp[0])
655 * [only meaningful if swap-backed (uobj == NULL)]
656 */
657
658 void
659 uvm_pager_dropcluster(struct uvm_object *uobj, struct vm_page *pg,
660 struct vm_page **ppsp, int *npages, int flags)
661 {
662 int lcv;
663
664 KASSERT(uobj == NULL || rw_write_held(uobj->vmobjlock));
665
666 /* drop all pages but "pg" */
667 for (lcv = 0 ; lcv < *npages ; lcv++) {
668 /* skip "pg" or empty slot */
669 if (ppsp[lcv] == pg || ppsp[lcv] == NULL)
670 continue;
671
672 /*
673 * Note that PQ_ANON bit can't change as long as we are holding
674 * the PG_BUSY bit (so there is no need to lock the page
675 * queues to test it).
676 */
677 if (!uobj) {
678 if (ppsp[lcv]->pg_flags & PQ_ANON) {
679 rw_enter(ppsp[lcv]->uanon->an_lock, RW_WRITE);
680 if (flags & PGO_REALLOCSWAP)
681 /* zap swap block */
682 ppsp[lcv]->uanon->an_swslot = 0;
683 } else {
684 rw_enter(ppsp[lcv]->uobject->vmobjlock,
685 RW_WRITE);
686 if (flags & PGO_REALLOCSWAP)
687 uao_set_swslot(ppsp[lcv]->uobject,
688 ppsp[lcv]->offset >> PAGE_SHIFT, 0);
689 }
690 }
691
692 /* did someone want the page while we had it busy-locked? */
693 if (ppsp[lcv]->pg_flags & PG_WANTED) {
694 wakeup(ppsp[lcv]);
695 }
696
697 /* if page was released, release it. otherwise un-busy it */
698 if (ppsp[lcv]->pg_flags & PG_RELEASED &&
699 ppsp[lcv]->pg_flags & PQ_ANON) {
700 /* kills anon and frees pg */
701 uvm_anon_release(ppsp[lcv]->uanon);
702 continue;
703 } else {
704 /*
705 * if we were planning on async io then we would
706 * have PG_RELEASED set, clear that with the others.
707 */
708 atomic_clearbits_int(&ppsp[lcv]->pg_flags,
709 PG_BUSY|PG_WANTED|PG_FAKE|PG_RELEASED);
710 UVM_PAGE_OWN(ppsp[lcv], NULL);
711 }
712
713 /*
714 * if we are operating on behalf of the pagedaemon and we
715 * had a successful pageout update the page!
716 */
717 if (flags & PGO_PDFREECLUST) {
718 pmap_clear_reference(ppsp[lcv]);
719 pmap_clear_modify(ppsp[lcv]);
720 atomic_setbits_int(&ppsp[lcv]->pg_flags, PG_CLEAN);
721 }
722
723 /* if anonymous cluster, unlock object and move on */
724 if (!uobj) {
725 if (ppsp[lcv]->pg_flags & PQ_ANON)
726 rw_exit(ppsp[lcv]->uanon->an_lock);
727 else
728 rw_exit(ppsp[lcv]->uobject->vmobjlock);
729 }
730 }
731 }
732
733 /*
734 * interrupt-context iodone handler for single-buf i/os
735 * or the top-level buf of a nested-buf i/o.
736 *
737 * => must be at splbio().
738 */
739
740 void
741 uvm_aio_biodone(struct buf *bp)
742 {
743 splassert(IPL_BIO);
744
745 /* reset b_iodone for when this is a single-buf i/o. */
746 bp->b_iodone = uvm_aio_aiodone;
747
748 mtx_enter(&uvm.aiodoned_lock);
749 TAILQ_INSERT_TAIL(&uvm.aio_done, bp, b_freelist);
750 wakeup(&uvm.aiodoned);
751 mtx_leave(&uvm.aiodoned_lock);
752 }
753
754 void
755 uvm_aio_aiodone_pages(struct vm_page **pgs, int npages, boolean_t write,
756 int error)
757 {
758 struct vm_page *pg;
759 struct uvm_object *uobj;
760 boolean_t swap;
761 int i;
762
763 uobj = NULL;
764
765 for (i = 0; i < npages; i++) {
766 pg = pgs[i];
767
768 if (i == 0) {
769 swap = (pg->pg_flags & PQ_SWAPBACKED) != 0;
770 if (!swap) {
771 uobj = pg->uobject;
772 rw_enter(uobj->vmobjlock, RW_WRITE);
773 }
774 }
775 KASSERT(swap || pg->uobject == uobj);
776
777 /*
778 * if this is a read and we got an error, mark the pages
779 * PG_RELEASED so that uvm_page_unbusy() will free them.
780 */
781 if (!write && error) {
782 atomic_setbits_int(&pg->pg_flags, PG_RELEASED);
783 continue;
784 }
785 KASSERT(!write || (pgs[i]->pg_flags & PG_FAKE) == 0);
786
787 /*
788 * if this is a read and the page is PG_FAKE,
789 * or this was a successful write,
790 * mark the page PG_CLEAN and not PG_FAKE.
791 */
792 if ((pgs[i]->pg_flags & PG_FAKE) || (write && error != ENOMEM)) {
793 pmap_clear_reference(pgs[i]);
794 pmap_clear_modify(pgs[i]);
795 atomic_setbits_int(&pgs[i]->pg_flags, PG_CLEAN);
796 atomic_clearbits_int(&pgs[i]->pg_flags, PG_FAKE);
797 }
798 }
799 uvm_page_unbusy(pgs, npages);
800 if (!swap) {
801 rw_exit(uobj->vmobjlock);
802 }
803 }
804
805 /*
806 * uvm_aio_aiodone: do iodone processing for async i/os.
807 * this should be called in thread context, not interrupt context.
808 */
809 void
810 uvm_aio_aiodone(struct buf *bp)
811 {
812 int npages = bp->b_bufsize >> PAGE_SHIFT;
813 struct vm_page *pgs[MAXPHYS >> PAGE_SHIFT];
814 int i, error;
815 boolean_t write;
816
817 KASSERT(npages <= MAXPHYS >> PAGE_SHIFT);
818 splassert(IPL_BIO);
819
820 error = (bp->b_flags & B_ERROR) ? (bp->b_error ? bp->b_error : EIO) : 0;
821 write = (bp->b_flags & B_READ) == 0;
822
823 for (i = 0; i < npages; i++)
824 pgs[i] = uvm_atopg((vaddr_t)bp->b_data +
825 ((vsize_t)i << PAGE_SHIFT));
826 uvm_pagermapout((vaddr_t)bp->b_data, npages);
827 #ifdef UVM_SWAP_ENCRYPT
828 /*
829 * XXX - assumes that we only get ASYNC writes. used to be above.
830 */
831 if (pgs[0]->pg_flags & PQ_ENCRYPT) {
832 uvm_swap_freepages(pgs, npages);
833 goto freed;
834 }
835 #endif /* UVM_SWAP_ENCRYPT */
836
837 uvm_aio_aiodone_pages(pgs, npages, write, error);
838
839 #ifdef UVM_SWAP_ENCRYPT
840 freed:
841 #endif
842 pool_put(&bufpool, bp);
843 }
Cache object: c0195fe5849d74adea444c660d71eb63
|