FreeBSD/Linux Kernel Cross Reference
sys/uvm/uvm_fault.c
1 /* $NetBSD: uvm_fault.c,v 1.231 2022/10/26 23:27:32 riastradh Exp $ */
2
3 /*
4 * Copyright (c) 1997 Charles D. Cranor and Washington University.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 *
27 * from: Id: uvm_fault.c,v 1.1.2.23 1998/02/06 05:29:05 chs Exp
28 */
29
30 /*
31 * uvm_fault.c: fault handler
32 */
33
34 #include <sys/cdefs.h>
35 __KERNEL_RCSID(0, "$NetBSD: uvm_fault.c,v 1.231 2022/10/26 23:27:32 riastradh Exp $");
36
37 #include "opt_uvmhist.h"
38
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/atomic.h>
42 #include <sys/kernel.h>
43 #include <sys/mman.h>
44
45 #include <uvm/uvm.h>
46 #include <uvm/uvm_pdpolicy.h>
47
48 /*
49 *
50 * a word on page faults:
51 *
52 * types of page faults we handle:
53 *
54 * CASE 1: upper layer faults CASE 2: lower layer faults
55 *
56 * CASE 1A CASE 1B CASE 2A CASE 2B
57 * read/write1 write>1 read/write +-cow_write/zero
58 * | | | |
59 * +--|--+ +--|--+ +-----+ + | + | +-----+
60 * amap | V | | ---------> new | | | | ^ |
61 * +-----+ +-----+ +-----+ + | + | +--|--+
62 * | | |
63 * +-----+ +-----+ +--|--+ | +--|--+
64 * uobj | d/c | | d/c | | V | +----+ |
65 * +-----+ +-----+ +-----+ +-----+
66 *
67 * d/c = don't care
68 *
69 * case [0]: layerless fault
70 * no amap or uobj is present. this is an error.
71 *
72 * case [1]: upper layer fault [anon active]
73 * 1A: [read] or [write with anon->an_ref == 1]
74 * I/O takes place in upper level anon and uobj is not touched.
75 * 1B: [write with anon->an_ref > 1]
76 * new anon is alloc'd and data is copied off ["COW"]
77 *
78 * case [2]: lower layer fault [uobj]
79 * 2A: [read on non-NULL uobj] or [write to non-copy_on_write area]
80 * I/O takes place directly in object.
81 * 2B: [write to copy_on_write] or [read on NULL uobj]
82 * data is "promoted" from uobj to a new anon.
83 * if uobj is null, then we zero fill.
84 *
85 * we follow the standard UVM locking protocol ordering:
86 *
87 * MAPS => AMAP => UOBJ => ANON => PAGE QUEUES (PQ)
88 * we hold a PG_BUSY page if we unlock for I/O
89 *
90 *
91 * the code is structured as follows:
92 *
93 * - init the "IN" params in the ufi structure
94 * ReFault: (ERESTART returned to the loop in uvm_fault_internal)
95 * - do lookups [locks maps], check protection, handle needs_copy
96 * - check for case 0 fault (error)
97 * - establish "range" of fault
98 * - if we have an amap lock it and extract the anons
99 * - if sequential advice deactivate pages behind us
100 * - at the same time check pmap for unmapped areas and anon for pages
101 * that we could map in (and do map it if found)
102 * - check object for resident pages that we could map in
103 * - if (case 2) goto Case2
104 * - >>> handle case 1
105 * - ensure source anon is resident in RAM
106 * - if case 1B alloc new anon and copy from source
107 * - map the correct page in
108 * Case2:
109 * - >>> handle case 2
110 * - ensure source page is resident (if uobj)
111 * - if case 2B alloc new anon and copy from source (could be zero
112 * fill if uobj == NULL)
113 * - map the correct page in
114 * - done!
115 *
116 * note on paging:
117 * if we have to do I/O we place a PG_BUSY page in the correct object,
118 * unlock everything, and do the I/O. when I/O is done we must reverify
119 * the state of the world before assuming that our data structures are
120 * valid. [because mappings could change while the map is unlocked]
121 *
122 * alternative 1: unbusy the page in question and restart the page fault
123 * from the top (ReFault). this is easy but does not take advantage
124 * of the information that we already have from our previous lookup,
125 * although it is possible that the "hints" in the vm_map will help here.
126 *
127 * alternative 2: the system already keeps track of a "version" number of
128 * a map. [i.e. every time you write-lock a map (e.g. to change a
129 * mapping) you bump the version number up by one...] so, we can save
130 * the version number of the map before we release the lock and start I/O.
131 * then when I/O is done we can relock and check the version numbers
132 * to see if anything changed. this might save us some over 1 because
133 * we don't have to unbusy the page and may be less compares(?).
134 *
135 * alternative 3: put in backpointers or a way to "hold" part of a map
136 * in place while I/O is in progress. this could be complex to
137 * implement (especially with structures like amap that can be referenced
138 * by multiple map entries, and figuring out what should wait could be
139 * complex as well...).
140 *
141 * we use alternative 2. given that we are multi-threaded now we may want
142 * to reconsider the choice.
143 */
144
145 /*
146 * local data structures
147 */
148
149 struct uvm_advice {
150 int advice;
151 int nback;
152 int nforw;
153 };
154
155 /*
156 * page range array:
157 * note: index in array must match "advice" value
158 * XXX: borrowed numbers from freebsd. do they work well for us?
159 */
160
161 static const struct uvm_advice uvmadvice[] = {
162 { UVM_ADV_NORMAL, 3, 4 },
163 { UVM_ADV_RANDOM, 0, 0 },
164 { UVM_ADV_SEQUENTIAL, 8, 7},
165 };
166
167 #define UVM_MAXRANGE 16 /* must be MAX() of nback+nforw+1 */
168
169 /*
170 * private prototypes
171 */
172
173 /*
174 * inline functions
175 */
176
177 /*
178 * uvmfault_anonflush: try and deactivate pages in specified anons
179 *
180 * => does not have to deactivate page if it is busy
181 */
182
183 static inline void
184 uvmfault_anonflush(struct vm_anon **anons, int n)
185 {
186 int lcv;
187 struct vm_page *pg;
188
189 for (lcv = 0; lcv < n; lcv++) {
190 if (anons[lcv] == NULL)
191 continue;
192 KASSERT(rw_lock_held(anons[lcv]->an_lock));
193 pg = anons[lcv]->an_page;
194 if (pg && (pg->flags & PG_BUSY) == 0) {
195 uvm_pagelock(pg);
196 uvm_pagedeactivate(pg);
197 uvm_pageunlock(pg);
198 }
199 }
200 }
201
202 /*
203 * normal functions
204 */
205
206 /*
207 * uvmfault_amapcopy: clear "needs_copy" in a map.
208 *
209 * => called with VM data structures unlocked (usually, see below)
210 * => we get a write lock on the maps and clear needs_copy for a VA
211 * => if we are out of RAM we sleep (waiting for more)
212 */
213
214 static void
215 uvmfault_amapcopy(struct uvm_faultinfo *ufi)
216 {
217 for (;;) {
218
219 /*
220 * no mapping? give up.
221 */
222
223 if (uvmfault_lookup(ufi, true) == false)
224 return;
225
226 /*
227 * copy if needed.
228 */
229
230 if (UVM_ET_ISNEEDSCOPY(ufi->entry))
231 amap_copy(ufi->map, ufi->entry, AMAP_COPY_NOWAIT,
232 ufi->orig_rvaddr, ufi->orig_rvaddr + 1);
233
234 /*
235 * didn't work? must be out of RAM. unlock and sleep.
236 */
237
238 if (UVM_ET_ISNEEDSCOPY(ufi->entry)) {
239 uvmfault_unlockmaps(ufi, true);
240 uvm_wait("fltamapcopy");
241 continue;
242 }
243
244 /*
245 * got it! unlock and return.
246 */
247
248 uvmfault_unlockmaps(ufi, true);
249 return;
250 }
251 /*NOTREACHED*/
252 }
253
254 /*
255 * uvmfault_anonget: get data in an anon into a non-busy, non-released
256 * page in that anon.
257 *
258 * => Map, amap and thus anon should be locked by caller.
259 * => If we fail, we unlock everything and error is returned.
260 * => If we are successful, return with everything still locked.
261 * => We do not move the page on the queues [gets moved later]. If we
262 * allocate a new page [we_own], it gets put on the queues. Either way,
263 * the result is that the page is on the queues at return time
264 * => For pages which are on loan from a uvm_object (and thus are not owned
265 * by the anon): if successful, return with the owning object locked.
266 * The caller must unlock this object when it unlocks everything else.
267 */
268
269 int
270 uvmfault_anonget(struct uvm_faultinfo *ufi, struct vm_amap *amap,
271 struct vm_anon *anon)
272 {
273 struct vm_page *pg;
274 krw_t lock_type;
275 int error;
276
277 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
278 KASSERT(rw_lock_held(anon->an_lock));
279 KASSERT(anon->an_lock == amap->am_lock);
280
281 /* Increment the counters.*/
282 cpu_count(CPU_COUNT_FLTANGET, 1);
283 if (anon->an_page) {
284 curlwp->l_ru.ru_minflt++;
285 } else {
286 curlwp->l_ru.ru_majflt++;
287 }
288 error = 0;
289
290 /*
291 * Loop until we get the anon data, or fail.
292 */
293
294 for (;;) {
295 bool we_own, locked;
296 /*
297 * Note: 'we_own' will become true if we set PG_BUSY on a page.
298 */
299 we_own = false;
300 pg = anon->an_page;
301
302 /*
303 * If there is a resident page and it is loaned, then anon
304 * may not own it. Call out to uvm_anon_lockloanpg() to
305 * identify and lock the real owner of the page.
306 */
307
308 if (pg && pg->loan_count)
309 pg = uvm_anon_lockloanpg(anon);
310
311 /*
312 * Is page resident? Make sure it is not busy/released.
313 */
314
315 lock_type = rw_lock_op(anon->an_lock);
316 if (pg) {
317
318 /*
319 * at this point, if the page has a uobject [meaning
320 * we have it on loan], then that uobject is locked
321 * by us! if the page is busy, we drop all the
322 * locks (including uobject) and try again.
323 */
324
325 if ((pg->flags & PG_BUSY) == 0) {
326 UVMHIST_LOG(maphist, "<- OK",0,0,0,0);
327 return 0;
328 }
329 cpu_count(CPU_COUNT_FLTPGWAIT, 1);
330
331 /*
332 * The last unlock must be an atomic unlock and wait
333 * on the owner of page.
334 */
335
336 if (pg->uobject) {
337 /* Owner of page is UVM object. */
338 uvmfault_unlockall(ufi, amap, NULL);
339 UVMHIST_LOG(maphist, " unlock+wait on uobj",0,
340 0,0,0);
341 uvm_pagewait(pg, pg->uobject->vmobjlock, "anonget1");
342 } else {
343 /* Owner of page is anon. */
344 uvmfault_unlockall(ufi, NULL, NULL);
345 UVMHIST_LOG(maphist, " unlock+wait on anon",0,
346 0,0,0);
347 uvm_pagewait(pg, anon->an_lock, "anonget2");
348 }
349 } else {
350 #if defined(VMSWAP)
351 /*
352 * No page, therefore allocate one. A write lock is
353 * required for this. If the caller didn't supply
354 * one, fail now and have them retry.
355 */
356
357 if (lock_type == RW_READER) {
358 return ENOLCK;
359 }
360 pg = uvm_pagealloc(NULL,
361 ufi != NULL ? ufi->orig_rvaddr : 0,
362 anon, ufi != NULL ? UVM_FLAG_COLORMATCH : 0);
363 if (pg == NULL) {
364 /* Out of memory. Wait a little. */
365 uvmfault_unlockall(ufi, amap, NULL);
366 cpu_count(CPU_COUNT_FLTNORAM, 1);
367 UVMHIST_LOG(maphist, " noram -- UVM_WAIT",0,
368 0,0,0);
369 if (!uvm_reclaimable()) {
370 return ENOMEM;
371 }
372 uvm_wait("flt_noram1");
373 } else {
374 /* PG_BUSY bit is set. */
375 we_own = true;
376 uvmfault_unlockall(ufi, amap, NULL);
377
378 /*
379 * Pass a PG_BUSY+PG_FAKE clean page into
380 * the uvm_swap_get() function with all data
381 * structures unlocked. Note that it is OK
382 * to read an_swslot here, because we hold
383 * PG_BUSY on the page.
384 */
385 cpu_count(CPU_COUNT_PAGEINS, 1);
386 error = uvm_swap_get(pg, anon->an_swslot,
387 PGO_SYNCIO);
388
389 /*
390 * We clean up after the I/O below in the
391 * 'we_own' case.
392 */
393 }
394 #else
395 panic("%s: no page", __func__);
396 #endif /* defined(VMSWAP) */
397 }
398
399 /*
400 * Re-lock the map and anon.
401 */
402
403 locked = uvmfault_relock(ufi);
404 if (locked || we_own) {
405 rw_enter(anon->an_lock, lock_type);
406 }
407
408 /*
409 * If we own the page (i.e. we set PG_BUSY), then we need
410 * to clean up after the I/O. There are three cases to
411 * consider:
412 *
413 * 1) Page was released during I/O: free anon and ReFault.
414 * 2) I/O not OK. Free the page and cause the fault to fail.
415 * 3) I/O OK! Activate the page and sync with the non-we_own
416 * case (i.e. drop anon lock if not locked).
417 */
418
419 if (we_own) {
420 KASSERT(lock_type == RW_WRITER);
421 #if defined(VMSWAP)
422 if (error) {
423
424 /*
425 * Remove the swap slot from the anon and
426 * mark the anon as having no real slot.
427 * Do not free the swap slot, thus preventing
428 * it from being used again.
429 */
430
431 if (anon->an_swslot > 0) {
432 uvm_swap_markbad(anon->an_swslot, 1);
433 }
434 anon->an_swslot = SWSLOT_BAD;
435
436 if ((pg->flags & PG_RELEASED) != 0) {
437 goto released;
438 }
439
440 /*
441 * Note: page was never !PG_BUSY, so it
442 * cannot be mapped and thus no need to
443 * pmap_page_protect() it.
444 */
445
446 uvm_pagefree(pg);
447
448 if (locked) {
449 uvmfault_unlockall(ufi, NULL, NULL);
450 }
451 rw_exit(anon->an_lock);
452 UVMHIST_LOG(maphist, "<- ERROR", 0,0,0,0);
453 return error;
454 }
455
456 if ((pg->flags & PG_RELEASED) != 0) {
457 released:
458 KASSERT(anon->an_ref == 0);
459
460 /*
461 * Released while we had unlocked amap.
462 */
463
464 if (locked) {
465 uvmfault_unlockall(ufi, NULL, NULL);
466 }
467 uvm_anon_release(anon);
468
469 if (error) {
470 UVMHIST_LOG(maphist,
471 "<- ERROR/RELEASED", 0,0,0,0);
472 return error;
473 }
474
475 UVMHIST_LOG(maphist, "<- RELEASED", 0,0,0,0);
476 return ERESTART;
477 }
478
479 /*
480 * We have successfully read the page, activate it.
481 */
482
483 uvm_pagelock(pg);
484 uvm_pageactivate(pg);
485 uvm_pagewakeup(pg);
486 uvm_pageunlock(pg);
487 pg->flags &= ~(PG_BUSY|PG_FAKE);
488 uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_UNKNOWN);
489 UVM_PAGE_OWN(pg, NULL);
490 #else
491 panic("%s: we_own", __func__);
492 #endif /* defined(VMSWAP) */
493 }
494
495 /*
496 * We were not able to re-lock the map - restart the fault.
497 */
498
499 if (!locked) {
500 if (we_own) {
501 rw_exit(anon->an_lock);
502 }
503 UVMHIST_LOG(maphist, "<- REFAULT", 0,0,0,0);
504 return ERESTART;
505 }
506
507 /*
508 * Verify that no one has touched the amap and moved
509 * the anon on us.
510 */
511
512 if (ufi != NULL && amap_lookup(&ufi->entry->aref,
513 ufi->orig_rvaddr - ufi->entry->start) != anon) {
514
515 uvmfault_unlockall(ufi, amap, NULL);
516 UVMHIST_LOG(maphist, "<- REFAULT", 0,0,0,0);
517 return ERESTART;
518 }
519
520 /*
521 * Retry..
522 */
523
524 cpu_count(CPU_COUNT_FLTANRETRY, 1);
525 continue;
526 }
527 /*NOTREACHED*/
528 }
529
530 /*
531 * uvmfault_promote: promote data to a new anon. used for 1B and 2B.
532 *
533 * 1. allocate an anon and a page.
534 * 2. fill its contents.
535 * 3. put it into amap.
536 *
537 * => if we fail (result != 0) we unlock everything.
538 * => on success, return a new locked anon via 'nanon'.
539 * (*nanon)->an_page will be a resident, locked, dirty page.
540 * => it's caller's responsibility to put the promoted nanon->an_page to the
541 * page queue.
542 */
543
544 static int
545 uvmfault_promote(struct uvm_faultinfo *ufi,
546 struct vm_anon *oanon,
547 struct vm_page *uobjpage,
548 struct vm_anon **nanon, /* OUT: allocated anon */
549 struct vm_anon **spare)
550 {
551 struct vm_amap *amap = ufi->entry->aref.ar_amap;
552 struct uvm_object *uobj;
553 struct vm_anon *anon;
554 struct vm_page *pg;
555 struct vm_page *opg;
556 int error;
557 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
558
559 if (oanon) {
560 /* anon COW */
561 opg = oanon->an_page;
562 KASSERT(opg != NULL);
563 KASSERT(opg->uobject == NULL || opg->loan_count > 0);
564 } else if (uobjpage != PGO_DONTCARE) {
565 /* object-backed COW */
566 opg = uobjpage;
567 KASSERT(rw_lock_held(opg->uobject->vmobjlock));
568 } else {
569 /* ZFOD */
570 opg = NULL;
571 }
572 if (opg != NULL) {
573 uobj = opg->uobject;
574 } else {
575 uobj = NULL;
576 }
577
578 KASSERT(amap != NULL);
579 KASSERT(uobjpage != NULL);
580 KASSERT(rw_write_held(amap->am_lock));
581 KASSERT(oanon == NULL || amap->am_lock == oanon->an_lock);
582 KASSERT(uobj == NULL || rw_lock_held(uobj->vmobjlock));
583
584 if (*spare != NULL) {
585 anon = *spare;
586 *spare = NULL;
587 } else {
588 anon = uvm_analloc();
589 }
590 if (anon) {
591
592 /*
593 * The new anon is locked.
594 *
595 * if opg == NULL, we want a zero'd, dirty page,
596 * so have uvm_pagealloc() do that for us.
597 */
598
599 KASSERT(anon->an_lock == NULL);
600 anon->an_lock = amap->am_lock;
601 pg = uvm_pagealloc(NULL, ufi->orig_rvaddr, anon,
602 UVM_FLAG_COLORMATCH | (opg == NULL ? UVM_PGA_ZERO : 0));
603 if (pg == NULL) {
604 anon->an_lock = NULL;
605 }
606 } else {
607 pg = NULL;
608 }
609
610 /*
611 * out of memory resources?
612 */
613
614 if (pg == NULL) {
615 /* save anon for the next try. */
616 if (anon != NULL) {
617 *spare = anon;
618 }
619
620 /* unlock and fail ... */
621 uvmfault_unlockall(ufi, amap, uobj);
622 if (!uvm_reclaimable()) {
623 UVMHIST_LOG(maphist, "out of VM", 0,0,0,0);
624 cpu_count(CPU_COUNT_FLTNOANON, 1);
625 error = ENOMEM;
626 goto done;
627 }
628
629 UVMHIST_LOG(maphist, "out of RAM, waiting for more", 0,0,0,0);
630 cpu_count(CPU_COUNT_FLTNORAM, 1);
631 uvm_wait("flt_noram5");
632 error = ERESTART;
633 goto done;
634 }
635
636 /* copy page [pg now dirty] */
637 if (opg) {
638 uvm_pagecopy(opg, pg);
639 }
640 KASSERT(uvm_pagegetdirty(pg) == UVM_PAGE_STATUS_DIRTY);
641
642 amap_add(&ufi->entry->aref, ufi->orig_rvaddr - ufi->entry->start, anon,
643 oanon != NULL);
644
645 /*
646 * from this point on am_lock won't be dropped until the page is
647 * entered, so it's safe to unbusy the page up front.
648 *
649 * uvm_fault_{upper,lower}_done will activate or enqueue the page.
650 */
651
652 pg = anon->an_page;
653 pg->flags &= ~(PG_BUSY|PG_FAKE);
654 UVM_PAGE_OWN(pg, NULL);
655
656 *nanon = anon;
657 error = 0;
658 done:
659 return error;
660 }
661
662 /*
663 * Update statistics after fault resolution.
664 * - maxrss
665 */
666 void
667 uvmfault_update_stats(struct uvm_faultinfo *ufi)
668 {
669 struct vm_map *map;
670 struct vmspace *vm;
671 struct proc *p;
672 vsize_t res;
673
674 map = ufi->orig_map;
675
676 p = curproc;
677 KASSERT(p != NULL);
678 vm = p->p_vmspace;
679
680 if (&vm->vm_map != map)
681 return;
682
683 res = pmap_resident_count(map->pmap);
684 if (vm->vm_rssmax < res)
685 vm->vm_rssmax = res;
686 }
687
688 /*
689 * F A U L T - m a i n e n t r y p o i n t
690 */
691
692 /*
693 * uvm_fault: page fault handler
694 *
695 * => called from MD code to resolve a page fault
696 * => VM data structures usually should be unlocked. however, it is
697 * possible to call here with the main map locked if the caller
698 * gets a write lock, sets it recursive, and then calls us (c.f.
699 * uvm_map_pageable). this should be avoided because it keeps
700 * the map locked off during I/O.
701 * => MUST NEVER BE CALLED IN INTERRUPT CONTEXT
702 */
703
704 #define MASK(entry) (UVM_ET_ISCOPYONWRITE(entry) ? \
705 ~VM_PROT_WRITE : VM_PROT_ALL)
706
707 /* fault_flag values passed from uvm_fault_wire to uvm_fault_internal */
708 #define UVM_FAULT_WIRE (1 << 0)
709 #define UVM_FAULT_MAXPROT (1 << 1)
710
711 struct uvm_faultctx {
712
713 /*
714 * the following members are set up by uvm_fault_check() and
715 * read-only after that.
716 *
717 * note that narrow is used by uvm_fault_check() to change
718 * the behaviour after ERESTART.
719 *
720 * most of them might change after RESTART if the underlying
721 * map entry has been changed behind us. an exception is
722 * wire_paging, which does never change.
723 */
724 vm_prot_t access_type;
725 vaddr_t startva;
726 int npages;
727 int centeridx;
728 bool narrow; /* work on a single requested page only */
729 bool wire_mapping; /* request a PMAP_WIRED mapping
730 (UVM_FAULT_WIRE or VM_MAPENT_ISWIRED) */
731 bool wire_paging; /* request uvm_pagewire
732 (true for UVM_FAULT_WIRE) */
733 bool cow_now; /* VM_PROT_WRITE is actually requested
734 (ie. should break COW and page loaning) */
735
736 /*
737 * enter_prot is set up by uvm_fault_check() and clamped
738 * (ie. drop the VM_PROT_WRITE bit) in various places in case
739 * of !cow_now.
740 */
741 vm_prot_t enter_prot; /* prot at which we want to enter pages in */
742
743 /*
744 * the following member is for uvmfault_promote() and ERESTART.
745 */
746 struct vm_anon *anon_spare;
747
748 /*
749 * the following is actually a uvm_fault_lower() internal.
750 * it's here merely for debugging.
751 * (or due to the mechanical separation of the function?)
752 */
753 bool promote;
754
755 /*
756 * type of lock to acquire on objects in both layers.
757 */
758 krw_t lower_lock_type;
759 krw_t upper_lock_type;
760 };
761
762 static inline int uvm_fault_check(
763 struct uvm_faultinfo *, struct uvm_faultctx *,
764 struct vm_anon ***, bool);
765
766 static int uvm_fault_upper(
767 struct uvm_faultinfo *, struct uvm_faultctx *,
768 struct vm_anon **);
769 static inline int uvm_fault_upper_lookup(
770 struct uvm_faultinfo *, const struct uvm_faultctx *,
771 struct vm_anon **, struct vm_page **);
772 static inline void uvm_fault_upper_neighbor(
773 struct uvm_faultinfo *, const struct uvm_faultctx *,
774 vaddr_t, struct vm_page *, bool);
775 static inline int uvm_fault_upper_loan(
776 struct uvm_faultinfo *, struct uvm_faultctx *,
777 struct vm_anon *, struct uvm_object **);
778 static inline int uvm_fault_upper_promote(
779 struct uvm_faultinfo *, struct uvm_faultctx *,
780 struct uvm_object *, struct vm_anon *);
781 static inline int uvm_fault_upper_direct(
782 struct uvm_faultinfo *, struct uvm_faultctx *,
783 struct uvm_object *, struct vm_anon *);
784 static int uvm_fault_upper_enter(
785 struct uvm_faultinfo *, const struct uvm_faultctx *,
786 struct uvm_object *, struct vm_anon *,
787 struct vm_page *, struct vm_anon *);
788 static inline void uvm_fault_upper_done(
789 struct uvm_faultinfo *, const struct uvm_faultctx *,
790 struct vm_anon *, struct vm_page *);
791
792 static int uvm_fault_lower(
793 struct uvm_faultinfo *, struct uvm_faultctx *,
794 struct vm_page **);
795 static inline void uvm_fault_lower_lookup(
796 struct uvm_faultinfo *, const struct uvm_faultctx *,
797 struct vm_page **);
798 static inline void uvm_fault_lower_neighbor(
799 struct uvm_faultinfo *, const struct uvm_faultctx *,
800 vaddr_t, struct vm_page *);
801 static inline int uvm_fault_lower_io(
802 struct uvm_faultinfo *, struct uvm_faultctx *,
803 struct uvm_object **, struct vm_page **);
804 static inline int uvm_fault_lower_direct(
805 struct uvm_faultinfo *, struct uvm_faultctx *,
806 struct uvm_object *, struct vm_page *);
807 static inline int uvm_fault_lower_direct_loan(
808 struct uvm_faultinfo *, struct uvm_faultctx *,
809 struct uvm_object *, struct vm_page **,
810 struct vm_page **);
811 static inline int uvm_fault_lower_promote(
812 struct uvm_faultinfo *, struct uvm_faultctx *,
813 struct uvm_object *, struct vm_page *);
814 static int uvm_fault_lower_enter(
815 struct uvm_faultinfo *, const struct uvm_faultctx *,
816 struct uvm_object *,
817 struct vm_anon *, struct vm_page *);
818 static inline void uvm_fault_lower_done(
819 struct uvm_faultinfo *, const struct uvm_faultctx *,
820 struct uvm_object *, struct vm_page *);
821
822 int
823 uvm_fault_internal(struct vm_map *orig_map, vaddr_t vaddr,
824 vm_prot_t access_type, int fault_flag)
825 {
826 struct uvm_faultinfo ufi;
827 struct uvm_faultctx flt = {
828 .access_type = access_type,
829
830 /* don't look for neighborhood * pages on "wire" fault */
831 .narrow = (fault_flag & UVM_FAULT_WIRE) != 0,
832
833 /* "wire" fault causes wiring of both mapping and paging */
834 .wire_mapping = (fault_flag & UVM_FAULT_WIRE) != 0,
835 .wire_paging = (fault_flag & UVM_FAULT_WIRE) != 0,
836
837 /*
838 * default lock type to acquire on upper & lower layer
839 * objects: reader. this can be upgraded at any point
840 * during the fault from read -> write and uvm_faultctx
841 * changed to match, but is never downgraded write -> read.
842 */
843 #ifdef __HAVE_UNLOCKED_PMAP /* XXX temporary */
844 .upper_lock_type = RW_WRITER,
845 .lower_lock_type = RW_WRITER,
846 #else
847 .upper_lock_type = RW_READER,
848 .lower_lock_type = RW_READER,
849 #endif
850 };
851 const bool maxprot = (fault_flag & UVM_FAULT_MAXPROT) != 0;
852 struct vm_anon *anons_store[UVM_MAXRANGE], **anons;
853 struct vm_page *pages_store[UVM_MAXRANGE], **pages;
854 int error;
855
856 UVMHIST_FUNC(__func__);
857 UVMHIST_CALLARGS(maphist, "(map=%#jx, vaddr=%#jx, at=%jd, ff=%jd)",
858 (uintptr_t)orig_map, vaddr, access_type, fault_flag);
859
860 /* Don't count anything until user interaction is possible */
861 kpreempt_disable();
862 if (__predict_true(start_init_exec)) {
863 struct cpu_info *ci = curcpu();
864 CPU_COUNT(CPU_COUNT_NFAULT, 1);
865 /* Don't flood RNG subsystem with samples. */
866 if (++(ci->ci_faultrng) == 503) {
867 ci->ci_faultrng = 0;
868 rnd_add_uint32(&curcpu()->ci_data.cpu_uvm->rs,
869 sizeof(vaddr_t) == sizeof(uint32_t) ?
870 (uint32_t)vaddr : sizeof(vaddr_t) ==
871 sizeof(uint64_t) ?
872 (uint32_t)vaddr :
873 (uint32_t)ci->ci_counts[CPU_COUNT_NFAULT]);
874 }
875 }
876 kpreempt_enable();
877
878 /*
879 * init the IN parameters in the ufi
880 */
881
882 ufi.orig_map = orig_map;
883 ufi.orig_rvaddr = trunc_page(vaddr);
884 ufi.orig_size = PAGE_SIZE; /* can't get any smaller than this */
885
886 error = ERESTART;
887 while (error == ERESTART) { /* ReFault: */
888 anons = anons_store;
889 pages = pages_store;
890
891 error = uvm_fault_check(&ufi, &flt, &anons, maxprot);
892 if (error != 0)
893 continue;
894
895 error = uvm_fault_upper_lookup(&ufi, &flt, anons, pages);
896 if (error != 0)
897 continue;
898
899 if (pages[flt.centeridx] == PGO_DONTCARE)
900 error = uvm_fault_upper(&ufi, &flt, anons);
901 else {
902 struct uvm_object * const uobj =
903 ufi.entry->object.uvm_obj;
904
905 if (uobj && uobj->pgops->pgo_fault != NULL) {
906 /*
907 * invoke "special" fault routine.
908 */
909 rw_enter(uobj->vmobjlock, RW_WRITER);
910 /* locked: maps(read), amap(if there), uobj */
911 error = uobj->pgops->pgo_fault(&ufi,
912 flt.startva, pages, flt.npages,
913 flt.centeridx, flt.access_type,
914 PGO_LOCKED|PGO_SYNCIO);
915
916 /*
917 * locked: nothing, pgo_fault has unlocked
918 * everything
919 */
920
921 /*
922 * object fault routine responsible for
923 * pmap_update().
924 */
925
926 /*
927 * Wake up the pagedaemon if the fault method
928 * failed for lack of memory but some can be
929 * reclaimed.
930 */
931 if (error == ENOMEM && uvm_reclaimable()) {
932 uvm_wait("pgo_fault");
933 error = ERESTART;
934 }
935 } else {
936 error = uvm_fault_lower(&ufi, &flt, pages);
937 }
938 }
939 }
940
941 if (flt.anon_spare != NULL) {
942 flt.anon_spare->an_ref--;
943 KASSERT(flt.anon_spare->an_ref == 0);
944 KASSERT(flt.anon_spare->an_lock == NULL);
945 uvm_anfree(flt.anon_spare);
946 }
947 return error;
948 }
949
950 /*
951 * uvm_fault_check: check prot, handle needs-copy, etc.
952 *
953 * 1. lookup entry.
954 * 2. check protection.
955 * 3. adjust fault condition (mainly for simulated fault).
956 * 4. handle needs-copy (lazy amap copy).
957 * 5. establish range of interest for neighbor fault (aka pre-fault).
958 * 6. look up anons (if amap exists).
959 * 7. flush pages (if MADV_SEQUENTIAL)
960 *
961 * => called with nothing locked.
962 * => if we fail (result != 0) we unlock everything.
963 * => initialize/adjust many members of flt.
964 */
965
966 static int
967 uvm_fault_check(
968 struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
969 struct vm_anon ***ranons, bool maxprot)
970 {
971 struct vm_amap *amap;
972 struct uvm_object *uobj;
973 vm_prot_t check_prot;
974 int nback, nforw;
975 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
976
977 /*
978 * lookup and lock the maps
979 */
980
981 if (uvmfault_lookup(ufi, false) == false) {
982 UVMHIST_LOG(maphist, "<- no mapping @ %#jx", ufi->orig_rvaddr,
983 0,0,0);
984 return EFAULT;
985 }
986 /* locked: maps(read) */
987
988 #ifdef DIAGNOSTIC
989 if ((ufi->map->flags & VM_MAP_PAGEABLE) == 0) {
990 printf("Page fault on non-pageable map:\n");
991 printf("ufi->map = %p\n", ufi->map);
992 printf("ufi->orig_map = %p\n", ufi->orig_map);
993 printf("ufi->orig_rvaddr = %#lx\n", (u_long) ufi->orig_rvaddr);
994 panic("uvm_fault: (ufi->map->flags & VM_MAP_PAGEABLE) == 0");
995 }
996 #endif
997
998 /*
999 * check protection
1000 */
1001
1002 check_prot = maxprot ?
1003 ufi->entry->max_protection : ufi->entry->protection;
1004 if ((check_prot & flt->access_type) != flt->access_type) {
1005 UVMHIST_LOG(maphist,
1006 "<- protection failure (prot=%#jx, access=%#jx)",
1007 ufi->entry->protection, flt->access_type, 0, 0);
1008 uvmfault_unlockmaps(ufi, false);
1009 return EFAULT;
1010 }
1011
1012 /*
1013 * "enter_prot" is the protection we want to enter the page in at.
1014 * for certain pages (e.g. copy-on-write pages) this protection can
1015 * be more strict than ufi->entry->protection. "wired" means either
1016 * the entry is wired or we are fault-wiring the pg.
1017 */
1018
1019 flt->enter_prot = ufi->entry->protection;
1020 if (VM_MAPENT_ISWIRED(ufi->entry)) {
1021 flt->wire_mapping = true;
1022 flt->wire_paging = true;
1023 flt->narrow = true;
1024 }
1025
1026 if (flt->wire_mapping) {
1027 flt->access_type = flt->enter_prot; /* full access for wired */
1028 flt->cow_now = (check_prot & VM_PROT_WRITE) != 0;
1029 } else {
1030 flt->cow_now = (flt->access_type & VM_PROT_WRITE) != 0;
1031 }
1032
1033 if (flt->wire_paging) {
1034 /* wiring pages requires a write lock. */
1035 flt->upper_lock_type = RW_WRITER;
1036 flt->lower_lock_type = RW_WRITER;
1037 }
1038
1039 flt->promote = false;
1040
1041 /*
1042 * handle "needs_copy" case. if we need to copy the amap we will
1043 * have to drop our readlock and relock it with a write lock. (we
1044 * need a write lock to change anything in a map entry [e.g.
1045 * needs_copy]).
1046 */
1047
1048 if (UVM_ET_ISNEEDSCOPY(ufi->entry)) {
1049 if (flt->cow_now || (ufi->entry->object.uvm_obj == NULL)) {
1050 KASSERT(!maxprot);
1051 /* need to clear */
1052 UVMHIST_LOG(maphist,
1053 " need to clear needs_copy and refault",0,0,0,0);
1054 uvmfault_unlockmaps(ufi, false);
1055 uvmfault_amapcopy(ufi);
1056 cpu_count(CPU_COUNT_FLTAMCOPY, 1);
1057 return ERESTART;
1058
1059 } else {
1060
1061 /*
1062 * ensure that we pmap_enter page R/O since
1063 * needs_copy is still true
1064 */
1065
1066 flt->enter_prot &= ~VM_PROT_WRITE;
1067 }
1068 }
1069
1070 /*
1071 * identify the players
1072 */
1073
1074 amap = ufi->entry->aref.ar_amap; /* upper layer */
1075 uobj = ufi->entry->object.uvm_obj; /* lower layer */
1076
1077 /*
1078 * check for a case 0 fault. if nothing backing the entry then
1079 * error now.
1080 */
1081
1082 if (amap == NULL && uobj == NULL) {
1083 uvmfault_unlockmaps(ufi, false);
1084 UVMHIST_LOG(maphist,"<- no backing store, no overlay",0,0,0,0);
1085 return EFAULT;
1086 }
1087
1088 /*
1089 * for a case 2B fault waste no time on adjacent pages because
1090 * they are likely already entered.
1091 */
1092
1093 if (uobj != NULL && amap != NULL &&
1094 (flt->access_type & VM_PROT_WRITE) != 0) {
1095 /* wide fault (!narrow) */
1096 flt->narrow = true;
1097 }
1098
1099 /*
1100 * establish range of interest based on advice from mapper
1101 * and then clip to fit map entry. note that we only want
1102 * to do this the first time through the fault. if we
1103 * ReFault we will disable this by setting "narrow" to true.
1104 */
1105
1106 if (flt->narrow == false) {
1107
1108 /* wide fault (!narrow) */
1109 KASSERT(uvmadvice[ufi->entry->advice].advice ==
1110 ufi->entry->advice);
1111 nback = MIN(uvmadvice[ufi->entry->advice].nback,
1112 (ufi->orig_rvaddr - ufi->entry->start) >> PAGE_SHIFT);
1113 flt->startva = ufi->orig_rvaddr - (nback << PAGE_SHIFT);
1114 /*
1115 * note: "-1" because we don't want to count the
1116 * faulting page as forw
1117 */
1118 nforw = MIN(uvmadvice[ufi->entry->advice].nforw,
1119 ((ufi->entry->end - ufi->orig_rvaddr) >>
1120 PAGE_SHIFT) - 1);
1121 flt->npages = nback + nforw + 1;
1122 flt->centeridx = nback;
1123
1124 flt->narrow = true; /* ensure only once per-fault */
1125
1126 } else {
1127
1128 /* narrow fault! */
1129 nback = nforw = 0;
1130 flt->startva = ufi->orig_rvaddr;
1131 flt->npages = 1;
1132 flt->centeridx = 0;
1133
1134 }
1135 /* offset from entry's start to pgs' start */
1136 const voff_t eoff = flt->startva - ufi->entry->start;
1137
1138 /* locked: maps(read) */
1139 UVMHIST_LOG(maphist, " narrow=%jd, back=%jd, forw=%jd, startva=%#jx",
1140 flt->narrow, nback, nforw, flt->startva);
1141 UVMHIST_LOG(maphist, " entry=%#jx, amap=%#jx, obj=%#jx",
1142 (uintptr_t)ufi->entry, (uintptr_t)amap, (uintptr_t)uobj, 0);
1143
1144 /*
1145 * guess at the most suitable lock types to acquire.
1146 * if we've got an amap then lock it and extract current anons.
1147 */
1148
1149 if (amap) {
1150 if ((amap_flags(amap) & AMAP_SHARED) == 0) {
1151 /*
1152 * the amap isn't shared. get a writer lock to
1153 * avoid the cost of upgrading the lock later if
1154 * needed.
1155 *
1156 * XXX nice for PostgreSQL, but consider threads.
1157 */
1158 flt->upper_lock_type = RW_WRITER;
1159 } else if ((flt->access_type & VM_PROT_WRITE) != 0) {
1160 /*
1161 * assume we're about to COW.
1162 */
1163 flt->upper_lock_type = RW_WRITER;
1164 }
1165 amap_lock(amap, flt->upper_lock_type);
1166 amap_lookups(&ufi->entry->aref, eoff, *ranons, flt->npages);
1167 } else {
1168 if ((flt->access_type & VM_PROT_WRITE) != 0) {
1169 /*
1170 * we are about to dirty the object and that
1171 * requires a write lock.
1172 */
1173 flt->lower_lock_type = RW_WRITER;
1174 }
1175 *ranons = NULL; /* to be safe */
1176 }
1177
1178 /* locked: maps(read), amap(if there) */
1179 KASSERT(amap == NULL ||
1180 rw_lock_op(amap->am_lock) == flt->upper_lock_type);
1181
1182 /*
1183 * for MADV_SEQUENTIAL mappings we want to deactivate the back pages
1184 * now and then forget about them (for the rest of the fault).
1185 */
1186
1187 if (ufi->entry->advice == MADV_SEQUENTIAL && nback != 0) {
1188
1189 UVMHIST_LOG(maphist, " MADV_SEQUENTIAL: flushing backpages",
1190 0,0,0,0);
1191 /* flush back-page anons? */
1192 if (amap)
1193 uvmfault_anonflush(*ranons, nback);
1194
1195 /*
1196 * flush object? change lock type to RW_WRITER, to avoid
1197 * excessive competition between read/write locks if many
1198 * threads doing "sequential access".
1199 */
1200 if (uobj) {
1201 voff_t uoff;
1202
1203 flt->lower_lock_type = RW_WRITER;
1204 uoff = ufi->entry->offset + eoff;
1205 rw_enter(uobj->vmobjlock, RW_WRITER);
1206 (void) (uobj->pgops->pgo_put)(uobj, uoff, uoff +
1207 (nback << PAGE_SHIFT), PGO_DEACTIVATE);
1208 }
1209
1210 /* now forget about the backpages */
1211 if (amap)
1212 *ranons += nback;
1213 flt->startva += (nback << PAGE_SHIFT);
1214 flt->npages -= nback;
1215 flt->centeridx = 0;
1216 }
1217 /*
1218 * => startva is fixed
1219 * => npages is fixed
1220 */
1221 KASSERT(flt->startva <= ufi->orig_rvaddr);
1222 KASSERT(ufi->orig_rvaddr + ufi->orig_size <=
1223 flt->startva + (flt->npages << PAGE_SHIFT));
1224 return 0;
1225 }
1226
1227 /*
1228 * uvm_fault_upper_upgrade: upgrade upper lock, reader -> writer
1229 */
1230
1231 static inline int
1232 uvm_fault_upper_upgrade(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
1233 struct vm_amap *amap, struct uvm_object *uobj)
1234 {
1235 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
1236
1237 KASSERT(amap != NULL);
1238 KASSERT(flt->upper_lock_type == rw_lock_op(amap->am_lock));
1239
1240 /*
1241 * fast path.
1242 */
1243
1244 if (__predict_true(flt->upper_lock_type == RW_WRITER)) {
1245 return 0;
1246 }
1247
1248 /*
1249 * otherwise try for the upgrade. if we don't get it, unlock
1250 * everything, restart the fault and next time around get a writer
1251 * lock.
1252 */
1253
1254 flt->upper_lock_type = RW_WRITER;
1255 if (__predict_false(!rw_tryupgrade(amap->am_lock))) {
1256 uvmfault_unlockall(ufi, amap, uobj);
1257 cpu_count(CPU_COUNT_FLTNOUP, 1);
1258 UVMHIST_LOG(maphist, " !upgrade upper", 0, 0,0,0);
1259 return ERESTART;
1260 }
1261 cpu_count(CPU_COUNT_FLTUP, 1);
1262 KASSERT(flt->upper_lock_type == rw_lock_op(amap->am_lock));
1263 return 0;
1264 }
1265
1266 /*
1267 * uvm_fault_upper_lookup: look up existing h/w mapping and amap.
1268 *
1269 * iterate range of interest:
1270 * 1. check if h/w mapping exists. if yes, we don't care
1271 * 2. check if anon exists. if not, page is lower.
1272 * 3. if anon exists, enter h/w mapping for neighbors.
1273 *
1274 * => called with amap locked (if exists).
1275 */
1276
1277 static int
1278 uvm_fault_upper_lookup(
1279 struct uvm_faultinfo *ufi, const struct uvm_faultctx *flt,
1280 struct vm_anon **anons, struct vm_page **pages)
1281 {
1282 struct vm_amap *amap = ufi->entry->aref.ar_amap;
1283 int lcv;
1284 vaddr_t currva;
1285 bool shadowed __unused;
1286 bool entered;
1287 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
1288
1289 /* locked: maps(read), amap(if there) */
1290 KASSERT(amap == NULL ||
1291 rw_lock_op(amap->am_lock) == flt->upper_lock_type);
1292
1293 /*
1294 * map in the backpages and frontpages we found in the amap in hopes
1295 * of preventing future faults. we also init the pages[] array as
1296 * we go.
1297 */
1298
1299 currva = flt->startva;
1300 shadowed = false;
1301 entered = false;
1302 for (lcv = 0; lcv < flt->npages; lcv++, currva += PAGE_SIZE) {
1303 /*
1304 * unmapped or center page. check if any anon at this level.
1305 */
1306 if (amap == NULL || anons[lcv] == NULL) {
1307 pages[lcv] = NULL;
1308 continue;
1309 }
1310
1311 /*
1312 * check for present page and map if possible.
1313 */
1314
1315 pages[lcv] = PGO_DONTCARE;
1316 if (lcv == flt->centeridx) { /* save center for later! */
1317 shadowed = true;
1318 continue;
1319 }
1320
1321 struct vm_anon *anon = anons[lcv];
1322 struct vm_page *pg = anon->an_page;
1323
1324 KASSERT(anon->an_lock == amap->am_lock);
1325
1326 /*
1327 * ignore loaned and busy pages.
1328 * don't play with VAs that are already mapped.
1329 */
1330
1331 if (pg && pg->loan_count == 0 && (pg->flags & PG_BUSY) == 0 &&
1332 !pmap_extract(ufi->orig_map->pmap, currva, NULL)) {
1333 uvm_fault_upper_neighbor(ufi, flt, currva,
1334 pg, anon->an_ref > 1);
1335 entered = true;
1336 }
1337 }
1338 if (entered) {
1339 pmap_update(ufi->orig_map->pmap);
1340 }
1341
1342 /* locked: maps(read), amap(if there) */
1343 KASSERT(amap == NULL ||
1344 rw_lock_op(amap->am_lock) == flt->upper_lock_type);
1345 /* (shadowed == true) if there is an anon at the faulting address */
1346 UVMHIST_LOG(maphist, " shadowed=%jd, will_get=%jd", shadowed,
1347 (ufi->entry->object.uvm_obj && shadowed != false),0,0);
1348
1349 return 0;
1350 }
1351
1352 /*
1353 * uvm_fault_upper_neighbor: enter single upper neighbor page.
1354 *
1355 * => called with amap and anon locked.
1356 */
1357
1358 static void
1359 uvm_fault_upper_neighbor(
1360 struct uvm_faultinfo *ufi, const struct uvm_faultctx *flt,
1361 vaddr_t currva, struct vm_page *pg, bool readonly)
1362 {
1363 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
1364
1365 /* locked: amap, anon */
1366
1367 KASSERT(pg->uobject == NULL);
1368 KASSERT(pg->uanon != NULL);
1369 KASSERT(rw_lock_op(pg->uanon->an_lock) == flt->upper_lock_type);
1370 KASSERT(uvm_pagegetdirty(pg) != UVM_PAGE_STATUS_CLEAN);
1371
1372 /*
1373 * there wasn't a direct fault on the page, so avoid the cost of
1374 * activating it.
1375 */
1376
1377 if (!uvmpdpol_pageisqueued_p(pg) && pg->wire_count == 0) {
1378 uvm_pagelock(pg);
1379 uvm_pageenqueue(pg);
1380 uvm_pageunlock(pg);
1381 }
1382
1383 UVMHIST_LOG(maphist,
1384 " MAPPING: n anon: pm=%#jx, va=%#jx, pg=%#jx",
1385 (uintptr_t)ufi->orig_map->pmap, currva, (uintptr_t)pg, 0);
1386 cpu_count(CPU_COUNT_FLTNAMAP, 1);
1387
1388 /*
1389 * Since this page isn't the page that's actually faulting,
1390 * ignore pmap_enter() failures; it's not critical that we
1391 * enter these right now.
1392 */
1393
1394 (void) pmap_enter(ufi->orig_map->pmap, currva,
1395 VM_PAGE_TO_PHYS(pg),
1396 readonly ? (flt->enter_prot & ~VM_PROT_WRITE) :
1397 flt->enter_prot,
1398 PMAP_CANFAIL | (flt->wire_mapping ? PMAP_WIRED : 0));
1399 }
1400
1401 /*
1402 * uvm_fault_upper: handle upper fault.
1403 *
1404 * 1. acquire anon lock.
1405 * 2. get anon. let uvmfault_anonget do the dirty work.
1406 * 3. handle loan.
1407 * 4. dispatch direct or promote handlers.
1408 */
1409
1410 static int
1411 uvm_fault_upper(
1412 struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
1413 struct vm_anon **anons)
1414 {
1415 struct vm_amap * const amap = ufi->entry->aref.ar_amap;
1416 struct vm_anon * const anon = anons[flt->centeridx];
1417 struct uvm_object *uobj;
1418 int error;
1419 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
1420
1421 /* locked: maps(read), amap, anon */
1422 KASSERT(rw_lock_op(amap->am_lock) == flt->upper_lock_type);
1423 KASSERT(anon->an_lock == amap->am_lock);
1424
1425 /*
1426 * handle case 1: fault on an anon in our amap
1427 */
1428
1429 UVMHIST_LOG(maphist, " case 1 fault: anon=%#jx",
1430 (uintptr_t)anon, 0, 0, 0);
1431
1432 /*
1433 * no matter if we have case 1A or case 1B we are going to need to
1434 * have the anon's memory resident. ensure that now.
1435 */
1436
1437 /*
1438 * let uvmfault_anonget do the dirty work.
1439 * if it fails (!OK) it will unlock everything for us.
1440 * if it succeeds, locks are still valid and locked.
1441 * also, if it is OK, then the anon's page is on the queues.
1442 * if the page is on loan from a uvm_object, then anonget will
1443 * lock that object for us if it does not fail.
1444 */
1445 retry:
1446 error = uvmfault_anonget(ufi, amap, anon);
1447 switch (error) {
1448 case 0:
1449 break;
1450
1451 case ERESTART:
1452 return ERESTART;
1453
1454 case EAGAIN:
1455 kpause("fltagain1", false, hz/2, NULL);
1456 return ERESTART;
1457
1458 case ENOLCK:
1459 /* it needs a write lock: retry */
1460 error = uvm_fault_upper_upgrade(ufi, flt, amap, NULL);
1461 if (error != 0) {
1462 return error;
1463 }
1464 KASSERT(rw_write_held(amap->am_lock));
1465 goto retry;
1466
1467 default:
1468 return error;
1469 }
1470
1471 /*
1472 * uobj is non null if the page is on loan from an object (i.e. uobj)
1473 */
1474
1475 uobj = anon->an_page->uobject; /* locked by anonget if !NULL */
1476
1477 /* locked: maps(read), amap, anon, uobj(if one) */
1478 KASSERT(rw_lock_op(amap->am_lock) == flt->upper_lock_type);
1479 KASSERT(anon->an_lock == amap->am_lock);
1480 KASSERT(uobj == NULL ||
1481 rw_lock_op(uobj->vmobjlock) == flt->lower_lock_type);
1482
1483 /*
1484 * special handling for loaned pages
1485 */
1486
1487 if (anon->an_page->loan_count) {
1488 error = uvm_fault_upper_loan(ufi, flt, anon, &uobj);
1489 if (error != 0)
1490 return error;
1491 }
1492
1493 /*
1494 * if we are case 1B then we will need to allocate a new blank
1495 * anon to transfer the data into. note that we have a lock
1496 * on anon, so no one can busy or release the page until we are done.
1497 * also note that the ref count can't drop to zero here because
1498 * it is > 1 and we are only dropping one ref.
1499 *
1500 * in the (hopefully very rare) case that we are out of RAM we
1501 * will unlock, wait for more RAM, and refault.
1502 *
1503 * if we are out of anon VM we kill the process (XXX: could wait?).
1504 */
1505
1506 if (flt->cow_now && anon->an_ref > 1) {
1507 flt->promote = true;
1508 error = uvm_fault_upper_promote(ufi, flt, uobj, anon);
1509 } else {
1510 error = uvm_fault_upper_direct(ufi, flt, uobj, anon);
1511 }
1512 return error;
1513 }
1514
1515 /*
1516 * uvm_fault_upper_loan: handle loaned upper page.
1517 *
1518 * 1. if not cow'ing now, simply adjust flt->enter_prot.
1519 * 2. if cow'ing now, and if ref count is 1, break loan.
1520 */
1521
1522 static int
1523 uvm_fault_upper_loan(
1524 struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
1525 struct vm_anon *anon, struct uvm_object **ruobj)
1526 {
1527 struct vm_amap * const amap = ufi->entry->aref.ar_amap;
1528 int error = 0;
1529 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
1530
1531 if (!flt->cow_now) {
1532
1533 /*
1534 * for read faults on loaned pages we just cap the
1535 * protection at read-only.
1536 */
1537
1538 flt->enter_prot = flt->enter_prot & ~VM_PROT_WRITE;
1539
1540 } else {
1541 /*
1542 * note that we can't allow writes into a loaned page!
1543 *
1544 * if we have a write fault on a loaned page in an
1545 * anon then we need to look at the anon's ref count.
1546 * if it is greater than one then we are going to do
1547 * a normal copy-on-write fault into a new anon (this
1548 * is not a problem). however, if the reference count
1549 * is one (a case where we would normally allow a
1550 * write directly to the page) then we need to kill
1551 * the loan before we continue.
1552 */
1553
1554 /* >1 case is already ok */
1555 if (anon->an_ref == 1) {
1556 /* breaking loan requires a write lock. */
1557 error = uvm_fault_upper_upgrade(ufi, flt, amap, NULL);
1558 if (error != 0) {
1559 return error;
1560 }
1561 KASSERT(rw_write_held(amap->am_lock));
1562
1563 error = uvm_loanbreak_anon(anon, *ruobj);
1564 if (error != 0) {
1565 uvmfault_unlockall(ufi, amap, *ruobj);
1566 uvm_wait("flt_noram2");
1567 return ERESTART;
1568 }
1569 /* if we were a loan receiver uobj is gone */
1570 if (*ruobj)
1571 *ruobj = NULL;
1572 }
1573 }
1574 return error;
1575 }
1576
1577 /*
1578 * uvm_fault_upper_promote: promote upper page.
1579 *
1580 * 1. call uvmfault_promote.
1581 * 2. enqueue page.
1582 * 3. deref.
1583 * 4. pass page to uvm_fault_upper_enter.
1584 */
1585
1586 static int
1587 uvm_fault_upper_promote(
1588 struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
1589 struct uvm_object *uobj, struct vm_anon *anon)
1590 {
1591 struct vm_amap * const amap = ufi->entry->aref.ar_amap;
1592 struct vm_anon * const oanon = anon;
1593 struct vm_page *pg;
1594 int error;
1595 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
1596
1597 UVMHIST_LOG(maphist, " case 1B: COW fault",0,0,0,0);
1598 cpu_count(CPU_COUNT_FLT_ACOW, 1);
1599
1600 /* promoting requires a write lock. */
1601 error = uvm_fault_upper_upgrade(ufi, flt, amap, NULL);
1602 if (error != 0) {
1603 return error;
1604 }
1605 KASSERT(rw_write_held(amap->am_lock));
1606
1607 error = uvmfault_promote(ufi, oanon, PGO_DONTCARE, &anon,
1608 &flt->anon_spare);
1609 switch (error) {
1610 case 0:
1611 break;
1612 case ERESTART:
1613 return ERESTART;
1614 default:
1615 return error;
1616 }
1617 pg = anon->an_page;
1618
1619 KASSERT(anon->an_lock == oanon->an_lock);
1620 KASSERT((pg->flags & (PG_BUSY | PG_FAKE)) == 0);
1621
1622 /* deref: can not drop to zero here by defn! */
1623 KASSERT(oanon->an_ref > 1);
1624 oanon->an_ref--;
1625
1626 /*
1627 * note: oanon is still locked, as is the new anon. we
1628 * need to check for this later when we unlock oanon; if
1629 * oanon != anon, we'll have to unlock anon, too.
1630 */
1631
1632 return uvm_fault_upper_enter(ufi, flt, uobj, anon, pg, oanon);
1633 }
1634
1635 /*
1636 * uvm_fault_upper_direct: handle direct fault.
1637 */
1638
1639 static int
1640 uvm_fault_upper_direct(
1641 struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
1642 struct uvm_object *uobj, struct vm_anon *anon)
1643 {
1644 struct vm_anon * const oanon = anon;
1645 struct vm_page *pg;
1646 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
1647
1648 cpu_count(CPU_COUNT_FLT_ANON, 1);
1649 pg = anon->an_page;
1650 if (anon->an_ref > 1) /* disallow writes to ref > 1 anons */
1651 flt->enter_prot = flt->enter_prot & ~VM_PROT_WRITE;
1652
1653 return uvm_fault_upper_enter(ufi, flt, uobj, anon, pg, oanon);
1654 }
1655
1656 /*
1657 * uvm_fault_upper_enter: enter h/w mapping of upper page.
1658 */
1659
1660 static int
1661 uvm_fault_upper_enter(
1662 struct uvm_faultinfo *ufi, const struct uvm_faultctx *flt,
1663 struct uvm_object *uobj, struct vm_anon *anon, struct vm_page *pg,
1664 struct vm_anon *oanon)
1665 {
1666 struct pmap *pmap = ufi->orig_map->pmap;
1667 vaddr_t va = ufi->orig_rvaddr;
1668 struct vm_amap * const amap = ufi->entry->aref.ar_amap;
1669 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
1670
1671 /* locked: maps(read), amap, oanon, anon(if different from oanon) */
1672 KASSERT(rw_lock_op(amap->am_lock) == flt->upper_lock_type);
1673 KASSERT(anon->an_lock == amap->am_lock);
1674 KASSERT(oanon->an_lock == amap->am_lock);
1675 KASSERT(uobj == NULL ||
1676 rw_lock_op(uobj->vmobjlock) == flt->lower_lock_type);
1677 KASSERT(uvm_pagegetdirty(pg) != UVM_PAGE_STATUS_CLEAN);
1678
1679 /*
1680 * now map the page in.
1681 */
1682
1683 UVMHIST_LOG(maphist,
1684 " MAPPING: anon: pm=%#jx, va=%#jx, pg=%#jx, promote=%jd",
1685 (uintptr_t)pmap, va, (uintptr_t)pg, flt->promote);
1686 if (pmap_enter(pmap, va, VM_PAGE_TO_PHYS(pg),
1687 flt->enter_prot, flt->access_type | PMAP_CANFAIL |
1688 (flt->wire_mapping ? PMAP_WIRED : 0)) != 0) {
1689
1690 /*
1691 * If pmap_enter() fails, it must not leave behind an existing
1692 * pmap entry. In particular, a now-stale entry for a different
1693 * page would leave the pmap inconsistent with the vm_map.
1694 * This is not to imply that pmap_enter() should remove an
1695 * existing mapping in such a situation (since that could create
1696 * different problems, eg. if the existing mapping is wired),
1697 * but rather that the pmap should be designed such that it
1698 * never needs to fail when the new mapping is replacing an
1699 * existing mapping and the new page has no existing mappings.
1700 *
1701 * XXX This can't be asserted safely any more because many
1702 * LWPs and/or many processes could simultaneously fault on
1703 * the same VA and some might succeed.
1704 */
1705
1706 /* KASSERT(!pmap_extract(pmap, va, NULL)); */
1707
1708 /*
1709 * ensure that the page is queued in the case that
1710 * we just promoted.
1711 */
1712
1713 uvm_pagelock(pg);
1714 uvm_pageenqueue(pg);
1715 uvm_pageunlock(pg);
1716
1717 /*
1718 * No need to undo what we did; we can simply think of
1719 * this as the pmap throwing away the mapping information.
1720 *
1721 * We do, however, have to go through the ReFault path,
1722 * as the map may change while we're asleep.
1723 */
1724
1725 uvmfault_unlockall(ufi, amap, uobj);
1726 if (!uvm_reclaimable()) {
1727 UVMHIST_LOG(maphist,
1728 "<- failed. out of VM",0,0,0,0);
1729 /* XXX instrumentation */
1730 return ENOMEM;
1731 }
1732 /* XXX instrumentation */
1733 uvm_wait("flt_pmfail1");
1734 return ERESTART;
1735 }
1736
1737 uvm_fault_upper_done(ufi, flt, anon, pg);
1738
1739 /*
1740 * done case 1! finish up by unlocking everything and returning success
1741 */
1742
1743 pmap_update(pmap);
1744 uvmfault_unlockall(ufi, amap, uobj);
1745 return 0;
1746 }
1747
1748 /*
1749 * uvm_fault_upper_done: queue upper center page.
1750 */
1751
1752 static void
1753 uvm_fault_upper_done(
1754 struct uvm_faultinfo *ufi, const struct uvm_faultctx *flt,
1755 struct vm_anon *anon, struct vm_page *pg)
1756 {
1757 const bool wire_paging = flt->wire_paging;
1758
1759 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
1760
1761 /*
1762 * ... update the page queues.
1763 */
1764
1765 if (wire_paging) {
1766 uvm_pagelock(pg);
1767 uvm_pagewire(pg);
1768 uvm_pageunlock(pg);
1769
1770 /*
1771 * since the now-wired page cannot be paged out,
1772 * release its swap resources for others to use.
1773 * and since an anon with no swap cannot be clean,
1774 * mark it dirty now.
1775 */
1776
1777 uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY);
1778 uvm_anon_dropswap(anon);
1779 } else if (uvmpdpol_pageactivate_p(pg)) {
1780 /*
1781 * avoid re-activating the page unless needed,
1782 * to avoid false sharing on multiprocessor.
1783 */
1784
1785 uvm_pagelock(pg);
1786 uvm_pageactivate(pg);
1787 uvm_pageunlock(pg);
1788 }
1789 }
1790
1791 /*
1792 * uvm_fault_lower_upgrade: upgrade lower lock, reader -> writer
1793 */
1794
1795 static inline int
1796 uvm_fault_lower_upgrade(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
1797 struct vm_amap *amap, struct uvm_object *uobj, struct vm_page *uobjpage)
1798 {
1799
1800 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
1801
1802 KASSERT(uobj != NULL);
1803 KASSERT(flt->lower_lock_type == rw_lock_op(uobj->vmobjlock));
1804
1805 /*
1806 * fast path.
1807 */
1808
1809 if (__predict_true(flt->lower_lock_type == RW_WRITER)) {
1810 return 0;
1811 }
1812
1813 /*
1814 * otherwise try for the upgrade. if we don't get it, unlock
1815 * everything, restart the fault and next time around get a writer
1816 * lock.
1817 */
1818
1819 flt->lower_lock_type = RW_WRITER;
1820 if (__predict_false(!rw_tryupgrade(uobj->vmobjlock))) {
1821 uvmfault_unlockall(ufi, amap, uobj);
1822 cpu_count(CPU_COUNT_FLTNOUP, 1);
1823 UVMHIST_LOG(maphist, " !upgrade lower", 0, 0,0,0);
1824 return ERESTART;
1825 }
1826 cpu_count(CPU_COUNT_FLTUP, 1);
1827 KASSERT(flt->lower_lock_type == rw_lock_op(uobj->vmobjlock));
1828 return 0;
1829 }
1830
1831 /*
1832 * uvm_fault_lower: handle lower fault.
1833 *
1834 * 1. check uobj
1835 * 1.1. if null, ZFOD.
1836 * 1.2. if not null, look up unnmapped neighbor pages.
1837 * 2. for center page, check if promote.
1838 * 2.1. ZFOD always needs promotion.
1839 * 2.2. other uobjs, when entry is marked COW (usually MAP_PRIVATE vnode).
1840 * 3. if uobj is not ZFOD and page is not found, do i/o.
1841 * 4. dispatch either direct / promote fault.
1842 */
1843
1844 static int
1845 uvm_fault_lower(
1846 struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
1847 struct vm_page **pages)
1848 {
1849 struct vm_amap *amap __diagused = ufi->entry->aref.ar_amap;
1850 struct uvm_object *uobj = ufi->entry->object.uvm_obj;
1851 struct vm_page *uobjpage;
1852 int error;
1853 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
1854
1855 /*
1856 * now, if the desired page is not shadowed by the amap and we have
1857 * a backing object that does not have a special fault routine, then
1858 * we ask (with pgo_get) the object for resident pages that we care
1859 * about and attempt to map them in. we do not let pgo_get block
1860 * (PGO_LOCKED).
1861 */
1862
1863 if (uobj == NULL) {
1864 /* zero fill; don't care neighbor pages */
1865 uobjpage = NULL;
1866 } else {
1867 uvm_fault_lower_lookup(ufi, flt, pages);
1868 uobjpage = pages[flt->centeridx];
1869 }
1870
1871 /*
1872 * note that at this point we are done with any front or back pages.
1873 * we are now going to focus on the center page (i.e. the one we've
1874 * faulted on). if we have faulted on the upper (anon) layer
1875 * [i.e. case 1], then the anon we want is anons[centeridx] (we have
1876 * not touched it yet). if we have faulted on the bottom (uobj)
1877 * layer [i.e. case 2] and the page was both present and available,
1878 * then we've got a pointer to it as "uobjpage" and we've already
1879 * made it BUSY.
1880 */
1881
1882 /*
1883 * locked:
1884 * maps(read), amap(if there), uobj(if !null), uobjpage(if !null)
1885 */
1886 KASSERT(amap == NULL ||
1887 rw_lock_op(amap->am_lock) == flt->upper_lock_type);
1888 KASSERT(uobj == NULL ||
1889 rw_lock_op(uobj->vmobjlock) == flt->lower_lock_type);
1890
1891 /*
1892 * note that uobjpage can not be PGO_DONTCARE at this point. we now
1893 * set uobjpage to PGO_DONTCARE if we are doing a zero fill. if we
1894 * have a backing object, check and see if we are going to promote
1895 * the data up to an anon during the fault.
1896 */
1897
1898 if (uobj == NULL) {
1899 uobjpage = PGO_DONTCARE;
1900 flt->promote = true; /* always need anon here */
1901 } else {
1902 KASSERT(uobjpage != PGO_DONTCARE);
1903 flt->promote = flt->cow_now && UVM_ET_ISCOPYONWRITE(ufi->entry);
1904 }
1905 UVMHIST_LOG(maphist, " case 2 fault: promote=%jd, zfill=%jd",
1906 flt->promote, (uobj == NULL), 0,0);
1907
1908 /*
1909 * if uobjpage is not null then we do not need to do I/O to get the
1910 * uobjpage.
1911 *
1912 * if uobjpage is null, then we need to unlock and ask the pager to
1913 * get the data for us. once we have the data, we need to reverify
1914 * the state the world. we are currently not holding any resources.
1915 */
1916
1917 if (uobjpage) {
1918 /* update rusage counters */
1919 curlwp->l_ru.ru_minflt++;
1920 } else {
1921 error = uvm_fault_lower_io(ufi, flt, &uobj, &uobjpage);
1922 if (error != 0)
1923 return error;
1924 }
1925
1926 /*
1927 * locked:
1928 * maps(read), amap(if !null), uobj(if !null), uobjpage(if uobj)
1929 */
1930 KASSERT(amap == NULL ||
1931 rw_lock_op(amap->am_lock) == flt->upper_lock_type);
1932 KASSERT(uobj == NULL ||
1933 rw_lock_op(uobj->vmobjlock) == flt->lower_lock_type);
1934
1935 /*
1936 * notes:
1937 * - at this point uobjpage can not be NULL
1938 * - at this point uobjpage can not be PG_RELEASED (since we checked
1939 * for it above)
1940 * - at this point uobjpage could be waited on (handle later)
1941 * - uobjpage can be from a different object if tmpfs (vnode vs UAO)
1942 */
1943
1944 KASSERT(uobjpage != NULL);
1945 KASSERT(uobj == NULL ||
1946 uobjpage->uobject->vmobjlock == uobj->vmobjlock);
1947 KASSERT(uobj == NULL || !UVM_OBJ_IS_CLEAN(uobjpage->uobject) ||
1948 uvm_pagegetdirty(uobjpage) == UVM_PAGE_STATUS_CLEAN);
1949
1950 if (!flt->promote) {
1951 error = uvm_fault_lower_direct(ufi, flt, uobj, uobjpage);
1952 } else {
1953 error = uvm_fault_lower_promote(ufi, flt, uobj, uobjpage);
1954 }
1955 return error;
1956 }
1957
1958 /*
1959 * uvm_fault_lower_lookup: look up on-memory uobj pages.
1960 *
1961 * 1. get on-memory pages.
1962 * 2. if failed, give up (get only center page later).
1963 * 3. if succeeded, enter h/w mapping of neighbor pages.
1964 */
1965
1966 static void
1967 uvm_fault_lower_lookup(
1968 struct uvm_faultinfo *ufi, const struct uvm_faultctx *flt,
1969 struct vm_page **pages)
1970 {
1971 struct uvm_object *uobj = ufi->entry->object.uvm_obj;
1972 int lcv, gotpages;
1973 vaddr_t currva;
1974 bool entered;
1975 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
1976
1977 rw_enter(uobj->vmobjlock, flt->lower_lock_type);
1978
1979 /*
1980 * Locked: maps(read), amap(if there), uobj
1981 */
1982
1983 cpu_count(CPU_COUNT_FLTLGET, 1);
1984 gotpages = flt->npages;
1985 (void) uobj->pgops->pgo_get(uobj,
1986 ufi->entry->offset + flt->startva - ufi->entry->start,
1987 pages, &gotpages, flt->centeridx,
1988 flt->access_type & MASK(ufi->entry), ufi->entry->advice,
1989 PGO_LOCKED);
1990
1991 KASSERT(rw_lock_op(uobj->vmobjlock) == flt->lower_lock_type);
1992
1993 /*
1994 * check for pages to map, if we got any
1995 */
1996
1997 if (gotpages == 0) {
1998 pages[flt->centeridx] = NULL;
1999 return;
2000 }
2001
2002 entered = false;
2003 currva = flt->startva;
2004 for (lcv = 0; lcv < flt->npages; lcv++, currva += PAGE_SIZE) {
2005 struct vm_page *curpg;
2006
2007 curpg = pages[lcv];
2008 if (curpg == NULL || curpg == PGO_DONTCARE) {
2009 continue;
2010 }
2011
2012 /*
2013 * in the case of tmpfs, the pages might be from a different
2014 * uvm_object. just make sure that they have the same lock.
2015 */
2016
2017 KASSERT(curpg->uobject->vmobjlock == uobj->vmobjlock);
2018 KASSERT((curpg->flags & PG_BUSY) == 0);
2019
2020 /*
2021 * leave the centre page for later. don't screw with
2022 * existing mappings (needless & expensive).
2023 */
2024
2025 if (lcv == flt->centeridx) {
2026 UVMHIST_LOG(maphist, " got uobjpage (%#jx) "
2027 "with locked get", (uintptr_t)curpg, 0, 0, 0);
2028 } else if (!pmap_extract(ufi->orig_map->pmap, currva, NULL)) {
2029 uvm_fault_lower_neighbor(ufi, flt, currva, curpg);
2030 entered = true;
2031 }
2032 }
2033 if (entered) {
2034 pmap_update(ufi->orig_map->pmap);
2035 }
2036 }
2037
2038 /*
2039 * uvm_fault_lower_neighbor: enter h/w mapping of lower neighbor page.
2040 */
2041
2042 static void
2043 uvm_fault_lower_neighbor(
2044 struct uvm_faultinfo *ufi, const struct uvm_faultctx *flt,
2045 vaddr_t currva, struct vm_page *pg)
2046 {
2047 const bool readonly = uvm_pagereadonly_p(pg) || pg->loan_count > 0;
2048 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
2049
2050 /* locked: maps(read), amap(if there), uobj */
2051
2052 /*
2053 * calling pgo_get with PGO_LOCKED returns us pages which
2054 * are neither busy nor released, so we don't need to check
2055 * for this. we can just directly enter the pages.
2056 *
2057 * there wasn't a direct fault on the page, so avoid the cost of
2058 * activating it.
2059 */
2060
2061 if (!uvmpdpol_pageisqueued_p(pg) && pg->wire_count == 0) {
2062 uvm_pagelock(pg);
2063 uvm_pageenqueue(pg);
2064 uvm_pageunlock(pg);
2065 }
2066
2067 UVMHIST_LOG(maphist,
2068 " MAPPING: n obj: pm=%#jx, va=%#jx, pg=%#jx",
2069 (uintptr_t)ufi->orig_map->pmap, currva, (uintptr_t)pg, 0);
2070 cpu_count(CPU_COUNT_FLTNOMAP, 1);
2071
2072 /*
2073 * Since this page isn't the page that's actually faulting,
2074 * ignore pmap_enter() failures; it's not critical that we
2075 * enter these right now.
2076 * NOTE: page can't be waited on or PG_RELEASED because we've
2077 * held the lock the whole time we've had the handle.
2078 */
2079 KASSERT((pg->flags & PG_PAGEOUT) == 0);
2080 KASSERT((pg->flags & PG_RELEASED) == 0);
2081 KASSERT(!UVM_OBJ_IS_CLEAN(pg->uobject) ||
2082 uvm_pagegetdirty(pg) == UVM_PAGE_STATUS_CLEAN);
2083 KASSERT((pg->flags & PG_BUSY) == 0);
2084 KASSERT(rw_lock_op(pg->uobject->vmobjlock) == flt->lower_lock_type);
2085
2086 const vm_prot_t mapprot =
2087 readonly ? (flt->enter_prot & ~VM_PROT_WRITE) :
2088 flt->enter_prot & MASK(ufi->entry);
2089 const u_int mapflags =
2090 PMAP_CANFAIL | (flt->wire_mapping ? (mapprot | PMAP_WIRED) : 0);
2091 (void) pmap_enter(ufi->orig_map->pmap, currva,
2092 VM_PAGE_TO_PHYS(pg), mapprot, mapflags);
2093 }
2094
2095 /*
2096 * uvm_fault_lower_io: get lower page from backing store.
2097 *
2098 * 1. unlock everything, because i/o will block.
2099 * 2. call pgo_get.
2100 * 3. if failed, recover.
2101 * 4. if succeeded, relock everything and verify things.
2102 */
2103
2104 static int
2105 uvm_fault_lower_io(
2106 struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
2107 struct uvm_object **ruobj, struct vm_page **ruobjpage)
2108 {
2109 struct vm_amap * const amap = ufi->entry->aref.ar_amap;
2110 struct uvm_object *uobj = *ruobj;
2111 struct vm_page *pg;
2112 bool locked;
2113 int gotpages;
2114 int error;
2115 voff_t uoff;
2116 vm_prot_t access_type;
2117 int advice;
2118 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
2119
2120 /* update rusage counters */
2121 curlwp->l_ru.ru_majflt++;
2122
2123 /* grab everything we need from the entry before we unlock */
2124 uoff = (ufi->orig_rvaddr - ufi->entry->start) + ufi->entry->offset;
2125 access_type = flt->access_type & MASK(ufi->entry);
2126 advice = ufi->entry->advice;
2127
2128 /* Locked: maps(read), amap(if there), uobj */
2129 KASSERT(rw_lock_op(uobj->vmobjlock) == flt->lower_lock_type);
2130
2131 /* Upgrade to a write lock if needed. */
2132 error = uvm_fault_lower_upgrade(ufi, flt, amap, uobj, NULL);
2133 if (error != 0) {
2134 return error;
2135 }
2136 uvmfault_unlockall(ufi, amap, NULL);
2137
2138 /* Locked: uobj(write) */
2139 KASSERT(rw_write_held(uobj->vmobjlock));
2140
2141 cpu_count(CPU_COUNT_FLTGET, 1);
2142 gotpages = 1;
2143 pg = NULL;
2144 error = uobj->pgops->pgo_get(uobj, uoff, &pg, &gotpages,
2145 0, access_type, advice, PGO_SYNCIO);
2146 /* locked: pg(if no error) */
2147
2148 /*
2149 * recover from I/O
2150 */
2151
2152 if (error) {
2153 if (error == EAGAIN) {
2154 UVMHIST_LOG(maphist,
2155 " pgo_get says TRY AGAIN!",0,0,0,0);
2156 kpause("fltagain2", false, hz/2, NULL);
2157 return ERESTART;
2158 }
2159
2160 #if 0
2161 KASSERT(error != ERESTART);
2162 #else
2163 /* XXXUEBS don't re-fault? */
2164 if (error == ERESTART)
2165 error = EIO;
2166 #endif
2167
2168 UVMHIST_LOG(maphist, "<- pgo_get failed (code %jd)",
2169 error, 0,0,0);
2170 return error;
2171 }
2172
2173 /*
2174 * re-verify the state of the world by first trying to relock
2175 * the maps. always relock the object.
2176 */
2177
2178 locked = uvmfault_relock(ufi);
2179 if (locked && amap)
2180 amap_lock(amap, flt->upper_lock_type);
2181
2182 /* might be changed */
2183 uobj = pg->uobject;
2184
2185 rw_enter(uobj->vmobjlock, flt->lower_lock_type);
2186 KASSERT((pg->flags & PG_BUSY) != 0);
2187 KASSERT(flt->lower_lock_type == RW_WRITER);
2188
2189 uvm_pagelock(pg);
2190 uvm_pageactivate(pg);
2191 uvm_pageunlock(pg);
2192
2193 /* locked(locked): maps(read), amap(if !null), uobj, pg */
2194 /* locked(!locked): uobj, pg */
2195
2196 /*
2197 * verify that the page has not be released and re-verify
2198 * that amap slot is still free. if there is a problem,
2199 * we unlock and clean up.
2200 */
2201
2202 if ((pg->flags & PG_RELEASED) != 0 ||
2203 (locked && amap && amap_lookup(&ufi->entry->aref,
2204 ufi->orig_rvaddr - ufi->entry->start))) {
2205 if (locked)
2206 uvmfault_unlockall(ufi, amap, NULL);
2207 locked = false;
2208 }
2209
2210 /*
2211 * unbusy/release the page.
2212 */
2213
2214 if ((pg->flags & PG_RELEASED) == 0) {
2215 pg->flags &= ~PG_BUSY;
2216 uvm_pagelock(pg);
2217 uvm_pagewakeup(pg);
2218 uvm_pageunlock(pg);
2219 UVM_PAGE_OWN(pg, NULL);
2220 } else {
2221 cpu_count(CPU_COUNT_FLTPGRELE, 1);
2222 uvm_pagefree(pg);
2223 }
2224
2225 /*
2226 * didn't get the lock? retry.
2227 */
2228
2229 if (locked == false) {
2230 UVMHIST_LOG(maphist,
2231 " wasn't able to relock after fault: retry",
2232 0,0,0,0);
2233 rw_exit(uobj->vmobjlock);
2234 return ERESTART;
2235 }
2236
2237 /*
2238 * we have the data in pg. we are holding object lock (so the page
2239 * can't be released on us).
2240 */
2241
2242 /* locked: maps(read), amap(if !null), uobj */
2243
2244 *ruobj = uobj;
2245 *ruobjpage = pg;
2246 return 0;
2247 }
2248
2249 /*
2250 * uvm_fault_lower_direct: fault lower center page
2251 *
2252 * 1. adjust flt->enter_prot.
2253 * 2. if page is loaned, resolve.
2254 */
2255
2256 int
2257 uvm_fault_lower_direct(
2258 struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
2259 struct uvm_object *uobj, struct vm_page *uobjpage)
2260 {
2261 struct vm_page *pg;
2262 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
2263
2264 /*
2265 * we are not promoting. if the mapping is COW ensure that we
2266 * don't give more access than we should (e.g. when doing a read
2267 * fault on a COPYONWRITE mapping we want to map the COW page in
2268 * R/O even though the entry protection could be R/W).
2269 *
2270 * set "pg" to the page we want to map in (uobjpage, usually)
2271 */
2272
2273 cpu_count(CPU_COUNT_FLT_OBJ, 1);
2274 if (UVM_ET_ISCOPYONWRITE(ufi->entry) ||
2275 UVM_OBJ_NEEDS_WRITEFAULT(uobjpage->uobject))
2276 flt->enter_prot &= ~VM_PROT_WRITE;
2277 pg = uobjpage; /* map in the actual object */
2278
2279 KASSERT(uobjpage != PGO_DONTCARE);
2280
2281 /*
2282 * we are faulting directly on the page. be careful
2283 * about writing to loaned pages...
2284 */
2285
2286 if (uobjpage->loan_count) {
2287 uvm_fault_lower_direct_loan(ufi, flt, uobj, &pg, &uobjpage);
2288 }
2289 KASSERT(pg == uobjpage);
2290 KASSERT((pg->flags & PG_BUSY) == 0);
2291 return uvm_fault_lower_enter(ufi, flt, uobj, NULL, pg);
2292 }
2293
2294 /*
2295 * uvm_fault_lower_direct_loan: resolve loaned page.
2296 *
2297 * 1. if not cow'ing, adjust flt->enter_prot.
2298 * 2. if cow'ing, break loan.
2299 */
2300
2301 static int
2302 uvm_fault_lower_direct_loan(
2303 struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
2304 struct uvm_object *uobj, struct vm_page **rpg,
2305 struct vm_page **ruobjpage)
2306 {
2307 struct vm_amap * const amap = ufi->entry->aref.ar_amap;
2308 struct vm_page *pg;
2309 struct vm_page *uobjpage = *ruobjpage;
2310 int error;
2311 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
2312
2313 if (!flt->cow_now) {
2314 /* read fault: cap the protection at readonly */
2315 /* cap! */
2316 flt->enter_prot = flt->enter_prot & ~VM_PROT_WRITE;
2317 } else {
2318 /*
2319 * write fault: must break the loan here. to do this
2320 * we need a write lock on the object.
2321 */
2322
2323 error = uvm_fault_lower_upgrade(ufi, flt, amap, uobj, uobjpage);
2324 if (error != 0) {
2325 return error;
2326 }
2327 KASSERT(rw_write_held(uobj->vmobjlock));
2328
2329 pg = uvm_loanbreak(uobjpage);
2330 if (pg == NULL) {
2331
2332 uvmfault_unlockall(ufi, amap, uobj);
2333 UVMHIST_LOG(maphist,
2334 " out of RAM breaking loan, waiting",
2335 0,0,0,0);
2336 cpu_count(CPU_COUNT_FLTNORAM, 1);
2337 uvm_wait("flt_noram4");
2338 return ERESTART;
2339 }
2340 *rpg = pg;
2341 *ruobjpage = pg;
2342
2343 /*
2344 * drop ownership of page while still holding object lock,
2345 * which won't be dropped until the page is entered.
2346 */
2347
2348 uvm_pagelock(pg);
2349 uvm_pagewakeup(pg);
2350 uvm_pageunlock(pg);
2351 pg->flags &= ~PG_BUSY;
2352 UVM_PAGE_OWN(pg, NULL);
2353 }
2354 return 0;
2355 }
2356
2357 /*
2358 * uvm_fault_lower_promote: promote lower page.
2359 *
2360 * 1. call uvmfault_promote.
2361 * 2. fill in data.
2362 * 3. if not ZFOD, dispose old page.
2363 */
2364
2365 int
2366 uvm_fault_lower_promote(
2367 struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
2368 struct uvm_object *uobj, struct vm_page *uobjpage)
2369 {
2370 struct vm_amap * const amap = ufi->entry->aref.ar_amap;
2371 struct vm_anon *anon;
2372 struct vm_page *pg;
2373 int error;
2374 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
2375
2376 KASSERT(amap != NULL);
2377
2378 /* promoting requires a write lock. */
2379 error = uvm_fault_upper_upgrade(ufi, flt, amap, uobj);
2380 if (error != 0) {
2381 return error;
2382 }
2383 KASSERT(rw_write_held(amap->am_lock));
2384 KASSERT(uobj == NULL ||
2385 rw_lock_op(uobj->vmobjlock) == flt->lower_lock_type);
2386
2387 /*
2388 * If we are going to promote the data to an anon we
2389 * allocate a blank anon here and plug it into our amap.
2390 */
2391 error = uvmfault_promote(ufi, NULL, uobjpage, &anon, &flt->anon_spare);
2392 switch (error) {
2393 case 0:
2394 break;
2395 case ERESTART:
2396 return ERESTART;
2397 default:
2398 return error;
2399 }
2400
2401 pg = anon->an_page;
2402
2403 /*
2404 * Fill in the data.
2405 */
2406
2407 if (uobjpage != PGO_DONTCARE) {
2408 cpu_count(CPU_COUNT_FLT_PRCOPY, 1);
2409
2410 /*
2411 * promote to shared amap? make sure all sharing
2412 * procs see it
2413 */
2414
2415 if ((amap_flags(amap) & AMAP_SHARED) != 0) {
2416 pmap_page_protect(uobjpage, VM_PROT_NONE);
2417 /*
2418 * XXX: PAGE MIGHT BE WIRED!
2419 */
2420 }
2421
2422 UVMHIST_LOG(maphist,
2423 " promote uobjpage %#jx to anon/page %#jx/%#jx",
2424 (uintptr_t)uobjpage, (uintptr_t)anon, (uintptr_t)pg, 0);
2425
2426 } else {
2427 cpu_count(CPU_COUNT_FLT_PRZERO, 1);
2428
2429 /*
2430 * Page is zero'd and marked dirty by
2431 * uvmfault_promote().
2432 */
2433
2434 UVMHIST_LOG(maphist," zero fill anon/page %#jx/%#jx",
2435 (uintptr_t)anon, (uintptr_t)pg, 0, 0);
2436 }
2437
2438 return uvm_fault_lower_enter(ufi, flt, uobj, anon, pg);
2439 }
2440
2441 /*
2442 * uvm_fault_lower_enter: enter h/w mapping of lower page or anon page promoted
2443 * from the lower page.
2444 */
2445
2446 int
2447 uvm_fault_lower_enter(
2448 struct uvm_faultinfo *ufi, const struct uvm_faultctx *flt,
2449 struct uvm_object *uobj,
2450 struct vm_anon *anon, struct vm_page *pg)
2451 {
2452 struct vm_amap * const amap = ufi->entry->aref.ar_amap;
2453 const bool readonly = uvm_pagereadonly_p(pg);
2454 int error;
2455 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
2456
2457 /*
2458 * Locked:
2459 *
2460 * maps(read), amap(if !null), uobj(if !null),
2461 * anon(if !null), pg(if anon), unlock_uobj(if !null)
2462 *
2463 * anon must be write locked (promotion). uobj can be either.
2464 *
2465 * Note: pg is either the uobjpage or the new page in the new anon.
2466 */
2467
2468 KASSERT(amap == NULL ||
2469 rw_lock_op(amap->am_lock) == flt->upper_lock_type);
2470 KASSERT(uobj == NULL ||
2471 rw_lock_op(uobj->vmobjlock) == flt->lower_lock_type);
2472 KASSERT(anon == NULL || anon->an_lock == amap->am_lock);
2473
2474 /*
2475 * note that pg can't be PG_RELEASED or PG_BUSY since we did
2476 * not drop the object lock since the last time we checked.
2477 */
2478
2479 KASSERT((pg->flags & PG_RELEASED) == 0);
2480 KASSERT((pg->flags & PG_BUSY) == 0);
2481
2482 /*
2483 * all resources are present. we can now map it in and free our
2484 * resources.
2485 */
2486
2487 UVMHIST_LOG(maphist,
2488 " MAPPING: case2: pm=%#jx, va=%#jx, pg=%#jx, promote=%jd",
2489 (uintptr_t)ufi->orig_map->pmap, ufi->orig_rvaddr,
2490 (uintptr_t)pg, flt->promote);
2491 KASSERTMSG((flt->access_type & VM_PROT_WRITE) == 0 || !readonly,
2492 "promote=%u cow_now=%u access_type=%x enter_prot=%x cow=%u "
2493 "entry=%p map=%p orig_rvaddr=%p pg=%p",
2494 flt->promote, flt->cow_now, flt->access_type, flt->enter_prot,
2495 UVM_ET_ISCOPYONWRITE(ufi->entry), ufi->entry, ufi->orig_map,
2496 (void *)ufi->orig_rvaddr, pg);
2497 KASSERT((flt->access_type & VM_PROT_WRITE) == 0 || !readonly);
2498 if (pmap_enter(ufi->orig_map->pmap, ufi->orig_rvaddr,
2499 VM_PAGE_TO_PHYS(pg),
2500 readonly ? flt->enter_prot & ~VM_PROT_WRITE : flt->enter_prot,
2501 flt->access_type | PMAP_CANFAIL |
2502 (flt->wire_mapping ? PMAP_WIRED : 0)) != 0) {
2503
2504 /*
2505 * No need to undo what we did; we can simply think of
2506 * this as the pmap throwing away the mapping information.
2507 *
2508 * We do, however, have to go through the ReFault path,
2509 * as the map may change while we're asleep.
2510 */
2511
2512 /*
2513 * ensure that the page is queued in the case that
2514 * we just promoted the page.
2515 */
2516
2517 if (anon != NULL) {
2518 uvm_pagelock(pg);
2519 uvm_pageenqueue(pg);
2520 uvm_pagewakeup(pg);
2521 uvm_pageunlock(pg);
2522 }
2523
2524 uvmfault_unlockall(ufi, amap, uobj);
2525 if (!uvm_reclaimable()) {
2526 UVMHIST_LOG(maphist,
2527 "<- failed. out of VM",0,0,0,0);
2528 /* XXX instrumentation */
2529 error = ENOMEM;
2530 return error;
2531 }
2532 /* XXX instrumentation */
2533 uvm_wait("flt_pmfail2");
2534 return ERESTART;
2535 }
2536
2537 uvm_fault_lower_done(ufi, flt, uobj, pg);
2538 pmap_update(ufi->orig_map->pmap);
2539 uvmfault_unlockall(ufi, amap, uobj);
2540
2541 UVMHIST_LOG(maphist, "<- done (SUCCESS!)",0,0,0,0);
2542 return 0;
2543 }
2544
2545 /*
2546 * uvm_fault_lower_done: queue lower center page.
2547 */
2548
2549 void
2550 uvm_fault_lower_done(
2551 struct uvm_faultinfo *ufi, const struct uvm_faultctx *flt,
2552 struct uvm_object *uobj, struct vm_page *pg)
2553 {
2554
2555 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
2556
2557 if (flt->wire_paging) {
2558 uvm_pagelock(pg);
2559 uvm_pagewire(pg);
2560 uvm_pageunlock(pg);
2561 if (pg->flags & PG_AOBJ) {
2562
2563 /*
2564 * since the now-wired page cannot be paged out,
2565 * release its swap resources for others to use.
2566 * since an aobj page with no swap cannot be clean,
2567 * mark it dirty now.
2568 *
2569 * use pg->uobject here. if the page is from a
2570 * tmpfs vnode, the pages are backed by its UAO and
2571 * not the vnode.
2572 */
2573
2574 KASSERT(uobj != NULL);
2575 KASSERT(uobj->vmobjlock == pg->uobject->vmobjlock);
2576 uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY);
2577 uao_dropswap(pg->uobject, pg->offset >> PAGE_SHIFT);
2578 }
2579 } else if (uvmpdpol_pageactivate_p(pg)) {
2580 /*
2581 * avoid re-activating the page unless needed,
2582 * to avoid false sharing on multiprocessor.
2583 */
2584
2585 uvm_pagelock(pg);
2586 uvm_pageactivate(pg);
2587 uvm_pageunlock(pg);
2588 }
2589 }
2590
2591
2592 /*
2593 * uvm_fault_wire: wire down a range of virtual addresses in a map.
2594 *
2595 * => map may be read-locked by caller, but MUST NOT be write-locked.
2596 * => if map is read-locked, any operations which may cause map to
2597 * be write-locked in uvm_fault() must be taken care of by
2598 * the caller. See uvm_map_pageable().
2599 */
2600
2601 int
2602 uvm_fault_wire(struct vm_map *map, vaddr_t start, vaddr_t end,
2603 vm_prot_t access_type, int maxprot)
2604 {
2605 vaddr_t va;
2606 int error;
2607
2608 /*
2609 * now fault it in a page at a time. if the fault fails then we have
2610 * to undo what we have done. note that in uvm_fault VM_PROT_NONE
2611 * is replaced with the max protection if fault_type is VM_FAULT_WIRE.
2612 */
2613
2614 /*
2615 * XXX work around overflowing a vaddr_t. this prevents us from
2616 * wiring the last page in the address space, though.
2617 */
2618 if (start > end) {
2619 return EFAULT;
2620 }
2621
2622 for (va = start; va < end; va += PAGE_SIZE) {
2623 error = uvm_fault_internal(map, va, access_type,
2624 (maxprot ? UVM_FAULT_MAXPROT : 0) | UVM_FAULT_WIRE);
2625 if (error) {
2626 if (va != start) {
2627 uvm_fault_unwire(map, start, va);
2628 }
2629 return error;
2630 }
2631 }
2632 return 0;
2633 }
2634
2635 /*
2636 * uvm_fault_unwire(): unwire range of virtual space.
2637 */
2638
2639 void
2640 uvm_fault_unwire(struct vm_map *map, vaddr_t start, vaddr_t end)
2641 {
2642 vm_map_lock_read(map);
2643 uvm_fault_unwire_locked(map, start, end);
2644 vm_map_unlock_read(map);
2645 }
2646
2647 /*
2648 * uvm_fault_unwire_locked(): the guts of uvm_fault_unwire().
2649 *
2650 * => map must be at least read-locked.
2651 */
2652
2653 void
2654 uvm_fault_unwire_locked(struct vm_map *map, vaddr_t start, vaddr_t end)
2655 {
2656 struct vm_map_entry *entry, *oentry;
2657 pmap_t pmap = vm_map_pmap(map);
2658 vaddr_t va;
2659 paddr_t pa;
2660 struct vm_page *pg;
2661
2662 /*
2663 * we assume that the area we are unwiring has actually been wired
2664 * in the first place. this means that we should be able to extract
2665 * the PAs from the pmap. we also lock out the page daemon so that
2666 * we can call uvm_pageunwire.
2667 */
2668
2669 /*
2670 * find the beginning map entry for the region.
2671 */
2672
2673 KASSERT(start >= vm_map_min(map) && end <= vm_map_max(map));
2674 if (uvm_map_lookup_entry(map, start, &entry) == false)
2675 panic("uvm_fault_unwire_locked: address not in map");
2676
2677 oentry = NULL;
2678 for (va = start; va < end; va += PAGE_SIZE) {
2679
2680 /*
2681 * find the map entry for the current address.
2682 */
2683
2684 KASSERT(va >= entry->start);
2685 while (va >= entry->end) {
2686 KASSERT(entry->next != &map->header &&
2687 entry->next->start <= entry->end);
2688 entry = entry->next;
2689 }
2690
2691 /*
2692 * lock it.
2693 */
2694
2695 if (entry != oentry) {
2696 if (oentry != NULL) {
2697 uvm_map_unlock_entry(oentry);
2698 }
2699 uvm_map_lock_entry(entry, RW_WRITER);
2700 oentry = entry;
2701 }
2702
2703 /*
2704 * if the entry is no longer wired, tell the pmap.
2705 */
2706
2707 if (!pmap_extract(pmap, va, &pa))
2708 continue;
2709
2710 if (VM_MAPENT_ISWIRED(entry) == 0)
2711 pmap_unwire(pmap, va);
2712
2713 pg = PHYS_TO_VM_PAGE(pa);
2714 if (pg) {
2715 uvm_pagelock(pg);
2716 uvm_pageunwire(pg);
2717 uvm_pageunlock(pg);
2718 }
2719 }
2720
2721 if (oentry != NULL) {
2722 uvm_map_unlock_entry(entry);
2723 }
2724 }
Cache object: 10d7e46bc89ab70b6028a41e62d3455b
|