FreeBSD/Linux Kernel Cross Reference
sys/uvm/uvm_amap.c
1 /* $NetBSD: uvm_amap.c,v 1.126 2021/03/13 15:29:55 skrll Exp $ */
2
3 /*
4 * Copyright (c) 1997 Charles D. Cranor and Washington University.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 /*
29 * uvm_amap.c: amap operations
30 */
31
32 /*
33 * this file contains functions that perform operations on amaps. see
34 * uvm_amap.h for a brief explanation of the role of amaps in uvm.
35 */
36
37 #include <sys/cdefs.h>
38 __KERNEL_RCSID(0, "$NetBSD: uvm_amap.c,v 1.126 2021/03/13 15:29:55 skrll Exp $");
39
40 #include "opt_uvmhist.h"
41
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/kmem.h>
46 #include <sys/pool.h>
47 #include <sys/atomic.h>
48
49 #include <uvm/uvm.h>
50 #include <uvm/uvm_swap.h>
51
52 /*
53 * cache for allocation of vm_map structures. note that in order to
54 * avoid an endless loop, the amap cache's allocator cannot allocate
55 * memory from an amap (it currently goes through the kernel uobj, so
56 * we are ok).
57 */
58 static struct pool_cache uvm_amap_cache;
59 static kmutex_t amap_list_lock __cacheline_aligned;
60 static LIST_HEAD(, vm_amap) amap_list;
61
62 /*
63 * local functions
64 */
65
66 static int
67 amap_roundup_slots(int slots)
68 {
69
70 return kmem_roundup_size(slots * sizeof(int)) / sizeof(int);
71 }
72
73 #ifdef UVM_AMAP_PPREF
74 /*
75 * what is ppref? ppref is an _optional_ amap feature which is used
76 * to keep track of reference counts on a per-page basis. it is enabled
77 * when UVM_AMAP_PPREF is defined.
78 *
79 * when enabled, an array of ints is allocated for the pprefs. this
80 * array is allocated only when a partial reference is added to the
81 * map (either by unmapping part of the amap, or gaining a reference
82 * to only a part of an amap). if the allocation of the array fails
83 * (KM_NOSLEEP), then we set the array pointer to PPREF_NONE to indicate
84 * that we tried to do ppref's but couldn't alloc the array so just
85 * give up (after all, this is an optional feature!).
86 *
87 * the array is divided into page sized "chunks." for chunks of length 1,
88 * the chunk reference count plus one is stored in that chunk's slot.
89 * for chunks of length > 1 the first slot contains (the reference count
90 * plus one) * -1. [the negative value indicates that the length is
91 * greater than one.] the second slot of the chunk contains the length
92 * of the chunk. here is an example:
93 *
94 * actual REFS: 2 2 2 2 3 1 1 0 0 0 4 4 0 1 1 1
95 * ppref: -3 4 x x 4 -2 2 -1 3 x -5 2 1 -2 3 x
96 * <----------><-><----><-------><----><-><------->
97 * (x = don't care)
98 *
99 * this allows us to allow one int to contain the ref count for the whole
100 * chunk. note that the "plus one" part is needed because a reference
101 * count of zero is neither positive or negative (need a way to tell
102 * if we've got one zero or a bunch of them).
103 *
104 * here are some in-line functions to help us.
105 */
106
107 /*
108 * pp_getreflen: get the reference and length for a specific offset
109 *
110 * => ppref's amap must be locked
111 */
112 static inline void
113 pp_getreflen(int *ppref, int offset, int *refp, int *lenp)
114 {
115
116 if (ppref[offset] > 0) { /* chunk size must be 1 */
117 *refp = ppref[offset] - 1; /* don't forget to adjust */
118 *lenp = 1;
119 } else {
120 *refp = (ppref[offset] * -1) - 1;
121 *lenp = ppref[offset+1];
122 }
123 }
124
125 /*
126 * pp_setreflen: set the reference and length for a specific offset
127 *
128 * => ppref's amap must be locked
129 */
130 static inline void
131 pp_setreflen(int *ppref, int offset, int ref, int len)
132 {
133 if (len == 0)
134 return;
135 if (len == 1) {
136 ppref[offset] = ref + 1;
137 } else {
138 ppref[offset] = (ref + 1) * -1;
139 ppref[offset+1] = len;
140 }
141 }
142 #endif /* UVM_AMAP_PPREF */
143
144 /*
145 * amap_alloc1: allocate an amap, but do not initialise the overlay.
146 *
147 * => Note: lock is not set.
148 */
149 static struct vm_amap *
150 amap_alloc1(int slots, int padslots, int flags)
151 {
152 const bool nowait = (flags & UVM_FLAG_NOWAIT) != 0;
153 const km_flag_t kmflags = nowait ? KM_NOSLEEP : KM_SLEEP;
154 struct vm_amap *amap;
155 krwlock_t *newlock, *oldlock;
156 int totalslots;
157
158 amap = pool_cache_get(&uvm_amap_cache, nowait ? PR_NOWAIT : PR_WAITOK);
159 if (amap == NULL) {
160 return NULL;
161 }
162 KASSERT(amap->am_lock != NULL);
163 KASSERT(amap->am_nused == 0);
164
165 /* Try to privatize the lock if currently shared. */
166 if (rw_obj_refcnt(amap->am_lock) > 1) {
167 newlock = rw_obj_tryalloc();
168 if (newlock != NULL) {
169 oldlock = amap->am_lock;
170 mutex_enter(&amap_list_lock);
171 amap->am_lock = newlock;
172 mutex_exit(&amap_list_lock);
173 rw_obj_free(oldlock);
174 }
175 }
176
177 totalslots = amap_roundup_slots(slots + padslots);
178 amap->am_ref = 1;
179 amap->am_flags = 0;
180 #ifdef UVM_AMAP_PPREF
181 amap->am_ppref = NULL;
182 #endif
183 amap->am_maxslot = totalslots;
184 amap->am_nslot = slots;
185
186 /*
187 * Note: since allocations are likely big, we expect to reduce the
188 * memory fragmentation by allocating them in separate blocks.
189 */
190 amap->am_slots = kmem_alloc(totalslots * sizeof(int), kmflags);
191 if (amap->am_slots == NULL)
192 goto fail1;
193
194 amap->am_bckptr = kmem_alloc(totalslots * sizeof(int), kmflags);
195 if (amap->am_bckptr == NULL)
196 goto fail2;
197
198 amap->am_anon = kmem_alloc(totalslots * sizeof(struct vm_anon *),
199 kmflags);
200 if (amap->am_anon == NULL)
201 goto fail3;
202
203 return amap;
204
205 fail3:
206 kmem_free(amap->am_bckptr, totalslots * sizeof(int));
207 fail2:
208 kmem_free(amap->am_slots, totalslots * sizeof(int));
209 fail1:
210 pool_cache_put(&uvm_amap_cache, amap);
211
212 /*
213 * XXX hack to tell the pagedaemon how many pages we need,
214 * since we can need more than it would normally free.
215 */
216 if (nowait) {
217 extern u_int uvm_extrapages;
218 atomic_add_int(&uvm_extrapages,
219 ((sizeof(int) * 2 + sizeof(struct vm_anon *)) *
220 totalslots) >> PAGE_SHIFT);
221 }
222 return NULL;
223 }
224
225 /*
226 * amap_alloc: allocate an amap to manage "sz" bytes of anonymous VM
227 *
228 * => caller should ensure sz is a multiple of PAGE_SIZE
229 * => reference count to new amap is set to one
230 * => new amap is returned unlocked
231 */
232
233 struct vm_amap *
234 amap_alloc(vaddr_t sz, vaddr_t padsz, int waitf)
235 {
236 struct vm_amap *amap;
237 int slots, padslots;
238 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
239
240 AMAP_B2SLOT(slots, sz);
241 AMAP_B2SLOT(padslots, padsz);
242
243 amap = amap_alloc1(slots, padslots, waitf);
244 if (amap) {
245 memset(amap->am_anon, 0,
246 amap->am_maxslot * sizeof(struct vm_anon *));
247 }
248
249 UVMHIST_LOG(maphist,"<- done, amap = %#jx, sz=%jd", (uintptr_t)amap,
250 sz, 0, 0);
251 return(amap);
252 }
253
254 /*
255 * amap_ctor: pool_cache constructor for new amaps
256 *
257 * => carefully synchronize with amap_swap_off()
258 */
259 static int
260 amap_ctor(void *arg, void *obj, int flags)
261 {
262 struct vm_amap *amap = obj;
263
264 if ((flags & PR_NOWAIT) != 0) {
265 amap->am_lock = rw_obj_tryalloc();
266 if (amap->am_lock == NULL) {
267 return ENOMEM;
268 }
269 } else {
270 amap->am_lock = rw_obj_alloc();
271 }
272 amap->am_nused = 0;
273 amap->am_flags = 0;
274
275 mutex_enter(&amap_list_lock);
276 LIST_INSERT_HEAD(&amap_list, amap, am_list);
277 mutex_exit(&amap_list_lock);
278 return 0;
279 }
280
281 /*
282 * amap_ctor: pool_cache destructor for amaps
283 *
284 * => carefully synchronize with amap_swap_off()
285 */
286 static void
287 amap_dtor(void *arg, void *obj)
288 {
289 struct vm_amap *amap = obj;
290
291 KASSERT(amap->am_nused == 0);
292
293 mutex_enter(&amap_list_lock);
294 LIST_REMOVE(amap, am_list);
295 mutex_exit(&amap_list_lock);
296 rw_obj_free(amap->am_lock);
297 }
298
299 /*
300 * uvm_amap_init: initialize the amap system.
301 */
302 void
303 uvm_amap_init(void)
304 {
305
306 mutex_init(&amap_list_lock, MUTEX_DEFAULT, IPL_NONE);
307
308 pool_cache_bootstrap(&uvm_amap_cache, sizeof(struct vm_amap), 0, 0,
309 PR_LARGECACHE, "amappl", NULL, IPL_NONE, amap_ctor, amap_dtor,
310 NULL);
311 }
312
313 /*
314 * amap_free: free an amap
315 *
316 * => the amap must be unlocked
317 * => the amap should have a zero reference count and be empty
318 */
319 void
320 amap_free(struct vm_amap *amap)
321 {
322 int slots;
323
324 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
325
326 KASSERT(amap->am_ref == 0 && amap->am_nused == 0);
327 KASSERT((amap->am_flags & AMAP_SWAPOFF) == 0);
328 slots = amap->am_maxslot;
329 kmem_free(amap->am_slots, slots * sizeof(*amap->am_slots));
330 kmem_free(amap->am_bckptr, slots * sizeof(*amap->am_bckptr));
331 kmem_free(amap->am_anon, slots * sizeof(*amap->am_anon));
332 #ifdef UVM_AMAP_PPREF
333 if (amap->am_ppref && amap->am_ppref != PPREF_NONE)
334 kmem_free(amap->am_ppref, slots * sizeof(*amap->am_ppref));
335 #endif
336 pool_cache_put(&uvm_amap_cache, amap);
337 UVMHIST_LOG(maphist,"<- done, freed amap = %#jx", (uintptr_t)amap,
338 0, 0, 0);
339 }
340
341 /*
342 * amap_extend: extend the size of an amap (if needed)
343 *
344 * => called from uvm_map when we want to extend an amap to cover
345 * a new mapping (rather than allocate a new one)
346 * => amap should be unlocked (we will lock it)
347 * => to safely extend an amap it should have a reference count of
348 * one (thus it can't be shared)
349 */
350 int
351 amap_extend(struct vm_map_entry *entry, vsize_t addsize, int flags)
352 {
353 struct vm_amap *amap = entry->aref.ar_amap;
354 int slotoff = entry->aref.ar_pageoff;
355 int slotmapped, slotadd, slotneed, slotadded, slotalloc;
356 int slotadj, slotarea, slotendoff;
357 int oldnslots;
358 #ifdef UVM_AMAP_PPREF
359 int *newppref, *oldppref;
360 #endif
361 int i, *newsl, *newbck, *oldsl, *oldbck;
362 struct vm_anon **newover, **oldover;
363 const km_flag_t kmflags =
364 (flags & AMAP_EXTEND_NOWAIT) ? KM_NOSLEEP : KM_SLEEP;
365
366 UVMHIST_FUNC(__func__);
367 UVMHIST_CALLARGS(maphist, " (entry=%#jx, addsize=%#jx, flags=%#jx)",
368 (uintptr_t)entry, addsize, flags, 0);
369
370 /*
371 * first, determine how many slots we need in the amap. don't
372 * forget that ar_pageoff could be non-zero: this means that
373 * there are some unused slots before us in the amap.
374 */
375
376 amap_lock(amap, RW_WRITER);
377 KASSERT(amap_refs(amap) == 1); /* amap can't be shared */
378 AMAP_B2SLOT(slotmapped, entry->end - entry->start); /* slots mapped */
379 AMAP_B2SLOT(slotadd, addsize); /* slots to add */
380 if (flags & AMAP_EXTEND_FORWARDS) {
381 slotneed = slotoff + slotmapped + slotadd;
382 slotadj = 0;
383 slotarea = 0;
384 } else {
385 slotneed = slotadd + slotmapped;
386 slotadj = slotadd - slotoff;
387 slotarea = amap->am_maxslot - slotmapped;
388 }
389
390 /*
391 * Because this amap only has 1 ref, we know that there is
392 * only one vm_map_entry pointing to it, and the one entry is
393 * using slots between slotoff and slotoff + slotmapped. If
394 * we have been using ppref then we know that only slots in
395 * the one map entry's range can have anons, since ppref
396 * allowed us to free any anons outside that range as other map
397 * entries which used this amap were removed. But without ppref,
398 * we couldn't know which slots were still needed by other map
399 * entries, so we couldn't free any anons as we removed map
400 * entries, and so any slot from 0 to am_nslot can have an
401 * anon. But now that we know there is only one map entry
402 * left and we know its range, we can free up any anons
403 * outside that range. This is necessary because the rest of
404 * this function assumes that there are no anons in the amap
405 * outside of the one map entry's range.
406 */
407
408 slotendoff = slotoff + slotmapped;
409 if (amap->am_ppref == PPREF_NONE) {
410 amap_wiperange(amap, 0, slotoff);
411 amap_wiperange(amap, slotendoff, amap->am_nslot - slotendoff);
412 }
413 for (i = 0; i < slotoff; i++) {
414 KASSERT(amap->am_anon[i] == NULL);
415 }
416 for (i = slotendoff; i < amap->am_nslot - slotendoff; i++) {
417 KASSERT(amap->am_anon[i] == NULL);
418 }
419
420 /*
421 * case 1: we already have enough slots in the map and thus
422 * only need to bump the reference counts on the slots we are
423 * adding.
424 */
425
426 if (flags & AMAP_EXTEND_FORWARDS) {
427 if (amap->am_nslot >= slotneed) {
428 #ifdef UVM_AMAP_PPREF
429 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
430 amap_pp_adjref(amap, slotoff + slotmapped,
431 slotadd, 1);
432 }
433 #endif
434 amap_unlock(amap);
435 UVMHIST_LOG(maphist,
436 "<- done (case 1f), amap = %#jx, sltneed=%jd",
437 (uintptr_t)amap, slotneed, 0, 0);
438 return 0;
439 }
440 } else {
441 if (slotadj <= 0) {
442 slotoff -= slotadd;
443 entry->aref.ar_pageoff = slotoff;
444 #ifdef UVM_AMAP_PPREF
445 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
446 amap_pp_adjref(amap, slotoff, slotadd, 1);
447 }
448 #endif
449 amap_unlock(amap);
450 UVMHIST_LOG(maphist,
451 "<- done (case 1b), amap = %#jx, sltneed=%jd",
452 (uintptr_t)amap, slotneed, 0, 0);
453 return 0;
454 }
455 }
456
457 /*
458 * case 2: we pre-allocated slots for use and we just need to
459 * bump nslot up to take account for these slots.
460 */
461
462 if (amap->am_maxslot >= slotneed) {
463 if (flags & AMAP_EXTEND_FORWARDS) {
464 #ifdef UVM_AMAP_PPREF
465 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
466 if ((slotoff + slotmapped) < amap->am_nslot)
467 amap_pp_adjref(amap,
468 slotoff + slotmapped,
469 (amap->am_nslot -
470 (slotoff + slotmapped)), 1);
471 pp_setreflen(amap->am_ppref, amap->am_nslot, 1,
472 slotneed - amap->am_nslot);
473 }
474 #endif
475 amap->am_nslot = slotneed;
476 amap_unlock(amap);
477
478 /*
479 * no need to zero am_anon since that was done at
480 * alloc time and we never shrink an allocation.
481 */
482
483 UVMHIST_LOG(maphist,"<- done (case 2f), amap = %#jx, "
484 "slotneed=%jd", (uintptr_t)amap, slotneed, 0, 0);
485 return 0;
486 } else {
487 #ifdef UVM_AMAP_PPREF
488 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
489 /*
490 * Slide up the ref counts on the pages that
491 * are actually in use.
492 */
493 memmove(amap->am_ppref + slotarea,
494 amap->am_ppref + slotoff,
495 slotmapped * sizeof(int));
496 /*
497 * Mark the (adjusted) gap at the front as
498 * referenced/not referenced.
499 */
500 pp_setreflen(amap->am_ppref,
501 0, 0, slotarea - slotadd);
502 pp_setreflen(amap->am_ppref,
503 slotarea - slotadd, 1, slotadd);
504 }
505 #endif
506
507 /*
508 * Slide the anon pointers up and clear out
509 * the space we just made.
510 */
511 memmove(amap->am_anon + slotarea,
512 amap->am_anon + slotoff,
513 slotmapped * sizeof(struct vm_anon*));
514 memset(amap->am_anon + slotoff, 0,
515 (slotarea - slotoff) * sizeof(struct vm_anon *));
516
517 /*
518 * Slide the backpointers up, but don't bother
519 * wiping out the old slots.
520 */
521 memmove(amap->am_bckptr + slotarea,
522 amap->am_bckptr + slotoff,
523 slotmapped * sizeof(int));
524
525 /*
526 * Adjust all the useful active slot numbers.
527 */
528 for (i = 0; i < amap->am_nused; i++)
529 amap->am_slots[i] += (slotarea - slotoff);
530
531 /*
532 * We just filled all the empty space in the
533 * front of the amap by activating a few new
534 * slots.
535 */
536 amap->am_nslot = amap->am_maxslot;
537 entry->aref.ar_pageoff = slotarea - slotadd;
538 amap_unlock(amap);
539
540 UVMHIST_LOG(maphist,"<- done (case 2b), amap = %#jx, "
541 "slotneed=%jd", (uintptr_t)amap, slotneed, 0, 0);
542 return 0;
543 }
544 }
545
546 /*
547 * Case 3: we need to allocate a new amap and copy all the amap
548 * data over from old amap to the new one. Drop the lock before
549 * performing allocation.
550 *
551 * Note: since allocations are likely big, we expect to reduce the
552 * memory fragmentation by allocating them in separate blocks.
553 */
554
555 amap_unlock(amap);
556
557 if (slotneed >= UVM_AMAP_LARGE) {
558 return E2BIG;
559 }
560
561 slotalloc = amap_roundup_slots(slotneed);
562 #ifdef UVM_AMAP_PPREF
563 newppref = NULL;
564 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
565 /* Will be handled later if fails. */
566 newppref = kmem_alloc(slotalloc * sizeof(*newppref), kmflags);
567 }
568 #endif
569 newsl = kmem_alloc(slotalloc * sizeof(*newsl), kmflags);
570 newbck = kmem_alloc(slotalloc * sizeof(*newbck), kmflags);
571 newover = kmem_alloc(slotalloc * sizeof(*newover), kmflags);
572 if (newsl == NULL || newbck == NULL || newover == NULL) {
573 #ifdef UVM_AMAP_PPREF
574 if (newppref != NULL) {
575 kmem_free(newppref, slotalloc * sizeof(*newppref));
576 }
577 #endif
578 if (newsl != NULL) {
579 kmem_free(newsl, slotalloc * sizeof(*newsl));
580 }
581 if (newbck != NULL) {
582 kmem_free(newbck, slotalloc * sizeof(*newbck));
583 }
584 if (newover != NULL) {
585 kmem_free(newover, slotalloc * sizeof(*newover));
586 }
587 return ENOMEM;
588 }
589 amap_lock(amap, RW_WRITER);
590 KASSERT(amap->am_maxslot < slotneed);
591
592 /*
593 * Copy everything over to new allocated areas.
594 */
595
596 slotadded = slotalloc - amap->am_nslot;
597 if (!(flags & AMAP_EXTEND_FORWARDS))
598 slotarea = slotalloc - slotmapped;
599
600 /* do am_slots */
601 oldsl = amap->am_slots;
602 if (flags & AMAP_EXTEND_FORWARDS)
603 memcpy(newsl, oldsl, sizeof(int) * amap->am_nused);
604 else
605 for (i = 0; i < amap->am_nused; i++)
606 newsl[i] = oldsl[i] + slotarea - slotoff;
607 amap->am_slots = newsl;
608
609 /* do am_anon */
610 oldover = amap->am_anon;
611 if (flags & AMAP_EXTEND_FORWARDS) {
612 memcpy(newover, oldover,
613 sizeof(struct vm_anon *) * amap->am_nslot);
614 memset(newover + amap->am_nslot, 0,
615 sizeof(struct vm_anon *) * slotadded);
616 } else {
617 memcpy(newover + slotarea, oldover + slotoff,
618 sizeof(struct vm_anon *) * slotmapped);
619 memset(newover, 0,
620 sizeof(struct vm_anon *) * slotarea);
621 }
622 amap->am_anon = newover;
623
624 /* do am_bckptr */
625 oldbck = amap->am_bckptr;
626 if (flags & AMAP_EXTEND_FORWARDS)
627 memcpy(newbck, oldbck, sizeof(int) * amap->am_nslot);
628 else
629 memcpy(newbck + slotarea, oldbck + slotoff,
630 sizeof(int) * slotmapped);
631 amap->am_bckptr = newbck;
632
633 #ifdef UVM_AMAP_PPREF
634 /* do ppref */
635 oldppref = amap->am_ppref;
636 if (newppref) {
637 if (flags & AMAP_EXTEND_FORWARDS) {
638 memcpy(newppref, oldppref,
639 sizeof(int) * amap->am_nslot);
640 memset(newppref + amap->am_nslot, 0,
641 sizeof(int) * slotadded);
642 } else {
643 memcpy(newppref + slotarea, oldppref + slotoff,
644 sizeof(int) * slotmapped);
645 }
646 amap->am_ppref = newppref;
647 if ((flags & AMAP_EXTEND_FORWARDS) &&
648 (slotoff + slotmapped) < amap->am_nslot)
649 amap_pp_adjref(amap, slotoff + slotmapped,
650 (amap->am_nslot - (slotoff + slotmapped)), 1);
651 if (flags & AMAP_EXTEND_FORWARDS)
652 pp_setreflen(newppref, amap->am_nslot, 1,
653 slotneed - amap->am_nslot);
654 else {
655 pp_setreflen(newppref, 0, 0,
656 slotalloc - slotneed);
657 pp_setreflen(newppref, slotalloc - slotneed, 1,
658 slotneed - slotmapped);
659 }
660 } else {
661 if (amap->am_ppref)
662 amap->am_ppref = PPREF_NONE;
663 }
664 #endif
665
666 /* update master values */
667 if (flags & AMAP_EXTEND_FORWARDS)
668 amap->am_nslot = slotneed;
669 else {
670 entry->aref.ar_pageoff = slotarea - slotadd;
671 amap->am_nslot = slotalloc;
672 }
673 oldnslots = amap->am_maxslot;
674 amap->am_maxslot = slotalloc;
675 amap_unlock(amap);
676
677 kmem_free(oldsl, oldnslots * sizeof(*oldsl));
678 kmem_free(oldbck, oldnslots * sizeof(*oldbck));
679 kmem_free(oldover, oldnslots * sizeof(*oldover));
680 #ifdef UVM_AMAP_PPREF
681 if (oldppref && oldppref != PPREF_NONE)
682 kmem_free(oldppref, oldnslots * sizeof(*oldppref));
683 #endif
684 UVMHIST_LOG(maphist,"<- done (case 3), amap = %#jx, slotneed=%jd",
685 (uintptr_t)amap, slotneed, 0, 0);
686 return 0;
687 }
688
689 /*
690 * amap_share_protect: change protection of anons in a shared amap
691 *
692 * for shared amaps, given the current data structure layout, it is
693 * not possible for us to directly locate all maps referencing the
694 * shared anon (to change the protection). in order to protect data
695 * in shared maps we use pmap_page_protect(). [this is useful for IPC
696 * mechanisms like map entry passing that may want to write-protect
697 * all mappings of a shared amap.] we traverse am_anon or am_slots
698 * depending on the current state of the amap.
699 *
700 * => entry's map and amap must be locked by the caller
701 */
702 void
703 amap_share_protect(struct vm_map_entry *entry, vm_prot_t prot)
704 {
705 struct vm_amap *amap = entry->aref.ar_amap;
706 u_int slots, lcv, slot, stop;
707 struct vm_anon *anon;
708
709 KASSERT(rw_write_held(amap->am_lock));
710
711 AMAP_B2SLOT(slots, (entry->end - entry->start));
712 stop = entry->aref.ar_pageoff + slots;
713
714 if (slots < amap->am_nused) {
715 /*
716 * Cheaper to traverse am_anon.
717 */
718 for (lcv = entry->aref.ar_pageoff ; lcv < stop ; lcv++) {
719 anon = amap->am_anon[lcv];
720 if (anon == NULL) {
721 continue;
722 }
723 if (anon->an_page) {
724 pmap_page_protect(anon->an_page, prot);
725 }
726 }
727 return;
728 }
729
730 /*
731 * Cheaper to traverse am_slots.
732 */
733 for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
734 slot = amap->am_slots[lcv];
735 if (slot < entry->aref.ar_pageoff || slot >= stop) {
736 continue;
737 }
738 anon = amap->am_anon[slot];
739 if (anon->an_page) {
740 pmap_page_protect(anon->an_page, prot);
741 }
742 }
743 }
744
745 /*
746 * amap_wipeout: wipeout all anon's in an amap; then free the amap!
747 *
748 * => Called from amap_unref(), when reference count drops to zero.
749 * => amap must be locked.
750 */
751
752 void
753 amap_wipeout(struct vm_amap *amap)
754 {
755 u_int lcv;
756
757 UVMHIST_FUNC(__func__);
758 UVMHIST_CALLARGS(maphist,"(amap=%#jx)", (uintptr_t)amap, 0,0,0);
759
760 KASSERT(rw_write_held(amap->am_lock));
761 KASSERT(amap->am_ref == 0);
762
763 if (__predict_false(amap->am_flags & AMAP_SWAPOFF)) {
764 /*
765 * Note: amap_swap_off() will call us again.
766 */
767 amap_unlock(amap);
768 return;
769 }
770
771 for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
772 struct vm_anon *anon;
773 u_int slot;
774
775 slot = amap->am_slots[lcv];
776 anon = amap->am_anon[slot];
777 KASSERT(anon != NULL && anon->an_ref != 0);
778
779 KASSERT(anon->an_lock == amap->am_lock);
780 UVMHIST_LOG(maphist," processing anon %#jx, ref=%jd",
781 (uintptr_t)anon, anon->an_ref, 0, 0);
782
783 /*
784 * Drop the reference.
785 */
786
787 if (__predict_true(--anon->an_ref == 0)) {
788 uvm_anfree(anon);
789 }
790 if (__predict_false((lcv & 31) == 31)) {
791 preempt_point();
792 }
793 }
794
795 /*
796 * Finally, destroy the amap.
797 */
798
799 amap->am_nused = 0;
800 amap_unlock(amap);
801 amap_free(amap);
802 UVMHIST_LOG(maphist,"<- done!", 0,0,0,0);
803 }
804
805 /*
806 * amap_copy: ensure that a map entry's "needs_copy" flag is false
807 * by copying the amap if necessary.
808 *
809 * => an entry with a null amap pointer will get a new (blank) one.
810 * => the map that the map entry belongs to must be locked by caller.
811 * => the amap currently attached to "entry" (if any) must be unlocked.
812 * => if canchunk is true, then we may clip the entry into a chunk
813 * => "startva" and "endva" are used only if canchunk is true. they are
814 * used to limit chunking (e.g. if you have a large space that you
815 * know you are going to need to allocate amaps for, there is no point
816 * in allowing that to be chunked)
817 */
818
819 void
820 amap_copy(struct vm_map *map, struct vm_map_entry *entry, int flags,
821 vaddr_t startva, vaddr_t endva)
822 {
823 const int waitf = (flags & AMAP_COPY_NOWAIT) ? UVM_FLAG_NOWAIT : 0;
824 struct vm_amap *amap, *srcamap;
825 u_int slots, lcv;
826 krwlock_t *oldlock;
827 vsize_t len;
828
829 UVMHIST_FUNC(__func__);
830 UVMHIST_CALLARGS(maphist, " (map=%#jx, entry=%#jx, flags=%#jx)",
831 (uintptr_t)map, (uintptr_t)entry, flags, -2);
832
833 KASSERT(map != kernel_map); /* we use nointr pool */
834
835 srcamap = entry->aref.ar_amap;
836 len = entry->end - entry->start;
837
838 /*
839 * Is there an amap to copy? If not, create one.
840 */
841
842 if (srcamap == NULL) {
843 const bool canchunk = (flags & AMAP_COPY_NOCHUNK) == 0;
844
845 /*
846 * Check to see if we have a large amap that we can
847 * chunk. We align startva/endva to chunk-sized
848 * boundaries and then clip to them.
849 */
850
851 if (canchunk && atop(len) >= UVM_AMAP_LARGE) {
852 vsize_t chunksize;
853
854 /* Convert slots to bytes. */
855 chunksize = UVM_AMAP_CHUNK << PAGE_SHIFT;
856 startva = (startva / chunksize) * chunksize;
857 endva = roundup(endva, chunksize);
858 UVMHIST_LOG(maphist,
859 " chunk amap ==> clip %#jx->%#jx to %#jx->%#jx",
860 entry->start, entry->end, startva, endva);
861 UVM_MAP_CLIP_START(map, entry, startva);
862
863 /* Watch out for endva wrap-around! */
864 if (endva >= startva) {
865 UVM_MAP_CLIP_END(map, entry, endva);
866 }
867 }
868
869 if ((flags & AMAP_COPY_NOMERGE) == 0 &&
870 uvm_mapent_trymerge(map, entry, UVM_MERGE_COPYING)) {
871 return;
872 }
873
874 UVMHIST_LOG(maphist, "<- done [creating new amap %#jx->%#jx]",
875 entry->start, entry->end, 0, 0);
876
877 /*
878 * Allocate an initialised amap and install it.
879 * Note: we must update the length after clipping.
880 */
881 len = entry->end - entry->start;
882 entry->aref.ar_pageoff = 0;
883 entry->aref.ar_amap = amap_alloc(len, 0, waitf);
884 if (entry->aref.ar_amap != NULL) {
885 entry->etype &= ~UVM_ET_NEEDSCOPY;
886 }
887 return;
888 }
889
890 /*
891 * First check and see if we are the only map entry referencing
892 * he amap we currently have. If so, then just take it over instead
893 * of copying it. Note that we are reading am_ref without lock held
894 * as the value value can only be one if we have the only reference
895 * to the amap (via our locked map). If the value is greater than
896 * one, then allocate amap and re-check the value.
897 */
898
899 if (srcamap->am_ref == 1) {
900 entry->etype &= ~UVM_ET_NEEDSCOPY;
901 UVMHIST_LOG(maphist, "<- done [ref cnt = 1, took it over]",
902 0, 0, 0, 0);
903 return;
904 }
905
906 UVMHIST_LOG(maphist," amap=%#jx, ref=%jd, must copy it",
907 (uintptr_t)srcamap, srcamap->am_ref, 0, 0);
908
909 /*
910 * Allocate a new amap (note: not initialised, etc).
911 */
912
913 AMAP_B2SLOT(slots, len);
914 amap = amap_alloc1(slots, 0, waitf);
915 if (amap == NULL) {
916 UVMHIST_LOG(maphist, " amap_alloc1 failed", 0,0,0,0);
917 return;
918 }
919
920 /*
921 * Make the new amap share the source amap's lock, and then lock
922 * both. We must do this before we set am_nused != 0, otherwise
923 * amap_swap_off() can become interested in the amap.
924 */
925
926 oldlock = amap->am_lock;
927 mutex_enter(&amap_list_lock);
928 amap->am_lock = srcamap->am_lock;
929 mutex_exit(&amap_list_lock);
930 rw_obj_hold(amap->am_lock);
931 rw_obj_free(oldlock);
932
933 amap_lock(srcamap, RW_WRITER);
934
935 /*
936 * Re-check the reference count with the lock held. If it has
937 * dropped to one - we can take over the existing map.
938 */
939
940 if (srcamap->am_ref == 1) {
941 /* Just take over the existing amap. */
942 entry->etype &= ~UVM_ET_NEEDSCOPY;
943 amap_unlock(srcamap);
944 /* Destroy the new (unused) amap. */
945 amap->am_ref--;
946 amap_free(amap);
947 return;
948 }
949
950 /*
951 * Copy the slots. Zero the padded part.
952 */
953
954 UVMHIST_LOG(maphist, " copying amap now",0, 0, 0, 0);
955 for (lcv = 0 ; lcv < slots; lcv++) {
956 amap->am_anon[lcv] =
957 srcamap->am_anon[entry->aref.ar_pageoff + lcv];
958 if (amap->am_anon[lcv] == NULL)
959 continue;
960 KASSERT(amap->am_anon[lcv]->an_lock == srcamap->am_lock);
961 KASSERT(amap->am_anon[lcv]->an_ref > 0);
962 KASSERT(amap->am_nused < amap->am_maxslot);
963 amap->am_anon[lcv]->an_ref++;
964 amap->am_bckptr[lcv] = amap->am_nused;
965 amap->am_slots[amap->am_nused] = lcv;
966 amap->am_nused++;
967 }
968 memset(&amap->am_anon[lcv], 0,
969 (amap->am_maxslot - lcv) * sizeof(struct vm_anon *));
970
971 /*
972 * Drop our reference to the old amap (srcamap) and unlock.
973 * Since the reference count on srcamap is greater than one,
974 * (we checked above), it cannot drop to zero while it is locked.
975 */
976
977 srcamap->am_ref--;
978 KASSERT(srcamap->am_ref > 0);
979
980 if (srcamap->am_ref == 1 && (srcamap->am_flags & AMAP_SHARED) != 0) {
981 srcamap->am_flags &= ~AMAP_SHARED;
982 }
983 #ifdef UVM_AMAP_PPREF
984 if (srcamap->am_ppref && srcamap->am_ppref != PPREF_NONE) {
985 amap_pp_adjref(srcamap, entry->aref.ar_pageoff,
986 len >> PAGE_SHIFT, -1);
987 }
988 #endif
989
990 amap_unlock(srcamap);
991
992 /*
993 * Install new amap.
994 */
995
996 entry->aref.ar_pageoff = 0;
997 entry->aref.ar_amap = amap;
998 entry->etype &= ~UVM_ET_NEEDSCOPY;
999 UVMHIST_LOG(maphist, "<- done",0, 0, 0, 0);
1000 }
1001
1002 /*
1003 * amap_cow_now: resolve all copy-on-write faults in an amap now for fork(2)
1004 *
1005 * called during fork(2) when the parent process has a wired map
1006 * entry. in that case we want to avoid write-protecting pages
1007 * in the parent's map (e.g. like what you'd do for a COW page)
1008 * so we resolve the COW here.
1009 *
1010 * => assume parent's entry was wired, thus all pages are resident.
1011 * => assume pages that are loaned out (loan_count) are already mapped
1012 * read-only in all maps, and thus no need for us to worry about them
1013 * => assume both parent and child vm_map's are locked
1014 * => caller passes child's map/entry in to us
1015 * => if we run out of memory we will unlock the amap and sleep _with_ the
1016 * parent and child vm_map's locked(!). we have to do this since
1017 * we are in the middle of a fork(2) and we can't let the parent
1018 * map change until we are done copying all the map entrys.
1019 * => XXXCDC: out of memory should cause fork to fail, but there is
1020 * currently no easy way to do this (needs fix)
1021 */
1022
1023 void
1024 amap_cow_now(struct vm_map *map, struct vm_map_entry *entry)
1025 {
1026 struct vm_amap *amap = entry->aref.ar_amap;
1027 struct vm_anon *anon, *nanon;
1028 struct vm_page *pg, *npg;
1029 u_int lcv, slot;
1030
1031 /*
1032 * note that if we unlock the amap then we must ReStart the "lcv" for
1033 * loop because some other process could reorder the anon's in the
1034 * am_anon[] array on us while the lock is dropped.
1035 */
1036
1037 ReStart:
1038 amap_lock(amap, RW_WRITER);
1039 for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
1040 slot = amap->am_slots[lcv];
1041 anon = amap->am_anon[slot];
1042 KASSERT(anon->an_lock == amap->am_lock);
1043
1044 /*
1045 * If anon has only one reference - we must have already
1046 * copied it. This can happen if we needed to sleep waiting
1047 * for memory in a previous run through this loop. The new
1048 * page might even have been paged out, since is not wired.
1049 */
1050
1051 if (anon->an_ref == 1) {
1052 KASSERT(anon->an_page != NULL || anon->an_swslot != 0);
1053 continue;
1054 }
1055
1056 /*
1057 * The old page must be resident since the parent is wired.
1058 */
1059
1060 pg = anon->an_page;
1061 KASSERT(pg != NULL);
1062 KASSERT(pg->wire_count > 0);
1063
1064 /*
1065 * If the page is loaned then it must already be mapped
1066 * read-only and we don't need to copy it.
1067 */
1068
1069 if (pg->loan_count != 0) {
1070 continue;
1071 }
1072 KASSERT(pg->uanon == anon && pg->uobject == NULL);
1073
1074 /*
1075 * If the page is busy, then we have to unlock, wait for
1076 * it and then restart.
1077 */
1078
1079 if (pg->flags & PG_BUSY) {
1080 uvm_pagewait(pg, amap->am_lock, "cownow");
1081 goto ReStart;
1082 }
1083
1084 /*
1085 * Perform a copy-on-write.
1086 * First - get a new anon and a page.
1087 */
1088
1089 nanon = uvm_analloc();
1090 if (nanon) {
1091 nanon->an_lock = amap->am_lock;
1092 npg = uvm_pagealloc(NULL, 0, nanon, 0);
1093 } else {
1094 npg = NULL;
1095 }
1096 if (nanon == NULL || npg == NULL) {
1097 amap_unlock(amap);
1098 if (nanon) {
1099 nanon->an_lock = NULL;
1100 nanon->an_ref--;
1101 KASSERT(nanon->an_ref == 0);
1102 uvm_anfree(nanon);
1103 }
1104 uvm_wait("cownowpage");
1105 goto ReStart;
1106 }
1107
1108 /*
1109 * Copy the data and replace anon with the new one.
1110 * Also, setup its lock (share the with amap's lock).
1111 */
1112
1113 uvm_pagecopy(pg, npg);
1114 anon->an_ref--;
1115 KASSERT(anon->an_ref > 0);
1116 amap->am_anon[slot] = nanon;
1117
1118 /*
1119 * Drop PG_BUSY on new page. Since its owner was write
1120 * locked all this time - it cannot be PG_RELEASED or
1121 * waited on.
1122 */
1123 uvm_pagelock(npg);
1124 uvm_pageactivate(npg);
1125 uvm_pageunlock(npg);
1126 npg->flags &= ~(PG_BUSY|PG_FAKE);
1127 UVM_PAGE_OWN(npg, NULL);
1128 }
1129 amap_unlock(amap);
1130 }
1131
1132 /*
1133 * amap_splitref: split a single reference into two separate references
1134 *
1135 * => called from uvm_map's clip routines
1136 * => origref's map should be locked
1137 * => origref->ar_amap should be unlocked (we will lock)
1138 */
1139 void
1140 amap_splitref(struct vm_aref *origref, struct vm_aref *splitref, vaddr_t offset)
1141 {
1142 struct vm_amap *amap = origref->ar_amap;
1143 u_int leftslots;
1144
1145 KASSERT(splitref->ar_amap == origref->ar_amap);
1146 AMAP_B2SLOT(leftslots, offset);
1147 KASSERT(leftslots != 0);
1148
1149 amap_lock(amap, RW_WRITER);
1150 KASSERT(amap->am_nslot - origref->ar_pageoff - leftslots > 0);
1151
1152 #ifdef UVM_AMAP_PPREF
1153 /* Establish ppref before we add a duplicate reference to the amap. */
1154 if (amap->am_ppref == NULL) {
1155 amap_pp_establish(amap, origref->ar_pageoff);
1156 }
1157 #endif
1158 /* Note: not a share reference. */
1159 amap->am_ref++;
1160 splitref->ar_pageoff = origref->ar_pageoff + leftslots;
1161 amap_unlock(amap);
1162 }
1163
1164 #ifdef UVM_AMAP_PPREF
1165
1166 /*
1167 * amap_pp_establish: add a ppref array to an amap, if possible.
1168 *
1169 * => amap should be locked by caller.
1170 */
1171 void
1172 amap_pp_establish(struct vm_amap *amap, vaddr_t offset)
1173 {
1174 const size_t sz = amap->am_maxslot * sizeof(*amap->am_ppref);
1175
1176 KASSERT(rw_write_held(amap->am_lock));
1177
1178 amap->am_ppref = kmem_zalloc(sz, KM_NOSLEEP);
1179 if (amap->am_ppref == NULL) {
1180 /* Failure - just do not use ppref. */
1181 amap->am_ppref = PPREF_NONE;
1182 return;
1183 }
1184 pp_setreflen(amap->am_ppref, 0, 0, offset);
1185 pp_setreflen(amap->am_ppref, offset, amap->am_ref,
1186 amap->am_nslot - offset);
1187 }
1188
1189 /*
1190 * amap_pp_adjref: adjust reference count to a part of an amap using the
1191 * per-page reference count array.
1192 *
1193 * => caller must check that ppref != PPREF_NONE before calling.
1194 * => map and amap must be locked.
1195 */
1196 void
1197 amap_pp_adjref(struct vm_amap *amap, int curslot, vsize_t slotlen, int adjval)
1198 {
1199 int stopslot, *ppref, lcv, prevlcv;
1200 int ref, len, prevref, prevlen;
1201
1202 KASSERT(rw_write_held(amap->am_lock));
1203
1204 stopslot = curslot + slotlen;
1205 ppref = amap->am_ppref;
1206 prevlcv = 0;
1207
1208 /*
1209 * Advance to the correct place in the array, fragment if needed.
1210 */
1211
1212 for (lcv = 0 ; lcv < curslot ; lcv += len) {
1213 pp_getreflen(ppref, lcv, &ref, &len);
1214 if (lcv + len > curslot) { /* goes past start? */
1215 pp_setreflen(ppref, lcv, ref, curslot - lcv);
1216 pp_setreflen(ppref, curslot, ref, len - (curslot -lcv));
1217 len = curslot - lcv; /* new length of entry @ lcv */
1218 }
1219 prevlcv = lcv;
1220 }
1221 if (lcv == 0) {
1222 /*
1223 * Ensure that the "prevref == ref" test below always
1224 * fails, since we are starting from the beginning of
1225 * the ppref array; that is, there is no previous chunk.
1226 */
1227 prevref = -1;
1228 prevlen = 0;
1229 } else {
1230 pp_getreflen(ppref, prevlcv, &prevref, &prevlen);
1231 }
1232
1233 /*
1234 * Now adjust reference counts in range. Merge the first
1235 * changed entry with the last unchanged entry if possible.
1236 */
1237 KASSERT(lcv == curslot);
1238 for (/* lcv already set */; lcv < stopslot ; lcv += len) {
1239 pp_getreflen(ppref, lcv, &ref, &len);
1240 if (lcv + len > stopslot) { /* goes past end? */
1241 pp_setreflen(ppref, lcv, ref, stopslot - lcv);
1242 pp_setreflen(ppref, stopslot, ref,
1243 len - (stopslot - lcv));
1244 len = stopslot - lcv;
1245 }
1246 ref += adjval;
1247 KASSERT(ref >= 0);
1248 KASSERT(ref <= amap->am_ref);
1249 if (lcv == prevlcv + prevlen && ref == prevref) {
1250 pp_setreflen(ppref, prevlcv, ref, prevlen + len);
1251 } else {
1252 pp_setreflen(ppref, lcv, ref, len);
1253 }
1254 if (ref == 0) {
1255 amap_wiperange(amap, lcv, len);
1256 }
1257 }
1258 }
1259
1260 /*
1261 * amap_wiperange: wipe out a range of an amap.
1262 * Note: different from amap_wipeout because the amap is kept intact.
1263 *
1264 * => Both map and amap must be locked by caller.
1265 */
1266 void
1267 amap_wiperange(struct vm_amap *amap, int slotoff, int slots)
1268 {
1269 u_int lcv, stop, slotend;
1270 bool byanon;
1271
1272 KASSERT(rw_write_held(amap->am_lock));
1273
1274 /*
1275 * We can either traverse the amap by am_anon or by am_slots.
1276 * Determine which way is less expensive.
1277 */
1278
1279 if (slots < amap->am_nused) {
1280 byanon = true;
1281 lcv = slotoff;
1282 stop = slotoff + slots;
1283 slotend = 0;
1284 } else {
1285 byanon = false;
1286 lcv = 0;
1287 stop = amap->am_nused;
1288 slotend = slotoff + slots;
1289 }
1290
1291 while (lcv < stop) {
1292 struct vm_anon *anon;
1293 u_int curslot, ptr, last;
1294
1295 if (byanon) {
1296 curslot = lcv++; /* lcv advances here */
1297 if (amap->am_anon[curslot] == NULL)
1298 continue;
1299 } else {
1300 curslot = amap->am_slots[lcv];
1301 if (curslot < slotoff || curslot >= slotend) {
1302 lcv++; /* lcv advances here */
1303 continue;
1304 }
1305 stop--; /* drop stop, since anon will be removed */
1306 }
1307 anon = amap->am_anon[curslot];
1308 KASSERT(anon->an_lock == amap->am_lock);
1309
1310 /*
1311 * Remove anon from the amap.
1312 */
1313
1314 amap->am_anon[curslot] = NULL;
1315 ptr = amap->am_bckptr[curslot];
1316 last = amap->am_nused - 1;
1317 if (ptr != last) {
1318 amap->am_slots[ptr] = amap->am_slots[last];
1319 amap->am_bckptr[amap->am_slots[ptr]] = ptr;
1320 }
1321 amap->am_nused--;
1322
1323 /*
1324 * Drop its reference count.
1325 */
1326
1327 KASSERT(anon->an_lock == amap->am_lock);
1328 if (--anon->an_ref == 0) {
1329 uvm_anfree(anon);
1330 }
1331 }
1332 }
1333
1334 #endif
1335
1336 #if defined(VMSWAP)
1337
1338 /*
1339 * amap_swap_off: pagein anonymous pages in amaps and drop swap slots.
1340 *
1341 * => called with swap_syscall_lock held.
1342 * => note that we don't always traverse all anons.
1343 * eg. amaps being wiped out, released anons.
1344 * => return true if failed.
1345 */
1346
1347 bool
1348 amap_swap_off(int startslot, int endslot)
1349 {
1350 struct vm_amap *am;
1351 struct vm_amap *am_next;
1352 struct vm_amap marker_prev;
1353 struct vm_amap marker_next;
1354 bool rv = false;
1355
1356 #if defined(DIAGNOSTIC)
1357 memset(&marker_prev, 0, sizeof(marker_prev));
1358 memset(&marker_next, 0, sizeof(marker_next));
1359 #endif /* defined(DIAGNOSTIC) */
1360
1361 mutex_enter(&amap_list_lock);
1362 for (am = LIST_FIRST(&amap_list); am != NULL && !rv; am = am_next) {
1363 int i;
1364
1365 LIST_INSERT_BEFORE(am, &marker_prev, am_list);
1366 LIST_INSERT_AFTER(am, &marker_next, am_list);
1367
1368 /* amap_list_lock prevents the lock pointer from changing. */
1369 if (!amap_lock_try(am, RW_WRITER)) {
1370 (void)kpause("amapswpo", false, 1, &amap_list_lock);
1371 am_next = LIST_NEXT(&marker_prev, am_list);
1372 if (am_next == &marker_next) {
1373 am_next = LIST_NEXT(am_next, am_list);
1374 } else {
1375 KASSERT(LIST_NEXT(am_next, am_list) ==
1376 &marker_next);
1377 }
1378 LIST_REMOVE(&marker_prev, am_list);
1379 LIST_REMOVE(&marker_next, am_list);
1380 continue;
1381 }
1382
1383 mutex_exit(&amap_list_lock);
1384
1385 /* If am_nused == 0, the amap could be free - careful. */
1386 for (i = 0; i < am->am_nused; i++) {
1387 int slot;
1388 int swslot;
1389 struct vm_anon *anon;
1390
1391 slot = am->am_slots[i];
1392 anon = am->am_anon[slot];
1393 KASSERT(anon->an_lock == am->am_lock);
1394
1395 swslot = anon->an_swslot;
1396 if (swslot < startslot || endslot <= swslot) {
1397 continue;
1398 }
1399
1400 am->am_flags |= AMAP_SWAPOFF;
1401
1402 rv = uvm_anon_pagein(am, anon);
1403 amap_lock(am, RW_WRITER);
1404
1405 am->am_flags &= ~AMAP_SWAPOFF;
1406 if (amap_refs(am) == 0) {
1407 amap_wipeout(am);
1408 am = NULL;
1409 break;
1410 }
1411 if (rv) {
1412 break;
1413 }
1414 i = 0;
1415 }
1416
1417 if (am) {
1418 amap_unlock(am);
1419 }
1420
1421 mutex_enter(&amap_list_lock);
1422 KASSERT(LIST_NEXT(&marker_prev, am_list) == &marker_next ||
1423 LIST_NEXT(LIST_NEXT(&marker_prev, am_list), am_list) ==
1424 &marker_next);
1425 am_next = LIST_NEXT(&marker_next, am_list);
1426 LIST_REMOVE(&marker_prev, am_list);
1427 LIST_REMOVE(&marker_next, am_list);
1428 }
1429 mutex_exit(&amap_list_lock);
1430
1431 return rv;
1432 }
1433
1434 #endif /* defined(VMSWAP) */
1435
1436 /*
1437 * amap_lookup: look up a page in an amap.
1438 *
1439 * => amap should be locked by caller.
1440 */
1441 struct vm_anon *
1442 amap_lookup(struct vm_aref *aref, vaddr_t offset)
1443 {
1444 struct vm_amap *amap = aref->ar_amap;
1445 struct vm_anon *an;
1446 u_int slot;
1447
1448 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
1449 KASSERT(rw_lock_held(amap->am_lock));
1450
1451 AMAP_B2SLOT(slot, offset);
1452 slot += aref->ar_pageoff;
1453 an = amap->am_anon[slot];
1454
1455 UVMHIST_LOG(maphist,
1456 "<- done (amap=%#jx, offset=%#jx, result=%#jx)",
1457 (uintptr_t)amap, offset, (uintptr_t)an, 0);
1458
1459 KASSERT(slot < amap->am_nslot);
1460 KASSERT(an == NULL || an->an_ref != 0);
1461 KASSERT(an == NULL || an->an_lock == amap->am_lock);
1462 return an;
1463 }
1464
1465 /*
1466 * amap_lookups: look up a range of pages in an amap.
1467 *
1468 * => amap should be locked by caller.
1469 */
1470 void
1471 amap_lookups(struct vm_aref *aref, vaddr_t offset, struct vm_anon **anons,
1472 int npages)
1473 {
1474 struct vm_amap *amap = aref->ar_amap;
1475 u_int slot;
1476
1477 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
1478 KASSERT(rw_lock_held(amap->am_lock));
1479
1480 AMAP_B2SLOT(slot, offset);
1481 slot += aref->ar_pageoff;
1482
1483 UVMHIST_LOG(maphist, " slot=%u, npages=%d, nslot=%d",
1484 slot, npages, amap->am_nslot, 0);
1485
1486 KASSERT((slot + (npages - 1)) < amap->am_nslot);
1487 memcpy(anons, &amap->am_anon[slot], npages * sizeof(struct vm_anon *));
1488
1489 #if defined(DIAGNOSTIC)
1490 for (int i = 0; i < npages; i++) {
1491 struct vm_anon * const an = anons[i];
1492 if (an == NULL) {
1493 continue;
1494 }
1495 KASSERT(an->an_ref != 0);
1496 KASSERT(an->an_lock == amap->am_lock);
1497 }
1498 #endif
1499 UVMHIST_LOG(maphist, "<- done", 0, 0, 0, 0);
1500 }
1501
1502 /*
1503 * amap_add: add (or replace) a page to an amap.
1504 *
1505 * => amap should be locked by caller.
1506 * => anon must have the lock associated with this amap.
1507 */
1508 void
1509 amap_add(struct vm_aref *aref, vaddr_t offset, struct vm_anon *anon,
1510 bool replace)
1511 {
1512 struct vm_amap *amap = aref->ar_amap;
1513 u_int slot;
1514
1515 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
1516 KASSERT(rw_write_held(amap->am_lock));
1517 KASSERT(anon->an_lock == amap->am_lock);
1518
1519 AMAP_B2SLOT(slot, offset);
1520 slot += aref->ar_pageoff;
1521 KASSERT(slot < amap->am_nslot);
1522
1523 if (replace) {
1524 struct vm_anon *oanon = amap->am_anon[slot];
1525
1526 KASSERT(oanon != NULL);
1527 if (oanon->an_page && (amap->am_flags & AMAP_SHARED) != 0) {
1528 pmap_page_protect(oanon->an_page, VM_PROT_NONE);
1529 /*
1530 * XXX: suppose page is supposed to be wired somewhere?
1531 */
1532 }
1533 } else {
1534 KASSERT(amap->am_anon[slot] == NULL);
1535 KASSERT(amap->am_nused < amap->am_maxslot);
1536 amap->am_bckptr[slot] = amap->am_nused;
1537 amap->am_slots[amap->am_nused] = slot;
1538 amap->am_nused++;
1539 }
1540 amap->am_anon[slot] = anon;
1541 UVMHIST_LOG(maphist,
1542 "<- done (amap=%#jx, offset=%#x, anon=%#jx, rep=%d)",
1543 (uintptr_t)amap, offset, (uintptr_t)anon, replace);
1544 }
1545
1546 /*
1547 * amap_unadd: remove a page from an amap.
1548 *
1549 * => amap should be locked by caller.
1550 */
1551 void
1552 amap_unadd(struct vm_aref *aref, vaddr_t offset)
1553 {
1554 struct vm_amap *amap = aref->ar_amap;
1555 u_int slot, ptr, last;
1556
1557 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
1558 KASSERT(rw_write_held(amap->am_lock));
1559
1560 AMAP_B2SLOT(slot, offset);
1561 slot += aref->ar_pageoff;
1562 KASSERT(slot < amap->am_nslot);
1563 KASSERT(amap->am_anon[slot] != NULL);
1564 KASSERT(amap->am_anon[slot]->an_lock == amap->am_lock);
1565
1566 amap->am_anon[slot] = NULL;
1567 ptr = amap->am_bckptr[slot];
1568
1569 last = amap->am_nused - 1;
1570 if (ptr != last) {
1571 /* Move the last entry to keep the slots contiguous. */
1572 amap->am_slots[ptr] = amap->am_slots[last];
1573 amap->am_bckptr[amap->am_slots[ptr]] = ptr;
1574 }
1575 amap->am_nused--;
1576 UVMHIST_LOG(maphist, "<- done (amap=%#jx, slot=%#jx)",
1577 (uintptr_t)amap, slot,0, 0);
1578 }
1579
1580 /*
1581 * amap_adjref_anons: adjust the reference count(s) on amap and its anons.
1582 */
1583 static void
1584 amap_adjref_anons(struct vm_amap *amap, vaddr_t offset, vsize_t len,
1585 int refv, bool all)
1586 {
1587
1588 #ifdef UVM_AMAP_PPREF
1589 KASSERT(rw_write_held(amap->am_lock));
1590
1591 /*
1592 * We must establish the ppref array before changing am_ref
1593 * so that the ppref values match the current amap refcount.
1594 */
1595
1596 if (amap->am_ppref == NULL) {
1597 amap_pp_establish(amap, offset);
1598 }
1599 #endif
1600
1601 amap->am_ref += refv;
1602
1603 #ifdef UVM_AMAP_PPREF
1604 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
1605 amap_pp_adjref(amap, offset, len, refv);
1606 }
1607 #endif
1608 amap_unlock(amap);
1609 }
1610
1611 /*
1612 * amap_ref: gain a reference to an amap.
1613 *
1614 * => amap must not be locked (we will lock).
1615 * => "offset" and "len" are in units of pages.
1616 * => Called at fork time to gain the child's reference.
1617 */
1618 void
1619 amap_ref(struct vm_amap *amap, vaddr_t offset, vsize_t len, int flags)
1620 {
1621 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
1622
1623 amap_lock(amap, RW_WRITER);
1624 if (flags & AMAP_SHARED) {
1625 amap->am_flags |= AMAP_SHARED;
1626 }
1627 amap_adjref_anons(amap, offset, len, 1, (flags & AMAP_REFALL) != 0);
1628
1629 UVMHIST_LOG(maphist,"<- done! amap=%#jx", (uintptr_t)amap, 0, 0, 0);
1630 }
1631
1632 /*
1633 * amap_unref: remove a reference to an amap.
1634 *
1635 * => All pmap-level references to this amap must be already removed.
1636 * => Called from uvm_unmap_detach(); entry is already removed from the map.
1637 * => We will lock amap, so it must be unlocked.
1638 */
1639 void
1640 amap_unref(struct vm_amap *amap, vaddr_t offset, vsize_t len, bool all)
1641 {
1642 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
1643
1644 amap_lock(amap, RW_WRITER);
1645
1646 UVMHIST_LOG(maphist," amap=%#jx refs=%d, nused=%d",
1647 (uintptr_t)amap, amap->am_ref, amap->am_nused, 0);
1648 KASSERT(amap->am_ref > 0);
1649
1650 if (amap->am_ref == 1) {
1651
1652 /*
1653 * If the last reference - wipeout and destroy the amap.
1654 */
1655 amap->am_ref--;
1656 amap_wipeout(amap);
1657 UVMHIST_LOG(maphist,"<- done (was last ref)!", 0, 0, 0, 0);
1658 return;
1659 }
1660
1661 /*
1662 * Otherwise, drop the reference count(s) on anons.
1663 */
1664
1665 if (amap->am_ref == 2 && (amap->am_flags & AMAP_SHARED) != 0) {
1666 amap->am_flags &= ~AMAP_SHARED;
1667 }
1668 amap_adjref_anons(amap, offset, len, -1, all);
1669
1670 UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0);
1671 }
Cache object: 68c1c17249501e23fc8e719326302684
|