FreeBSD/Linux Kernel Cross Reference
sys/uvm/uvm_aobj.c
1 /* $OpenBSD: uvm_aobj.c,v 1.107 2022/08/29 02:58:13 jsg Exp $ */
2 /* $NetBSD: uvm_aobj.c,v 1.39 2001/02/18 21:19:08 chs Exp $ */
3
4 /*
5 * Copyright (c) 1998 Chuck Silvers, Charles D. Cranor and
6 * Washington University.
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 *
29 * from: Id: uvm_aobj.c,v 1.1.2.5 1998/02/06 05:14:38 chs Exp
30 */
31 /*
32 * uvm_aobj.c: anonymous memory uvm_object pager
33 *
34 * author: Chuck Silvers <chuq@chuq.com>
35 * started: Jan-1998
36 *
37 * - design mostly from Chuck Cranor
38 */
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/malloc.h>
43 #include <sys/kernel.h>
44 #include <sys/pool.h>
45 #include <sys/stdint.h>
46 #include <sys/atomic.h>
47
48 #include <uvm/uvm.h>
49
50 /*
51 * An anonymous UVM object (aobj) manages anonymous-memory. In addition to
52 * keeping the list of resident pages, it may also keep a list of allocated
53 * swap blocks. Depending on the size of the object, this list is either
54 * stored in an array (small objects) or in a hash table (large objects).
55 */
56
57 /*
58 * Note: for hash tables, we break the address space of the aobj into blocks
59 * of UAO_SWHASH_CLUSTER_SIZE pages, which shall be a power of two.
60 */
61 #define UAO_SWHASH_CLUSTER_SHIFT 4
62 #define UAO_SWHASH_CLUSTER_SIZE (1 << UAO_SWHASH_CLUSTER_SHIFT)
63
64 /* Get the "tag" for this page index. */
65 #define UAO_SWHASH_ELT_TAG(idx) ((idx) >> UAO_SWHASH_CLUSTER_SHIFT)
66 #define UAO_SWHASH_ELT_PAGESLOT_IDX(idx) \
67 ((idx) & (UAO_SWHASH_CLUSTER_SIZE - 1))
68
69 /* Given an ELT and a page index, find the swap slot. */
70 #define UAO_SWHASH_ELT_PAGESLOT(elt, idx) \
71 ((elt)->slots[UAO_SWHASH_ELT_PAGESLOT_IDX(idx)])
72
73 /* Given an ELT, return its pageidx base. */
74 #define UAO_SWHASH_ELT_PAGEIDX_BASE(elt) \
75 ((elt)->tag << UAO_SWHASH_CLUSTER_SHIFT)
76
77 /* The hash function. */
78 #define UAO_SWHASH_HASH(aobj, idx) \
79 (&(aobj)->u_swhash[(((idx) >> UAO_SWHASH_CLUSTER_SHIFT) \
80 & (aobj)->u_swhashmask)])
81
82 /*
83 * The threshold which determines whether we will use an array or a
84 * hash table to store the list of allocated swap blocks.
85 */
86 #define UAO_SWHASH_THRESHOLD (UAO_SWHASH_CLUSTER_SIZE * 4)
87 #define UAO_USES_SWHASH(aobj) \
88 ((aobj)->u_pages > UAO_SWHASH_THRESHOLD)
89
90 /* The number of buckets in a hash, with an upper bound. */
91 #define UAO_SWHASH_MAXBUCKETS 256
92 #define UAO_SWHASH_BUCKETS(pages) \
93 (min((pages) >> UAO_SWHASH_CLUSTER_SHIFT, UAO_SWHASH_MAXBUCKETS))
94
95
96 /*
97 * uao_swhash_elt: when a hash table is being used, this structure defines
98 * the format of an entry in the bucket list.
99 */
100 struct uao_swhash_elt {
101 LIST_ENTRY(uao_swhash_elt) list; /* the hash list */
102 voff_t tag; /* our 'tag' */
103 int count; /* our number of active slots */
104 int slots[UAO_SWHASH_CLUSTER_SIZE]; /* the slots */
105 };
106
107 /*
108 * uao_swhash: the swap hash table structure
109 */
110 LIST_HEAD(uao_swhash, uao_swhash_elt);
111
112 /*
113 * uao_swhash_elt_pool: pool of uao_swhash_elt structures
114 */
115 struct pool uao_swhash_elt_pool;
116
117 /*
118 * uvm_aobj: the actual anon-backed uvm_object
119 *
120 * => the uvm_object is at the top of the structure, this allows
121 * (struct uvm_aobj *) == (struct uvm_object *)
122 * => only one of u_swslots and u_swhash is used in any given aobj
123 */
124 struct uvm_aobj {
125 struct uvm_object u_obj; /* has: pgops, memt, #pages, #refs */
126 int u_pages; /* number of pages in entire object */
127 int u_flags; /* the flags (see uvm_aobj.h) */
128 /*
129 * Either an array or hashtable (array of bucket heads) of
130 * offset -> swapslot mappings for the aobj.
131 */
132 #define u_swslots u_swap.slot_array
133 #define u_swhash u_swap.slot_hash
134 union swslots {
135 int *slot_array;
136 struct uao_swhash *slot_hash;
137 } u_swap;
138 u_long u_swhashmask; /* mask for hashtable */
139 LIST_ENTRY(uvm_aobj) u_list; /* global list of aobjs */
140 };
141
142 struct pool uvm_aobj_pool;
143
144 static struct uao_swhash_elt *uao_find_swhash_elt(struct uvm_aobj *, int,
145 boolean_t);
146 static int uao_find_swslot(struct uvm_object *, int);
147 static boolean_t uao_flush(struct uvm_object *, voff_t,
148 voff_t, int);
149 static void uao_free(struct uvm_aobj *);
150 static int uao_get(struct uvm_object *, voff_t,
151 vm_page_t *, int *, int, vm_prot_t,
152 int, int);
153 static boolean_t uao_pagein(struct uvm_aobj *, int, int);
154 static boolean_t uao_pagein_page(struct uvm_aobj *, int);
155
156 void uao_dropswap_range(struct uvm_object *, voff_t, voff_t);
157 void uao_shrink_flush(struct uvm_object *, int, int);
158 int uao_shrink_hash(struct uvm_object *, int);
159 int uao_shrink_array(struct uvm_object *, int);
160 int uao_shrink_convert(struct uvm_object *, int);
161
162 int uao_grow_hash(struct uvm_object *, int);
163 int uao_grow_array(struct uvm_object *, int);
164 int uao_grow_convert(struct uvm_object *, int);
165
166 /*
167 * aobj_pager
168 *
169 * note that some functions (e.g. put) are handled elsewhere
170 */
171 const struct uvm_pagerops aobj_pager = {
172 .pgo_reference = uao_reference,
173 .pgo_detach = uao_detach,
174 .pgo_flush = uao_flush,
175 .pgo_get = uao_get,
176 };
177
178 /*
179 * uao_list: global list of active aobjs, locked by uao_list_lock
180 *
181 * Lock ordering: generally the locking order is object lock, then list lock.
182 * in the case of swap off we have to iterate over the list, and thus the
183 * ordering is reversed. In that case we must use trylocking to prevent
184 * deadlock.
185 */
186 static LIST_HEAD(aobjlist, uvm_aobj) uao_list = LIST_HEAD_INITIALIZER(uao_list);
187 static struct mutex uao_list_lock = MUTEX_INITIALIZER(IPL_MPFLOOR);
188
189
190 /*
191 * functions
192 */
193 /*
194 * hash table/array related functions
195 */
196 /*
197 * uao_find_swhash_elt: find (or create) a hash table entry for a page
198 * offset.
199 */
200 static struct uao_swhash_elt *
201 uao_find_swhash_elt(struct uvm_aobj *aobj, int pageidx, boolean_t create)
202 {
203 struct uao_swhash *swhash;
204 struct uao_swhash_elt *elt;
205 voff_t page_tag;
206
207 swhash = UAO_SWHASH_HASH(aobj, pageidx); /* first hash to get bucket */
208 page_tag = UAO_SWHASH_ELT_TAG(pageidx); /* tag to search for */
209
210 /*
211 * now search the bucket for the requested tag
212 */
213 LIST_FOREACH(elt, swhash, list) {
214 if (elt->tag == page_tag)
215 return elt;
216 }
217
218 if (!create)
219 return NULL;
220
221 /*
222 * allocate a new entry for the bucket and init/insert it in
223 */
224 elt = pool_get(&uao_swhash_elt_pool, PR_NOWAIT | PR_ZERO);
225 /*
226 * XXX We cannot sleep here as the hash table might disappear
227 * from under our feet. And we run the risk of deadlocking
228 * the pagedeamon. In fact this code will only be called by
229 * the pagedaemon and allocation will only fail if we
230 * exhausted the pagedeamon reserve. In that case we're
231 * doomed anyway, so panic.
232 */
233 if (elt == NULL)
234 panic("%s: can't allocate entry", __func__);
235 LIST_INSERT_HEAD(swhash, elt, list);
236 elt->tag = page_tag;
237
238 return elt;
239 }
240
241 /*
242 * uao_find_swslot: find the swap slot number for an aobj/pageidx
243 */
244 static inline int
245 uao_find_swslot(struct uvm_object *uobj, int pageidx)
246 {
247 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
248
249 KASSERT(UVM_OBJ_IS_AOBJ(uobj));
250
251 /*
252 * if noswap flag is set, then we never return a slot
253 */
254 if (aobj->u_flags & UAO_FLAG_NOSWAP)
255 return 0;
256
257 /*
258 * if hashing, look in hash table.
259 */
260 if (UAO_USES_SWHASH(aobj)) {
261 struct uao_swhash_elt *elt =
262 uao_find_swhash_elt(aobj, pageidx, FALSE);
263
264 if (elt)
265 return UAO_SWHASH_ELT_PAGESLOT(elt, pageidx);
266 else
267 return 0;
268 }
269
270 /*
271 * otherwise, look in the array
272 */
273 return aobj->u_swslots[pageidx];
274 }
275
276 /*
277 * uao_set_swslot: set the swap slot for a page in an aobj.
278 *
279 * => setting a slot to zero frees the slot
280 * => object must be locked by caller
281 * => we return the old slot number, or -1 if we failed to allocate
282 * memory to record the new slot number
283 */
284 int
285 uao_set_swslot(struct uvm_object *uobj, int pageidx, int slot)
286 {
287 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
288 int oldslot;
289
290 KASSERT(rw_write_held(uobj->vmobjlock) || uobj->uo_refs == 0);
291 KASSERT(UVM_OBJ_IS_AOBJ(uobj));
292
293 /*
294 * if noswap flag is set, then we can't set a slot
295 */
296 if (aobj->u_flags & UAO_FLAG_NOSWAP) {
297 if (slot == 0)
298 return 0; /* a clear is ok */
299
300 /* but a set is not */
301 printf("uao_set_swslot: uobj = %p\n", uobj);
302 panic("uao_set_swslot: attempt to set a slot on a NOSWAP object");
303 }
304
305 /*
306 * are we using a hash table? if so, add it in the hash.
307 */
308 if (UAO_USES_SWHASH(aobj)) {
309 /*
310 * Avoid allocating an entry just to free it again if
311 * the page had not swap slot in the first place, and
312 * we are freeing.
313 */
314 struct uao_swhash_elt *elt =
315 uao_find_swhash_elt(aobj, pageidx, slot ? TRUE : FALSE);
316 if (elt == NULL) {
317 KASSERT(slot == 0);
318 return 0;
319 }
320
321 oldslot = UAO_SWHASH_ELT_PAGESLOT(elt, pageidx);
322 UAO_SWHASH_ELT_PAGESLOT(elt, pageidx) = slot;
323
324 /*
325 * now adjust the elt's reference counter and free it if we've
326 * dropped it to zero.
327 */
328 if (slot) {
329 if (oldslot == 0)
330 elt->count++;
331 } else {
332 if (oldslot)
333 elt->count--;
334
335 if (elt->count == 0) {
336 LIST_REMOVE(elt, list);
337 pool_put(&uao_swhash_elt_pool, elt);
338 }
339 }
340 } else {
341 /* we are using an array */
342 oldslot = aobj->u_swslots[pageidx];
343 aobj->u_swslots[pageidx] = slot;
344 }
345 return oldslot;
346 }
347 /*
348 * end of hash/array functions
349 */
350
351 /*
352 * uao_free: free all resources held by an aobj, and then free the aobj
353 *
354 * => the aobj should be dead
355 */
356 static void
357 uao_free(struct uvm_aobj *aobj)
358 {
359 struct uvm_object *uobj = &aobj->u_obj;
360
361 KASSERT(UVM_OBJ_IS_AOBJ(uobj));
362 KASSERT(rw_write_held(uobj->vmobjlock));
363 uao_dropswap_range(uobj, 0, 0);
364 rw_exit(uobj->vmobjlock);
365
366 if (UAO_USES_SWHASH(aobj)) {
367 /*
368 * free the hash table itself.
369 */
370 hashfree(aobj->u_swhash, UAO_SWHASH_BUCKETS(aobj->u_pages), M_UVMAOBJ);
371 } else {
372 free(aobj->u_swslots, M_UVMAOBJ, aobj->u_pages * sizeof(int));
373 }
374
375 /*
376 * finally free the aobj itself
377 */
378 uvm_obj_destroy(uobj);
379 pool_put(&uvm_aobj_pool, aobj);
380 }
381
382 /*
383 * pager functions
384 */
385
386 #ifdef TMPFS
387 /*
388 * Shrink an aobj to a given number of pages. The procedure is always the same:
389 * assess the necessity of data structure conversion (hash to array), secure
390 * resources, flush pages and drop swap slots.
391 *
392 */
393
394 void
395 uao_shrink_flush(struct uvm_object *uobj, int startpg, int endpg)
396 {
397 KASSERT(startpg < endpg);
398 KASSERT(uobj->uo_refs == 1);
399 uao_flush(uobj, (voff_t)startpg << PAGE_SHIFT,
400 (voff_t)endpg << PAGE_SHIFT, PGO_FREE);
401 uao_dropswap_range(uobj, startpg, endpg);
402 }
403
404 int
405 uao_shrink_hash(struct uvm_object *uobj, int pages)
406 {
407 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
408 struct uao_swhash *new_swhash;
409 struct uao_swhash_elt *elt;
410 unsigned long new_hashmask;
411 int i;
412
413 KASSERT(UAO_USES_SWHASH(aobj));
414
415 /*
416 * If the size of the hash table doesn't change, all we need to do is
417 * to adjust the page count.
418 */
419 if (UAO_SWHASH_BUCKETS(aobj->u_pages) == UAO_SWHASH_BUCKETS(pages)) {
420 uao_shrink_flush(uobj, pages, aobj->u_pages);
421 aobj->u_pages = pages;
422 return 0;
423 }
424
425 new_swhash = hashinit(UAO_SWHASH_BUCKETS(pages), M_UVMAOBJ,
426 M_WAITOK | M_CANFAIL, &new_hashmask);
427 if (new_swhash == NULL)
428 return ENOMEM;
429
430 uao_shrink_flush(uobj, pages, aobj->u_pages);
431
432 /*
433 * Even though the hash table size is changing, the hash of the buckets
434 * we are interested in copying should not change.
435 */
436 for (i = 0; i < UAO_SWHASH_BUCKETS(aobj->u_pages); i++) {
437 while (LIST_EMPTY(&aobj->u_swhash[i]) == 0) {
438 elt = LIST_FIRST(&aobj->u_swhash[i]);
439 LIST_REMOVE(elt, list);
440 LIST_INSERT_HEAD(&new_swhash[i], elt, list);
441 }
442 }
443
444 hashfree(aobj->u_swhash, UAO_SWHASH_BUCKETS(aobj->u_pages), M_UVMAOBJ);
445
446 aobj->u_swhash = new_swhash;
447 aobj->u_pages = pages;
448 aobj->u_swhashmask = new_hashmask;
449
450 return 0;
451 }
452
453 int
454 uao_shrink_convert(struct uvm_object *uobj, int pages)
455 {
456 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
457 struct uao_swhash_elt *elt;
458 int i, *new_swslots;
459
460 new_swslots = mallocarray(pages, sizeof(int), M_UVMAOBJ,
461 M_WAITOK | M_CANFAIL | M_ZERO);
462 if (new_swslots == NULL)
463 return ENOMEM;
464
465 uao_shrink_flush(uobj, pages, aobj->u_pages);
466
467 /* Convert swap slots from hash to array. */
468 for (i = 0; i < pages; i++) {
469 elt = uao_find_swhash_elt(aobj, i, FALSE);
470 if (elt != NULL) {
471 new_swslots[i] = UAO_SWHASH_ELT_PAGESLOT(elt, i);
472 if (new_swslots[i] != 0)
473 elt->count--;
474 if (elt->count == 0) {
475 LIST_REMOVE(elt, list);
476 pool_put(&uao_swhash_elt_pool, elt);
477 }
478 }
479 }
480
481 hashfree(aobj->u_swhash, UAO_SWHASH_BUCKETS(aobj->u_pages), M_UVMAOBJ);
482
483 aobj->u_swslots = new_swslots;
484 aobj->u_pages = pages;
485
486 return 0;
487 }
488
489 int
490 uao_shrink_array(struct uvm_object *uobj, int pages)
491 {
492 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
493 int i, *new_swslots;
494
495 new_swslots = mallocarray(pages, sizeof(int), M_UVMAOBJ,
496 M_WAITOK | M_CANFAIL | M_ZERO);
497 if (new_swslots == NULL)
498 return ENOMEM;
499
500 uao_shrink_flush(uobj, pages, aobj->u_pages);
501
502 for (i = 0; i < pages; i++)
503 new_swslots[i] = aobj->u_swslots[i];
504
505 free(aobj->u_swslots, M_UVMAOBJ, aobj->u_pages * sizeof(int));
506
507 aobj->u_swslots = new_swslots;
508 aobj->u_pages = pages;
509
510 return 0;
511 }
512
513 int
514 uao_shrink(struct uvm_object *uobj, int pages)
515 {
516 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
517
518 KASSERT(pages < aobj->u_pages);
519
520 /*
521 * Distinguish between three possible cases:
522 * 1. aobj uses hash and must be converted to array.
523 * 2. aobj uses array and array size needs to be adjusted.
524 * 3. aobj uses hash and hash size needs to be adjusted.
525 */
526 if (pages > UAO_SWHASH_THRESHOLD)
527 return uao_shrink_hash(uobj, pages); /* case 3 */
528 else if (aobj->u_pages > UAO_SWHASH_THRESHOLD)
529 return uao_shrink_convert(uobj, pages); /* case 1 */
530 else
531 return uao_shrink_array(uobj, pages); /* case 2 */
532 }
533
534 /*
535 * Grow an aobj to a given number of pages. Right now we only adjust the swap
536 * slots. We could additionally handle page allocation directly, so that they
537 * don't happen through uvm_fault(). That would allow us to use another
538 * mechanism for the swap slots other than malloc(). It is thus mandatory that
539 * the caller of these functions does not allow faults to happen in case of
540 * growth error.
541 */
542 int
543 uao_grow_array(struct uvm_object *uobj, int pages)
544 {
545 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
546 int i, *new_swslots;
547
548 KASSERT(aobj->u_pages <= UAO_SWHASH_THRESHOLD);
549
550 new_swslots = mallocarray(pages, sizeof(int), M_UVMAOBJ,
551 M_WAITOK | M_CANFAIL | M_ZERO);
552 if (new_swslots == NULL)
553 return ENOMEM;
554
555 for (i = 0; i < aobj->u_pages; i++)
556 new_swslots[i] = aobj->u_swslots[i];
557
558 free(aobj->u_swslots, M_UVMAOBJ, aobj->u_pages * sizeof(int));
559
560 aobj->u_swslots = new_swslots;
561 aobj->u_pages = pages;
562
563 return 0;
564 }
565
566 int
567 uao_grow_hash(struct uvm_object *uobj, int pages)
568 {
569 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
570 struct uao_swhash *new_swhash;
571 struct uao_swhash_elt *elt;
572 unsigned long new_hashmask;
573 int i;
574
575 KASSERT(pages > UAO_SWHASH_THRESHOLD);
576
577 /*
578 * If the size of the hash table doesn't change, all we need to do is
579 * to adjust the page count.
580 */
581 if (UAO_SWHASH_BUCKETS(aobj->u_pages) == UAO_SWHASH_BUCKETS(pages)) {
582 aobj->u_pages = pages;
583 return 0;
584 }
585
586 KASSERT(UAO_SWHASH_BUCKETS(aobj->u_pages) < UAO_SWHASH_BUCKETS(pages));
587
588 new_swhash = hashinit(UAO_SWHASH_BUCKETS(pages), M_UVMAOBJ,
589 M_WAITOK | M_CANFAIL, &new_hashmask);
590 if (new_swhash == NULL)
591 return ENOMEM;
592
593 for (i = 0; i < UAO_SWHASH_BUCKETS(aobj->u_pages); i++) {
594 while (LIST_EMPTY(&aobj->u_swhash[i]) == 0) {
595 elt = LIST_FIRST(&aobj->u_swhash[i]);
596 LIST_REMOVE(elt, list);
597 LIST_INSERT_HEAD(&new_swhash[i], elt, list);
598 }
599 }
600
601 hashfree(aobj->u_swhash, UAO_SWHASH_BUCKETS(aobj->u_pages), M_UVMAOBJ);
602
603 aobj->u_swhash = new_swhash;
604 aobj->u_pages = pages;
605 aobj->u_swhashmask = new_hashmask;
606
607 return 0;
608 }
609
610 int
611 uao_grow_convert(struct uvm_object *uobj, int pages)
612 {
613 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
614 struct uao_swhash *new_swhash;
615 struct uao_swhash_elt *elt;
616 unsigned long new_hashmask;
617 int i, *old_swslots;
618
619 new_swhash = hashinit(UAO_SWHASH_BUCKETS(pages), M_UVMAOBJ,
620 M_WAITOK | M_CANFAIL, &new_hashmask);
621 if (new_swhash == NULL)
622 return ENOMEM;
623
624 /* Set these now, so we can use uao_find_swhash_elt(). */
625 old_swslots = aobj->u_swslots;
626 aobj->u_swhash = new_swhash;
627 aobj->u_swhashmask = new_hashmask;
628
629 for (i = 0; i < aobj->u_pages; i++) {
630 if (old_swslots[i] != 0) {
631 elt = uao_find_swhash_elt(aobj, i, TRUE);
632 elt->count++;
633 UAO_SWHASH_ELT_PAGESLOT(elt, i) = old_swslots[i];
634 }
635 }
636
637 free(old_swslots, M_UVMAOBJ, aobj->u_pages * sizeof(int));
638 aobj->u_pages = pages;
639
640 return 0;
641 }
642
643 int
644 uao_grow(struct uvm_object *uobj, int pages)
645 {
646 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
647
648 KASSERT(pages > aobj->u_pages);
649
650 /*
651 * Distinguish between three possible cases:
652 * 1. aobj uses hash and hash size needs to be adjusted.
653 * 2. aobj uses array and array size needs to be adjusted.
654 * 3. aobj uses array and must be converted to hash.
655 */
656 if (pages <= UAO_SWHASH_THRESHOLD)
657 return uao_grow_array(uobj, pages); /* case 2 */
658 else if (aobj->u_pages > UAO_SWHASH_THRESHOLD)
659 return uao_grow_hash(uobj, pages); /* case 1 */
660 else
661 return uao_grow_convert(uobj, pages);
662 }
663 #endif /* TMPFS */
664
665 /*
666 * uao_create: create an aobj of the given size and return its uvm_object.
667 *
668 * => for normal use, flags are zero or UAO_FLAG_CANFAIL.
669 * => for the kernel object, the flags are:
670 * UAO_FLAG_KERNOBJ - allocate the kernel object (can only happen once)
671 * UAO_FLAG_KERNSWAP - enable swapping of kernel object (" ")
672 */
673 struct uvm_object *
674 uao_create(vsize_t size, int flags)
675 {
676 static struct uvm_aobj kernel_object_store;
677 static struct rwlock bootstrap_kernel_object_lock;
678 static int kobj_alloced = 0;
679 int pages = round_page(size) >> PAGE_SHIFT;
680 struct uvm_aobj *aobj;
681 int refs;
682
683 /*
684 * Allocate a new aobj, unless kernel object is requested.
685 */
686 if (flags & UAO_FLAG_KERNOBJ) {
687 KASSERT(!kobj_alloced);
688 aobj = &kernel_object_store;
689 aobj->u_pages = pages;
690 aobj->u_flags = UAO_FLAG_NOSWAP;
691 refs = UVM_OBJ_KERN;
692 kobj_alloced = UAO_FLAG_KERNOBJ;
693 } else if (flags & UAO_FLAG_KERNSWAP) {
694 KASSERT(kobj_alloced == UAO_FLAG_KERNOBJ);
695 aobj = &kernel_object_store;
696 kobj_alloced = UAO_FLAG_KERNSWAP;
697 } else {
698 aobj = pool_get(&uvm_aobj_pool, PR_WAITOK);
699 aobj->u_pages = pages;
700 aobj->u_flags = 0;
701 refs = 1;
702 }
703
704 /*
705 * allocate hash/array if necessary
706 */
707 if (flags == 0 || (flags & (UAO_FLAG_KERNSWAP | UAO_FLAG_CANFAIL))) {
708 int mflags;
709
710 if (flags)
711 mflags = M_NOWAIT;
712 else
713 mflags = M_WAITOK;
714
715 /* allocate hash table or array depending on object size */
716 if (UAO_USES_SWHASH(aobj)) {
717 aobj->u_swhash = hashinit(UAO_SWHASH_BUCKETS(pages),
718 M_UVMAOBJ, mflags, &aobj->u_swhashmask);
719 if (aobj->u_swhash == NULL) {
720 if (flags & UAO_FLAG_CANFAIL) {
721 pool_put(&uvm_aobj_pool, aobj);
722 return NULL;
723 }
724 panic("uao_create: hashinit swhash failed");
725 }
726 } else {
727 aobj->u_swslots = mallocarray(pages, sizeof(int),
728 M_UVMAOBJ, mflags|M_ZERO);
729 if (aobj->u_swslots == NULL) {
730 if (flags & UAO_FLAG_CANFAIL) {
731 pool_put(&uvm_aobj_pool, aobj);
732 return NULL;
733 }
734 panic("uao_create: malloc swslots failed");
735 }
736 }
737
738 if (flags & UAO_FLAG_KERNSWAP) {
739 aobj->u_flags &= ~UAO_FLAG_NOSWAP; /* clear noswap */
740 return &aobj->u_obj;
741 /* done! */
742 }
743 }
744
745 /*
746 * Initialise UVM object.
747 */
748 uvm_obj_init(&aobj->u_obj, &aobj_pager, refs);
749 if (flags & UAO_FLAG_KERNOBJ) {
750 /* Use a temporary static lock for kernel_object. */
751 rw_init(&bootstrap_kernel_object_lock, "kobjlk");
752 uvm_obj_setlock(&aobj->u_obj, &bootstrap_kernel_object_lock);
753 }
754
755 /*
756 * now that aobj is ready, add it to the global list
757 */
758 mtx_enter(&uao_list_lock);
759 LIST_INSERT_HEAD(&uao_list, aobj, u_list);
760 mtx_leave(&uao_list_lock);
761
762 return &aobj->u_obj;
763 }
764
765
766
767 /*
768 * uao_init: set up aobj pager subsystem
769 *
770 * => called at boot time from uvm_pager_init()
771 */
772 void
773 uao_init(void)
774 {
775 /*
776 * NOTE: Pages for this pool must not come from a pageable
777 * kernel map!
778 */
779 pool_init(&uao_swhash_elt_pool, sizeof(struct uao_swhash_elt), 0,
780 IPL_NONE, PR_WAITOK, "uaoeltpl", NULL);
781 pool_init(&uvm_aobj_pool, sizeof(struct uvm_aobj), 0,
782 IPL_NONE, PR_WAITOK, "aobjpl", NULL);
783 }
784
785 /*
786 * uao_reference: hold a reference to an anonymous UVM object.
787 */
788 void
789 uao_reference(struct uvm_object *uobj)
790 {
791 /* Kernel object is persistent. */
792 if (UVM_OBJ_IS_KERN_OBJECT(uobj))
793 return;
794
795 atomic_inc_int(&uobj->uo_refs);
796 }
797
798
799 /*
800 * uao_detach: drop a reference to an anonymous UVM object.
801 */
802 void
803 uao_detach(struct uvm_object *uobj)
804 {
805 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
806 struct vm_page *pg;
807
808 /*
809 * Detaching from kernel_object is a NOP.
810 */
811 if (UVM_OBJ_IS_KERN_OBJECT(uobj))
812 return;
813
814 /*
815 * Drop the reference. If it was the last one, destroy the object.
816 */
817 if (atomic_dec_int_nv(&uobj->uo_refs) > 0) {
818 return;
819 }
820
821 /*
822 * Remove the aobj from the global list.
823 */
824 mtx_enter(&uao_list_lock);
825 LIST_REMOVE(aobj, u_list);
826 mtx_leave(&uao_list_lock);
827
828 /*
829 * Free all the pages left in the aobj. For each page, when the
830 * page is no longer busy (and thus after any disk I/O that it is
831 * involved in is complete), release any swap resources and free
832 * the page itself.
833 */
834 rw_enter(uobj->vmobjlock, RW_WRITE);
835 while ((pg = RBT_ROOT(uvm_objtree, &uobj->memt)) != NULL) {
836 pmap_page_protect(pg, PROT_NONE);
837 if (pg->pg_flags & PG_BUSY) {
838 uvm_pagewait(pg, uobj->vmobjlock, "uao_det");
839 rw_enter(uobj->vmobjlock, RW_WRITE);
840 continue;
841 }
842 uao_dropswap(&aobj->u_obj, pg->offset >> PAGE_SHIFT);
843 uvm_lock_pageq();
844 uvm_pagefree(pg);
845 uvm_unlock_pageq();
846 }
847
848 /*
849 * Finally, free the anonymous UVM object itself.
850 */
851 uao_free(aobj);
852 }
853
854 /*
855 * uao_flush: flush pages out of a uvm object
856 *
857 * => if PGO_CLEANIT is not set, then we will not block.
858 * => if PGO_ALLPAGE is set, then all pages in the object are valid targets
859 * for flushing.
860 * => NOTE: we are allowed to lock the page queues, so the caller
861 * must not be holding the lock on them [e.g. pagedaemon had
862 * better not call us with the queues locked]
863 * => we return TRUE unless we encountered some sort of I/O error
864 * XXXJRT currently never happens, as we never directly initiate
865 * XXXJRT I/O
866 */
867 boolean_t
868 uao_flush(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
869 {
870 struct uvm_aobj *aobj = (struct uvm_aobj *) uobj;
871 struct vm_page *pg;
872 voff_t curoff;
873
874 KASSERT(UVM_OBJ_IS_AOBJ(uobj));
875 KASSERT(rw_write_held(uobj->vmobjlock));
876
877 if (flags & PGO_ALLPAGES) {
878 start = 0;
879 stop = (voff_t)aobj->u_pages << PAGE_SHIFT;
880 } else {
881 start = trunc_page(start);
882 stop = round_page(stop);
883 if (stop > ((voff_t)aobj->u_pages << PAGE_SHIFT)) {
884 printf("uao_flush: strange, got an out of range "
885 "flush (fixed)\n");
886 stop = (voff_t)aobj->u_pages << PAGE_SHIFT;
887 }
888 }
889
890 /*
891 * Don't need to do any work here if we're not freeing
892 * or deactivating pages.
893 */
894 if ((flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) {
895 return TRUE;
896 }
897
898 curoff = start;
899 for (;;) {
900 if (curoff < stop) {
901 pg = uvm_pagelookup(uobj, curoff);
902 curoff += PAGE_SIZE;
903 if (pg == NULL)
904 continue;
905 } else {
906 break;
907 }
908
909 /* Make sure page is unbusy, else wait for it. */
910 if (pg->pg_flags & PG_BUSY) {
911 uvm_pagewait(pg, uobj->vmobjlock, "uaoflsh");
912 rw_enter(uobj->vmobjlock, RW_WRITE);
913 curoff -= PAGE_SIZE;
914 continue;
915 }
916
917 switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) {
918 /*
919 * XXX In these first 3 cases, we always just
920 * XXX deactivate the page. We may want to
921 * XXX handle the different cases more specifically
922 * XXX in the future.
923 */
924 case PGO_CLEANIT|PGO_FREE:
925 /* FALLTHROUGH */
926 case PGO_CLEANIT|PGO_DEACTIVATE:
927 /* FALLTHROUGH */
928 case PGO_DEACTIVATE:
929 deactivate_it:
930 if (pg->wire_count != 0)
931 continue;
932
933 uvm_lock_pageq();
934 pmap_page_protect(pg, PROT_NONE);
935 uvm_pagedeactivate(pg);
936 uvm_unlock_pageq();
937
938 continue;
939 case PGO_FREE:
940 /*
941 * If there are multiple references to
942 * the object, just deactivate the page.
943 */
944 if (uobj->uo_refs > 1)
945 goto deactivate_it;
946
947 /* XXX skip the page if it's wired */
948 if (pg->wire_count != 0)
949 continue;
950
951 /*
952 * free the swap slot and the page.
953 */
954 pmap_page_protect(pg, PROT_NONE);
955
956 /*
957 * freeing swapslot here is not strictly necessary.
958 * however, leaving it here doesn't save much
959 * because we need to update swap accounting anyway.
960 */
961 uao_dropswap(uobj, pg->offset >> PAGE_SHIFT);
962 uvm_lock_pageq();
963 uvm_pagefree(pg);
964 uvm_unlock_pageq();
965
966 continue;
967 default:
968 panic("uao_flush: weird flags");
969 }
970 }
971
972 return TRUE;
973 }
974
975 /*
976 * uao_get: fetch me a page
977 *
978 * we have three cases:
979 * 1: page is resident -> just return the page.
980 * 2: page is zero-fill -> allocate a new page and zero it.
981 * 3: page is swapped out -> fetch the page from swap.
982 *
983 * cases 1 can be handled with PGO_LOCKED, cases 2 and 3 cannot.
984 * so, if the "center" page hits case 3 (or any page, with PGO_ALLPAGES),
985 * then we will need to return VM_PAGER_UNLOCK.
986 *
987 * => flags: PGO_ALLPAGES: get all of the pages
988 * PGO_LOCKED: fault data structures are locked
989 * => NOTE: offset is the offset of pps[0], _NOT_ pps[centeridx]
990 * => NOTE: caller must check for released pages!!
991 */
992 static int
993 uao_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps,
994 int *npagesp, int centeridx, vm_prot_t access_type, int advice, int flags)
995 {
996 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
997 voff_t current_offset;
998 vm_page_t ptmp;
999 int lcv, gotpages, maxpages, swslot, rv, pageidx;
1000 boolean_t done;
1001
1002 KASSERT(UVM_OBJ_IS_AOBJ(uobj));
1003 KASSERT(rw_write_held(uobj->vmobjlock));
1004
1005 /*
1006 * get number of pages
1007 */
1008 maxpages = *npagesp;
1009
1010 if (flags & PGO_LOCKED) {
1011 /*
1012 * step 1a: get pages that are already resident. only do
1013 * this if the data structures are locked (i.e. the first
1014 * time through).
1015 */
1016
1017 done = TRUE; /* be optimistic */
1018 gotpages = 0; /* # of pages we got so far */
1019
1020 for (lcv = 0, current_offset = offset ; lcv < maxpages ;
1021 lcv++, current_offset += PAGE_SIZE) {
1022 /* do we care about this page? if not, skip it */
1023 if (pps[lcv] == PGO_DONTCARE)
1024 continue;
1025
1026 ptmp = uvm_pagelookup(uobj, current_offset);
1027
1028 /*
1029 * if page is new, attempt to allocate the page,
1030 * zero-fill'd.
1031 */
1032 if (ptmp == NULL && uao_find_swslot(uobj,
1033 current_offset >> PAGE_SHIFT) == 0) {
1034 ptmp = uvm_pagealloc(uobj, current_offset,
1035 NULL, UVM_PGA_ZERO);
1036 if (ptmp) {
1037 /* new page */
1038 atomic_clearbits_int(&ptmp->pg_flags,
1039 PG_BUSY|PG_FAKE);
1040 atomic_setbits_int(&ptmp->pg_flags,
1041 PQ_AOBJ);
1042 UVM_PAGE_OWN(ptmp, NULL);
1043 }
1044 }
1045
1046 /*
1047 * to be useful must get a non-busy page
1048 */
1049 if (ptmp == NULL ||
1050 (ptmp->pg_flags & PG_BUSY) != 0) {
1051 if (lcv == centeridx ||
1052 (flags & PGO_ALLPAGES) != 0)
1053 /* need to do a wait or I/O! */
1054 done = FALSE;
1055 continue;
1056 }
1057
1058 /*
1059 * useful page: plug it in our result array
1060 */
1061 atomic_setbits_int(&ptmp->pg_flags, PG_BUSY);
1062 UVM_PAGE_OWN(ptmp, "uao_get1");
1063 pps[lcv] = ptmp;
1064 gotpages++;
1065
1066 }
1067
1068 /*
1069 * step 1b: now we've either done everything needed or we
1070 * to unlock and do some waiting or I/O.
1071 */
1072 *npagesp = gotpages;
1073 if (done)
1074 /* bingo! */
1075 return VM_PAGER_OK;
1076 else
1077 /* EEK! Need to unlock and I/O */
1078 return VM_PAGER_UNLOCK;
1079 }
1080
1081 /*
1082 * step 2: get non-resident or busy pages.
1083 * data structures are unlocked.
1084 */
1085 for (lcv = 0, current_offset = offset ; lcv < maxpages ;
1086 lcv++, current_offset += PAGE_SIZE) {
1087 /*
1088 * - skip over pages we've already gotten or don't want
1089 * - skip over pages we don't _have_ to get
1090 */
1091 if (pps[lcv] != NULL ||
1092 (lcv != centeridx && (flags & PGO_ALLPAGES) == 0))
1093 continue;
1094
1095 pageidx = current_offset >> PAGE_SHIFT;
1096
1097 /*
1098 * we have yet to locate the current page (pps[lcv]). we
1099 * first look for a page that is already at the current offset.
1100 * if we find a page, we check to see if it is busy or
1101 * released. if that is the case, then we sleep on the page
1102 * until it is no longer busy or released and repeat the lookup.
1103 * if the page we found is neither busy nor released, then we
1104 * busy it (so we own it) and plug it into pps[lcv]. this
1105 * 'break's the following while loop and indicates we are
1106 * ready to move on to the next page in the "lcv" loop above.
1107 *
1108 * if we exit the while loop with pps[lcv] still set to NULL,
1109 * then it means that we allocated a new busy/fake/clean page
1110 * ptmp in the object and we need to do I/O to fill in the data.
1111 */
1112
1113 /* top of "pps" while loop */
1114 while (pps[lcv] == NULL) {
1115 /* look for a resident page */
1116 ptmp = uvm_pagelookup(uobj, current_offset);
1117
1118 /* not resident? allocate one now (if we can) */
1119 if (ptmp == NULL) {
1120
1121 ptmp = uvm_pagealloc(uobj, current_offset,
1122 NULL, 0);
1123
1124 /* out of RAM? */
1125 if (ptmp == NULL) {
1126 rw_exit(uobj->vmobjlock);
1127 uvm_wait("uao_getpage");
1128 rw_enter(uobj->vmobjlock, RW_WRITE);
1129 /* goto top of pps while loop */
1130 continue;
1131 }
1132
1133 /*
1134 * safe with PQ's unlocked: because we just
1135 * alloc'd the page
1136 */
1137 atomic_setbits_int(&ptmp->pg_flags, PQ_AOBJ);
1138
1139 /*
1140 * got new page ready for I/O. break pps while
1141 * loop. pps[lcv] is still NULL.
1142 */
1143 break;
1144 }
1145
1146 /* page is there, see if we need to wait on it */
1147 if ((ptmp->pg_flags & PG_BUSY) != 0) {
1148 uvm_pagewait(ptmp, uobj->vmobjlock, "uao_get");
1149 rw_enter(uobj->vmobjlock, RW_WRITE);
1150 continue; /* goto top of pps while loop */
1151 }
1152
1153 /*
1154 * if we get here then the page is resident and
1155 * unbusy. we busy it now (so we own it).
1156 */
1157 /* we own it, caller must un-busy */
1158 atomic_setbits_int(&ptmp->pg_flags, PG_BUSY);
1159 UVM_PAGE_OWN(ptmp, "uao_get2");
1160 pps[lcv] = ptmp;
1161 }
1162
1163 /*
1164 * if we own the valid page at the correct offset, pps[lcv] will
1165 * point to it. nothing more to do except go to the next page.
1166 */
1167 if (pps[lcv])
1168 continue; /* next lcv */
1169
1170 /*
1171 * we have a "fake/busy/clean" page that we just allocated.
1172 * do the needed "i/o", either reading from swap or zeroing.
1173 */
1174 swslot = uao_find_swslot(uobj, pageidx);
1175
1176 /* just zero the page if there's nothing in swap. */
1177 if (swslot == 0) {
1178 /* page hasn't existed before, just zero it. */
1179 uvm_pagezero(ptmp);
1180 } else {
1181 /*
1182 * page in the swapped-out page.
1183 * unlock object for i/o, relock when done.
1184 */
1185
1186 rw_exit(uobj->vmobjlock);
1187 rv = uvm_swap_get(ptmp, swslot, PGO_SYNCIO);
1188 rw_enter(uobj->vmobjlock, RW_WRITE);
1189
1190 /*
1191 * I/O done. check for errors.
1192 */
1193 if (rv != VM_PAGER_OK) {
1194 /*
1195 * remove the swap slot from the aobj
1196 * and mark the aobj as having no real slot.
1197 * don't free the swap slot, thus preventing
1198 * it from being used again.
1199 */
1200 swslot = uao_set_swslot(&aobj->u_obj, pageidx,
1201 SWSLOT_BAD);
1202 uvm_swap_markbad(swslot, 1);
1203
1204 if (ptmp->pg_flags & PG_WANTED)
1205 wakeup(ptmp);
1206 atomic_clearbits_int(&ptmp->pg_flags,
1207 PG_WANTED|PG_BUSY);
1208 UVM_PAGE_OWN(ptmp, NULL);
1209 uvm_lock_pageq();
1210 uvm_pagefree(ptmp);
1211 uvm_unlock_pageq();
1212 rw_exit(uobj->vmobjlock);
1213
1214 return rv;
1215 }
1216 }
1217
1218 /*
1219 * we got the page! clear the fake flag (indicates valid
1220 * data now in page) and plug into our result array. note
1221 * that page is still busy.
1222 *
1223 * it is the callers job to:
1224 * => check if the page is released
1225 * => unbusy the page
1226 * => activate the page
1227 */
1228 atomic_clearbits_int(&ptmp->pg_flags, PG_FAKE);
1229 pmap_clear_modify(ptmp); /* ... and clean */
1230 pps[lcv] = ptmp;
1231
1232 } /* lcv loop */
1233
1234 rw_exit(uobj->vmobjlock);
1235 return VM_PAGER_OK;
1236 }
1237
1238 /*
1239 * uao_dropswap: release any swap resources from this aobj page.
1240 *
1241 * => aobj must be locked or have a reference count of 0.
1242 */
1243 int
1244 uao_dropswap(struct uvm_object *uobj, int pageidx)
1245 {
1246 int slot;
1247
1248 KASSERT(UVM_OBJ_IS_AOBJ(uobj));
1249
1250 slot = uao_set_swslot(uobj, pageidx, 0);
1251 if (slot) {
1252 uvm_swap_free(slot, 1);
1253 }
1254 return slot;
1255 }
1256
1257 /*
1258 * page in every page in every aobj that is paged-out to a range of swslots.
1259 *
1260 * => aobj must be locked and is returned locked.
1261 * => returns TRUE if pagein was aborted due to lack of memory.
1262 */
1263 boolean_t
1264 uao_swap_off(int startslot, int endslot)
1265 {
1266 struct uvm_aobj *aobj;
1267
1268 /*
1269 * Walk the list of all anonymous UVM objects. Grab the first.
1270 */
1271 mtx_enter(&uao_list_lock);
1272 if ((aobj = LIST_FIRST(&uao_list)) == NULL) {
1273 mtx_leave(&uao_list_lock);
1274 return FALSE;
1275 }
1276 uao_reference(&aobj->u_obj);
1277
1278 do {
1279 struct uvm_aobj *nextaobj;
1280 boolean_t rv;
1281
1282 /*
1283 * Prefetch the next object and immediately hold a reference
1284 * on it, so neither the current nor the next entry could
1285 * disappear while we are iterating.
1286 */
1287 if ((nextaobj = LIST_NEXT(aobj, u_list)) != NULL) {
1288 uao_reference(&nextaobj->u_obj);
1289 }
1290 mtx_leave(&uao_list_lock);
1291
1292 /*
1293 * Page in all pages in the swap slot range.
1294 */
1295 rw_enter(aobj->u_obj.vmobjlock, RW_WRITE);
1296 rv = uao_pagein(aobj, startslot, endslot);
1297 rw_exit(aobj->u_obj.vmobjlock);
1298
1299 /* Drop the reference of the current object. */
1300 uao_detach(&aobj->u_obj);
1301 if (rv) {
1302 if (nextaobj) {
1303 uao_detach(&nextaobj->u_obj);
1304 }
1305 return rv;
1306 }
1307
1308 aobj = nextaobj;
1309 mtx_enter(&uao_list_lock);
1310 } while (aobj);
1311
1312 /*
1313 * done with traversal, unlock the list
1314 */
1315 mtx_leave(&uao_list_lock);
1316 return FALSE;
1317 }
1318
1319 /*
1320 * page in any pages from aobj in the given range.
1321 *
1322 * => returns TRUE if pagein was aborted due to lack of memory.
1323 */
1324 static boolean_t
1325 uao_pagein(struct uvm_aobj *aobj, int startslot, int endslot)
1326 {
1327 boolean_t rv;
1328
1329 if (UAO_USES_SWHASH(aobj)) {
1330 struct uao_swhash_elt *elt;
1331 int bucket;
1332
1333 restart:
1334 for (bucket = aobj->u_swhashmask; bucket >= 0; bucket--) {
1335 for (elt = LIST_FIRST(&aobj->u_swhash[bucket]);
1336 elt != NULL;
1337 elt = LIST_NEXT(elt, list)) {
1338 int i;
1339
1340 for (i = 0; i < UAO_SWHASH_CLUSTER_SIZE; i++) {
1341 int slot = elt->slots[i];
1342
1343 /*
1344 * if the slot isn't in range, skip it.
1345 */
1346 if (slot < startslot ||
1347 slot >= endslot) {
1348 continue;
1349 }
1350
1351 /*
1352 * process the page,
1353 * the start over on this object
1354 * since the swhash elt
1355 * may have been freed.
1356 */
1357 rv = uao_pagein_page(aobj,
1358 UAO_SWHASH_ELT_PAGEIDX_BASE(elt) + i);
1359 if (rv) {
1360 return rv;
1361 }
1362 goto restart;
1363 }
1364 }
1365 }
1366 } else {
1367 int i;
1368
1369 for (i = 0; i < aobj->u_pages; i++) {
1370 int slot = aobj->u_swslots[i];
1371
1372 /*
1373 * if the slot isn't in range, skip it
1374 */
1375 if (slot < startslot || slot >= endslot) {
1376 continue;
1377 }
1378
1379 /*
1380 * process the page.
1381 */
1382 rv = uao_pagein_page(aobj, i);
1383 if (rv) {
1384 return rv;
1385 }
1386 }
1387 }
1388
1389 return FALSE;
1390 }
1391
1392 /*
1393 * uao_pagein_page: page in a single page from an anonymous UVM object.
1394 *
1395 * => Returns TRUE if pagein was aborted due to lack of memory.
1396 */
1397 static boolean_t
1398 uao_pagein_page(struct uvm_aobj *aobj, int pageidx)
1399 {
1400 struct uvm_object *uobj = &aobj->u_obj;
1401 struct vm_page *pg;
1402 int rv, slot, npages;
1403
1404 pg = NULL;
1405 npages = 1;
1406
1407 KASSERT(rw_write_held(uobj->vmobjlock));
1408 rv = uao_get(&aobj->u_obj, (voff_t)pageidx << PAGE_SHIFT,
1409 &pg, &npages, 0, PROT_READ | PROT_WRITE, 0, 0);
1410
1411 /*
1412 * relock and finish up.
1413 */
1414 rw_enter(uobj->vmobjlock, RW_WRITE);
1415 switch (rv) {
1416 case VM_PAGER_OK:
1417 break;
1418
1419 case VM_PAGER_ERROR:
1420 case VM_PAGER_REFAULT:
1421 /*
1422 * nothing more to do on errors.
1423 * VM_PAGER_REFAULT can only mean that the anon was freed,
1424 * so again there's nothing to do.
1425 */
1426 return FALSE;
1427 }
1428
1429 /*
1430 * ok, we've got the page now.
1431 * mark it as dirty, clear its swslot and un-busy it.
1432 */
1433 slot = uao_set_swslot(&aobj->u_obj, pageidx, 0);
1434 uvm_swap_free(slot, 1);
1435 atomic_clearbits_int(&pg->pg_flags, PG_BUSY|PG_CLEAN|PG_FAKE);
1436 UVM_PAGE_OWN(pg, NULL);
1437
1438 /*
1439 * deactivate the page (to put it on a page queue).
1440 */
1441 pmap_clear_reference(pg);
1442 uvm_lock_pageq();
1443 uvm_pagedeactivate(pg);
1444 uvm_unlock_pageq();
1445
1446 return FALSE;
1447 }
1448
1449 /*
1450 * uao_dropswap_range: drop swapslots in the range.
1451 *
1452 * => aobj must be locked and is returned locked.
1453 * => start is inclusive. end is exclusive.
1454 */
1455 void
1456 uao_dropswap_range(struct uvm_object *uobj, voff_t start, voff_t end)
1457 {
1458 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
1459 int swpgonlydelta = 0;
1460
1461 KASSERT(UVM_OBJ_IS_AOBJ(uobj));
1462 KASSERT(rw_write_held(uobj->vmobjlock));
1463
1464 if (end == 0) {
1465 end = INT64_MAX;
1466 }
1467
1468 if (UAO_USES_SWHASH(aobj)) {
1469 int i, hashbuckets = aobj->u_swhashmask + 1;
1470 voff_t taghi;
1471 voff_t taglo;
1472
1473 taglo = UAO_SWHASH_ELT_TAG(start);
1474 taghi = UAO_SWHASH_ELT_TAG(end);
1475
1476 for (i = 0; i < hashbuckets; i++) {
1477 struct uao_swhash_elt *elt, *next;
1478
1479 for (elt = LIST_FIRST(&aobj->u_swhash[i]);
1480 elt != NULL;
1481 elt = next) {
1482 int startidx, endidx;
1483 int j;
1484
1485 next = LIST_NEXT(elt, list);
1486
1487 if (elt->tag < taglo || taghi < elt->tag) {
1488 continue;
1489 }
1490
1491 if (elt->tag == taglo) {
1492 startidx =
1493 UAO_SWHASH_ELT_PAGESLOT_IDX(start);
1494 } else {
1495 startidx = 0;
1496 }
1497
1498 if (elt->tag == taghi) {
1499 endidx =
1500 UAO_SWHASH_ELT_PAGESLOT_IDX(end);
1501 } else {
1502 endidx = UAO_SWHASH_CLUSTER_SIZE;
1503 }
1504
1505 for (j = startidx; j < endidx; j++) {
1506 int slot = elt->slots[j];
1507
1508 KASSERT(uvm_pagelookup(&aobj->u_obj,
1509 (voff_t)(UAO_SWHASH_ELT_PAGEIDX_BASE(elt)
1510 + j) << PAGE_SHIFT) == NULL);
1511
1512 if (slot > 0) {
1513 uvm_swap_free(slot, 1);
1514 swpgonlydelta++;
1515 KASSERT(elt->count > 0);
1516 elt->slots[j] = 0;
1517 elt->count--;
1518 }
1519 }
1520
1521 if (elt->count == 0) {
1522 LIST_REMOVE(elt, list);
1523 pool_put(&uao_swhash_elt_pool, elt);
1524 }
1525 }
1526 }
1527 } else {
1528 int i;
1529
1530 if (aobj->u_pages < end) {
1531 end = aobj->u_pages;
1532 }
1533 for (i = start; i < end; i++) {
1534 int slot = aobj->u_swslots[i];
1535
1536 if (slot > 0) {
1537 uvm_swap_free(slot, 1);
1538 swpgonlydelta++;
1539 }
1540 }
1541 }
1542
1543 /*
1544 * adjust the counter of pages only in swap for all
1545 * the swap slots we've freed.
1546 */
1547 if (swpgonlydelta > 0) {
1548 KASSERT(uvmexp.swpgonly >= swpgonlydelta);
1549 atomic_add_int(&uvmexp.swpgonly, -swpgonlydelta);
1550 }
1551 }
Cache object: 6f947088bb4366882aa3679b8d298901
|