FreeBSD/Linux Kernel Cross Reference
sys/vm/vm_phys.c
1 /*-
2 * Copyright (c) 2002-2006 Rice University
3 * Copyright (c) 2007 Alan L. Cox <alc@cs.rice.edu>
4 * All rights reserved.
5 *
6 * This software was developed for the FreeBSD Project by Alan L. Cox,
7 * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
25 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
26 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
28 * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD: releng/9.1/sys/vm/vm_phys.c 236924 2012-06-11 21:19:59Z kib $");
34
35 #include "opt_ddb.h"
36 #include "opt_vm.h"
37
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/lock.h>
41 #include <sys/kernel.h>
42 #include <sys/malloc.h>
43 #include <sys/mutex.h>
44 #include <sys/queue.h>
45 #include <sys/sbuf.h>
46 #include <sys/sysctl.h>
47 #include <sys/vmmeter.h>
48 #include <sys/vnode.h>
49
50 #include <ddb/ddb.h>
51
52 #include <vm/vm.h>
53 #include <vm/vm_param.h>
54 #include <vm/vm_kern.h>
55 #include <vm/vm_object.h>
56 #include <vm/vm_page.h>
57 #include <vm/vm_phys.h>
58 #include <vm/vm_reserv.h>
59
60 /*
61 * VM_FREELIST_DEFAULT is split into VM_NDOMAIN lists, one for each
62 * domain. These extra lists are stored at the end of the regular
63 * free lists starting with VM_NFREELIST.
64 */
65 #define VM_RAW_NFREELIST (VM_NFREELIST + VM_NDOMAIN - 1)
66
67 struct vm_freelist {
68 struct pglist pl;
69 int lcnt;
70 };
71
72 struct vm_phys_seg {
73 vm_paddr_t start;
74 vm_paddr_t end;
75 vm_page_t first_page;
76 int domain;
77 struct vm_freelist (*free_queues)[VM_NFREEPOOL][VM_NFREEORDER];
78 };
79
80 struct mem_affinity *mem_affinity;
81
82 static struct vm_phys_seg vm_phys_segs[VM_PHYSSEG_MAX];
83
84 static int vm_phys_nsegs;
85
86 #define VM_PHYS_FICTITIOUS_NSEGS 8
87 static struct vm_phys_fictitious_seg {
88 vm_paddr_t start;
89 vm_paddr_t end;
90 vm_page_t first_page;
91 } vm_phys_fictitious_segs[VM_PHYS_FICTITIOUS_NSEGS];
92 static struct mtx vm_phys_fictitious_reg_mtx;
93 MALLOC_DEFINE(M_FICT_PAGES, "", "");
94
95 static struct vm_freelist
96 vm_phys_free_queues[VM_RAW_NFREELIST][VM_NFREEPOOL][VM_NFREEORDER];
97 static struct vm_freelist
98 (*vm_phys_lookup_lists[VM_NDOMAIN][VM_RAW_NFREELIST])[VM_NFREEPOOL][VM_NFREEORDER];
99
100 static int vm_nfreelists = VM_FREELIST_DEFAULT + 1;
101
102 static int cnt_prezero;
103 SYSCTL_INT(_vm_stats_misc, OID_AUTO, cnt_prezero, CTLFLAG_RD,
104 &cnt_prezero, 0, "The number of physical pages prezeroed at idle time");
105
106 static int sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS);
107 SYSCTL_OID(_vm, OID_AUTO, phys_free, CTLTYPE_STRING | CTLFLAG_RD,
108 NULL, 0, sysctl_vm_phys_free, "A", "Phys Free Info");
109
110 static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS);
111 SYSCTL_OID(_vm, OID_AUTO, phys_segs, CTLTYPE_STRING | CTLFLAG_RD,
112 NULL, 0, sysctl_vm_phys_segs, "A", "Phys Seg Info");
113
114 #if VM_NDOMAIN > 1
115 static int sysctl_vm_phys_lookup_lists(SYSCTL_HANDLER_ARGS);
116 SYSCTL_OID(_vm, OID_AUTO, phys_lookup_lists, CTLTYPE_STRING | CTLFLAG_RD,
117 NULL, 0, sysctl_vm_phys_lookup_lists, "A", "Phys Lookup Lists");
118 #endif
119
120 static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind,
121 int domain);
122 static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind);
123 static int vm_phys_paddr_to_segind(vm_paddr_t pa);
124 static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl,
125 int order);
126
127 /*
128 * Outputs the state of the physical memory allocator, specifically,
129 * the amount of physical memory in each free list.
130 */
131 static int
132 sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS)
133 {
134 struct sbuf sbuf;
135 struct vm_freelist *fl;
136 int error, flind, oind, pind;
137
138 error = sysctl_wire_old_buffer(req, 0);
139 if (error != 0)
140 return (error);
141 sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
142 for (flind = 0; flind < vm_nfreelists; flind++) {
143 sbuf_printf(&sbuf, "\nFREE LIST %d:\n"
144 "\n ORDER (SIZE) | NUMBER"
145 "\n ", flind);
146 for (pind = 0; pind < VM_NFREEPOOL; pind++)
147 sbuf_printf(&sbuf, " | POOL %d", pind);
148 sbuf_printf(&sbuf, "\n-- ");
149 for (pind = 0; pind < VM_NFREEPOOL; pind++)
150 sbuf_printf(&sbuf, "-- -- ");
151 sbuf_printf(&sbuf, "--\n");
152 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) {
153 sbuf_printf(&sbuf, " %2d (%6dK)", oind,
154 1 << (PAGE_SHIFT - 10 + oind));
155 for (pind = 0; pind < VM_NFREEPOOL; pind++) {
156 fl = vm_phys_free_queues[flind][pind];
157 sbuf_printf(&sbuf, " | %6d", fl[oind].lcnt);
158 }
159 sbuf_printf(&sbuf, "\n");
160 }
161 }
162 error = sbuf_finish(&sbuf);
163 sbuf_delete(&sbuf);
164 return (error);
165 }
166
167 /*
168 * Outputs the set of physical memory segments.
169 */
170 static int
171 sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS)
172 {
173 struct sbuf sbuf;
174 struct vm_phys_seg *seg;
175 int error, segind;
176
177 error = sysctl_wire_old_buffer(req, 0);
178 if (error != 0)
179 return (error);
180 sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
181 for (segind = 0; segind < vm_phys_nsegs; segind++) {
182 sbuf_printf(&sbuf, "\nSEGMENT %d:\n\n", segind);
183 seg = &vm_phys_segs[segind];
184 sbuf_printf(&sbuf, "start: %#jx\n",
185 (uintmax_t)seg->start);
186 sbuf_printf(&sbuf, "end: %#jx\n",
187 (uintmax_t)seg->end);
188 sbuf_printf(&sbuf, "domain: %d\n", seg->domain);
189 sbuf_printf(&sbuf, "free list: %p\n", seg->free_queues);
190 }
191 error = sbuf_finish(&sbuf);
192 sbuf_delete(&sbuf);
193 return (error);
194 }
195
196 #if VM_NDOMAIN > 1
197 /*
198 * Outputs the set of free list lookup lists.
199 */
200 static int
201 sysctl_vm_phys_lookup_lists(SYSCTL_HANDLER_ARGS)
202 {
203 struct sbuf sbuf;
204 int domain, error, flind, ndomains;
205
206 error = sysctl_wire_old_buffer(req, 0);
207 if (error != 0)
208 return (error);
209 sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
210 ndomains = vm_nfreelists - VM_NFREELIST + 1;
211 for (domain = 0; domain < ndomains; domain++) {
212 sbuf_printf(&sbuf, "\nDOMAIN %d:\n\n", domain);
213 for (flind = 0; flind < vm_nfreelists; flind++)
214 sbuf_printf(&sbuf, " [%d]:\t%p\n", flind,
215 vm_phys_lookup_lists[domain][flind]);
216 }
217 error = sbuf_finish(&sbuf);
218 sbuf_delete(&sbuf);
219 return (error);
220 }
221 #endif
222
223 /*
224 * Create a physical memory segment.
225 */
226 static void
227 _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind, int domain)
228 {
229 struct vm_phys_seg *seg;
230 #ifdef VM_PHYSSEG_SPARSE
231 long pages;
232 int segind;
233
234 pages = 0;
235 for (segind = 0; segind < vm_phys_nsegs; segind++) {
236 seg = &vm_phys_segs[segind];
237 pages += atop(seg->end - seg->start);
238 }
239 #endif
240 KASSERT(vm_phys_nsegs < VM_PHYSSEG_MAX,
241 ("vm_phys_create_seg: increase VM_PHYSSEG_MAX"));
242 seg = &vm_phys_segs[vm_phys_nsegs++];
243 seg->start = start;
244 seg->end = end;
245 seg->domain = domain;
246 #ifdef VM_PHYSSEG_SPARSE
247 seg->first_page = &vm_page_array[pages];
248 #else
249 seg->first_page = PHYS_TO_VM_PAGE(start);
250 #endif
251 #if VM_NDOMAIN > 1
252 if (flind == VM_FREELIST_DEFAULT && domain != 0) {
253 flind = VM_NFREELIST + (domain - 1);
254 if (flind >= vm_nfreelists)
255 vm_nfreelists = flind + 1;
256 }
257 #endif
258 seg->free_queues = &vm_phys_free_queues[flind];
259 }
260
261 static void
262 vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind)
263 {
264 int i;
265
266 if (mem_affinity == NULL) {
267 _vm_phys_create_seg(start, end, flind, 0);
268 return;
269 }
270
271 for (i = 0;; i++) {
272 if (mem_affinity[i].end == 0)
273 panic("Reached end of affinity info");
274 if (mem_affinity[i].end <= start)
275 continue;
276 if (mem_affinity[i].start > start)
277 panic("No affinity info for start %jx",
278 (uintmax_t)start);
279 if (mem_affinity[i].end >= end) {
280 _vm_phys_create_seg(start, end, flind,
281 mem_affinity[i].domain);
282 break;
283 }
284 _vm_phys_create_seg(start, mem_affinity[i].end, flind,
285 mem_affinity[i].domain);
286 start = mem_affinity[i].end;
287 }
288 }
289
290 /*
291 * Initialize the physical memory allocator.
292 */
293 void
294 vm_phys_init(void)
295 {
296 struct vm_freelist *fl;
297 int flind, i, oind, pind;
298 #if VM_NDOMAIN > 1
299 int ndomains, j;
300 #endif
301
302 for (i = 0; phys_avail[i + 1] != 0; i += 2) {
303 #ifdef VM_FREELIST_ISADMA
304 if (phys_avail[i] < 16777216) {
305 if (phys_avail[i + 1] > 16777216) {
306 vm_phys_create_seg(phys_avail[i], 16777216,
307 VM_FREELIST_ISADMA);
308 vm_phys_create_seg(16777216, phys_avail[i + 1],
309 VM_FREELIST_DEFAULT);
310 } else {
311 vm_phys_create_seg(phys_avail[i],
312 phys_avail[i + 1], VM_FREELIST_ISADMA);
313 }
314 if (VM_FREELIST_ISADMA >= vm_nfreelists)
315 vm_nfreelists = VM_FREELIST_ISADMA + 1;
316 } else
317 #endif
318 #ifdef VM_FREELIST_HIGHMEM
319 if (phys_avail[i + 1] > VM_HIGHMEM_ADDRESS) {
320 if (phys_avail[i] < VM_HIGHMEM_ADDRESS) {
321 vm_phys_create_seg(phys_avail[i],
322 VM_HIGHMEM_ADDRESS, VM_FREELIST_DEFAULT);
323 vm_phys_create_seg(VM_HIGHMEM_ADDRESS,
324 phys_avail[i + 1], VM_FREELIST_HIGHMEM);
325 } else {
326 vm_phys_create_seg(phys_avail[i],
327 phys_avail[i + 1], VM_FREELIST_HIGHMEM);
328 }
329 if (VM_FREELIST_HIGHMEM >= vm_nfreelists)
330 vm_nfreelists = VM_FREELIST_HIGHMEM + 1;
331 } else
332 #endif
333 vm_phys_create_seg(phys_avail[i], phys_avail[i + 1],
334 VM_FREELIST_DEFAULT);
335 }
336 for (flind = 0; flind < vm_nfreelists; flind++) {
337 for (pind = 0; pind < VM_NFREEPOOL; pind++) {
338 fl = vm_phys_free_queues[flind][pind];
339 for (oind = 0; oind < VM_NFREEORDER; oind++)
340 TAILQ_INIT(&fl[oind].pl);
341 }
342 }
343 #if VM_NDOMAIN > 1
344 /*
345 * Build a free list lookup list for each domain. All of the
346 * memory domain lists are inserted at the VM_FREELIST_DEFAULT
347 * index in a round-robin order starting with the current
348 * domain.
349 */
350 ndomains = vm_nfreelists - VM_NFREELIST + 1;
351 for (flind = 0; flind < VM_FREELIST_DEFAULT; flind++)
352 for (i = 0; i < ndomains; i++)
353 vm_phys_lookup_lists[i][flind] =
354 &vm_phys_free_queues[flind];
355 for (i = 0; i < ndomains; i++)
356 for (j = 0; j < ndomains; j++) {
357 flind = (i + j) % ndomains;
358 if (flind == 0)
359 flind = VM_FREELIST_DEFAULT;
360 else
361 flind += VM_NFREELIST - 1;
362 vm_phys_lookup_lists[i][VM_FREELIST_DEFAULT + j] =
363 &vm_phys_free_queues[flind];
364 }
365 for (flind = VM_FREELIST_DEFAULT + 1; flind < VM_NFREELIST;
366 flind++)
367 for (i = 0; i < ndomains; i++)
368 vm_phys_lookup_lists[i][flind + ndomains - 1] =
369 &vm_phys_free_queues[flind];
370 #else
371 for (flind = 0; flind < vm_nfreelists; flind++)
372 vm_phys_lookup_lists[0][flind] = &vm_phys_free_queues[flind];
373 #endif
374
375 mtx_init(&vm_phys_fictitious_reg_mtx, "vmfctr", NULL, MTX_DEF);
376 }
377
378 /*
379 * Split a contiguous, power of two-sized set of physical pages.
380 */
381 static __inline void
382 vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order)
383 {
384 vm_page_t m_buddy;
385
386 while (oind > order) {
387 oind--;
388 m_buddy = &m[1 << oind];
389 KASSERT(m_buddy->order == VM_NFREEORDER,
390 ("vm_phys_split_pages: page %p has unexpected order %d",
391 m_buddy, m_buddy->order));
392 m_buddy->order = oind;
393 TAILQ_INSERT_HEAD(&fl[oind].pl, m_buddy, pageq);
394 fl[oind].lcnt++;
395 }
396 }
397
398 /*
399 * Initialize a physical page and add it to the free lists.
400 */
401 void
402 vm_phys_add_page(vm_paddr_t pa)
403 {
404 vm_page_t m;
405
406 cnt.v_page_count++;
407 m = vm_phys_paddr_to_vm_page(pa);
408 m->phys_addr = pa;
409 m->queue = PQ_NONE;
410 m->segind = vm_phys_paddr_to_segind(pa);
411 m->flags = PG_FREE;
412 KASSERT(m->order == VM_NFREEORDER,
413 ("vm_phys_add_page: page %p has unexpected order %d",
414 m, m->order));
415 m->pool = VM_FREEPOOL_DEFAULT;
416 pmap_page_init(m);
417 mtx_lock(&vm_page_queue_free_mtx);
418 cnt.v_free_count++;
419 vm_phys_free_pages(m, 0);
420 mtx_unlock(&vm_page_queue_free_mtx);
421 }
422
423 /*
424 * Allocate a contiguous, power of two-sized set of physical pages
425 * from the free lists.
426 *
427 * The free page queues must be locked.
428 */
429 vm_page_t
430 vm_phys_alloc_pages(int pool, int order)
431 {
432 vm_page_t m;
433 int flind;
434
435 for (flind = 0; flind < vm_nfreelists; flind++) {
436 m = vm_phys_alloc_freelist_pages(flind, pool, order);
437 if (m != NULL)
438 return (m);
439 }
440 return (NULL);
441 }
442
443 /*
444 * Find and dequeue a free page on the given free list, with the
445 * specified pool and order
446 */
447 vm_page_t
448 vm_phys_alloc_freelist_pages(int flind, int pool, int order)
449 {
450 struct vm_freelist *fl;
451 struct vm_freelist *alt;
452 int domain, oind, pind;
453 vm_page_t m;
454
455 KASSERT(flind < VM_NFREELIST,
456 ("vm_phys_alloc_freelist_pages: freelist %d is out of range", flind));
457 KASSERT(pool < VM_NFREEPOOL,
458 ("vm_phys_alloc_freelist_pages: pool %d is out of range", pool));
459 KASSERT(order < VM_NFREEORDER,
460 ("vm_phys_alloc_freelist_pages: order %d is out of range", order));
461
462 #if VM_NDOMAIN > 1
463 domain = PCPU_GET(domain);
464 #else
465 domain = 0;
466 #endif
467 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
468 fl = (*vm_phys_lookup_lists[domain][flind])[pool];
469 for (oind = order; oind < VM_NFREEORDER; oind++) {
470 m = TAILQ_FIRST(&fl[oind].pl);
471 if (m != NULL) {
472 TAILQ_REMOVE(&fl[oind].pl, m, pageq);
473 fl[oind].lcnt--;
474 m->order = VM_NFREEORDER;
475 vm_phys_split_pages(m, oind, fl, order);
476 return (m);
477 }
478 }
479
480 /*
481 * The given pool was empty. Find the largest
482 * contiguous, power-of-two-sized set of pages in any
483 * pool. Transfer these pages to the given pool, and
484 * use them to satisfy the allocation.
485 */
486 for (oind = VM_NFREEORDER - 1; oind >= order; oind--) {
487 for (pind = 0; pind < VM_NFREEPOOL; pind++) {
488 alt = (*vm_phys_lookup_lists[domain][flind])[pind];
489 m = TAILQ_FIRST(&alt[oind].pl);
490 if (m != NULL) {
491 TAILQ_REMOVE(&alt[oind].pl, m, pageq);
492 alt[oind].lcnt--;
493 m->order = VM_NFREEORDER;
494 vm_phys_set_pool(pool, m, oind);
495 vm_phys_split_pages(m, oind, fl, order);
496 return (m);
497 }
498 }
499 }
500 return (NULL);
501 }
502
503 /*
504 * Allocate physical memory from phys_avail[].
505 */
506 vm_paddr_t
507 vm_phys_bootstrap_alloc(vm_size_t size, unsigned long alignment)
508 {
509 vm_paddr_t pa;
510 int i;
511
512 size = round_page(size);
513 for (i = 0; phys_avail[i + 1] != 0; i += 2) {
514 if (phys_avail[i + 1] - phys_avail[i] < size)
515 continue;
516 pa = phys_avail[i];
517 phys_avail[i] += size;
518 return (pa);
519 }
520 panic("vm_phys_bootstrap_alloc");
521 }
522
523 /*
524 * Find the vm_page corresponding to the given physical address.
525 */
526 vm_page_t
527 vm_phys_paddr_to_vm_page(vm_paddr_t pa)
528 {
529 struct vm_phys_seg *seg;
530 int segind;
531
532 for (segind = 0; segind < vm_phys_nsegs; segind++) {
533 seg = &vm_phys_segs[segind];
534 if (pa >= seg->start && pa < seg->end)
535 return (&seg->first_page[atop(pa - seg->start)]);
536 }
537 return (NULL);
538 }
539
540 vm_page_t
541 vm_phys_fictitious_to_vm_page(vm_paddr_t pa)
542 {
543 struct vm_phys_fictitious_seg *seg;
544 vm_page_t m;
545 int segind;
546
547 m = NULL;
548 for (segind = 0; segind < VM_PHYS_FICTITIOUS_NSEGS; segind++) {
549 seg = &vm_phys_fictitious_segs[segind];
550 if (pa >= seg->start && pa < seg->end) {
551 m = &seg->first_page[atop(pa - seg->start)];
552 KASSERT((m->flags & PG_FICTITIOUS) != 0,
553 ("%p not fictitious", m));
554 break;
555 }
556 }
557 return (m);
558 }
559
560 int
561 vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end,
562 vm_memattr_t memattr)
563 {
564 struct vm_phys_fictitious_seg *seg;
565 vm_page_t fp;
566 long i, page_count;
567 int segind;
568 #ifdef VM_PHYSSEG_DENSE
569 long pi;
570 boolean_t malloced;
571 #endif
572
573 page_count = (end - start) / PAGE_SIZE;
574
575 #ifdef VM_PHYSSEG_DENSE
576 pi = atop(start);
577 if (pi >= first_page && atop(end) < vm_page_array_size) {
578 fp = &vm_page_array[pi - first_page];
579 malloced = FALSE;
580 } else
581 #endif
582 {
583 fp = malloc(page_count * sizeof(struct vm_page), M_FICT_PAGES,
584 M_WAITOK | M_ZERO);
585 #ifdef VM_PHYSSEG_DENSE
586 malloced = TRUE;
587 #endif
588 }
589 for (i = 0; i < page_count; i++) {
590 vm_page_initfake(&fp[i], start + PAGE_SIZE * i, memattr);
591 pmap_page_init(&fp[i]);
592 fp[i].oflags &= ~(VPO_BUSY | VPO_UNMANAGED);
593 }
594 mtx_lock(&vm_phys_fictitious_reg_mtx);
595 for (segind = 0; segind < VM_PHYS_FICTITIOUS_NSEGS; segind++) {
596 seg = &vm_phys_fictitious_segs[segind];
597 if (seg->start == 0 && seg->end == 0) {
598 seg->start = start;
599 seg->end = end;
600 seg->first_page = fp;
601 mtx_unlock(&vm_phys_fictitious_reg_mtx);
602 return (0);
603 }
604 }
605 mtx_unlock(&vm_phys_fictitious_reg_mtx);
606 #ifdef VM_PHYSSEG_DENSE
607 if (malloced)
608 #endif
609 free(fp, M_FICT_PAGES);
610 return (EBUSY);
611 }
612
613 void
614 vm_phys_fictitious_unreg_range(vm_paddr_t start, vm_paddr_t end)
615 {
616 struct vm_phys_fictitious_seg *seg;
617 vm_page_t fp;
618 int segind;
619 #ifdef VM_PHYSSEG_DENSE
620 long pi;
621 #endif
622
623 #ifdef VM_PHYSSEG_DENSE
624 pi = atop(start);
625 #endif
626
627 mtx_lock(&vm_phys_fictitious_reg_mtx);
628 for (segind = 0; segind < VM_PHYS_FICTITIOUS_NSEGS; segind++) {
629 seg = &vm_phys_fictitious_segs[segind];
630 if (seg->start == start && seg->end == end) {
631 seg->start = seg->end = 0;
632 fp = seg->first_page;
633 seg->first_page = NULL;
634 mtx_unlock(&vm_phys_fictitious_reg_mtx);
635 #ifdef VM_PHYSSEG_DENSE
636 if (pi < first_page || atop(end) >= vm_page_array_size)
637 #endif
638 free(fp, M_FICT_PAGES);
639 return;
640 }
641 }
642 mtx_unlock(&vm_phys_fictitious_reg_mtx);
643 KASSERT(0, ("Unregistering not registered fictitious range"));
644 }
645
646 /*
647 * Find the segment containing the given physical address.
648 */
649 static int
650 vm_phys_paddr_to_segind(vm_paddr_t pa)
651 {
652 struct vm_phys_seg *seg;
653 int segind;
654
655 for (segind = 0; segind < vm_phys_nsegs; segind++) {
656 seg = &vm_phys_segs[segind];
657 if (pa >= seg->start && pa < seg->end)
658 return (segind);
659 }
660 panic("vm_phys_paddr_to_segind: paddr %#jx is not in any segment" ,
661 (uintmax_t)pa);
662 }
663
664 /*
665 * Free a contiguous, power of two-sized set of physical pages.
666 *
667 * The free page queues must be locked.
668 */
669 void
670 vm_phys_free_pages(vm_page_t m, int order)
671 {
672 struct vm_freelist *fl;
673 struct vm_phys_seg *seg;
674 vm_paddr_t pa, pa_buddy;
675 vm_page_t m_buddy;
676
677 KASSERT(m->order == VM_NFREEORDER,
678 ("vm_phys_free_pages: page %p has unexpected order %d",
679 m, m->order));
680 KASSERT(m->pool < VM_NFREEPOOL,
681 ("vm_phys_free_pages: page %p has unexpected pool %d",
682 m, m->pool));
683 KASSERT(order < VM_NFREEORDER,
684 ("vm_phys_free_pages: order %d is out of range", order));
685 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
686 pa = VM_PAGE_TO_PHYS(m);
687 seg = &vm_phys_segs[m->segind];
688 while (order < VM_NFREEORDER - 1) {
689 pa_buddy = pa ^ (1 << (PAGE_SHIFT + order));
690 if (pa_buddy < seg->start ||
691 pa_buddy >= seg->end)
692 break;
693 m_buddy = &seg->first_page[atop(pa_buddy - seg->start)];
694 if (m_buddy->order != order)
695 break;
696 fl = (*seg->free_queues)[m_buddy->pool];
697 TAILQ_REMOVE(&fl[m_buddy->order].pl, m_buddy, pageq);
698 fl[m_buddy->order].lcnt--;
699 m_buddy->order = VM_NFREEORDER;
700 if (m_buddy->pool != m->pool)
701 vm_phys_set_pool(m->pool, m_buddy, order);
702 order++;
703 pa &= ~((1 << (PAGE_SHIFT + order)) - 1);
704 m = &seg->first_page[atop(pa - seg->start)];
705 }
706 m->order = order;
707 fl = (*seg->free_queues)[m->pool];
708 TAILQ_INSERT_TAIL(&fl[order].pl, m, pageq);
709 fl[order].lcnt++;
710 }
711
712 /*
713 * Set the pool for a contiguous, power of two-sized set of physical pages.
714 */
715 void
716 vm_phys_set_pool(int pool, vm_page_t m, int order)
717 {
718 vm_page_t m_tmp;
719
720 for (m_tmp = m; m_tmp < &m[1 << order]; m_tmp++)
721 m_tmp->pool = pool;
722 }
723
724 /*
725 * Search for the given physical page "m" in the free lists. If the search
726 * succeeds, remove "m" from the free lists and return TRUE. Otherwise, return
727 * FALSE, indicating that "m" is not in the free lists.
728 *
729 * The free page queues must be locked.
730 */
731 boolean_t
732 vm_phys_unfree_page(vm_page_t m)
733 {
734 struct vm_freelist *fl;
735 struct vm_phys_seg *seg;
736 vm_paddr_t pa, pa_half;
737 vm_page_t m_set, m_tmp;
738 int order;
739
740 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
741
742 /*
743 * First, find the contiguous, power of two-sized set of free
744 * physical pages containing the given physical page "m" and
745 * assign it to "m_set".
746 */
747 seg = &vm_phys_segs[m->segind];
748 for (m_set = m, order = 0; m_set->order == VM_NFREEORDER &&
749 order < VM_NFREEORDER - 1; ) {
750 order++;
751 pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + order));
752 if (pa >= seg->start)
753 m_set = &seg->first_page[atop(pa - seg->start)];
754 else
755 return (FALSE);
756 }
757 if (m_set->order < order)
758 return (FALSE);
759 if (m_set->order == VM_NFREEORDER)
760 return (FALSE);
761 KASSERT(m_set->order < VM_NFREEORDER,
762 ("vm_phys_unfree_page: page %p has unexpected order %d",
763 m_set, m_set->order));
764
765 /*
766 * Next, remove "m_set" from the free lists. Finally, extract
767 * "m" from "m_set" using an iterative algorithm: While "m_set"
768 * is larger than a page, shrink "m_set" by returning the half
769 * of "m_set" that does not contain "m" to the free lists.
770 */
771 fl = (*seg->free_queues)[m_set->pool];
772 order = m_set->order;
773 TAILQ_REMOVE(&fl[order].pl, m_set, pageq);
774 fl[order].lcnt--;
775 m_set->order = VM_NFREEORDER;
776 while (order > 0) {
777 order--;
778 pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order));
779 if (m->phys_addr < pa_half)
780 m_tmp = &seg->first_page[atop(pa_half - seg->start)];
781 else {
782 m_tmp = m_set;
783 m_set = &seg->first_page[atop(pa_half - seg->start)];
784 }
785 m_tmp->order = order;
786 TAILQ_INSERT_HEAD(&fl[order].pl, m_tmp, pageq);
787 fl[order].lcnt++;
788 }
789 KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency"));
790 return (TRUE);
791 }
792
793 /*
794 * Try to zero one physical page. Used by an idle priority thread.
795 */
796 boolean_t
797 vm_phys_zero_pages_idle(void)
798 {
799 static struct vm_freelist *fl = vm_phys_free_queues[0][0];
800 static int flind, oind, pind;
801 vm_page_t m, m_tmp;
802
803 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
804 for (;;) {
805 TAILQ_FOREACH_REVERSE(m, &fl[oind].pl, pglist, pageq) {
806 for (m_tmp = m; m_tmp < &m[1 << oind]; m_tmp++) {
807 if ((m_tmp->flags & (PG_CACHED | PG_ZERO)) == 0) {
808 vm_phys_unfree_page(m_tmp);
809 cnt.v_free_count--;
810 mtx_unlock(&vm_page_queue_free_mtx);
811 pmap_zero_page_idle(m_tmp);
812 m_tmp->flags |= PG_ZERO;
813 mtx_lock(&vm_page_queue_free_mtx);
814 cnt.v_free_count++;
815 vm_phys_free_pages(m_tmp, 0);
816 vm_page_zero_count++;
817 cnt_prezero++;
818 return (TRUE);
819 }
820 }
821 }
822 oind++;
823 if (oind == VM_NFREEORDER) {
824 oind = 0;
825 pind++;
826 if (pind == VM_NFREEPOOL) {
827 pind = 0;
828 flind++;
829 if (flind == vm_nfreelists)
830 flind = 0;
831 }
832 fl = vm_phys_free_queues[flind][pind];
833 }
834 }
835 }
836
837 /*
838 * Allocate a contiguous set of physical pages of the given size
839 * "npages" from the free lists. All of the physical pages must be at
840 * or above the given physical address "low" and below the given
841 * physical address "high". The given value "alignment" determines the
842 * alignment of the first physical page in the set. If the given value
843 * "boundary" is non-zero, then the set of physical pages cannot cross
844 * any physical address boundary that is a multiple of that value. Both
845 * "alignment" and "boundary" must be a power of two.
846 */
847 vm_page_t
848 vm_phys_alloc_contig(unsigned long npages, vm_paddr_t low, vm_paddr_t high,
849 unsigned long alignment, unsigned long boundary)
850 {
851 struct vm_freelist *fl;
852 struct vm_phys_seg *seg;
853 struct vnode *vp;
854 vm_paddr_t pa, pa_last, size;
855 vm_page_t deferred_vdrop_list, m, m_ret;
856 int domain, flind, i, oind, order, pind;
857
858 #if VM_NDOMAIN > 1
859 domain = PCPU_GET(domain);
860 #else
861 domain = 0;
862 #endif
863 size = npages << PAGE_SHIFT;
864 KASSERT(size != 0,
865 ("vm_phys_alloc_contig: size must not be 0"));
866 KASSERT((alignment & (alignment - 1)) == 0,
867 ("vm_phys_alloc_contig: alignment must be a power of 2"));
868 KASSERT((boundary & (boundary - 1)) == 0,
869 ("vm_phys_alloc_contig: boundary must be a power of 2"));
870 deferred_vdrop_list = NULL;
871 /* Compute the queue that is the best fit for npages. */
872 for (order = 0; (1 << order) < npages; order++);
873 mtx_lock(&vm_page_queue_free_mtx);
874 #if VM_NRESERVLEVEL > 0
875 retry:
876 #endif
877 for (flind = 0; flind < vm_nfreelists; flind++) {
878 for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; oind++) {
879 for (pind = 0; pind < VM_NFREEPOOL; pind++) {
880 fl = (*vm_phys_lookup_lists[domain][flind])
881 [pind];
882 TAILQ_FOREACH(m_ret, &fl[oind].pl, pageq) {
883 /*
884 * A free list may contain physical pages
885 * from one or more segments.
886 */
887 seg = &vm_phys_segs[m_ret->segind];
888 if (seg->start > high ||
889 low >= seg->end)
890 continue;
891
892 /*
893 * Is the size of this allocation request
894 * larger than the largest block size?
895 */
896 if (order >= VM_NFREEORDER) {
897 /*
898 * Determine if a sufficient number
899 * of subsequent blocks to satisfy
900 * the allocation request are free.
901 */
902 pa = VM_PAGE_TO_PHYS(m_ret);
903 pa_last = pa + size;
904 for (;;) {
905 pa += 1 << (PAGE_SHIFT + VM_NFREEORDER - 1);
906 if (pa >= pa_last)
907 break;
908 if (pa < seg->start ||
909 pa >= seg->end)
910 break;
911 m = &seg->first_page[atop(pa - seg->start)];
912 if (m->order != VM_NFREEORDER - 1)
913 break;
914 }
915 /* If not, continue to the next block. */
916 if (pa < pa_last)
917 continue;
918 }
919
920 /*
921 * Determine if the blocks are within the given range,
922 * satisfy the given alignment, and do not cross the
923 * given boundary.
924 */
925 pa = VM_PAGE_TO_PHYS(m_ret);
926 if (pa >= low &&
927 pa + size <= high &&
928 (pa & (alignment - 1)) == 0 &&
929 ((pa ^ (pa + size - 1)) & ~(boundary - 1)) == 0)
930 goto done;
931 }
932 }
933 }
934 }
935 #if VM_NRESERVLEVEL > 0
936 if (vm_reserv_reclaim_contig(size, low, high, alignment, boundary))
937 goto retry;
938 #endif
939 mtx_unlock(&vm_page_queue_free_mtx);
940 return (NULL);
941 done:
942 for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) {
943 fl = (*seg->free_queues)[m->pool];
944 TAILQ_REMOVE(&fl[m->order].pl, m, pageq);
945 fl[m->order].lcnt--;
946 m->order = VM_NFREEORDER;
947 }
948 if (m_ret->pool != VM_FREEPOOL_DEFAULT)
949 vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m_ret, oind);
950 fl = (*seg->free_queues)[m_ret->pool];
951 vm_phys_split_pages(m_ret, oind, fl, order);
952 for (i = 0; i < npages; i++) {
953 m = &m_ret[i];
954 vp = vm_page_alloc_init(m);
955 if (vp != NULL) {
956 /*
957 * Enqueue the vnode for deferred vdrop().
958 *
959 * Unmanaged pages don't use "pageq", so it
960 * can be safely abused to construct a short-
961 * lived queue of vnodes.
962 */
963 m->pageq.tqe_prev = (void *)vp;
964 m->pageq.tqe_next = deferred_vdrop_list;
965 deferred_vdrop_list = m;
966 }
967 }
968 for (; i < roundup2(npages, 1 << imin(oind, order)); i++) {
969 m = &m_ret[i];
970 KASSERT(m->order == VM_NFREEORDER,
971 ("vm_phys_alloc_contig: page %p has unexpected order %d",
972 m, m->order));
973 vm_phys_free_pages(m, 0);
974 }
975 mtx_unlock(&vm_page_queue_free_mtx);
976 while (deferred_vdrop_list != NULL) {
977 vdrop((struct vnode *)deferred_vdrop_list->pageq.tqe_prev);
978 deferred_vdrop_list = deferred_vdrop_list->pageq.tqe_next;
979 }
980 return (m_ret);
981 }
982
983 #ifdef DDB
984 /*
985 * Show the number of physical pages in each of the free lists.
986 */
987 DB_SHOW_COMMAND(freepages, db_show_freepages)
988 {
989 struct vm_freelist *fl;
990 int flind, oind, pind;
991
992 for (flind = 0; flind < vm_nfreelists; flind++) {
993 db_printf("FREE LIST %d:\n"
994 "\n ORDER (SIZE) | NUMBER"
995 "\n ", flind);
996 for (pind = 0; pind < VM_NFREEPOOL; pind++)
997 db_printf(" | POOL %d", pind);
998 db_printf("\n-- ");
999 for (pind = 0; pind < VM_NFREEPOOL; pind++)
1000 db_printf("-- -- ");
1001 db_printf("--\n");
1002 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) {
1003 db_printf(" %2.2d (%6.6dK)", oind,
1004 1 << (PAGE_SHIFT - 10 + oind));
1005 for (pind = 0; pind < VM_NFREEPOOL; pind++) {
1006 fl = vm_phys_free_queues[flind][pind];
1007 db_printf(" | %6.6d", fl[oind].lcnt);
1008 }
1009 db_printf("\n");
1010 }
1011 db_printf("\n");
1012 }
1013 }
1014 #endif
Cache object: 0ecead8399f322c5739183475015be92
|