FreeBSD/Linux Kernel Cross Reference
sys/vm/vm_phys.c
1 /*-
2 * Copyright (c) 2002-2006 Rice University
3 * Copyright (c) 2007 Alan L. Cox <alc@cs.rice.edu>
4 * All rights reserved.
5 *
6 * This software was developed for the FreeBSD Project by Alan L. Cox,
7 * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
25 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
26 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
28 * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD: releng/9.0/sys/vm/vm_phys.c 226894 2011-10-29 06:13:44Z attilio $");
34
35 #include "opt_ddb.h"
36 #include "opt_vm.h"
37
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/lock.h>
41 #include <sys/kernel.h>
42 #include <sys/malloc.h>
43 #include <sys/mutex.h>
44 #include <sys/queue.h>
45 #include <sys/sbuf.h>
46 #include <sys/sysctl.h>
47 #include <sys/vmmeter.h>
48 #include <sys/vnode.h>
49
50 #include <ddb/ddb.h>
51
52 #include <vm/vm.h>
53 #include <vm/vm_param.h>
54 #include <vm/vm_kern.h>
55 #include <vm/vm_object.h>
56 #include <vm/vm_page.h>
57 #include <vm/vm_phys.h>
58 #include <vm/vm_reserv.h>
59
60 /*
61 * VM_FREELIST_DEFAULT is split into VM_NDOMAIN lists, one for each
62 * domain. These extra lists are stored at the end of the regular
63 * free lists starting with VM_NFREELIST.
64 */
65 #define VM_RAW_NFREELIST (VM_NFREELIST + VM_NDOMAIN - 1)
66
67 struct vm_freelist {
68 struct pglist pl;
69 int lcnt;
70 };
71
72 struct vm_phys_seg {
73 vm_paddr_t start;
74 vm_paddr_t end;
75 vm_page_t first_page;
76 int domain;
77 struct vm_freelist (*free_queues)[VM_NFREEPOOL][VM_NFREEORDER];
78 };
79
80 struct mem_affinity *mem_affinity;
81
82 static struct vm_phys_seg vm_phys_segs[VM_PHYSSEG_MAX];
83
84 static int vm_phys_nsegs;
85
86 static struct vm_freelist
87 vm_phys_free_queues[VM_RAW_NFREELIST][VM_NFREEPOOL][VM_NFREEORDER];
88 static struct vm_freelist
89 (*vm_phys_lookup_lists[VM_NDOMAIN][VM_RAW_NFREELIST])[VM_NFREEPOOL][VM_NFREEORDER];
90
91 static int vm_nfreelists = VM_FREELIST_DEFAULT + 1;
92
93 static int cnt_prezero;
94 SYSCTL_INT(_vm_stats_misc, OID_AUTO, cnt_prezero, CTLFLAG_RD,
95 &cnt_prezero, 0, "The number of physical pages prezeroed at idle time");
96
97 static int sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS);
98 SYSCTL_OID(_vm, OID_AUTO, phys_free, CTLTYPE_STRING | CTLFLAG_RD,
99 NULL, 0, sysctl_vm_phys_free, "A", "Phys Free Info");
100
101 static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS);
102 SYSCTL_OID(_vm, OID_AUTO, phys_segs, CTLTYPE_STRING | CTLFLAG_RD,
103 NULL, 0, sysctl_vm_phys_segs, "A", "Phys Seg Info");
104
105 #if VM_NDOMAIN > 1
106 static int sysctl_vm_phys_lookup_lists(SYSCTL_HANDLER_ARGS);
107 SYSCTL_OID(_vm, OID_AUTO, phys_lookup_lists, CTLTYPE_STRING | CTLFLAG_RD,
108 NULL, 0, sysctl_vm_phys_lookup_lists, "A", "Phys Lookup Lists");
109 #endif
110
111 static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind,
112 int domain);
113 static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind);
114 static int vm_phys_paddr_to_segind(vm_paddr_t pa);
115 static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl,
116 int order);
117
118 /*
119 * Outputs the state of the physical memory allocator, specifically,
120 * the amount of physical memory in each free list.
121 */
122 static int
123 sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS)
124 {
125 struct sbuf sbuf;
126 struct vm_freelist *fl;
127 int error, flind, oind, pind;
128
129 error = sysctl_wire_old_buffer(req, 0);
130 if (error != 0)
131 return (error);
132 sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
133 for (flind = 0; flind < vm_nfreelists; flind++) {
134 sbuf_printf(&sbuf, "\nFREE LIST %d:\n"
135 "\n ORDER (SIZE) | NUMBER"
136 "\n ", flind);
137 for (pind = 0; pind < VM_NFREEPOOL; pind++)
138 sbuf_printf(&sbuf, " | POOL %d", pind);
139 sbuf_printf(&sbuf, "\n-- ");
140 for (pind = 0; pind < VM_NFREEPOOL; pind++)
141 sbuf_printf(&sbuf, "-- -- ");
142 sbuf_printf(&sbuf, "--\n");
143 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) {
144 sbuf_printf(&sbuf, " %2d (%6dK)", oind,
145 1 << (PAGE_SHIFT - 10 + oind));
146 for (pind = 0; pind < VM_NFREEPOOL; pind++) {
147 fl = vm_phys_free_queues[flind][pind];
148 sbuf_printf(&sbuf, " | %6d", fl[oind].lcnt);
149 }
150 sbuf_printf(&sbuf, "\n");
151 }
152 }
153 error = sbuf_finish(&sbuf);
154 sbuf_delete(&sbuf);
155 return (error);
156 }
157
158 /*
159 * Outputs the set of physical memory segments.
160 */
161 static int
162 sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS)
163 {
164 struct sbuf sbuf;
165 struct vm_phys_seg *seg;
166 int error, segind;
167
168 error = sysctl_wire_old_buffer(req, 0);
169 if (error != 0)
170 return (error);
171 sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
172 for (segind = 0; segind < vm_phys_nsegs; segind++) {
173 sbuf_printf(&sbuf, "\nSEGMENT %d:\n\n", segind);
174 seg = &vm_phys_segs[segind];
175 sbuf_printf(&sbuf, "start: %#jx\n",
176 (uintmax_t)seg->start);
177 sbuf_printf(&sbuf, "end: %#jx\n",
178 (uintmax_t)seg->end);
179 sbuf_printf(&sbuf, "domain: %d\n", seg->domain);
180 sbuf_printf(&sbuf, "free list: %p\n", seg->free_queues);
181 }
182 error = sbuf_finish(&sbuf);
183 sbuf_delete(&sbuf);
184 return (error);
185 }
186
187 #if VM_NDOMAIN > 1
188 /*
189 * Outputs the set of free list lookup lists.
190 */
191 static int
192 sysctl_vm_phys_lookup_lists(SYSCTL_HANDLER_ARGS)
193 {
194 struct sbuf sbuf;
195 int domain, error, flind, ndomains;
196
197 error = sysctl_wire_old_buffer(req, 0);
198 if (error != 0)
199 return (error);
200 sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
201 ndomains = vm_nfreelists - VM_NFREELIST + 1;
202 for (domain = 0; domain < ndomains; domain++) {
203 sbuf_printf(&sbuf, "\nDOMAIN %d:\n\n", domain);
204 for (flind = 0; flind < vm_nfreelists; flind++)
205 sbuf_printf(&sbuf, " [%d]:\t%p\n", flind,
206 vm_phys_lookup_lists[domain][flind]);
207 }
208 error = sbuf_finish(&sbuf);
209 sbuf_delete(&sbuf);
210 return (error);
211 }
212 #endif
213
214 /*
215 * Create a physical memory segment.
216 */
217 static void
218 _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind, int domain)
219 {
220 struct vm_phys_seg *seg;
221 #ifdef VM_PHYSSEG_SPARSE
222 long pages;
223 int segind;
224
225 pages = 0;
226 for (segind = 0; segind < vm_phys_nsegs; segind++) {
227 seg = &vm_phys_segs[segind];
228 pages += atop(seg->end - seg->start);
229 }
230 #endif
231 KASSERT(vm_phys_nsegs < VM_PHYSSEG_MAX,
232 ("vm_phys_create_seg: increase VM_PHYSSEG_MAX"));
233 seg = &vm_phys_segs[vm_phys_nsegs++];
234 seg->start = start;
235 seg->end = end;
236 seg->domain = domain;
237 #ifdef VM_PHYSSEG_SPARSE
238 seg->first_page = &vm_page_array[pages];
239 #else
240 seg->first_page = PHYS_TO_VM_PAGE(start);
241 #endif
242 #if VM_NDOMAIN > 1
243 if (flind == VM_FREELIST_DEFAULT && domain != 0) {
244 flind = VM_NFREELIST + (domain - 1);
245 if (flind >= vm_nfreelists)
246 vm_nfreelists = flind + 1;
247 }
248 #endif
249 seg->free_queues = &vm_phys_free_queues[flind];
250 }
251
252 static void
253 vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind)
254 {
255 int i;
256
257 if (mem_affinity == NULL) {
258 _vm_phys_create_seg(start, end, flind, 0);
259 return;
260 }
261
262 for (i = 0;; i++) {
263 if (mem_affinity[i].end == 0)
264 panic("Reached end of affinity info");
265 if (mem_affinity[i].end <= start)
266 continue;
267 if (mem_affinity[i].start > start)
268 panic("No affinity info for start %jx",
269 (uintmax_t)start);
270 if (mem_affinity[i].end >= end) {
271 _vm_phys_create_seg(start, end, flind,
272 mem_affinity[i].domain);
273 break;
274 }
275 _vm_phys_create_seg(start, mem_affinity[i].end, flind,
276 mem_affinity[i].domain);
277 start = mem_affinity[i].end;
278 }
279 }
280
281 /*
282 * Initialize the physical memory allocator.
283 */
284 void
285 vm_phys_init(void)
286 {
287 struct vm_freelist *fl;
288 int flind, i, oind, pind;
289 #if VM_NDOMAIN > 1
290 int ndomains, j;
291 #endif
292
293 for (i = 0; phys_avail[i + 1] != 0; i += 2) {
294 #ifdef VM_FREELIST_ISADMA
295 if (phys_avail[i] < 16777216) {
296 if (phys_avail[i + 1] > 16777216) {
297 vm_phys_create_seg(phys_avail[i], 16777216,
298 VM_FREELIST_ISADMA);
299 vm_phys_create_seg(16777216, phys_avail[i + 1],
300 VM_FREELIST_DEFAULT);
301 } else {
302 vm_phys_create_seg(phys_avail[i],
303 phys_avail[i + 1], VM_FREELIST_ISADMA);
304 }
305 if (VM_FREELIST_ISADMA >= vm_nfreelists)
306 vm_nfreelists = VM_FREELIST_ISADMA + 1;
307 } else
308 #endif
309 #ifdef VM_FREELIST_HIGHMEM
310 if (phys_avail[i + 1] > VM_HIGHMEM_ADDRESS) {
311 if (phys_avail[i] < VM_HIGHMEM_ADDRESS) {
312 vm_phys_create_seg(phys_avail[i],
313 VM_HIGHMEM_ADDRESS, VM_FREELIST_DEFAULT);
314 vm_phys_create_seg(VM_HIGHMEM_ADDRESS,
315 phys_avail[i + 1], VM_FREELIST_HIGHMEM);
316 } else {
317 vm_phys_create_seg(phys_avail[i],
318 phys_avail[i + 1], VM_FREELIST_HIGHMEM);
319 }
320 if (VM_FREELIST_HIGHMEM >= vm_nfreelists)
321 vm_nfreelists = VM_FREELIST_HIGHMEM + 1;
322 } else
323 #endif
324 vm_phys_create_seg(phys_avail[i], phys_avail[i + 1],
325 VM_FREELIST_DEFAULT);
326 }
327 for (flind = 0; flind < vm_nfreelists; flind++) {
328 for (pind = 0; pind < VM_NFREEPOOL; pind++) {
329 fl = vm_phys_free_queues[flind][pind];
330 for (oind = 0; oind < VM_NFREEORDER; oind++)
331 TAILQ_INIT(&fl[oind].pl);
332 }
333 }
334 #if VM_NDOMAIN > 1
335 /*
336 * Build a free list lookup list for each domain. All of the
337 * memory domain lists are inserted at the VM_FREELIST_DEFAULT
338 * index in a round-robin order starting with the current
339 * domain.
340 */
341 ndomains = vm_nfreelists - VM_NFREELIST + 1;
342 for (flind = 0; flind < VM_FREELIST_DEFAULT; flind++)
343 for (i = 0; i < ndomains; i++)
344 vm_phys_lookup_lists[i][flind] =
345 &vm_phys_free_queues[flind];
346 for (i = 0; i < ndomains; i++)
347 for (j = 0; j < ndomains; j++) {
348 flind = (i + j) % ndomains;
349 if (flind == 0)
350 flind = VM_FREELIST_DEFAULT;
351 else
352 flind += VM_NFREELIST - 1;
353 vm_phys_lookup_lists[i][VM_FREELIST_DEFAULT + j] =
354 &vm_phys_free_queues[flind];
355 }
356 for (flind = VM_FREELIST_DEFAULT + 1; flind < VM_NFREELIST;
357 flind++)
358 for (i = 0; i < ndomains; i++)
359 vm_phys_lookup_lists[i][flind + ndomains - 1] =
360 &vm_phys_free_queues[flind];
361 #else
362 for (flind = 0; flind < vm_nfreelists; flind++)
363 vm_phys_lookup_lists[0][flind] = &vm_phys_free_queues[flind];
364 #endif
365 }
366
367 /*
368 * Split a contiguous, power of two-sized set of physical pages.
369 */
370 static __inline void
371 vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order)
372 {
373 vm_page_t m_buddy;
374
375 while (oind > order) {
376 oind--;
377 m_buddy = &m[1 << oind];
378 KASSERT(m_buddy->order == VM_NFREEORDER,
379 ("vm_phys_split_pages: page %p has unexpected order %d",
380 m_buddy, m_buddy->order));
381 m_buddy->order = oind;
382 TAILQ_INSERT_HEAD(&fl[oind].pl, m_buddy, pageq);
383 fl[oind].lcnt++;
384 }
385 }
386
387 /*
388 * Initialize a physical page and add it to the free lists.
389 */
390 void
391 vm_phys_add_page(vm_paddr_t pa)
392 {
393 vm_page_t m;
394
395 cnt.v_page_count++;
396 m = vm_phys_paddr_to_vm_page(pa);
397 m->phys_addr = pa;
398 m->queue = PQ_NONE;
399 m->segind = vm_phys_paddr_to_segind(pa);
400 m->flags = PG_FREE;
401 KASSERT(m->order == VM_NFREEORDER,
402 ("vm_phys_add_page: page %p has unexpected order %d",
403 m, m->order));
404 m->pool = VM_FREEPOOL_DEFAULT;
405 pmap_page_init(m);
406 mtx_lock(&vm_page_queue_free_mtx);
407 cnt.v_free_count++;
408 vm_phys_free_pages(m, 0);
409 mtx_unlock(&vm_page_queue_free_mtx);
410 }
411
412 /*
413 * Allocate a contiguous, power of two-sized set of physical pages
414 * from the free lists.
415 *
416 * The free page queues must be locked.
417 */
418 vm_page_t
419 vm_phys_alloc_pages(int pool, int order)
420 {
421 vm_page_t m;
422 int flind;
423
424 for (flind = 0; flind < vm_nfreelists; flind++) {
425 m = vm_phys_alloc_freelist_pages(flind, pool, order);
426 if (m != NULL)
427 return (m);
428 }
429 return (NULL);
430 }
431
432 /*
433 * Find and dequeue a free page on the given free list, with the
434 * specified pool and order
435 */
436 vm_page_t
437 vm_phys_alloc_freelist_pages(int flind, int pool, int order)
438 {
439 struct vm_freelist *fl;
440 struct vm_freelist *alt;
441 int domain, oind, pind;
442 vm_page_t m;
443
444 KASSERT(flind < VM_NFREELIST,
445 ("vm_phys_alloc_freelist_pages: freelist %d is out of range", flind));
446 KASSERT(pool < VM_NFREEPOOL,
447 ("vm_phys_alloc_freelist_pages: pool %d is out of range", pool));
448 KASSERT(order < VM_NFREEORDER,
449 ("vm_phys_alloc_freelist_pages: order %d is out of range", order));
450
451 #if VM_NDOMAIN > 1
452 domain = PCPU_GET(domain);
453 #else
454 domain = 0;
455 #endif
456 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
457 fl = (*vm_phys_lookup_lists[domain][flind])[pool];
458 for (oind = order; oind < VM_NFREEORDER; oind++) {
459 m = TAILQ_FIRST(&fl[oind].pl);
460 if (m != NULL) {
461 TAILQ_REMOVE(&fl[oind].pl, m, pageq);
462 fl[oind].lcnt--;
463 m->order = VM_NFREEORDER;
464 vm_phys_split_pages(m, oind, fl, order);
465 return (m);
466 }
467 }
468
469 /*
470 * The given pool was empty. Find the largest
471 * contiguous, power-of-two-sized set of pages in any
472 * pool. Transfer these pages to the given pool, and
473 * use them to satisfy the allocation.
474 */
475 for (oind = VM_NFREEORDER - 1; oind >= order; oind--) {
476 for (pind = 0; pind < VM_NFREEPOOL; pind++) {
477 alt = (*vm_phys_lookup_lists[domain][flind])[pind];
478 m = TAILQ_FIRST(&alt[oind].pl);
479 if (m != NULL) {
480 TAILQ_REMOVE(&alt[oind].pl, m, pageq);
481 alt[oind].lcnt--;
482 m->order = VM_NFREEORDER;
483 vm_phys_set_pool(pool, m, oind);
484 vm_phys_split_pages(m, oind, fl, order);
485 return (m);
486 }
487 }
488 }
489 return (NULL);
490 }
491
492 /*
493 * Allocate physical memory from phys_avail[].
494 */
495 vm_paddr_t
496 vm_phys_bootstrap_alloc(vm_size_t size, unsigned long alignment)
497 {
498 vm_paddr_t pa;
499 int i;
500
501 size = round_page(size);
502 for (i = 0; phys_avail[i + 1] != 0; i += 2) {
503 if (phys_avail[i + 1] - phys_avail[i] < size)
504 continue;
505 pa = phys_avail[i];
506 phys_avail[i] += size;
507 return (pa);
508 }
509 panic("vm_phys_bootstrap_alloc");
510 }
511
512 /*
513 * Find the vm_page corresponding to the given physical address.
514 */
515 vm_page_t
516 vm_phys_paddr_to_vm_page(vm_paddr_t pa)
517 {
518 struct vm_phys_seg *seg;
519 int segind;
520
521 for (segind = 0; segind < vm_phys_nsegs; segind++) {
522 seg = &vm_phys_segs[segind];
523 if (pa >= seg->start && pa < seg->end)
524 return (&seg->first_page[atop(pa - seg->start)]);
525 }
526 return (NULL);
527 }
528
529 /*
530 * Find the segment containing the given physical address.
531 */
532 static int
533 vm_phys_paddr_to_segind(vm_paddr_t pa)
534 {
535 struct vm_phys_seg *seg;
536 int segind;
537
538 for (segind = 0; segind < vm_phys_nsegs; segind++) {
539 seg = &vm_phys_segs[segind];
540 if (pa >= seg->start && pa < seg->end)
541 return (segind);
542 }
543 panic("vm_phys_paddr_to_segind: paddr %#jx is not in any segment" ,
544 (uintmax_t)pa);
545 }
546
547 /*
548 * Free a contiguous, power of two-sized set of physical pages.
549 *
550 * The free page queues must be locked.
551 */
552 void
553 vm_phys_free_pages(vm_page_t m, int order)
554 {
555 struct vm_freelist *fl;
556 struct vm_phys_seg *seg;
557 vm_paddr_t pa, pa_buddy;
558 vm_page_t m_buddy;
559
560 KASSERT(m->order == VM_NFREEORDER,
561 ("vm_phys_free_pages: page %p has unexpected order %d",
562 m, m->order));
563 KASSERT(m->pool < VM_NFREEPOOL,
564 ("vm_phys_free_pages: page %p has unexpected pool %d",
565 m, m->pool));
566 KASSERT(order < VM_NFREEORDER,
567 ("vm_phys_free_pages: order %d is out of range", order));
568 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
569 pa = VM_PAGE_TO_PHYS(m);
570 seg = &vm_phys_segs[m->segind];
571 while (order < VM_NFREEORDER - 1) {
572 pa_buddy = pa ^ (1 << (PAGE_SHIFT + order));
573 if (pa_buddy < seg->start ||
574 pa_buddy >= seg->end)
575 break;
576 m_buddy = &seg->first_page[atop(pa_buddy - seg->start)];
577 if (m_buddy->order != order)
578 break;
579 fl = (*seg->free_queues)[m_buddy->pool];
580 TAILQ_REMOVE(&fl[m_buddy->order].pl, m_buddy, pageq);
581 fl[m_buddy->order].lcnt--;
582 m_buddy->order = VM_NFREEORDER;
583 if (m_buddy->pool != m->pool)
584 vm_phys_set_pool(m->pool, m_buddy, order);
585 order++;
586 pa &= ~((1 << (PAGE_SHIFT + order)) - 1);
587 m = &seg->first_page[atop(pa - seg->start)];
588 }
589 m->order = order;
590 fl = (*seg->free_queues)[m->pool];
591 TAILQ_INSERT_TAIL(&fl[order].pl, m, pageq);
592 fl[order].lcnt++;
593 }
594
595 /*
596 * Set the pool for a contiguous, power of two-sized set of physical pages.
597 */
598 void
599 vm_phys_set_pool(int pool, vm_page_t m, int order)
600 {
601 vm_page_t m_tmp;
602
603 for (m_tmp = m; m_tmp < &m[1 << order]; m_tmp++)
604 m_tmp->pool = pool;
605 }
606
607 /*
608 * Search for the given physical page "m" in the free lists. If the search
609 * succeeds, remove "m" from the free lists and return TRUE. Otherwise, return
610 * FALSE, indicating that "m" is not in the free lists.
611 *
612 * The free page queues must be locked.
613 */
614 boolean_t
615 vm_phys_unfree_page(vm_page_t m)
616 {
617 struct vm_freelist *fl;
618 struct vm_phys_seg *seg;
619 vm_paddr_t pa, pa_half;
620 vm_page_t m_set, m_tmp;
621 int order;
622
623 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
624
625 /*
626 * First, find the contiguous, power of two-sized set of free
627 * physical pages containing the given physical page "m" and
628 * assign it to "m_set".
629 */
630 seg = &vm_phys_segs[m->segind];
631 for (m_set = m, order = 0; m_set->order == VM_NFREEORDER &&
632 order < VM_NFREEORDER - 1; ) {
633 order++;
634 pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + order));
635 if (pa >= seg->start)
636 m_set = &seg->first_page[atop(pa - seg->start)];
637 else
638 return (FALSE);
639 }
640 if (m_set->order < order)
641 return (FALSE);
642 if (m_set->order == VM_NFREEORDER)
643 return (FALSE);
644 KASSERT(m_set->order < VM_NFREEORDER,
645 ("vm_phys_unfree_page: page %p has unexpected order %d",
646 m_set, m_set->order));
647
648 /*
649 * Next, remove "m_set" from the free lists. Finally, extract
650 * "m" from "m_set" using an iterative algorithm: While "m_set"
651 * is larger than a page, shrink "m_set" by returning the half
652 * of "m_set" that does not contain "m" to the free lists.
653 */
654 fl = (*seg->free_queues)[m_set->pool];
655 order = m_set->order;
656 TAILQ_REMOVE(&fl[order].pl, m_set, pageq);
657 fl[order].lcnt--;
658 m_set->order = VM_NFREEORDER;
659 while (order > 0) {
660 order--;
661 pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order));
662 if (m->phys_addr < pa_half)
663 m_tmp = &seg->first_page[atop(pa_half - seg->start)];
664 else {
665 m_tmp = m_set;
666 m_set = &seg->first_page[atop(pa_half - seg->start)];
667 }
668 m_tmp->order = order;
669 TAILQ_INSERT_HEAD(&fl[order].pl, m_tmp, pageq);
670 fl[order].lcnt++;
671 }
672 KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency"));
673 return (TRUE);
674 }
675
676 /*
677 * Try to zero one physical page. Used by an idle priority thread.
678 */
679 boolean_t
680 vm_phys_zero_pages_idle(void)
681 {
682 static struct vm_freelist *fl = vm_phys_free_queues[0][0];
683 static int flind, oind, pind;
684 vm_page_t m, m_tmp;
685
686 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
687 for (;;) {
688 TAILQ_FOREACH_REVERSE(m, &fl[oind].pl, pglist, pageq) {
689 for (m_tmp = m; m_tmp < &m[1 << oind]; m_tmp++) {
690 if ((m_tmp->flags & (PG_CACHED | PG_ZERO)) == 0) {
691 vm_phys_unfree_page(m_tmp);
692 cnt.v_free_count--;
693 mtx_unlock(&vm_page_queue_free_mtx);
694 pmap_zero_page_idle(m_tmp);
695 m_tmp->flags |= PG_ZERO;
696 mtx_lock(&vm_page_queue_free_mtx);
697 cnt.v_free_count++;
698 vm_phys_free_pages(m_tmp, 0);
699 vm_page_zero_count++;
700 cnt_prezero++;
701 return (TRUE);
702 }
703 }
704 }
705 oind++;
706 if (oind == VM_NFREEORDER) {
707 oind = 0;
708 pind++;
709 if (pind == VM_NFREEPOOL) {
710 pind = 0;
711 flind++;
712 if (flind == vm_nfreelists)
713 flind = 0;
714 }
715 fl = vm_phys_free_queues[flind][pind];
716 }
717 }
718 }
719
720 /*
721 * Allocate a contiguous set of physical pages of the given size
722 * "npages" from the free lists. All of the physical pages must be at
723 * or above the given physical address "low" and below the given
724 * physical address "high". The given value "alignment" determines the
725 * alignment of the first physical page in the set. If the given value
726 * "boundary" is non-zero, then the set of physical pages cannot cross
727 * any physical address boundary that is a multiple of that value. Both
728 * "alignment" and "boundary" must be a power of two.
729 */
730 vm_page_t
731 vm_phys_alloc_contig(unsigned long npages, vm_paddr_t low, vm_paddr_t high,
732 unsigned long alignment, unsigned long boundary)
733 {
734 struct vm_freelist *fl;
735 struct vm_phys_seg *seg;
736 struct vnode *vp;
737 vm_paddr_t pa, pa_last, size;
738 vm_page_t deferred_vdrop_list, m, m_ret;
739 int domain, flind, i, oind, order, pind;
740
741 #if VM_NDOMAIN > 1
742 domain = PCPU_GET(domain);
743 #else
744 domain = 0;
745 #endif
746 size = npages << PAGE_SHIFT;
747 KASSERT(size != 0,
748 ("vm_phys_alloc_contig: size must not be 0"));
749 KASSERT((alignment & (alignment - 1)) == 0,
750 ("vm_phys_alloc_contig: alignment must be a power of 2"));
751 KASSERT((boundary & (boundary - 1)) == 0,
752 ("vm_phys_alloc_contig: boundary must be a power of 2"));
753 deferred_vdrop_list = NULL;
754 /* Compute the queue that is the best fit for npages. */
755 for (order = 0; (1 << order) < npages; order++);
756 mtx_lock(&vm_page_queue_free_mtx);
757 #if VM_NRESERVLEVEL > 0
758 retry:
759 #endif
760 for (flind = 0; flind < vm_nfreelists; flind++) {
761 for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; oind++) {
762 for (pind = 0; pind < VM_NFREEPOOL; pind++) {
763 fl = (*vm_phys_lookup_lists[domain][flind])
764 [pind];
765 TAILQ_FOREACH(m_ret, &fl[oind].pl, pageq) {
766 /*
767 * A free list may contain physical pages
768 * from one or more segments.
769 */
770 seg = &vm_phys_segs[m_ret->segind];
771 if (seg->start > high ||
772 low >= seg->end)
773 continue;
774
775 /*
776 * Is the size of this allocation request
777 * larger than the largest block size?
778 */
779 if (order >= VM_NFREEORDER) {
780 /*
781 * Determine if a sufficient number
782 * of subsequent blocks to satisfy
783 * the allocation request are free.
784 */
785 pa = VM_PAGE_TO_PHYS(m_ret);
786 pa_last = pa + size;
787 for (;;) {
788 pa += 1 << (PAGE_SHIFT + VM_NFREEORDER - 1);
789 if (pa >= pa_last)
790 break;
791 if (pa < seg->start ||
792 pa >= seg->end)
793 break;
794 m = &seg->first_page[atop(pa - seg->start)];
795 if (m->order != VM_NFREEORDER - 1)
796 break;
797 }
798 /* If not, continue to the next block. */
799 if (pa < pa_last)
800 continue;
801 }
802
803 /*
804 * Determine if the blocks are within the given range,
805 * satisfy the given alignment, and do not cross the
806 * given boundary.
807 */
808 pa = VM_PAGE_TO_PHYS(m_ret);
809 if (pa >= low &&
810 pa + size <= high &&
811 (pa & (alignment - 1)) == 0 &&
812 ((pa ^ (pa + size - 1)) & ~(boundary - 1)) == 0)
813 goto done;
814 }
815 }
816 }
817 }
818 #if VM_NRESERVLEVEL > 0
819 if (vm_reserv_reclaim_contig(size, low, high, alignment, boundary))
820 goto retry;
821 #endif
822 mtx_unlock(&vm_page_queue_free_mtx);
823 return (NULL);
824 done:
825 for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) {
826 fl = (*seg->free_queues)[m->pool];
827 TAILQ_REMOVE(&fl[m->order].pl, m, pageq);
828 fl[m->order].lcnt--;
829 m->order = VM_NFREEORDER;
830 }
831 if (m_ret->pool != VM_FREEPOOL_DEFAULT)
832 vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m_ret, oind);
833 fl = (*seg->free_queues)[m_ret->pool];
834 vm_phys_split_pages(m_ret, oind, fl, order);
835 for (i = 0; i < npages; i++) {
836 m = &m_ret[i];
837 vp = vm_page_alloc_init(m);
838 if (vp != NULL) {
839 /*
840 * Enqueue the vnode for deferred vdrop().
841 *
842 * Unmanaged pages don't use "pageq", so it
843 * can be safely abused to construct a short-
844 * lived queue of vnodes.
845 */
846 m->pageq.tqe_prev = (void *)vp;
847 m->pageq.tqe_next = deferred_vdrop_list;
848 deferred_vdrop_list = m;
849 }
850 }
851 for (; i < roundup2(npages, 1 << imin(oind, order)); i++) {
852 m = &m_ret[i];
853 KASSERT(m->order == VM_NFREEORDER,
854 ("vm_phys_alloc_contig: page %p has unexpected order %d",
855 m, m->order));
856 vm_phys_free_pages(m, 0);
857 }
858 mtx_unlock(&vm_page_queue_free_mtx);
859 while (deferred_vdrop_list != NULL) {
860 vdrop((struct vnode *)deferred_vdrop_list->pageq.tqe_prev);
861 deferred_vdrop_list = deferred_vdrop_list->pageq.tqe_next;
862 }
863 return (m_ret);
864 }
865
866 #ifdef DDB
867 /*
868 * Show the number of physical pages in each of the free lists.
869 */
870 DB_SHOW_COMMAND(freepages, db_show_freepages)
871 {
872 struct vm_freelist *fl;
873 int flind, oind, pind;
874
875 for (flind = 0; flind < vm_nfreelists; flind++) {
876 db_printf("FREE LIST %d:\n"
877 "\n ORDER (SIZE) | NUMBER"
878 "\n ", flind);
879 for (pind = 0; pind < VM_NFREEPOOL; pind++)
880 db_printf(" | POOL %d", pind);
881 db_printf("\n-- ");
882 for (pind = 0; pind < VM_NFREEPOOL; pind++)
883 db_printf("-- -- ");
884 db_printf("--\n");
885 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) {
886 db_printf(" %2.2d (%6.6dK)", oind,
887 1 << (PAGE_SHIFT - 10 + oind));
888 for (pind = 0; pind < VM_NFREEPOOL; pind++) {
889 fl = vm_phys_free_queues[flind][pind];
890 db_printf(" | %6.6d", fl[oind].lcnt);
891 }
892 db_printf("\n");
893 }
894 db_printf("\n");
895 }
896 }
897 #endif
Cache object: 1f918d0ae8eea8dda87dfaed783bfdef
|