FreeBSD/Linux Kernel Cross Reference
sys/vm/vm_contig.c
1 /*-
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * The Mach Operating System project at Carnegie-Mellon University.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * from: @(#)vm_page.c 7.4 (Berkeley) 5/7/91
33 */
34
35 /*-
36 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
37 * All rights reserved.
38 *
39 * Authors: Avadis Tevanian, Jr., Michael Wayne Young
40 *
41 * Permission to use, copy, modify and distribute this software and
42 * its documentation is hereby granted, provided that both the copyright
43 * notice and this permission notice appear in all copies of the
44 * software, derivative works or modified versions, and any portions
45 * thereof, and that both notices appear in supporting documentation.
46 *
47 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
48 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
49 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
50 *
51 * Carnegie Mellon requests users of this software to return to
52 *
53 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
54 * School of Computer Science
55 * Carnegie Mellon University
56 * Pittsburgh PA 15213-3890
57 *
58 * any improvements or extensions that they make and grant Carnegie the
59 * rights to redistribute these changes.
60 */
61
62 #include <sys/cdefs.h>
63 __FBSDID("$FreeBSD$");
64
65 #include <sys/param.h>
66 #include <sys/systm.h>
67 #include <sys/lock.h>
68 #include <sys/malloc.h>
69 #include <sys/mutex.h>
70 #include <sys/proc.h>
71 #include <sys/kernel.h>
72 #include <sys/linker_set.h>
73 #include <sys/sysctl.h>
74 #include <sys/vmmeter.h>
75 #include <sys/vnode.h>
76
77 #include <vm/vm.h>
78 #include <vm/vm_param.h>
79 #include <vm/vm_kern.h>
80 #include <vm/pmap.h>
81 #include <vm/vm_map.h>
82 #include <vm/vm_object.h>
83 #include <vm/vm_page.h>
84 #include <vm/vm_pageout.h>
85 #include <vm/vm_pager.h>
86 #include <vm/vm_extern.h>
87
88 static int
89 vm_contig_launder_page(vm_page_t m)
90 {
91 vm_object_t object;
92 vm_page_t m_tmp;
93 struct vnode *vp;
94 struct mount *mp;
95
96 object = m->object;
97 if (!VM_OBJECT_TRYLOCK(object))
98 return (EAGAIN);
99 if (vm_page_sleep_if_busy(m, TRUE, "vpctw0")) {
100 VM_OBJECT_UNLOCK(object);
101 vm_page_lock_queues();
102 return (EBUSY);
103 }
104 vm_page_test_dirty(m);
105 if (m->dirty == 0 && m->hold_count == 0)
106 pmap_remove_all(m);
107 if (m->dirty) {
108 if ((object->flags & OBJ_DEAD) != 0) {
109 VM_OBJECT_UNLOCK(object);
110 return (EAGAIN);
111 }
112 if (object->type == OBJT_VNODE) {
113 vm_page_unlock_queues();
114 vp = object->handle;
115 vm_object_reference_locked(object);
116 VM_OBJECT_UNLOCK(object);
117 (void) vn_start_write(vp, &mp, V_WAIT);
118 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, curthread);
119 VM_OBJECT_LOCK(object);
120 vm_object_page_clean(object, 0, 0, OBJPC_SYNC);
121 VM_OBJECT_UNLOCK(object);
122 VOP_UNLOCK(vp, 0, curthread);
123 vm_object_deallocate(object);
124 vn_finished_write(mp);
125 vm_page_lock_queues();
126 return (0);
127 } else if (object->type == OBJT_SWAP ||
128 object->type == OBJT_DEFAULT) {
129 m_tmp = m;
130 vm_pageout_flush(&m_tmp, 1, VM_PAGER_PUT_SYNC);
131 VM_OBJECT_UNLOCK(object);
132 return (0);
133 }
134 } else if (m->hold_count == 0)
135 vm_page_cache(m);
136 VM_OBJECT_UNLOCK(object);
137 return (0);
138 }
139
140 static int
141 vm_contig_launder(int queue)
142 {
143 vm_page_t m, next;
144 int error;
145
146 for (m = TAILQ_FIRST(&vm_page_queues[queue].pl); m != NULL; m = next) {
147 next = TAILQ_NEXT(m, pageq);
148
149 /* Skip marker pages */
150 if ((m->flags & PG_MARKER) != 0)
151 continue;
152
153 KASSERT(m->queue == queue,
154 ("vm_contig_launder: page %p's queue is not %d", m, queue));
155 error = vm_contig_launder_page(m);
156 if (error == 0)
157 return (TRUE);
158 if (error == EBUSY)
159 return (FALSE);
160 }
161 return (FALSE);
162 }
163
164 /*
165 * This interface is for merging with malloc() someday.
166 * Even if we never implement compaction so that contiguous allocation
167 * works after initialization time, malloc()'s data structures are good
168 * for statistics and for allocations of less than a page.
169 */
170 static void *
171 contigmalloc1(
172 unsigned long size, /* should be size_t here and for malloc() */
173 struct malloc_type *type,
174 int flags,
175 vm_paddr_t low,
176 vm_paddr_t high,
177 unsigned long alignment,
178 unsigned long boundary,
179 vm_map_t map)
180 {
181 int i, start;
182 vm_paddr_t phys;
183 vm_object_t object;
184 vm_offset_t addr, tmp_addr;
185 int pass, pqtype;
186 int inactl, actl, inactmax, actmax;
187 vm_page_t pga = vm_page_array;
188
189 size = round_page(size);
190 if (size == 0)
191 panic("contigmalloc1: size must not be 0");
192 if ((alignment & (alignment - 1)) != 0)
193 panic("contigmalloc1: alignment must be a power of 2");
194 if ((boundary & (boundary - 1)) != 0)
195 panic("contigmalloc1: boundary must be a power of 2");
196
197 start = 0;
198 for (pass = 2; pass >= 0; pass--) {
199 vm_page_lock_queues();
200 again0:
201 mtx_lock_spin(&vm_page_queue_free_mtx);
202 again:
203 /*
204 * Find first page in array that is free, within range,
205 * aligned, and such that the boundary won't be crossed.
206 */
207 for (i = start; i < cnt.v_page_count; i++) {
208 phys = VM_PAGE_TO_PHYS(&pga[i]);
209 pqtype = pga[i].queue - pga[i].pc;
210 if (((pqtype == PQ_FREE) || (pqtype == PQ_CACHE)) &&
211 (phys >= low) && (phys < high) &&
212 ((phys & (alignment - 1)) == 0) &&
213 (((phys ^ (phys + size - 1)) & ~(boundary - 1)) == 0))
214 break;
215 }
216
217 /*
218 * If the above failed or we will exceed the upper bound, fail.
219 */
220 if ((i == cnt.v_page_count) ||
221 ((VM_PAGE_TO_PHYS(&pga[i]) + size) > high)) {
222 mtx_unlock_spin(&vm_page_queue_free_mtx);
223 /*
224 * Instead of racing to empty the inactive/active
225 * queues, give up, even with more left to free,
226 * if we try more than the initial amount of pages.
227 *
228 * There's no point attempting this on the last pass.
229 */
230 if (pass > 0) {
231 inactl = actl = 0;
232 inactmax = vm_page_queues[PQ_INACTIVE].lcnt;
233 actmax = vm_page_queues[PQ_ACTIVE].lcnt;
234 again1:
235 if (inactl < inactmax &&
236 vm_contig_launder(PQ_INACTIVE)) {
237 inactl++;
238 goto again1;
239 }
240 if (actl < actmax &&
241 vm_contig_launder(PQ_ACTIVE)) {
242 actl++;
243 goto again1;
244 }
245 }
246 vm_page_unlock_queues();
247 continue;
248 }
249 start = i;
250
251 /*
252 * Check successive pages for contiguous and free.
253 */
254 for (i = start + 1; i < (start + size / PAGE_SIZE); i++) {
255 pqtype = pga[i].queue - pga[i].pc;
256 if ((VM_PAGE_TO_PHYS(&pga[i]) !=
257 (VM_PAGE_TO_PHYS(&pga[i - 1]) + PAGE_SIZE)) ||
258 ((pqtype != PQ_FREE) && (pqtype != PQ_CACHE))) {
259 start++;
260 goto again;
261 }
262 }
263 mtx_unlock_spin(&vm_page_queue_free_mtx);
264 for (i = start; i < (start + size / PAGE_SIZE); i++) {
265 vm_page_t m = &pga[i];
266
267 if ((m->queue - m->pc) == PQ_CACHE) {
268 if (m->hold_count != 0) {
269 start++;
270 goto again0;
271 }
272 object = m->object;
273 if (!VM_OBJECT_TRYLOCK(object)) {
274 start++;
275 goto again0;
276 }
277 if ((m->flags & PG_BUSY) || m->busy != 0) {
278 VM_OBJECT_UNLOCK(object);
279 start++;
280 goto again0;
281 }
282 vm_page_free(m);
283 VM_OBJECT_UNLOCK(object);
284 }
285 }
286 mtx_lock_spin(&vm_page_queue_free_mtx);
287 for (i = start; i < (start + size / PAGE_SIZE); i++) {
288 pqtype = pga[i].queue - pga[i].pc;
289 if (pqtype != PQ_FREE) {
290 start++;
291 goto again;
292 }
293 }
294 for (i = start; i < (start + size / PAGE_SIZE); i++) {
295 vm_page_t m = &pga[i];
296 vm_pageq_remove_nowakeup(m);
297 m->valid = VM_PAGE_BITS_ALL;
298 if (m->flags & PG_ZERO)
299 vm_page_zero_count--;
300 /* Don't clear the PG_ZERO flag, we'll need it later. */
301 m->flags = PG_UNMANAGED | (m->flags & PG_ZERO);
302 KASSERT(m->dirty == 0,
303 ("contigmalloc1: page %p was dirty", m));
304 m->wire_count = 0;
305 m->busy = 0;
306 }
307 mtx_unlock_spin(&vm_page_queue_free_mtx);
308 vm_page_unlock_queues();
309 /*
310 * We've found a contiguous chunk that meets are requirements.
311 * Allocate kernel VM, unfree and assign the physical pages to
312 * it and return kernel VM pointer.
313 */
314 vm_map_lock(map);
315 if (vm_map_findspace(map, vm_map_min(map), size, &addr) !=
316 KERN_SUCCESS) {
317 /*
318 * XXX We almost never run out of kernel virtual
319 * space, so we don't make the allocated memory
320 * above available.
321 */
322 vm_map_unlock(map);
323 return (NULL);
324 }
325 vm_object_reference(kernel_object);
326 vm_map_insert(map, kernel_object, addr - VM_MIN_KERNEL_ADDRESS,
327 addr, addr + size, VM_PROT_ALL, VM_PROT_ALL, 0);
328 vm_map_unlock(map);
329
330 tmp_addr = addr;
331 VM_OBJECT_LOCK(kernel_object);
332 for (i = start; i < (start + size / PAGE_SIZE); i++) {
333 vm_page_t m = &pga[i];
334 vm_page_insert(m, kernel_object,
335 OFF_TO_IDX(tmp_addr - VM_MIN_KERNEL_ADDRESS));
336 if ((flags & M_ZERO) && !(m->flags & PG_ZERO))
337 pmap_zero_page(m);
338 tmp_addr += PAGE_SIZE;
339 }
340 VM_OBJECT_UNLOCK(kernel_object);
341 vm_map_wire(map, addr, addr + size,
342 VM_MAP_WIRE_SYSTEM|VM_MAP_WIRE_NOHOLES);
343
344 return ((void *)addr);
345 }
346 return (NULL);
347 }
348
349 static void
350 vm_page_release_contigl(vm_page_t m, vm_pindex_t count)
351 {
352 while (count--) {
353 vm_page_free_toq(m);
354 m++;
355 }
356 }
357
358 void
359 vm_page_release_contig(vm_page_t m, vm_pindex_t count)
360 {
361 vm_page_lock_queues();
362 vm_page_release_contigl(m, count);
363 vm_page_unlock_queues();
364 }
365
366 static int
367 vm_contig_unqueue_free(vm_page_t m)
368 {
369 int error = 0;
370
371 mtx_lock_spin(&vm_page_queue_free_mtx);
372 if ((m->queue - m->pc) == PQ_FREE)
373 vm_pageq_remove_nowakeup(m);
374 else
375 error = EAGAIN;
376 mtx_unlock_spin(&vm_page_queue_free_mtx);
377 if (error)
378 return (error);
379 m->valid = VM_PAGE_BITS_ALL;
380 if (m->flags & PG_ZERO)
381 vm_page_zero_count--;
382 /* Don't clear the PG_ZERO flag; we'll need it later. */
383 m->flags = PG_UNMANAGED | (m->flags & PG_ZERO);
384 KASSERT(m->dirty == 0,
385 ("contigmalloc2: page %p was dirty", m));
386 m->wire_count = 0;
387 m->busy = 0;
388 return (error);
389 }
390
391 vm_page_t
392 vm_page_alloc_contig(vm_pindex_t npages, vm_paddr_t low, vm_paddr_t high,
393 vm_offset_t alignment, vm_offset_t boundary)
394 {
395 vm_object_t object;
396 vm_offset_t size;
397 vm_paddr_t phys;
398 vm_page_t pga = vm_page_array;
399 int i, pass, pqtype, start;
400
401 size = npages << PAGE_SHIFT;
402 if (size == 0)
403 panic("vm_page_alloc_contig: size must not be 0");
404 if ((alignment & (alignment - 1)) != 0)
405 panic("vm_page_alloc_contig: alignment must be a power of 2");
406 if ((boundary & (boundary - 1)) != 0)
407 panic("vm_page_alloc_contig: boundary must be a power of 2");
408
409 for (pass = 0; pass < 2; pass++) {
410 if (atop(high) < vm_page_array_size)
411 start = atop(high) - npages + 1;
412 else
413 start = vm_page_array_size - npages + 1;
414 vm_page_lock_queues();
415 retry:
416 start--;
417 /*
418 * Find last page in array that is free, within range,
419 * aligned, and such that the boundary won't be crossed.
420 */
421 for (i = start; i >= 0; i--) {
422 phys = VM_PAGE_TO_PHYS(&pga[i]);
423 pqtype = pga[i].queue - pga[i].pc;
424 if (pass == 0) {
425 if (pqtype != PQ_FREE && pqtype != PQ_CACHE)
426 continue;
427 } else if (pqtype != PQ_FREE && pqtype != PQ_CACHE &&
428 pga[i].queue != PQ_ACTIVE &&
429 pga[i].queue != PQ_INACTIVE)
430 continue;
431 if (phys >= low && phys + size <= high &&
432 ((phys & (alignment - 1)) == 0) &&
433 ((phys ^ (phys + size - 1)) & ~(boundary - 1)) == 0)
434 break;
435 }
436 /* There are no candidates at all. */
437 if (i < 0) {
438 vm_page_unlock_queues();
439 continue;
440 }
441 start = i;
442 /*
443 * Check successive pages for contiguous and free.
444 */
445 for (i = start + npages - 1; i > start; i--) {
446 pqtype = pga[i].queue - pga[i].pc;
447 if (VM_PAGE_TO_PHYS(&pga[i]) !=
448 VM_PAGE_TO_PHYS(&pga[i - 1]) + PAGE_SIZE) {
449 start = i - npages + 1;
450 goto retry;
451 }
452 if (pass == 0) {
453 if (pqtype != PQ_FREE && pqtype != PQ_CACHE) {
454 start = i - npages + 1;
455 goto retry;
456 }
457 } else if (pqtype != PQ_FREE && pqtype != PQ_CACHE &&
458 pga[i].queue != PQ_ACTIVE &&
459 pga[i].queue != PQ_INACTIVE) {
460 start = i - npages + 1;
461 goto retry;
462 }
463 }
464 for (i = start + npages - 1; i >= start; i--) {
465 vm_page_t m = &pga[i];
466
467 retry_page:
468 pqtype = m->queue - m->pc;
469 if (pass != 0 && pqtype != PQ_FREE &&
470 pqtype != PQ_CACHE) {
471 switch (m->queue) {
472 case PQ_ACTIVE:
473 case PQ_INACTIVE:
474 if (vm_contig_launder_page(m) != 0)
475 goto cleanup_freed;
476 pqtype = m->queue - m->pc;
477 if (pqtype == PQ_FREE ||
478 pqtype == PQ_CACHE)
479 break;
480 default:
481 cleanup_freed:
482 vm_page_release_contigl(&pga[i + 1],
483 start + npages - 1 - i);
484 start = i - npages + 1;
485 goto retry;
486 }
487 }
488 if (pqtype == PQ_CACHE) {
489 if (m->hold_count != 0) {
490 start = i - npages + 1;
491 goto retry;
492 }
493 object = m->object;
494 if (!VM_OBJECT_TRYLOCK(object)) {
495 start = i - npages + 1;
496 goto retry;
497 }
498 if ((m->flags & PG_BUSY) || m->busy != 0) {
499 VM_OBJECT_UNLOCK(object);
500 start = i - npages + 1;
501 goto retry;
502 }
503 vm_page_free(m);
504 VM_OBJECT_UNLOCK(object);
505 }
506 /*
507 * There is no good API for freeing a page
508 * directly to PQ_NONE on our behalf, so spin.
509 */
510 if (vm_contig_unqueue_free(m) != 0)
511 goto retry_page;
512 }
513 vm_page_unlock_queues();
514 /*
515 * We've found a contiguous chunk that meets are requirements.
516 */
517 return (&pga[start]);
518 }
519 return (NULL);
520 }
521
522 static void *
523 contigmalloc2(vm_page_t m, vm_pindex_t npages, int flags)
524 {
525 vm_object_t object = kernel_object;
526 vm_map_t map = kernel_map;
527 vm_offset_t addr, tmp_addr;
528 vm_pindex_t i;
529
530 /*
531 * Allocate kernel VM, unfree and assign the physical pages to
532 * it and return kernel VM pointer.
533 */
534 vm_map_lock(map);
535 if (vm_map_findspace(map, vm_map_min(map), npages << PAGE_SHIFT, &addr)
536 != KERN_SUCCESS) {
537 vm_map_unlock(map);
538 return (NULL);
539 }
540 vm_object_reference(object);
541 vm_map_insert(map, object, addr - VM_MIN_KERNEL_ADDRESS,
542 addr, addr + (npages << PAGE_SHIFT), VM_PROT_ALL, VM_PROT_ALL, 0);
543 vm_map_unlock(map);
544 tmp_addr = addr;
545 VM_OBJECT_LOCK(object);
546 for (i = 0; i < npages; i++) {
547 vm_page_insert(&m[i], object,
548 OFF_TO_IDX(tmp_addr - VM_MIN_KERNEL_ADDRESS));
549 if ((flags & M_ZERO) && !(m[i].flags & PG_ZERO))
550 pmap_zero_page(&m[i]);
551 tmp_addr += PAGE_SIZE;
552 }
553 VM_OBJECT_UNLOCK(object);
554 vm_map_wire(map, addr, addr + (npages << PAGE_SHIFT),
555 VM_MAP_WIRE_SYSTEM | VM_MAP_WIRE_NOHOLES);
556 return ((void *)addr);
557 }
558
559 static int vm_old_contigmalloc = 0;
560 SYSCTL_INT(_vm, OID_AUTO, old_contigmalloc,
561 CTLFLAG_RW, &vm_old_contigmalloc, 0, "Use the old contigmalloc algorithm");
562 TUNABLE_INT("vm.old_contigmalloc", &vm_old_contigmalloc);
563
564 void *
565 contigmalloc(
566 unsigned long size, /* should be size_t here and for malloc() */
567 struct malloc_type *type,
568 int flags,
569 vm_paddr_t low,
570 vm_paddr_t high,
571 unsigned long alignment,
572 unsigned long boundary)
573 {
574 void * ret;
575 vm_page_t pages;
576 vm_pindex_t npgs;
577
578 npgs = round_page(size) >> PAGE_SHIFT;
579 mtx_lock(&Giant);
580 if (vm_old_contigmalloc) {
581 ret = contigmalloc1(size, type, flags, low, high, alignment,
582 boundary, kernel_map);
583 } else {
584 pages = vm_page_alloc_contig(npgs, low, high,
585 alignment, boundary);
586 if (pages == NULL) {
587 ret = NULL;
588 } else {
589 ret = contigmalloc2(pages, npgs, flags);
590 if (ret == NULL)
591 vm_page_release_contig(pages, npgs);
592 }
593
594 }
595 mtx_unlock(&Giant);
596 malloc_type_allocated(type, ret == NULL ? 0 : npgs << PAGE_SHIFT);
597 return (ret);
598 }
599
600 void
601 contigfree(void *addr, unsigned long size, struct malloc_type *type)
602 {
603 vm_pindex_t npgs;
604
605 npgs = round_page(size) >> PAGE_SHIFT;
606 kmem_free(kernel_map, (vm_offset_t)addr, size);
607 malloc_type_freed(type, npgs << PAGE_SHIFT);
608 }
Cache object: c32a7f56837dc3bc6a61cff267c6d4c0
|