FreeBSD/Linux Kernel Cross Reference
sys/vm/vm_contig.c
1 /*-
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * The Mach Operating System project at Carnegie-Mellon University.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * from: @(#)vm_page.c 7.4 (Berkeley) 5/7/91
33 */
34
35 /*-
36 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
37 * All rights reserved.
38 *
39 * Authors: Avadis Tevanian, Jr., Michael Wayne Young
40 *
41 * Permission to use, copy, modify and distribute this software and
42 * its documentation is hereby granted, provided that both the copyright
43 * notice and this permission notice appear in all copies of the
44 * software, derivative works or modified versions, and any portions
45 * thereof, and that both notices appear in supporting documentation.
46 *
47 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
48 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
49 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
50 *
51 * Carnegie Mellon requests users of this software to return to
52 *
53 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
54 * School of Computer Science
55 * Carnegie Mellon University
56 * Pittsburgh PA 15213-3890
57 *
58 * any improvements or extensions that they make and grant Carnegie the
59 * rights to redistribute these changes.
60 */
61
62 #include <sys/cdefs.h>
63 __FBSDID("$FreeBSD: releng/5.4/sys/vm/vm_contig.c 141090 2005-01-31 23:27:04Z imp $");
64
65 #include <sys/param.h>
66 #include <sys/systm.h>
67 #include <sys/lock.h>
68 #include <sys/malloc.h>
69 #include <sys/mutex.h>
70 #include <sys/proc.h>
71 #include <sys/kernel.h>
72 #include <sys/linker_set.h>
73 #include <sys/sysctl.h>
74 #include <sys/vmmeter.h>
75 #include <sys/vnode.h>
76
77 #include <vm/vm.h>
78 #include <vm/vm_param.h>
79 #include <vm/vm_kern.h>
80 #include <vm/pmap.h>
81 #include <vm/vm_map.h>
82 #include <vm/vm_object.h>
83 #include <vm/vm_page.h>
84 #include <vm/vm_pageout.h>
85 #include <vm/vm_pager.h>
86 #include <vm/vm_extern.h>
87
88 static int
89 vm_contig_launder_page(vm_page_t m)
90 {
91 vm_object_t object;
92 vm_page_t m_tmp;
93 struct vnode *vp;
94
95 object = m->object;
96 if (!VM_OBJECT_TRYLOCK(object))
97 return (EAGAIN);
98 if (vm_page_sleep_if_busy(m, TRUE, "vpctw0")) {
99 VM_OBJECT_UNLOCK(object);
100 vm_page_lock_queues();
101 return (EBUSY);
102 }
103 vm_page_test_dirty(m);
104 if (m->dirty == 0 && m->hold_count == 0)
105 pmap_remove_all(m);
106 if (m->dirty) {
107 if (object->type == OBJT_VNODE) {
108 vm_page_unlock_queues();
109 vp = object->handle;
110 VM_OBJECT_UNLOCK(object);
111 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, curthread);
112 VM_OBJECT_LOCK(object);
113 vm_object_page_clean(object, 0, 0, OBJPC_SYNC);
114 VM_OBJECT_UNLOCK(object);
115 VOP_UNLOCK(vp, 0, curthread);
116 vm_page_lock_queues();
117 return (0);
118 } else if (object->type == OBJT_SWAP ||
119 object->type == OBJT_DEFAULT) {
120 m_tmp = m;
121 vm_pageout_flush(&m_tmp, 1, VM_PAGER_PUT_SYNC);
122 VM_OBJECT_UNLOCK(object);
123 return (0);
124 }
125 } else if (m->hold_count == 0)
126 vm_page_cache(m);
127 VM_OBJECT_UNLOCK(object);
128 return (0);
129 }
130
131 static int
132 vm_contig_launder(int queue)
133 {
134 vm_page_t m, next;
135 int error;
136
137 for (m = TAILQ_FIRST(&vm_page_queues[queue].pl); m != NULL; m = next) {
138 next = TAILQ_NEXT(m, pageq);
139 KASSERT(m->queue == queue,
140 ("vm_contig_launder: page %p's queue is not %d", m, queue));
141 error = vm_contig_launder_page(m);
142 if (error == 0)
143 return (TRUE);
144 if (error == EBUSY)
145 return (FALSE);
146 }
147 return (FALSE);
148 }
149
150 /*
151 * This interface is for merging with malloc() someday.
152 * Even if we never implement compaction so that contiguous allocation
153 * works after initialization time, malloc()'s data structures are good
154 * for statistics and for allocations of less than a page.
155 */
156 static void *
157 contigmalloc1(
158 unsigned long size, /* should be size_t here and for malloc() */
159 struct malloc_type *type,
160 int flags,
161 vm_paddr_t low,
162 vm_paddr_t high,
163 unsigned long alignment,
164 unsigned long boundary,
165 vm_map_t map)
166 {
167 int i, start;
168 vm_paddr_t phys;
169 vm_object_t object;
170 vm_offset_t addr, tmp_addr;
171 int pass, pqtype;
172 int inactl, actl, inactmax, actmax;
173 vm_page_t pga = vm_page_array;
174
175 size = round_page(size);
176 if (size == 0)
177 panic("contigmalloc1: size must not be 0");
178 if ((alignment & (alignment - 1)) != 0)
179 panic("contigmalloc1: alignment must be a power of 2");
180 if ((boundary & (boundary - 1)) != 0)
181 panic("contigmalloc1: boundary must be a power of 2");
182
183 start = 0;
184 for (pass = 2; pass >= 0; pass--) {
185 vm_page_lock_queues();
186 again0:
187 mtx_lock_spin(&vm_page_queue_free_mtx);
188 again:
189 /*
190 * Find first page in array that is free, within range,
191 * aligned, and such that the boundary won't be crossed.
192 */
193 for (i = start; i < cnt.v_page_count; i++) {
194 phys = VM_PAGE_TO_PHYS(&pga[i]);
195 pqtype = pga[i].queue - pga[i].pc;
196 if (((pqtype == PQ_FREE) || (pqtype == PQ_CACHE)) &&
197 (phys >= low) && (phys < high) &&
198 ((phys & (alignment - 1)) == 0) &&
199 (((phys ^ (phys + size - 1)) & ~(boundary - 1)) == 0))
200 break;
201 }
202
203 /*
204 * If the above failed or we will exceed the upper bound, fail.
205 */
206 if ((i == cnt.v_page_count) ||
207 ((VM_PAGE_TO_PHYS(&pga[i]) + size) > high)) {
208 mtx_unlock_spin(&vm_page_queue_free_mtx);
209 /*
210 * Instead of racing to empty the inactive/active
211 * queues, give up, even with more left to free,
212 * if we try more than the initial amount of pages.
213 *
214 * There's no point attempting this on the last pass.
215 */
216 if (pass > 0) {
217 inactl = actl = 0;
218 inactmax = vm_page_queues[PQ_INACTIVE].lcnt;
219 actmax = vm_page_queues[PQ_ACTIVE].lcnt;
220 again1:
221 if (inactl < inactmax &&
222 vm_contig_launder(PQ_INACTIVE)) {
223 inactl++;
224 goto again1;
225 }
226 if (actl < actmax &&
227 vm_contig_launder(PQ_ACTIVE)) {
228 actl++;
229 goto again1;
230 }
231 }
232 vm_page_unlock_queues();
233 continue;
234 }
235 start = i;
236
237 /*
238 * Check successive pages for contiguous and free.
239 */
240 for (i = start + 1; i < (start + size / PAGE_SIZE); i++) {
241 pqtype = pga[i].queue - pga[i].pc;
242 if ((VM_PAGE_TO_PHYS(&pga[i]) !=
243 (VM_PAGE_TO_PHYS(&pga[i - 1]) + PAGE_SIZE)) ||
244 ((pqtype != PQ_FREE) && (pqtype != PQ_CACHE))) {
245 start++;
246 goto again;
247 }
248 }
249 mtx_unlock_spin(&vm_page_queue_free_mtx);
250 for (i = start; i < (start + size / PAGE_SIZE); i++) {
251 vm_page_t m = &pga[i];
252
253 if ((m->queue - m->pc) == PQ_CACHE) {
254 if (m->hold_count != 0) {
255 start++;
256 goto again0;
257 }
258 object = m->object;
259 if (!VM_OBJECT_TRYLOCK(object)) {
260 start++;
261 goto again0;
262 }
263 if ((m->flags & PG_BUSY) || m->busy != 0) {
264 VM_OBJECT_UNLOCK(object);
265 start++;
266 goto again0;
267 }
268 vm_page_free(m);
269 VM_OBJECT_UNLOCK(object);
270 }
271 }
272 mtx_lock_spin(&vm_page_queue_free_mtx);
273 for (i = start; i < (start + size / PAGE_SIZE); i++) {
274 pqtype = pga[i].queue - pga[i].pc;
275 if (pqtype != PQ_FREE) {
276 start++;
277 goto again;
278 }
279 }
280 for (i = start; i < (start + size / PAGE_SIZE); i++) {
281 vm_page_t m = &pga[i];
282 vm_pageq_remove_nowakeup(m);
283 m->valid = VM_PAGE_BITS_ALL;
284 if (m->flags & PG_ZERO)
285 vm_page_zero_count--;
286 /* Don't clear the PG_ZERO flag, we'll need it later. */
287 m->flags = PG_UNMANAGED | (m->flags & PG_ZERO);
288 KASSERT(m->dirty == 0,
289 ("contigmalloc1: page %p was dirty", m));
290 m->wire_count = 0;
291 m->busy = 0;
292 }
293 mtx_unlock_spin(&vm_page_queue_free_mtx);
294 vm_page_unlock_queues();
295 /*
296 * We've found a contiguous chunk that meets are requirements.
297 * Allocate kernel VM, unfree and assign the physical pages to
298 * it and return kernel VM pointer.
299 */
300 vm_map_lock(map);
301 if (vm_map_findspace(map, vm_map_min(map), size, &addr) !=
302 KERN_SUCCESS) {
303 /*
304 * XXX We almost never run out of kernel virtual
305 * space, so we don't make the allocated memory
306 * above available.
307 */
308 vm_map_unlock(map);
309 return (NULL);
310 }
311 vm_object_reference(kernel_object);
312 vm_map_insert(map, kernel_object, addr - VM_MIN_KERNEL_ADDRESS,
313 addr, addr + size, VM_PROT_ALL, VM_PROT_ALL, 0);
314 vm_map_unlock(map);
315
316 tmp_addr = addr;
317 VM_OBJECT_LOCK(kernel_object);
318 for (i = start; i < (start + size / PAGE_SIZE); i++) {
319 vm_page_t m = &pga[i];
320 vm_page_insert(m, kernel_object,
321 OFF_TO_IDX(tmp_addr - VM_MIN_KERNEL_ADDRESS));
322 if ((flags & M_ZERO) && !(m->flags & PG_ZERO))
323 pmap_zero_page(m);
324 tmp_addr += PAGE_SIZE;
325 }
326 VM_OBJECT_UNLOCK(kernel_object);
327 vm_map_wire(map, addr, addr + size,
328 VM_MAP_WIRE_SYSTEM|VM_MAP_WIRE_NOHOLES);
329
330 return ((void *)addr);
331 }
332 return (NULL);
333 }
334
335 static void
336 vm_page_release_contigl(vm_page_t m, vm_pindex_t count)
337 {
338 while (count--) {
339 vm_page_free_toq(m);
340 m++;
341 }
342 }
343
344 void
345 vm_page_release_contig(vm_page_t m, vm_pindex_t count)
346 {
347 vm_page_lock_queues();
348 vm_page_release_contigl(m, count);
349 vm_page_unlock_queues();
350 }
351
352 static int
353 vm_contig_unqueue_free(vm_page_t m)
354 {
355 int error = 0;
356
357 mtx_lock_spin(&vm_page_queue_free_mtx);
358 if ((m->queue - m->pc) == PQ_FREE)
359 vm_pageq_remove_nowakeup(m);
360 else
361 error = EAGAIN;
362 mtx_unlock_spin(&vm_page_queue_free_mtx);
363 if (error)
364 return (error);
365 m->valid = VM_PAGE_BITS_ALL;
366 if (m->flags & PG_ZERO)
367 vm_page_zero_count--;
368 /* Don't clear the PG_ZERO flag; we'll need it later. */
369 m->flags = PG_UNMANAGED | (m->flags & PG_ZERO);
370 KASSERT(m->dirty == 0,
371 ("contigmalloc2: page %p was dirty", m));
372 m->wire_count = 0;
373 m->busy = 0;
374 return (error);
375 }
376
377 vm_page_t
378 vm_page_alloc_contig(vm_pindex_t npages, vm_paddr_t low, vm_paddr_t high,
379 vm_offset_t alignment, vm_offset_t boundary)
380 {
381 vm_object_t object;
382 vm_offset_t size;
383 vm_paddr_t phys;
384 vm_page_t pga = vm_page_array;
385 int i, pass, pqtype, start;
386
387 size = npages << PAGE_SHIFT;
388 if (size == 0)
389 panic("vm_page_alloc_contig: size must not be 0");
390 if ((alignment & (alignment - 1)) != 0)
391 panic("vm_page_alloc_contig: alignment must be a power of 2");
392 if ((boundary & (boundary - 1)) != 0)
393 panic("vm_page_alloc_contig: boundary must be a power of 2");
394
395 for (pass = 0; pass < 2; pass++) {
396 start = vm_page_array_size;
397 vm_page_lock_queues();
398 retry:
399 start--;
400 /*
401 * Find last page in array that is free, within range,
402 * aligned, and such that the boundary won't be crossed.
403 */
404 for (i = start; i >= 0; i--) {
405 phys = VM_PAGE_TO_PHYS(&pga[i]);
406 pqtype = pga[i].queue - pga[i].pc;
407 if (pass == 0) {
408 if (pqtype != PQ_FREE && pqtype != PQ_CACHE)
409 continue;
410 } else if (pqtype != PQ_FREE && pqtype != PQ_CACHE &&
411 pga[i].queue != PQ_ACTIVE &&
412 pga[i].queue != PQ_INACTIVE)
413 continue;
414 if (phys >= low && phys + size <= high &&
415 ((phys & (alignment - 1)) == 0) &&
416 ((phys ^ (phys + size - 1)) & ~(boundary - 1)) == 0)
417 break;
418 }
419 /* There are no candidates at all. */
420 if (i == -1) {
421 vm_page_unlock_queues();
422 continue;
423 }
424 start = i;
425 /*
426 * Check successive pages for contiguous and free.
427 */
428 for (i = start + 1; i < start + npages; i++) {
429 pqtype = pga[i].queue - pga[i].pc;
430 if (VM_PAGE_TO_PHYS(&pga[i]) !=
431 VM_PAGE_TO_PHYS(&pga[i - 1]) + PAGE_SIZE)
432 goto retry;
433 if (pass == 0) {
434 if (pqtype != PQ_FREE && pqtype != PQ_CACHE)
435 goto retry;
436 } else if (pqtype != PQ_FREE && pqtype != PQ_CACHE &&
437 pga[i].queue != PQ_ACTIVE &&
438 pga[i].queue != PQ_INACTIVE)
439 goto retry;
440 }
441 for (i = start; i < start + npages; i++) {
442 vm_page_t m = &pga[i];
443
444 retry_page:
445 pqtype = m->queue - m->pc;
446 if (pass != 0 && pqtype != PQ_FREE &&
447 pqtype != PQ_CACHE) {
448 switch (m->queue) {
449 case PQ_ACTIVE:
450 case PQ_INACTIVE:
451 if (vm_contig_launder_page(m) != 0)
452 goto cleanup_freed;
453 pqtype = m->queue - m->pc;
454 if (pqtype == PQ_FREE ||
455 pqtype == PQ_CACHE)
456 break;
457 default:
458 cleanup_freed:
459 vm_page_release_contigl(&pga[start],
460 i - start);
461 goto retry;
462 }
463 }
464 if (pqtype == PQ_CACHE) {
465 if (m->hold_count != 0)
466 goto retry;
467 object = m->object;
468 if (!VM_OBJECT_TRYLOCK(object))
469 goto retry;
470 if ((m->flags & PG_BUSY) || m->busy != 0) {
471 VM_OBJECT_UNLOCK(object);
472 goto retry;
473 }
474 vm_page_free(m);
475 VM_OBJECT_UNLOCK(object);
476 }
477 /*
478 * There is no good API for freeing a page
479 * directly to PQ_NONE on our behalf, so spin.
480 */
481 if (vm_contig_unqueue_free(m) != 0)
482 goto retry_page;
483 }
484 vm_page_unlock_queues();
485 /*
486 * We've found a contiguous chunk that meets are requirements.
487 */
488 return (&pga[start]);
489 }
490 return (NULL);
491 }
492
493 static void *
494 contigmalloc2(vm_page_t m, vm_pindex_t npages, int flags)
495 {
496 vm_object_t object = kernel_object;
497 vm_map_t map = kernel_map;
498 vm_offset_t addr, tmp_addr;
499 vm_pindex_t i;
500
501 /*
502 * Allocate kernel VM, unfree and assign the physical pages to
503 * it and return kernel VM pointer.
504 */
505 vm_map_lock(map);
506 if (vm_map_findspace(map, vm_map_min(map), npages << PAGE_SHIFT, &addr)
507 != KERN_SUCCESS) {
508 vm_map_unlock(map);
509 return (NULL);
510 }
511 vm_object_reference(object);
512 vm_map_insert(map, object, addr - VM_MIN_KERNEL_ADDRESS,
513 addr, addr + (npages << PAGE_SHIFT), VM_PROT_ALL, VM_PROT_ALL, 0);
514 vm_map_unlock(map);
515 tmp_addr = addr;
516 VM_OBJECT_LOCK(object);
517 for (i = 0; i < npages; i++) {
518 vm_page_insert(&m[i], object,
519 OFF_TO_IDX(tmp_addr - VM_MIN_KERNEL_ADDRESS));
520 if ((flags & M_ZERO) && !(m->flags & PG_ZERO))
521 pmap_zero_page(&m[i]);
522 tmp_addr += PAGE_SIZE;
523 }
524 VM_OBJECT_UNLOCK(object);
525 vm_map_wire(map, addr, addr + (npages << PAGE_SHIFT),
526 VM_MAP_WIRE_SYSTEM | VM_MAP_WIRE_NOHOLES);
527 return ((void *)addr);
528 }
529
530 static int vm_old_contigmalloc = 0;
531 SYSCTL_INT(_vm, OID_AUTO, old_contigmalloc,
532 CTLFLAG_RW, &vm_old_contigmalloc, 0, "Use the old contigmalloc algorithm");
533 TUNABLE_INT("vm.old_contigmalloc", &vm_old_contigmalloc);
534
535 void *
536 contigmalloc(
537 unsigned long size, /* should be size_t here and for malloc() */
538 struct malloc_type *type,
539 int flags,
540 vm_paddr_t low,
541 vm_paddr_t high,
542 unsigned long alignment,
543 unsigned long boundary)
544 {
545 void * ret;
546 vm_page_t pages;
547 vm_pindex_t npgs;
548
549 npgs = round_page(size) >> PAGE_SHIFT;
550 mtx_lock(&Giant);
551 if (vm_old_contigmalloc) {
552 ret = contigmalloc1(size, type, flags, low, high, alignment,
553 boundary, kernel_map);
554 } else {
555 pages = vm_page_alloc_contig(npgs, low, high,
556 alignment, boundary);
557 if (pages == NULL) {
558 ret = NULL;
559 } else {
560 ret = contigmalloc2(pages, npgs, flags);
561 if (ret == NULL)
562 vm_page_release_contig(pages, npgs);
563 }
564
565 }
566 mtx_unlock(&Giant);
567 malloc_type_allocated(type, ret == NULL ? 0 : npgs << PAGE_SHIFT);
568 return (ret);
569 }
570
571 void
572 contigfree(void *addr, unsigned long size, struct malloc_type *type)
573 {
574 vm_pindex_t npgs;
575
576 npgs = round_page(size) >> PAGE_SHIFT;
577 kmem_free(kernel_map, (vm_offset_t)addr, size);
578 malloc_type_freed(type, npgs << PAGE_SHIFT);
579 }
Cache object: 1d7ff84c558fde3d46d0b586bcde6ab0
|