FreeBSD/Linux Kernel Cross Reference
sys/vm/vm_contig.c
1 /*
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * The Mach Operating System project at Carnegie-Mellon University.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * from: @(#)vm_page.c 7.4 (Berkeley) 5/7/91
33 */
34
35 /*
36 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
37 * All rights reserved.
38 *
39 * Authors: Avadis Tevanian, Jr., Michael Wayne Young
40 *
41 * Permission to use, copy, modify and distribute this software and
42 * its documentation is hereby granted, provided that both the copyright
43 * notice and this permission notice appear in all copies of the
44 * software, derivative works or modified versions, and any portions
45 * thereof, and that both notices appear in supporting documentation.
46 *
47 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
48 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
49 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
50 *
51 * Carnegie Mellon requests users of this software to return to
52 *
53 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
54 * School of Computer Science
55 * Carnegie Mellon University
56 * Pittsburgh PA 15213-3890
57 *
58 * any improvements or extensions that they make and grant Carnegie the
59 * rights to redistribute these changes.
60 */
61
62 #include <sys/cdefs.h>
63 __FBSDID("$FreeBSD: releng/5.3/sys/vm/vm_contig.c 133185 2004-08-05 21:54:11Z green $");
64
65 #include <sys/param.h>
66 #include <sys/systm.h>
67 #include <sys/lock.h>
68 #include <sys/malloc.h>
69 #include <sys/mutex.h>
70 #include <sys/proc.h>
71 #include <sys/kernel.h>
72 #include <sys/linker_set.h>
73 #include <sys/sysctl.h>
74 #include <sys/vmmeter.h>
75 #include <sys/vnode.h>
76
77 #include <vm/vm.h>
78 #include <vm/vm_param.h>
79 #include <vm/vm_kern.h>
80 #include <vm/pmap.h>
81 #include <vm/vm_map.h>
82 #include <vm/vm_object.h>
83 #include <vm/vm_page.h>
84 #include <vm/vm_pageout.h>
85 #include <vm/vm_pager.h>
86 #include <vm/vm_extern.h>
87
88 static int
89 vm_contig_launder_page(vm_page_t m)
90 {
91 vm_object_t object;
92 vm_page_t m_tmp;
93 struct vnode *vp;
94
95 if (vm_page_sleep_if_busy(m, TRUE, "vpctw0")) {
96 vm_page_lock_queues();
97 return (EBUSY);
98 }
99 if (!VM_OBJECT_TRYLOCK(m->object))
100 return (EAGAIN);
101 vm_page_test_dirty(m);
102 if (m->dirty == 0 && m->hold_count == 0)
103 pmap_remove_all(m);
104 if (m->dirty) {
105 object = m->object;
106 if (object->type == OBJT_VNODE) {
107 vm_page_unlock_queues();
108 vp = object->handle;
109 VM_OBJECT_UNLOCK(object);
110 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, curthread);
111 VM_OBJECT_LOCK(object);
112 vm_object_page_clean(object, 0, 0, OBJPC_SYNC);
113 VM_OBJECT_UNLOCK(object);
114 VOP_UNLOCK(vp, 0, curthread);
115 vm_page_lock_queues();
116 return (0);
117 } else if (object->type == OBJT_SWAP ||
118 object->type == OBJT_DEFAULT) {
119 m_tmp = m;
120 vm_pageout_flush(&m_tmp, 1, VM_PAGER_PUT_SYNC);
121 VM_OBJECT_UNLOCK(object);
122 return (0);
123 }
124 } else if (m->hold_count == 0)
125 vm_page_cache(m);
126 VM_OBJECT_UNLOCK(m->object);
127 return (0);
128 }
129
130 static int
131 vm_contig_launder(int queue)
132 {
133 vm_page_t m, next;
134 int error;
135
136 for (m = TAILQ_FIRST(&vm_page_queues[queue].pl); m != NULL; m = next) {
137 next = TAILQ_NEXT(m, pageq);
138 KASSERT(m->queue == queue,
139 ("vm_contig_launder: page %p's queue is not %d", m, queue));
140 error = vm_contig_launder_page(m);
141 if (error == 0)
142 return (TRUE);
143 if (error == EBUSY)
144 return (FALSE);
145 }
146 return (FALSE);
147 }
148
149 /*
150 * This interface is for merging with malloc() someday.
151 * Even if we never implement compaction so that contiguous allocation
152 * works after initialization time, malloc()'s data structures are good
153 * for statistics and for allocations of less than a page.
154 */
155 static void *
156 contigmalloc1(
157 unsigned long size, /* should be size_t here and for malloc() */
158 struct malloc_type *type,
159 int flags,
160 vm_paddr_t low,
161 vm_paddr_t high,
162 unsigned long alignment,
163 unsigned long boundary,
164 vm_map_t map)
165 {
166 int i, start;
167 vm_paddr_t phys;
168 vm_object_t object;
169 vm_offset_t addr, tmp_addr;
170 int pass, pqtype;
171 int inactl, actl, inactmax, actmax;
172 vm_page_t pga = vm_page_array;
173
174 size = round_page(size);
175 if (size == 0)
176 panic("contigmalloc1: size must not be 0");
177 if ((alignment & (alignment - 1)) != 0)
178 panic("contigmalloc1: alignment must be a power of 2");
179 if ((boundary & (boundary - 1)) != 0)
180 panic("contigmalloc1: boundary must be a power of 2");
181
182 start = 0;
183 for (pass = 2; pass >= 0; pass--) {
184 vm_page_lock_queues();
185 again0:
186 mtx_lock_spin(&vm_page_queue_free_mtx);
187 again:
188 /*
189 * Find first page in array that is free, within range,
190 * aligned, and such that the boundary won't be crossed.
191 */
192 for (i = start; i < cnt.v_page_count; i++) {
193 phys = VM_PAGE_TO_PHYS(&pga[i]);
194 pqtype = pga[i].queue - pga[i].pc;
195 if (((pqtype == PQ_FREE) || (pqtype == PQ_CACHE)) &&
196 (phys >= low) && (phys < high) &&
197 ((phys & (alignment - 1)) == 0) &&
198 (((phys ^ (phys + size - 1)) & ~(boundary - 1)) == 0))
199 break;
200 }
201
202 /*
203 * If the above failed or we will exceed the upper bound, fail.
204 */
205 if ((i == cnt.v_page_count) ||
206 ((VM_PAGE_TO_PHYS(&pga[i]) + size) > high)) {
207 mtx_unlock_spin(&vm_page_queue_free_mtx);
208 /*
209 * Instead of racing to empty the inactive/active
210 * queues, give up, even with more left to free,
211 * if we try more than the initial amount of pages.
212 *
213 * There's no point attempting this on the last pass.
214 */
215 if (pass > 0) {
216 inactl = actl = 0;
217 inactmax = vm_page_queues[PQ_INACTIVE].lcnt;
218 actmax = vm_page_queues[PQ_ACTIVE].lcnt;
219 again1:
220 if (inactl < inactmax &&
221 vm_contig_launder(PQ_INACTIVE)) {
222 inactl++;
223 goto again1;
224 }
225 if (actl < actmax &&
226 vm_contig_launder(PQ_ACTIVE)) {
227 actl++;
228 goto again1;
229 }
230 }
231 vm_page_unlock_queues();
232 continue;
233 }
234 start = i;
235
236 /*
237 * Check successive pages for contiguous and free.
238 */
239 for (i = start + 1; i < (start + size / PAGE_SIZE); i++) {
240 pqtype = pga[i].queue - pga[i].pc;
241 if ((VM_PAGE_TO_PHYS(&pga[i]) !=
242 (VM_PAGE_TO_PHYS(&pga[i - 1]) + PAGE_SIZE)) ||
243 ((pqtype != PQ_FREE) && (pqtype != PQ_CACHE))) {
244 start++;
245 goto again;
246 }
247 }
248 mtx_unlock_spin(&vm_page_queue_free_mtx);
249 for (i = start; i < (start + size / PAGE_SIZE); i++) {
250 vm_page_t m = &pga[i];
251
252 if ((m->queue - m->pc) == PQ_CACHE) {
253 object = m->object;
254 if (!VM_OBJECT_TRYLOCK(object)) {
255 start++;
256 goto again0;
257 }
258 vm_page_busy(m);
259 vm_page_free(m);
260 VM_OBJECT_UNLOCK(object);
261 }
262 }
263 mtx_lock_spin(&vm_page_queue_free_mtx);
264 for (i = start; i < (start + size / PAGE_SIZE); i++) {
265 pqtype = pga[i].queue - pga[i].pc;
266 if (pqtype != PQ_FREE) {
267 start++;
268 goto again;
269 }
270 }
271 for (i = start; i < (start + size / PAGE_SIZE); i++) {
272 vm_page_t m = &pga[i];
273 vm_pageq_remove_nowakeup(m);
274 m->valid = VM_PAGE_BITS_ALL;
275 if (m->flags & PG_ZERO)
276 vm_page_zero_count--;
277 /* Don't clear the PG_ZERO flag, we'll need it later. */
278 m->flags = PG_UNMANAGED | (m->flags & PG_ZERO);
279 KASSERT(m->dirty == 0,
280 ("contigmalloc1: page %p was dirty", m));
281 m->wire_count = 0;
282 m->busy = 0;
283 m->object = NULL;
284 }
285 mtx_unlock_spin(&vm_page_queue_free_mtx);
286 vm_page_unlock_queues();
287 /*
288 * We've found a contiguous chunk that meets are requirements.
289 * Allocate kernel VM, unfree and assign the physical pages to
290 * it and return kernel VM pointer.
291 */
292 vm_map_lock(map);
293 if (vm_map_findspace(map, vm_map_min(map), size, &addr) !=
294 KERN_SUCCESS) {
295 /*
296 * XXX We almost never run out of kernel virtual
297 * space, so we don't make the allocated memory
298 * above available.
299 */
300 vm_map_unlock(map);
301 return (NULL);
302 }
303 vm_object_reference(kernel_object);
304 vm_map_insert(map, kernel_object, addr - VM_MIN_KERNEL_ADDRESS,
305 addr, addr + size, VM_PROT_ALL, VM_PROT_ALL, 0);
306 vm_map_unlock(map);
307
308 tmp_addr = addr;
309 VM_OBJECT_LOCK(kernel_object);
310 for (i = start; i < (start + size / PAGE_SIZE); i++) {
311 vm_page_t m = &pga[i];
312 vm_page_insert(m, kernel_object,
313 OFF_TO_IDX(tmp_addr - VM_MIN_KERNEL_ADDRESS));
314 if ((flags & M_ZERO) && !(m->flags & PG_ZERO))
315 pmap_zero_page(m);
316 tmp_addr += PAGE_SIZE;
317 }
318 VM_OBJECT_UNLOCK(kernel_object);
319 vm_map_wire(map, addr, addr + size,
320 VM_MAP_WIRE_SYSTEM|VM_MAP_WIRE_NOHOLES);
321
322 return ((void *)addr);
323 }
324 return (NULL);
325 }
326
327 static void
328 vm_page_release_contigl(vm_page_t m, vm_pindex_t count)
329 {
330 while (count--) {
331 vm_page_free_toq(m);
332 m++;
333 }
334 }
335
336 void
337 vm_page_release_contig(vm_page_t m, vm_pindex_t count)
338 {
339 vm_page_lock_queues();
340 vm_page_release_contigl(m, count);
341 vm_page_unlock_queues();
342 }
343
344 static int
345 vm_contig_unqueue_free(vm_page_t m)
346 {
347 int error = 0;
348
349 mtx_lock_spin(&vm_page_queue_free_mtx);
350 if ((m->queue - m->pc) == PQ_FREE)
351 vm_pageq_remove_nowakeup(m);
352 else
353 error = EAGAIN;
354 mtx_unlock_spin(&vm_page_queue_free_mtx);
355 if (error)
356 return (error);
357 m->valid = VM_PAGE_BITS_ALL;
358 if (m->flags & PG_ZERO)
359 vm_page_zero_count--;
360 /* Don't clear the PG_ZERO flag; we'll need it later. */
361 m->flags = PG_UNMANAGED | (m->flags & PG_ZERO);
362 KASSERT(m->dirty == 0,
363 ("contigmalloc2: page %p was dirty", m));
364 m->wire_count = 0;
365 m->busy = 0;
366 m->object = NULL;
367 return (error);
368 }
369
370 vm_page_t
371 vm_page_alloc_contig(vm_pindex_t npages, vm_paddr_t low, vm_paddr_t high,
372 vm_offset_t alignment, vm_offset_t boundary)
373 {
374 vm_object_t object;
375 vm_offset_t size;
376 vm_paddr_t phys;
377 vm_page_t pga = vm_page_array;
378 int i, pass, pqtype, start;
379
380 size = npages << PAGE_SHIFT;
381 if (size == 0)
382 panic("vm_page_alloc_contig: size must not be 0");
383 if ((alignment & (alignment - 1)) != 0)
384 panic("vm_page_alloc_contig: alignment must be a power of 2");
385 if ((boundary & (boundary - 1)) != 0)
386 panic("vm_page_alloc_contig: boundary must be a power of 2");
387
388 for (pass = 0; pass < 2; pass++) {
389 start = vm_page_array_size;
390 vm_page_lock_queues();
391 retry:
392 start--;
393 /*
394 * Find last page in array that is free, within range,
395 * aligned, and such that the boundary won't be crossed.
396 */
397 for (i = start; i >= 0; i--) {
398 phys = VM_PAGE_TO_PHYS(&pga[i]);
399 pqtype = pga[i].queue - pga[i].pc;
400 if (pass == 0) {
401 if (pqtype != PQ_FREE && pqtype != PQ_CACHE)
402 continue;
403 } else if (pqtype != PQ_FREE && pqtype != PQ_CACHE &&
404 pga[i].queue != PQ_ACTIVE &&
405 pga[i].queue != PQ_INACTIVE)
406 continue;
407 if (phys >= low && phys + size <= high &&
408 ((phys & (alignment - 1)) == 0) &&
409 ((phys ^ (phys + size - 1)) & ~(boundary - 1)) == 0)
410 break;
411 }
412 /* There are no candidates at all. */
413 if (i == -1) {
414 vm_page_unlock_queues();
415 continue;
416 }
417 start = i;
418 /*
419 * Check successive pages for contiguous and free.
420 */
421 for (i = start + 1; i < start + npages; i++) {
422 pqtype = pga[i].queue - pga[i].pc;
423 if (VM_PAGE_TO_PHYS(&pga[i]) !=
424 VM_PAGE_TO_PHYS(&pga[i - 1]) + PAGE_SIZE)
425 goto retry;
426 if (pass == 0) {
427 if (pqtype != PQ_FREE && pqtype != PQ_CACHE)
428 goto retry;
429 } else if (pqtype != PQ_FREE && pqtype != PQ_CACHE &&
430 pga[i].queue != PQ_ACTIVE &&
431 pga[i].queue != PQ_INACTIVE)
432 goto retry;
433 }
434 for (i = start; i < start + npages; i++) {
435 vm_page_t m = &pga[i];
436
437 retry_page:
438 pqtype = m->queue - m->pc;
439 if (pass != 0 && pqtype != PQ_FREE &&
440 pqtype != PQ_CACHE) {
441 switch (m->queue) {
442 case PQ_ACTIVE:
443 case PQ_INACTIVE:
444 if (vm_contig_launder_page(m) != 0)
445 goto cleanup_freed;
446 pqtype = m->queue - m->pc;
447 if (pqtype == PQ_FREE ||
448 pqtype == PQ_CACHE)
449 break;
450 default:
451 cleanup_freed:
452 vm_page_release_contigl(&pga[start],
453 i - start);
454 goto retry;
455 }
456 }
457 if (pqtype == PQ_CACHE) {
458 object = m->object;
459 if (!VM_OBJECT_TRYLOCK(object))
460 goto retry;
461 vm_page_busy(m);
462 vm_page_free(m);
463 VM_OBJECT_UNLOCK(object);
464 }
465 /*
466 * There is no good API for freeing a page
467 * directly to PQ_NONE on our behalf, so spin.
468 */
469 if (vm_contig_unqueue_free(m) != 0)
470 goto retry_page;
471 }
472 vm_page_unlock_queues();
473 /*
474 * We've found a contiguous chunk that meets are requirements.
475 */
476 return (&pga[start]);
477 }
478 return (NULL);
479 }
480
481 static void *
482 contigmalloc2(vm_page_t m, vm_pindex_t npages, int flags)
483 {
484 vm_object_t object = kernel_object;
485 vm_map_t map = kernel_map;
486 vm_offset_t addr, tmp_addr;
487 vm_pindex_t i;
488
489 /*
490 * Allocate kernel VM, unfree and assign the physical pages to
491 * it and return kernel VM pointer.
492 */
493 vm_map_lock(map);
494 if (vm_map_findspace(map, vm_map_min(map), npages << PAGE_SHIFT, &addr)
495 != KERN_SUCCESS) {
496 vm_map_unlock(map);
497 return (NULL);
498 }
499 vm_object_reference(object);
500 vm_map_insert(map, object, addr - VM_MIN_KERNEL_ADDRESS,
501 addr, addr + (npages << PAGE_SHIFT), VM_PROT_ALL, VM_PROT_ALL, 0);
502 vm_map_unlock(map);
503 tmp_addr = addr;
504 VM_OBJECT_LOCK(object);
505 for (i = 0; i < npages; i++) {
506 vm_page_insert(&m[i], object,
507 OFF_TO_IDX(tmp_addr - VM_MIN_KERNEL_ADDRESS));
508 if ((flags & M_ZERO) && !(m->flags & PG_ZERO))
509 pmap_zero_page(&m[i]);
510 tmp_addr += PAGE_SIZE;
511 }
512 VM_OBJECT_UNLOCK(object);
513 vm_map_wire(map, addr, addr + (npages << PAGE_SHIFT),
514 VM_MAP_WIRE_SYSTEM | VM_MAP_WIRE_NOHOLES);
515 return ((void *)addr);
516 }
517
518 static int vm_old_contigmalloc = 0;
519 SYSCTL_INT(_vm, OID_AUTO, old_contigmalloc,
520 CTLFLAG_RW, &vm_old_contigmalloc, 0, "Use the old contigmalloc algorithm");
521 TUNABLE_INT("vm.old_contigmalloc", &vm_old_contigmalloc);
522
523 void *
524 contigmalloc(
525 unsigned long size, /* should be size_t here and for malloc() */
526 struct malloc_type *type,
527 int flags,
528 vm_paddr_t low,
529 vm_paddr_t high,
530 unsigned long alignment,
531 unsigned long boundary)
532 {
533 void * ret;
534 vm_page_t pages;
535 vm_pindex_t npgs;
536
537 npgs = round_page(size) >> PAGE_SHIFT;
538 mtx_lock(&Giant);
539 if (vm_old_contigmalloc) {
540 ret = contigmalloc1(size, type, flags, low, high, alignment,
541 boundary, kernel_map);
542 } else {
543 pages = vm_page_alloc_contig(npgs, low, high,
544 alignment, boundary);
545 if (pages == NULL) {
546 ret = NULL;
547 } else {
548 ret = contigmalloc2(pages, npgs, flags);
549 if (ret == NULL)
550 vm_page_release_contig(pages, npgs);
551 }
552
553 }
554 mtx_unlock(&Giant);
555 malloc_type_allocated(type, ret == NULL ? 0 : npgs << PAGE_SHIFT);
556 return (ret);
557 }
558
559 void
560 contigfree(void *addr, unsigned long size, struct malloc_type *type)
561 {
562 vm_pindex_t npgs;
563
564 npgs = round_page(size) >> PAGE_SHIFT;
565 kmem_free(kernel_map, (vm_offset_t)addr, size);
566 malloc_type_freed(type, npgs << PAGE_SHIFT);
567 }
Cache object: 4aa688009c8296472fd92731e830d263
|