FreeBSD/Linux Kernel Cross Reference
sys/kern/zalloc.c
1 /*
2 * Mach Operating System
3 * Copyright (c) 1993-1987 Carnegie Mellon University
4 * All Rights Reserved.
5 *
6 * Permission to use, copy, modify and distribute this software and its
7 * documentation is hereby granted, provided that both the copyright
8 * notice and this permission notice appear in all copies of the
9 * software, derivative works or modified versions, and any portions
10 * thereof, and that both notices appear in supporting documentation.
11 *
12 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
13 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
14 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
15 *
16 * Carnegie Mellon requests users of this software to return to
17 *
18 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
19 * School of Computer Science
20 * Carnegie Mellon University
21 * Pittsburgh PA 15213-3890
22 *
23 * any improvements or extensions that they make and grant Carnegie Mellon
24 * the rights to redistribute these changes.
25 */
26 /*
27 * HISTORY
28 * $Log: zalloc.c,v $
29 * Revision 2.21 93/11/17 17:34:10 dbg
30 * Fixed type of null continuation argument to thread_block.
31 * [93/06/03 dbg]
32 *
33 * Add ANSI function prototypes.
34 * [93/05/21 dbg]
35 *
36 * Revision 2.20 93/05/15 18:56:28 mrt
37 * machparam.h -> machspl.h
38 *
39 * Revision 2.19 93/01/14 17:37:34 danner
40 * Fixed casts of assert_wait and thread_wakeup arguments.
41 * [93/01/12 danner]
42 * 64bit cleanup. Proper spl typing.
43 * [92/12/01 af]
44 *
45 * Revision 2.18 92/08/03 17:40:37 jfriedl
46 * removed silly prototypes
47 * [92/08/02 jfriedl]
48 *
49 * Revision 2.17 92/05/21 17:17:28 jfriedl
50 * Added stuff to quiet some gcc warnings.
51 * [92/05/16 jfriedl]
52 *
53 * Revision 2.16 92/02/23 19:49:58 elf
54 * Eliminate keep_wired argument from vm_map_copyin().
55 * [92/02/21 10:13:57 dlb]
56 *
57 * Revision 2.14.7.1 92/02/18 19:07:08 jeffreyh
58 * Increased zone_map_size for 2 servers
59 * [91/08/30 bernadat]
60 *
61 * Revision 2.15 92/01/14 16:45:03 rpd
62 * Changed host_zone_info for CountInOut.
63 * [92/01/14 rpd]
64 *
65 * Revision 2.14 91/05/18 14:34:46 rpd
66 * Added check_simple_locks.
67 * Moved ADD_TO_ZONE, REMOVE_FROM_ZONE here.
68 * Moved extraneous zone GC declarations here.
69 * [91/03/31 rpd]
70 *
71 * Minor cleanup in zget_space.
72 * [91/03/28 rpd]
73 * Changed to use zdata to initialize zalloc_next_space.
74 * [91/03/22 rpd]
75 *
76 * Revision 2.13 91/05/14 16:50:36 mrt
77 * Correcting copyright
78 *
79 * Revision 2.12 91/03/16 14:53:32 rpd
80 * Updated for new kmem_alloc interface.
81 * [91/03/03 rpd]
82 * Added continuation argument to thread_block.
83 * [91/02/16 rpd]
84 *
85 * Revision 2.11 91/02/05 17:31:25 mrt
86 * Changed to new Mach copyright
87 * [91/02/01 16:21:52 mrt]
88 *
89 * Revision 2.10 91/01/08 15:18:28 rpd
90 * Added zalloc_wasted_space.
91 * [91/01/06 rpd]
92 * Removed COLLECT_ZONE_GARBAGE.
93 * [91/01/03 rpd]
94 *
95 * Changed zinit to make zones by default *not* collectable.
96 * [90/12/29 rpd]
97 * Added consider_zone_gc.
98 * [90/11/11 rpd]
99 *
100 * Revision 2.9 90/12/20 16:39:11 jeffreyh
101 * [90/12/19 10:36:55 jeffreyh]
102 *
103 * 10-Dec-90 Jeffrey Heller (jeffreyh) at OSF
104 * Merge in changes from OSF/1 done by jvs@osf
105 * Zone's are now collectable by default,
106 * zchange now takes a collectable argument
107 * include machine/machparam.h for splhigh
108 *
109 * Revision 2.8 90/11/05 14:32:08 rpd
110 * Added zone_check option to zfree.
111 * [90/10/29 rpd]
112 *
113 * Revision 2.7 90/06/19 22:59:49 rpd
114 * Added zi_collectable field to zone_info structure.
115 * [90/06/05 rpd]
116 *
117 * Revision 2.6 90/06/02 14:57:28 rpd
118 * Made zone_ignore_overflow TRUE by default.
119 * When a zone overflows, increase its max_size.
120 * [90/05/11 17:00:24 rpd]
121 *
122 * Added host_zone_info.
123 * [90/03/26 22:28:05 rpd]
124 *
125 * Revision 2.5 90/05/03 15:47:04 dbg
126 * Add host_zone_info.
127 * [90/04/06 dbg]
128 *
129 * Revision 2.4 90/02/22 20:04:23 dbg
130 * Initialize zone_page_table_lock before using it.
131 * [90/02/16 dbg]
132 *
133 * Revision 2.3 89/11/29 14:09:25 af
134 * Nullify zone_page_alloc/init if 'garbage' not here.
135 * [89/10/29 14:23:56 af]
136 *
137 * Could not compile without the 'garbage' thing cuz a def mizzing.
138 * [89/10/29 09:35:22 af]
139 *
140 * Revision 2.2 89/08/11 17:56:21 rwd
141 * Added collectible zones. Collectible zones allow storage to be
142 * returned to system via kmem_free when pages are no longer used.
143 * This option should only be used when zone memory is virtual
144 * (rather than physical as in a MIPS architecture).
145 * [89/07/22 rfr]
146 *
147 * Revision 2.11 89/05/30 10:38:40 rvb
148 * Make zalloc storage pointers external, so they can be initialized from
149 * the outside.
150 * [89/05/30 08:28:14 rvb]
151 *
152 * Revision 2.10 89/05/11 14:41:30 gm0w
153 * Keep all zones on a list that host_zone_info can traverse.
154 * This fixes a bug in host_zone_info: it would try to lock
155 * uninitialized zones. Fixed zinit to round elem_size up
156 * to a multiple of four. This prevents zget_space from handing
157 * out improperly aligned objects.
158 * [89/05/08 21:34:17 rpd]
159 *
160 * Revision 2.9 89/05/06 15:47:11 rpd
161 * From jsb: Added missing argument to kmem_free in zget_space.
162 *
163 * Revision 2.8 89/05/06 02:57:35 rpd
164 * Added host_zone_info (under MACH_DEBUG).
165 * Fixed zget to increase cur_size when the space comes from zget_space.
166 * Use MACRO_BEGIN/MACRO_END, decl_simple_lock_data where appropriate.
167 * [89/05/06 02:43:29 rpd]
168 *
169 * Revision 2.7 89/04/18 16:43:20 mwyoung
170 * Document zget_space. Eliminate MACH_XP conditional.
171 * [89/03/26 mwyoung]
172 * Make handling of zero allocation size unconditional. Clean up
173 * allocation code.
174 * [89/03/16 mwyoung]
175 *
176 * Revision 2.6 89/03/15 15:04:46 gm0w
177 * Picked up code from rfr to allocate data from non pageable zones
178 * from a single pool.
179 * [89/03/09 mrt]
180 *
181 * Revision 2.5 89/03/09 20:17:50 rpd
182 * More cleanup.
183 *
184 * Revision 2.4 89/02/25 18:11:15 gm0w
185 * Changes for cleanup.
186 *
187 * Revision 2.3 89/01/18 00:50:51 jsb
188 * Vnode support: interpret allocation size of zero in zinit as meaning
189 * PAGE_SIZE.
190 * [89/01/17 20:57:39 jsb]
191 *
192 * Revision 2.2 88/12/19 02:48:41 mwyoung
193 * Fix include file references.
194 * [88/12/19 00:33:03 mwyoung]
195 *
196 * Add and use zone_ignore_overflow.
197 * [88/12/14 mwyoung]
198 *
199 * 8-Jan-88 Rick Rashid (rfr) at Carnegie-Mellon University
200 * Made pageable zones really pageable. Turned spin locks to sleep
201 * locks for pageable zones.
202 *
203 * 30-Dec-87 David Golub (dbg) at Carnegie-Mellon University
204 * Delinted.
205 *
206 * 20-Oct-87 Michael Young (mwyoung) at Carnegie-Mellon University
207 * Allocate zone memory from a separate kernel submap, to avoid
208 * sleeping with the kernel_map locked.
209 *
210 * 1-Oct-87 Michael Young (mwyoung) at Carnegie-Mellon University
211 * Added zchange().
212 *
213 * 30-Sep-87 Richard Sanzi (sanzi) at Carnegie-Mellon University
214 * Deleted the printf() in zinit() which is called when zinit is
215 * creating a pageable zone.
216 *
217 * 12-Sep-87 Avadis Tevanian (avie) at Carnegie-Mellon University
218 * Modified to use list of elements instead of queues. Actually,
219 * this package now uses macros defined in zalloc.h which define
220 * the list semantics.
221 *
222 * 30-Mar-87 Avadis Tevanian (avie) at Carnegie-Mellon University
223 * Update zone's cur_size field when it is crammed (zcram).
224 *
225 * 23-Mar-87 Avadis Tevanian (avie) at Carnegie-Mellon University
226 * Only define zfree if there is no macro version.
227 *
228 * 17-Mar-87 David Golub (dbg) at Carnegie-Mellon University
229 * De-linted.
230 *
231 * 12-Feb-87 Robert Sansom (rds) at Carnegie Mellon University
232 * Added zget - no waiting version of zalloc.
233 *
234 * 22-Jan-87 Michael Young (mwyoung) at Carnegie-Mellon University
235 * De-linted.
236 *
237 * 12-Jan-87 Michael Young (mwyoung) at Carnegie-Mellon University
238 * Eliminated use of the old interlocked queuing package;
239 * random other cleanups.
240 *
241 * 9-Jun-85 Avadis Tevanian (avie) at Carnegie-Mellon University
242 * Created.
243 */
244 /*
245 * File: kern/zalloc.c
246 * Author: Avadis Tevanian, Jr.
247 *
248 * Zone-based memory allocator. A zone is a collection of fixed size
249 * data blocks for which quick allocation/deallocation is possible.
250 */
251
252 #include <kern/assert.h>
253 #include <kern/macro_help.h>
254 #include <kern/memory.h>
255 #include <kern/sched.h> /* sched_tick */
256 #include <kern/sched_prim.h>
257 #include <kern/strings.h>
258 #include <kern/zalloc.h>
259 #include <mach/vm_param.h>
260 #include <vm/vm_kern.h>
261 #include <machine/machspl.h>
262
263 #include <mach_debug.h>
264 #if MACH_DEBUG
265 #include <mach/kern_return.h>
266 #include <mach/machine/vm_types.h>
267 #include <mach_debug/zone_info.h>
268 #include <kern/host.h>
269 #include <vm/vm_map.h>
270 #include <vm/vm_user.h>
271 #include <vm/vm_kern.h>
272 #endif
273
274 #define ADD_TO_ZONE(zone, element) \
275 MACRO_BEGIN \
276 *((vm_offset_t *)(element)) = (zone)->free_elements; \
277 (zone)->free_elements = (vm_offset_t) (element); \
278 (zone)->count--; \
279 MACRO_END
280
281 #define REMOVE_FROM_ZONE(zone, ret, type) \
282 MACRO_BEGIN \
283 (ret) = (type) (zone)->free_elements; \
284 if ((ret) != (type) 0) { \
285 (zone)->count++; \
286 (zone)->free_elements = *((vm_offset_t *)(ret)); \
287 } \
288 MACRO_END
289
290 /*
291 * Support for garbage collection of unused zone pages:
292 */
293
294 struct zone_page_table_entry {
295 struct zone_page_table_entry *next;
296 short in_free_list;
297 short alloc_count;
298 };
299
300 extern struct zone_page_table_entry * zone_page_table;
301 extern vm_offset_t zone_map_min_address;
302
303 #define lock_zone_page_table() simple_lock(&zone_page_table_lock)
304 #define unlock_zone_page_table() simple_unlock(&zone_page_table_lock)
305
306 #define zone_page(addr) \
307 (&zone_page_table[atop(((vm_offset_t)addr) - zone_map_min_address)])
308
309
310 void zone_page_alloc(
311 vm_offset_t addr,
312 vm_size_t size); /* forward */
313 void zone_page_dealloc(
314 vm_offset_t addr,
315 vm_size_t size); /* forward */
316 void zone_page_in_use(
317 vm_offset_t addr,
318 vm_size_t size); /* forward */
319 void zone_page_free(
320 vm_offset_t addr,
321 vm_size_t size); /* forward */
322
323 zone_t zone_zone; /* this is the zone containing other zones */
324
325 boolean_t zone_ignore_overflow = TRUE;
326
327 vm_map_t zone_map = VM_MAP_NULL;
328 vm_size_t zone_map_size = 12 * 1024 * 1024;
329
330 /*
331 * The VM system gives us an initial chunk of memory.
332 * It has to be big enough to allocate the zone_zone
333 * and some initial kernel data structures, like kernel maps.
334 * It is advantageous to make it bigger than really necessary,
335 * because this memory is more efficient than normal kernel
336 * virtual memory. (It doesn't have vm_page structures backing it
337 * and it may have other machine-dependent advantages.)
338 * So for best performance, zdata_size should approximate
339 * the amount of memory you expect the zone system to consume.
340 */
341
342 vm_offset_t zdata;
343 vm_size_t zdata_size = 420 * 1024;
344
345 #define lock_zone(zone) \
346 MACRO_BEGIN \
347 if (zone->pageable) { \
348 lock_write(&zone->complex_lock); \
349 } else { \
350 simple_lock(&zone->lock); \
351 } \
352 MACRO_END
353
354 #define unlock_zone(zone) \
355 MACRO_BEGIN \
356 if (zone->pageable) { \
357 lock_done(&zone->complex_lock); \
358 } else { \
359 simple_unlock(&zone->lock); \
360 } \
361 MACRO_END
362
363 #define lock_zone_init(zone) \
364 MACRO_BEGIN \
365 if (zone->pageable) { \
366 lock_init(&zone->complex_lock, TRUE); \
367 } else { \
368 simple_lock_init(&zone->lock); \
369 } \
370 MACRO_END
371
372 vm_offset_t zget_space(vm_offset_t size);
373
374 decl_simple_lock_data(,zget_space_lock)
375 vm_offset_t zalloc_next_space;
376 vm_offset_t zalloc_end_of_space;
377 vm_size_t zalloc_wasted_space;
378
379 /*
380 * Garbage collection map information
381 */
382 decl_simple_lock_data(,zone_page_table_lock)
383 struct zone_page_table_entry * zone_page_table;
384 vm_offset_t zone_map_min_address;
385 vm_offset_t zone_map_max_address;
386 int zone_pages;
387
388 void zone_page_init(
389 vm_offset_t addr,
390 vm_size_t size,
391 int value); /* forward */
392
393 #define ZONE_PAGE_USED 0
394 #define ZONE_PAGE_UNUSED -1
395
396
397 /*
398 * Protects first_zone, last_zone, num_zones,
399 * and the next_zone field of zones.
400 */
401 decl_simple_lock_data(,all_zones_lock)
402 zone_t first_zone;
403 zone_t *last_zone;
404 int num_zones;
405
406 /*
407 * zinit initializes a new zone. The zone data structures themselves
408 * are stored in a zone, which is initially a static structure that
409 * is initialized by zone_init.
410 */
411 zone_t zinit(
412 vm_size_t size, /* the size of an element */
413 vm_size_t max, /* maximum memory to use */
414 vm_size_t alloc, /* allocation size */
415 boolean_t pageable, /* is this zone pageable? */
416 char *name) /* a name for the zone */
417 {
418 register zone_t z;
419
420 if (zone_zone == ZONE_NULL)
421 z = (zone_t) zget_space(sizeof(struct zone));
422 else
423 z = (zone_t) zalloc(zone_zone);
424 if (z == ZONE_NULL)
425 panic("zinit");
426
427 if (alloc == 0)
428 alloc = PAGE_SIZE;
429
430 if (size == 0)
431 size = sizeof(z->free_elements);
432 /*
433 * Round off all the parameters appropriately.
434 */
435
436 if ((max = round_page(max)) < (alloc = round_page(alloc)))
437 max = alloc;
438
439 z->free_elements = 0;
440 z->cur_size = 0;
441 z->max_size = max;
442 z->elem_size = ((size-1) + sizeof(z->free_elements)) -
443 ((size-1) % sizeof(z->free_elements));
444
445 z->alloc_size = alloc;
446 z->pageable = pageable;
447 z->zone_name = name;
448 z->count = 0;
449 z->doing_alloc = FALSE;
450 z->exhaustible = z->sleepable = FALSE;
451 z->collectable = FALSE;
452 z->expandable = TRUE;
453 lock_zone_init(z);
454
455 /*
456 * Add the zone to the all-zones list.
457 */
458
459 z->next_zone = ZONE_NULL;
460 simple_lock(&all_zones_lock);
461 *last_zone = z;
462 last_zone = &z->next_zone;
463 num_zones++;
464 simple_unlock(&all_zones_lock);
465
466 return z;
467 }
468
469 /*
470 * Cram the given memory into the specified zone.
471 */
472 void zcram(
473 register zone_t zone,
474 vm_offset_t newmem,
475 vm_size_t size)
476 {
477 register vm_size_t elem_size;
478
479 if (newmem == (vm_offset_t) 0) {
480 panic("zcram - memory at zero");
481 }
482 elem_size = zone->elem_size;
483
484 lock_zone(zone);
485 while (size >= elem_size) {
486 ADD_TO_ZONE(zone, newmem);
487 zone_page_alloc(newmem, elem_size);
488 zone->count++; /* compensate for ADD_TO_ZONE */
489 size -= elem_size;
490 newmem += elem_size;
491 zone->cur_size += elem_size;
492 }
493 unlock_zone(zone);
494 }
495
496 /*
497 * Contiguous space allocator for non-paged zones. Allocates "size" amount
498 * of memory from zone_map.
499 */
500
501 vm_offset_t zget_space(
502 vm_offset_t size)
503 {
504 vm_offset_t new_space = 0;
505 vm_offset_t result;
506 vm_size_t space_to_add = 0; /*'=0' to quiet gcc warnings */
507
508 simple_lock(&zget_space_lock);
509 while ((zalloc_next_space + size) > zalloc_end_of_space) {
510 /*
511 * Add at least one page to allocation area.
512 */
513
514 space_to_add = round_page(size);
515
516 if (new_space == 0) {
517 /*
518 * Memory cannot be wired down while holding
519 * any locks that the pageout daemon might
520 * need to free up pages. [Making the zget_space
521 * lock a complex lock does not help in this
522 * regard.]
523 *
524 * Unlock and allocate memory. Because several
525 * threads might try to do this at once, don't
526 * use the memory before checking for available
527 * space again.
528 */
529
530 simple_unlock(&zget_space_lock);
531
532 if (kmem_alloc_wired(zone_map,
533 &new_space, space_to_add)
534 != KERN_SUCCESS)
535 return 0;
536 zone_page_init(new_space, space_to_add,
537 ZONE_PAGE_USED);
538 simple_lock(&zget_space_lock);
539 continue;
540 }
541
542
543 /*
544 * Memory was allocated in a previous iteration.
545 *
546 * Check whether the new region is contiguous
547 * with the old one.
548 */
549
550 if (new_space != zalloc_end_of_space) {
551 /*
552 * Throw away the remainder of the
553 * old space, and start a new one.
554 */
555 zalloc_wasted_space +=
556 zalloc_end_of_space - zalloc_next_space;
557 zalloc_next_space = new_space;
558 }
559
560 zalloc_end_of_space = new_space + space_to_add;
561
562 new_space = 0;
563 }
564 result = zalloc_next_space;
565 zalloc_next_space += size;
566 simple_unlock(&zget_space_lock);
567
568 if (new_space != 0)
569 kmem_free(zone_map, new_space, space_to_add);
570
571 return result;
572 }
573
574
575 /*
576 * Initialize the "zone of zones" which uses fixed memory allocated
577 * earlier in memory initialization. zone_bootstrap is called
578 * before zone_init.
579 */
580 void zone_bootstrap(void)
581 {
582 simple_lock_init(&all_zones_lock);
583 first_zone = ZONE_NULL;
584 last_zone = &first_zone;
585 num_zones = 0;
586
587 simple_lock_init(&zget_space_lock);
588 zalloc_next_space = zdata;
589 zalloc_end_of_space = zdata + zdata_size;
590 zalloc_wasted_space = 0;
591
592 zone_zone = ZONE_NULL;
593 zone_zone = zinit(sizeof(struct zone), 128 * sizeof(struct zone),
594 sizeof(struct zone), FALSE, "zones");
595 }
596
597 void zone_init(void)
598 {
599 vm_offset_t zone_min;
600 vm_offset_t zone_max;
601
602 vm_size_t zone_table_size;
603
604 zone_map = kmem_suballoc(kernel_map, &zone_min, &zone_max,
605 zone_map_size, FALSE);
606
607 /*
608 * Setup garbage collection information:
609 */
610
611 zone_table_size = atop(zone_max - zone_min) *
612 sizeof(struct zone_page_table_entry);
613 if (kmem_alloc_wired(zone_map, (vm_offset_t *) &zone_page_table,
614 zone_table_size) != KERN_SUCCESS)
615 panic("zone_init");
616 zone_min = (vm_offset_t)zone_page_table + round_page(zone_table_size);
617 zone_pages = atop(zone_max - zone_min);
618 zone_map_min_address = zone_min;
619 zone_map_max_address = zone_max;
620 simple_lock_init(&zone_page_table_lock);
621 zone_page_init(zone_min, zone_max - zone_min, ZONE_PAGE_UNUSED);
622 }
623
624
625 /*
626 * zalloc returns an element from the specified zone.
627 */
628 vm_offset_t zalloc(
629 register zone_t zone)
630 {
631 vm_offset_t addr;
632
633 if (zone == ZONE_NULL)
634 panic("zalloc: null zone");
635
636 check_simple_locks();
637
638 lock_zone(zone);
639 REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
640 while (addr == 0) {
641 /*
642 * If nothing was there, try to get more
643 */
644 if (zone->doing_alloc) {
645 /*
646 * Someone is allocating memory for this zone.
647 * Wait for it to show up, then try again.
648 */
649 assert_wait((event_t)&zone->doing_alloc, TRUE);
650 /* XXX say wakeup needed */
651 unlock_zone(zone);
652 thread_block(CONTINUE_NULL);
653 lock_zone(zone);
654 }
655 else {
656 if ((zone->cur_size + (zone->pageable ?
657 zone->alloc_size : zone->elem_size)) >
658 zone->max_size) {
659 if (zone->exhaustible)
660 break;
661 /*
662 * Printf calls logwakeup, which calls
663 * select_wakeup which will do a zfree
664 * (which tries to take the select_zone
665 * lock... Hang. Release the lock now
666 * so it can be taken again later.
667 * NOTE: this used to be specific to
668 * the select_zone, but for
669 * cleanliness, we just unlock all
670 * zones before this.
671 */
672 if (zone->expandable) {
673 /*
674 * We're willing to overflow certain
675 * zones, but not without complaining.
676 *
677 * This is best used in conjunction
678 * with the collecatable flag. What we
679 * want is an assurance we can get the
680 * memory back, assuming there's no
681 * leak.
682 */
683 zone->max_size += (zone->max_size >> 1);
684 } else if (!zone_ignore_overflow) {
685 unlock_zone(zone);
686 panic("zalloc: zone \"%s\" empty.\n",
687 zone->zone_name);
688 }
689 }
690
691 if (zone->pageable)
692 zone->doing_alloc = TRUE;
693 unlock_zone(zone);
694
695 if (zone->pageable) {
696 if (kmem_alloc_pageable(zone_map, &addr,
697 zone->alloc_size)
698 != KERN_SUCCESS)
699 panic("zalloc");
700 zcram(zone, addr, zone->alloc_size);
701 lock_zone(zone);
702 zone->doing_alloc = FALSE;
703 /* XXX check before doing this */
704 thread_wakeup((event_t)&zone->doing_alloc);
705
706 REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
707 } else if (zone->collectable) {
708 if (kmem_alloc_wired(zone_map,
709 &addr, zone->alloc_size)
710 != KERN_SUCCESS)
711 panic("zalloc");
712 zone_page_init(addr, zone->alloc_size,
713 ZONE_PAGE_USED);
714 zcram(zone, addr, zone->alloc_size);
715 lock_zone(zone);
716 REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
717 } else {
718 addr = zget_space(zone->elem_size);
719 if (addr == 0)
720 panic("zalloc");
721
722 lock_zone(zone);
723 zone->count++;
724 zone->cur_size += zone->elem_size;
725 unlock_zone(zone);
726 zone_page_alloc(addr, zone->elem_size);
727 return addr;
728 }
729 }
730 }
731
732 unlock_zone(zone);
733 return addr;
734 }
735
736
737 /*
738 * zget returns an element from the specified zone
739 * and immediately returns nothing if there is nothing there.
740 *
741 * This form should be used when you can not block (like when
742 * processing an interrupt).
743 */
744 vm_offset_t zget(
745 register zone_t zone)
746 {
747 register vm_offset_t addr;
748
749 if (zone == ZONE_NULL)
750 panic ("zalloc: null zone");
751
752 lock_zone(zone);
753 REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
754 unlock_zone(zone);
755
756 return addr;
757 }
758
759 boolean_t zone_check = FALSE;
760
761 void zfree(
762 register zone_t zone,
763 vm_offset_t elem)
764 {
765 lock_zone(zone);
766 if (zone_check) {
767 vm_offset_t this;
768
769 /* check the zone's consistency */
770
771 for (this = zone->free_elements;
772 this != 0;
773 this = * (vm_offset_t *) this)
774 if (this == elem)
775 panic("zfree");
776 }
777 ADD_TO_ZONE(zone, elem);
778 unlock_zone(zone);
779 }
780
781 void zcollectable(
782 zone_t zone)
783 {
784 zone->collectable = TRUE;
785 }
786
787 void zchange(
788 zone_t zone,
789 boolean_t pageable,
790 boolean_t sleepable,
791 boolean_t exhaustible,
792 boolean_t collectable)
793 {
794 zone->pageable = pageable;
795 zone->sleepable = sleepable;
796 zone->exhaustible = exhaustible;
797 zone->collectable = collectable;
798 lock_zone_init(zone);
799 }
800
801 /*
802 * Zone garbage collection subroutines
803 *
804 * These routines have in common the modification of entries in the
805 * zone_page_table. The latter contains one entry for every page
806 * in the zone_map.
807 *
808 * For each page table entry in the given range:
809 *
810 * zone_page_in_use - decrements in_free_list
811 * zone_page_free - increments in_free_list
812 * zone_page_init - initializes in_free_list and alloc_count
813 * zone_page_alloc - increments alloc_count
814 * zone_page_dealloc - decrements alloc_count
815 * zone_add_free_page_list - adds the page to the free list
816 *
817 * Two counts are maintained for each page, the in_free_list count and
818 * alloc_count. The alloc_count is how many zone elements have been
819 * allocated from a page. (Note that the page could contain elements
820 * that span page boundaries. The count includes these elements so
821 * one element may be counted in two pages.) In_free_list is a count
822 * of how many zone elements are currently free. If in_free_list is
823 * equal to alloc_count then the page is eligible for garbage
824 * collection.
825 *
826 * Alloc_count and in_free_list are initialized to the correct values
827 * for a particular zone when a page is zcram'ed into a zone. Subsequent
828 * gets and frees of zone elements will call zone_page_in_use and
829 * zone_page_free which modify the in_free_list count. When the zones
830 * garbage collector runs it will walk through a zones free element list,
831 * remove the elements that reside on collectable pages, and use
832 * zone_add_free_page_list to create a list of pages to be collected.
833 */
834
835 void zone_page_in_use(
836 vm_offset_t addr,
837 vm_size_t size)
838 {
839 int i, j;
840 if ((addr < zone_map_min_address) ||
841 (addr+size > zone_map_max_address)) return;
842 i = atop(addr-zone_map_min_address);
843 j = atop((addr+size-1) - zone_map_min_address);
844 lock_zone_page_table();
845 for (; i <= j; i++) {
846 zone_page_table[i].in_free_list--;
847 }
848 unlock_zone_page_table();
849 }
850
851 void zone_page_free(
852 vm_offset_t addr,
853 vm_size_t size)
854 {
855 int i, j;
856 if ((addr < zone_map_min_address) ||
857 (addr+size > zone_map_max_address)) return;
858 i = atop(addr-zone_map_min_address);
859 j = atop((addr+size-1) - zone_map_min_address);
860 lock_zone_page_table();
861 for (; i <= j; i++) {
862 /* Set in_free_list to (ZONE_PAGE_USED + 1) if
863 * it was previously set to ZONE_PAGE_UNUSED.
864 */
865 if (zone_page_table[i].in_free_list == ZONE_PAGE_UNUSED) {
866 zone_page_table[i].in_free_list = 1;
867 } else {
868 zone_page_table[i].in_free_list++;
869 }
870 }
871 unlock_zone_page_table();
872 }
873
874 void zone_page_init(
875 vm_offset_t addr,
876 vm_size_t size,
877 int value)
878 {
879 int i, j;
880 if ((addr < zone_map_min_address) ||
881 (addr+size > zone_map_max_address)) return;
882 i = atop(addr-zone_map_min_address);
883 j = atop((addr+size-1) - zone_map_min_address);
884 lock_zone_page_table();
885 for (; i <= j; i++) {
886 zone_page_table[i].alloc_count = value;
887 zone_page_table[i].in_free_list = 0;
888 }
889 unlock_zone_page_table();
890 }
891
892 void zone_page_alloc(
893 vm_offset_t addr,
894 vm_size_t size)
895 {
896 int i, j;
897 if ((addr < zone_map_min_address) ||
898 (addr+size > zone_map_max_address)) return;
899 i = atop(addr-zone_map_min_address);
900 j = atop((addr+size-1) - zone_map_min_address);
901 lock_zone_page_table();
902 for (; i <= j; i++) {
903 /* Set alloc_count to (ZONE_PAGE_USED + 1) if
904 * it was previously set to ZONE_PAGE_UNUSED.
905 */
906 if (zone_page_table[i].alloc_count == ZONE_PAGE_UNUSED) {
907 zone_page_table[i].alloc_count = 1;
908 } else {
909 zone_page_table[i].alloc_count++;
910 }
911 }
912 unlock_zone_page_table();
913 }
914
915 void zone_page_dealloc(
916 vm_offset_t addr,
917 vm_size_t size)
918 {
919 int i, j;
920 if ((addr < zone_map_min_address) ||
921 (addr+size > zone_map_max_address)) return;
922 i = atop(addr-zone_map_min_address);
923 j = atop((addr+size-1) - zone_map_min_address);
924 lock_zone_page_table();
925 for (; i <= j; i++) {
926 zone_page_table[i].alloc_count--;
927 }
928 unlock_zone_page_table();
929 }
930
931 void
932 zone_add_free_page_list(
933 struct zone_page_table_entry **free_list,
934 vm_offset_t addr,
935 vm_size_t size)
936 {
937 int i, j;
938 if ((addr < zone_map_min_address) ||
939 (addr+size > zone_map_max_address)) return;
940 i = atop(addr-zone_map_min_address);
941 j = atop((addr+size-1) - zone_map_min_address);
942 lock_zone_page_table();
943 for (; i <= j; i++) {
944 if (zone_page_table[i].alloc_count == 0) {
945 zone_page_table[i].next = *free_list;
946 *free_list = &zone_page_table[i];
947 zone_page_table[i].alloc_count = ZONE_PAGE_UNUSED;
948 zone_page_table[i].in_free_list = 0;
949 }
950 }
951 unlock_zone_page_table();
952 }
953
954
955 /* This is used for walking through a zone's free element list.
956 */
957 struct zone_free_entry {
958 struct zone_free_entry * next;
959 };
960
961
962 /* Zone garbage collection
963 *
964 * zone_gc will walk through all the free elements in all the
965 * zones that are marked collectable looking for reclaimable
966 * pages. zone_gc is called by consider_zone_gc when the system
967 * begins to run out of memory.
968 */
969 void
970 zone_gc(void)
971 {
972 int max_zones;
973 zone_t z;
974 int i;
975 register spl_t s;
976 struct zone_page_table_entry *freep;
977 struct zone_page_table_entry *zone_free_page_list;
978
979 simple_lock(&all_zones_lock);
980 max_zones = num_zones;
981 z = first_zone;
982 simple_unlock(&all_zones_lock);
983
984 zone_free_page_list = (struct zone_page_table_entry *) 0;
985
986 for (i = 0; i < max_zones; i++) {
987 struct zone_free_entry * last;
988 struct zone_free_entry * elt;
989 assert(z != ZONE_NULL);
990 /* run this at splhigh so that interupt routines that use zones
991 can not interupt while their zone is locked */
992 s = splhigh();
993 lock_zone(z);
994
995 if (!z->pageable && z->collectable) {
996
997 /* Count the free elements in each page. This loop
998 * requires that all in_free_list entries are zero.
999 */
1000 elt = (struct zone_free_entry *)(z->free_elements);
1001 while ((elt != (struct zone_free_entry *)0)) {
1002 zone_page_free((vm_offset_t)elt, z->elem_size);
1003 elt = elt->next;
1004 }
1005
1006 /* Now determine which elements should be removed
1007 * from the free list and, after all the elements
1008 * on a page have been removed, add the element's
1009 * page to a list of pages to be freed.
1010 */
1011 elt = (struct zone_free_entry *)(z->free_elements);
1012 last = elt;
1013 while ((elt != (struct zone_free_entry *)0)) {
1014 if (((vm_offset_t)elt>=zone_map_min_address)&&
1015 ((vm_offset_t)elt<=zone_map_max_address)&&
1016 (zone_page(elt)->in_free_list ==
1017 zone_page(elt)->alloc_count)) {
1018
1019 z->cur_size -= z->elem_size;
1020 zone_page_in_use((vm_offset_t)elt, z->elem_size);
1021 zone_page_dealloc((vm_offset_t)elt, z->elem_size);
1022 if (zone_page(elt)->alloc_count == 0 ||
1023 zone_page(elt+(z->elem_size-1))->alloc_count==0) {
1024 zone_add_free_page_list(
1025 &zone_free_page_list,
1026 (vm_offset_t)elt, z->elem_size);
1027 }
1028
1029
1030 if (elt == last) {
1031 elt = elt->next;
1032 z->free_elements =(vm_offset_t)elt;
1033 last = elt;
1034 } else {
1035 last->next = elt->next;
1036 elt = elt->next;
1037 }
1038 } else {
1039 /* This element is not eligible for collection
1040 * so clear in_free_list in preparation for a
1041 * subsequent garbage collection pass.
1042 */
1043 if (((vm_offset_t)elt>=zone_map_min_address)&&
1044 ((vm_offset_t)elt<=zone_map_max_address)) {
1045 zone_page(elt)->in_free_list = 0;
1046 }
1047 last = elt;
1048 elt = elt->next;
1049 }
1050 }
1051 }
1052 unlock_zone(z);
1053 splx(s);
1054 simple_lock(&all_zones_lock);
1055 z = z->next_zone;
1056 simple_unlock(&all_zones_lock);
1057 }
1058
1059 for (freep = zone_free_page_list; freep != 0; freep = freep->next) {
1060 vm_offset_t free_addr;
1061
1062 free_addr = zone_map_min_address +
1063 PAGE_SIZE * (freep - zone_page_table);
1064 kmem_free(zone_map, free_addr, PAGE_SIZE);
1065 }
1066 }
1067
1068 boolean_t zone_gc_allowed = TRUE;
1069 unsigned zone_gc_last_tick = 0;
1070 unsigned zone_gc_max_rate = 0; /* in ticks */
1071
1072 /*
1073 * consider_zone_gc:
1074 *
1075 * Called by the pageout daemon when the system needs more free pages.
1076 */
1077
1078 void
1079 consider_zone_gc(void)
1080 {
1081 /*
1082 * By default, don't attempt zone GC more frequently
1083 * than once a minute.
1084 */
1085
1086 if (zone_gc_max_rate == 0)
1087 zone_gc_max_rate = 60;
1088
1089 if (zone_gc_allowed &&
1090 (sched_tick > (zone_gc_last_tick + zone_gc_max_rate))) {
1091 zone_gc_last_tick = sched_tick;
1092 zone_gc();
1093 }
1094 }
1095
1096 #if MACH_DEBUG
1097 kern_return_t host_zone_info(
1098 host_t host,
1099 zone_name_array_t *namesp,
1100 unsigned int *namesCntp,
1101 zone_info_array_t *infop,
1102 unsigned int *infoCntp)
1103 {
1104 zone_name_t *names;
1105 vm_offset_t names_addr;
1106 vm_size_t names_size = 0; /*'=0' to quiet gcc warnings */
1107 zone_info_t *info;
1108 vm_offset_t info_addr;
1109 vm_size_t info_size = 0; /*'=0' to quiet gcc warnings */
1110 unsigned int max_zones, i;
1111 zone_t z;
1112 kern_return_t kr;
1113
1114 if (host == HOST_NULL)
1115 return KERN_INVALID_HOST;
1116
1117 /*
1118 * We assume that zones aren't freed once allocated.
1119 * We won't pick up any zones that are allocated later.
1120 */
1121
1122 simple_lock(&all_zones_lock);
1123 max_zones = num_zones;
1124 z = first_zone;
1125 simple_unlock(&all_zones_lock);
1126
1127 if (max_zones <= *namesCntp) {
1128 /* use in-line memory */
1129
1130 names = *namesp;
1131 } else {
1132 names_size = round_page(max_zones * sizeof *names);
1133 kr = kmem_alloc_pageable(ipc_kernel_map,
1134 &names_addr, names_size);
1135 if (kr != KERN_SUCCESS)
1136 return kr;
1137
1138 names = (zone_name_t *) names_addr;
1139 }
1140
1141 if (max_zones <= *infoCntp) {
1142 /* use in-line memory */
1143
1144 info = *infop;
1145 } else {
1146 info_size = round_page(max_zones * sizeof *info);
1147 kr = kmem_alloc_pageable(ipc_kernel_map,
1148 &info_addr, info_size);
1149 if (kr != KERN_SUCCESS) {
1150 if (names != *namesp)
1151 kmem_free(ipc_kernel_map,
1152 names_addr, names_size);
1153 return kr;
1154 }
1155
1156 info = (zone_info_t *) info_addr;
1157 }
1158
1159 for (i = 0; i < max_zones; i++) {
1160 zone_name_t *zn = &names[i];
1161 zone_info_t *zi = &info[i];
1162 struct zone zcopy;
1163
1164 assert(z != ZONE_NULL);
1165
1166 lock_zone(z);
1167 zcopy = *z;
1168 unlock_zone(z);
1169
1170 simple_lock(&all_zones_lock);
1171 z = z->next_zone;
1172 simple_unlock(&all_zones_lock);
1173
1174 /* assuming here the name data is static */
1175 (void) strncpy(zn->zn_name, zcopy.zone_name,
1176 sizeof zn->zn_name);
1177
1178 zi->zi_count = zcopy.count;
1179 zi->zi_cur_size = zcopy.cur_size;
1180 zi->zi_max_size = zcopy.max_size;
1181 zi->zi_elem_size = zcopy.elem_size;
1182 zi->zi_alloc_size = zcopy.alloc_size;
1183 zi->zi_pageable = zcopy.pageable;
1184 zi->zi_sleepable = zcopy.sleepable;
1185 zi->zi_exhaustible = zcopy.exhaustible;
1186 zi->zi_collectable = zcopy.collectable;
1187 }
1188
1189 if (names != *namesp) {
1190 vm_size_t used;
1191 vm_map_copy_t copy;
1192
1193 used = max_zones * sizeof *names;
1194
1195 if (used != names_size)
1196 bzero((char *) (names_addr + used), names_size - used);
1197
1198 kr = vm_map_copyin(ipc_kernel_map, names_addr, names_size,
1199 TRUE, ©);
1200 assert(kr == KERN_SUCCESS);
1201
1202 *namesp = (zone_name_t *) copy;
1203 }
1204 *namesCntp = max_zones;
1205
1206 if (info != *infop) {
1207 vm_size_t used;
1208 vm_map_copy_t copy;
1209
1210 used = max_zones * sizeof *info;
1211
1212 if (used != info_size)
1213 bzero((char *) (info_addr + used), info_size - used);
1214
1215 kr = vm_map_copyin(ipc_kernel_map, info_addr, info_size,
1216 TRUE, ©);
1217 assert(kr == KERN_SUCCESS);
1218
1219 *infop = (zone_info_t *) copy;
1220 }
1221 *infoCntp = max_zones;
1222
1223 return KERN_SUCCESS;
1224 }
1225 #endif /* MACH_DEBUG */
Cache object: 1f8d432fba6371e01e47bb2745fb276a
|