FreeBSD/Linux Kernel Cross Reference
sys/kern/zalloc.c
1 /*
2 * Mach Operating System
3 * Copyright (c) 1993,1991,1990,1989,1988,1987 Carnegie Mellon University
4 * All Rights Reserved.
5 *
6 * Permission to use, copy, modify and distribute this software and its
7 * documentation is hereby granted, provided that both the copyright
8 * notice and this permission notice appear in all copies of the
9 * software, derivative works or modified versions, and any portions
10 * thereof, and that both notices appear in supporting documentation.
11 *
12 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
13 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
14 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
15 *
16 * Carnegie Mellon requests users of this software to return to
17 *
18 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
19 * School of Computer Science
20 * Carnegie Mellon University
21 * Pittsburgh PA 15213-3890
22 *
23 * any improvements or extensions that they make and grant Carnegie Mellon
24 * the rights to redistribute these changes.
25 */
26 /*
27 * HISTORY
28 * $Log: zalloc.c,v $
29 * Revision 2.20 93/05/15 18:56:28 mrt
30 * machparam.h -> machspl.h
31 *
32 * Revision 2.19 93/01/14 17:37:34 danner
33 * Fixed casts of assert_wait and thread_wakeup arguments.
34 * [93/01/12 danner]
35 * 64bit cleanup. Proper spl typing.
36 * [92/12/01 af]
37 *
38 * Revision 2.18 92/08/03 17:40:37 jfriedl
39 * removed silly prototypes
40 * [92/08/02 jfriedl]
41 *
42 * Revision 2.17 92/05/21 17:17:28 jfriedl
43 * Added stuff to quiet some gcc warnings.
44 * [92/05/16 jfriedl]
45 *
46 * Revision 2.16 92/02/23 19:49:58 elf
47 * Eliminate keep_wired argument from vm_map_copyin().
48 * [92/02/21 10:13:57 dlb]
49 *
50 * Revision 2.14.7.1 92/02/18 19:07:08 jeffreyh
51 * Increased zone_map_size for 2 servers
52 * [91/08/30 bernadat]
53 *
54 * Revision 2.15 92/01/14 16:45:03 rpd
55 * Changed host_zone_info for CountInOut.
56 * [92/01/14 rpd]
57 *
58 * Revision 2.14 91/05/18 14:34:46 rpd
59 * Added check_simple_locks.
60 * Moved ADD_TO_ZONE, REMOVE_FROM_ZONE here.
61 * Moved extraneous zone GC declarations here.
62 * [91/03/31 rpd]
63 *
64 * Minor cleanup in zget_space.
65 * [91/03/28 rpd]
66 * Changed to use zdata to initialize zalloc_next_space.
67 * [91/03/22 rpd]
68 *
69 * Revision 2.13 91/05/14 16:50:36 mrt
70 * Correcting copyright
71 *
72 * Revision 2.12 91/03/16 14:53:32 rpd
73 * Updated for new kmem_alloc interface.
74 * [91/03/03 rpd]
75 * Added continuation argument to thread_block.
76 * [91/02/16 rpd]
77 *
78 * Revision 2.11 91/02/05 17:31:25 mrt
79 * Changed to new Mach copyright
80 * [91/02/01 16:21:52 mrt]
81 *
82 * Revision 2.10 91/01/08 15:18:28 rpd
83 * Added zalloc_wasted_space.
84 * [91/01/06 rpd]
85 * Removed COLLECT_ZONE_GARBAGE.
86 * [91/01/03 rpd]
87 *
88 * Changed zinit to make zones by default *not* collectable.
89 * [90/12/29 rpd]
90 * Added consider_zone_gc.
91 * [90/11/11 rpd]
92 *
93 * Revision 2.9 90/12/20 16:39:11 jeffreyh
94 * [90/12/19 10:36:55 jeffreyh]
95 *
96 * 10-Dec-90 Jeffrey Heller (jeffreyh) at OSF
97 * Merge in changes from OSF/1 done by jvs@osf
98 * Zone's are now collectable by default,
99 * zchange now takes a collectable argument
100 * include machine/machparam.h for splhigh
101 *
102 * Revision 2.8 90/11/05 14:32:08 rpd
103 * Added zone_check option to zfree.
104 * [90/10/29 rpd]
105 *
106 * Revision 2.7 90/06/19 22:59:49 rpd
107 * Added zi_collectable field to zone_info structure.
108 * [90/06/05 rpd]
109 *
110 * Revision 2.6 90/06/02 14:57:28 rpd
111 * Made zone_ignore_overflow TRUE by default.
112 * When a zone overflows, increase its max_size.
113 * [90/05/11 17:00:24 rpd]
114 *
115 * Added host_zone_info.
116 * [90/03/26 22:28:05 rpd]
117 *
118 * Revision 2.5 90/05/03 15:47:04 dbg
119 * Add host_zone_info.
120 * [90/04/06 dbg]
121 *
122 * Revision 2.4 90/02/22 20:04:23 dbg
123 * Initialize zone_page_table_lock before using it.
124 * [90/02/16 dbg]
125 *
126 * Revision 2.3 89/11/29 14:09:25 af
127 * Nullify zone_page_alloc/init if 'garbage' not here.
128 * [89/10/29 14:23:56 af]
129 *
130 * Could not compile without the 'garbage' thing cuz a def mizzing.
131 * [89/10/29 09:35:22 af]
132 *
133 * Revision 2.2 89/08/11 17:56:21 rwd
134 * Added collectible zones. Collectible zones allow storage to be
135 * returned to system via kmem_free when pages are no longer used.
136 * This option should only be used when zone memory is virtual
137 * (rather than physical as in a MIPS architecture).
138 * [89/07/22 rfr]
139 *
140 * Revision 2.11 89/05/30 10:38:40 rvb
141 * Make zalloc storage pointers external, so they can be initialized from
142 * the outside.
143 * [89/05/30 08:28:14 rvb]
144 *
145 * Revision 2.10 89/05/11 14:41:30 gm0w
146 * Keep all zones on a list that host_zone_info can traverse.
147 * This fixes a bug in host_zone_info: it would try to lock
148 * uninitialized zones. Fixed zinit to round elem_size up
149 * to a multiple of four. This prevents zget_space from handing
150 * out improperly aligned objects.
151 * [89/05/08 21:34:17 rpd]
152 *
153 * Revision 2.9 89/05/06 15:47:11 rpd
154 * From jsb: Added missing argument to kmem_free in zget_space.
155 *
156 * Revision 2.8 89/05/06 02:57:35 rpd
157 * Added host_zone_info (under MACH_DEBUG).
158 * Fixed zget to increase cur_size when the space comes from zget_space.
159 * Use MACRO_BEGIN/MACRO_END, decl_simple_lock_data where appropriate.
160 * [89/05/06 02:43:29 rpd]
161 *
162 * Revision 2.7 89/04/18 16:43:20 mwyoung
163 * Document zget_space. Eliminate MACH_XP conditional.
164 * [89/03/26 mwyoung]
165 * Make handling of zero allocation size unconditional. Clean up
166 * allocation code.
167 * [89/03/16 mwyoung]
168 *
169 * Revision 2.6 89/03/15 15:04:46 gm0w
170 * Picked up code from rfr to allocate data from non pageable zones
171 * from a single pool.
172 * [89/03/09 mrt]
173 *
174 * Revision 2.5 89/03/09 20:17:50 rpd
175 * More cleanup.
176 *
177 * Revision 2.4 89/02/25 18:11:15 gm0w
178 * Changes for cleanup.
179 *
180 * Revision 2.3 89/01/18 00:50:51 jsb
181 * Vnode support: interpret allocation size of zero in zinit as meaning
182 * PAGE_SIZE.
183 * [89/01/17 20:57:39 jsb]
184 *
185 * Revision 2.2 88/12/19 02:48:41 mwyoung
186 * Fix include file references.
187 * [88/12/19 00:33:03 mwyoung]
188 *
189 * Add and use zone_ignore_overflow.
190 * [88/12/14 mwyoung]
191 *
192 * 8-Jan-88 Rick Rashid (rfr) at Carnegie-Mellon University
193 * Made pageable zones really pageable. Turned spin locks to sleep
194 * locks for pageable zones.
195 *
196 * 30-Dec-87 David Golub (dbg) at Carnegie-Mellon University
197 * Delinted.
198 *
199 * 20-Oct-87 Michael Young (mwyoung) at Carnegie-Mellon University
200 * Allocate zone memory from a separate kernel submap, to avoid
201 * sleeping with the kernel_map locked.
202 *
203 * 1-Oct-87 Michael Young (mwyoung) at Carnegie-Mellon University
204 * Added zchange().
205 *
206 * 30-Sep-87 Richard Sanzi (sanzi) at Carnegie-Mellon University
207 * Deleted the printf() in zinit() which is called when zinit is
208 * creating a pageable zone.
209 *
210 * 12-Sep-87 Avadis Tevanian (avie) at Carnegie-Mellon University
211 * Modified to use list of elements instead of queues. Actually,
212 * this package now uses macros defined in zalloc.h which define
213 * the list semantics.
214 *
215 * 30-Mar-87 Avadis Tevanian (avie) at Carnegie-Mellon University
216 * Update zone's cur_size field when it is crammed (zcram).
217 *
218 * 23-Mar-87 Avadis Tevanian (avie) at Carnegie-Mellon University
219 * Only define zfree if there is no macro version.
220 *
221 * 17-Mar-87 David Golub (dbg) at Carnegie-Mellon University
222 * De-linted.
223 *
224 * 12-Feb-87 Robert Sansom (rds) at Carnegie Mellon University
225 * Added zget - no waiting version of zalloc.
226 *
227 * 22-Jan-87 Michael Young (mwyoung) at Carnegie-Mellon University
228 * De-linted.
229 *
230 * 12-Jan-87 Michael Young (mwyoung) at Carnegie-Mellon University
231 * Eliminated use of the old interlocked queuing package;
232 * random other cleanups.
233 *
234 * 9-Jun-85 Avadis Tevanian (avie) at Carnegie-Mellon University
235 * Created.
236 */
237 /*
238 * File: kern/zalloc.c
239 * Author: Avadis Tevanian, Jr.
240 *
241 * Zone-based memory allocator. A zone is a collection of fixed size
242 * data blocks for which quick allocation/deallocation is possible.
243 */
244
245 #include <kern/macro_help.h>
246 #include <kern/sched.h>
247 #include <kern/time_out.h>
248 #include <kern/zalloc.h>
249 #include <mach/vm_param.h>
250 #include <vm/vm_kern.h>
251 #include <machine/machspl.h>
252
253 #include <mach_debug.h>
254 #if MACH_DEBUG
255 #include <mach/kern_return.h>
256 #include <mach/machine/vm_types.h>
257 #include <mach_debug/zone_info.h>
258 #include <kern/host.h>
259 #include <vm/vm_map.h>
260 #include <vm/vm_user.h>
261 #include <vm/vm_kern.h>
262 #endif
263
264 #define ADD_TO_ZONE(zone, element) \
265 MACRO_BEGIN \
266 *((vm_offset_t *)(element)) = (zone)->free_elements; \
267 (zone)->free_elements = (vm_offset_t) (element); \
268 (zone)->count--; \
269 MACRO_END
270
271 #define REMOVE_FROM_ZONE(zone, ret, type) \
272 MACRO_BEGIN \
273 (ret) = (type) (zone)->free_elements; \
274 if ((ret) != (type) 0) { \
275 (zone)->count++; \
276 (zone)->free_elements = *((vm_offset_t *)(ret)); \
277 } \
278 MACRO_END
279
280 /*
281 * Support for garbage collection of unused zone pages:
282 */
283
284 struct zone_page_table_entry {
285 struct zone_page_table_entry *next;
286 short in_free_list;
287 short alloc_count;
288 };
289
290 extern struct zone_page_table_entry * zone_page_table;
291 extern vm_offset_t zone_map_min_address;
292
293 #define lock_zone_page_table() simple_lock(&zone_page_table_lock)
294 #define unlock_zone_page_table() simple_unlock(&zone_page_table_lock)
295
296 #define zone_page(addr) \
297 (&(zone_page_table[(atop(((vm_offset_t)addr) - zone_map_min_address))]))
298
299
300 extern void zone_page_alloc();
301 extern void zone_page_dealloc();
302 extern void zone_page_in_use();
303 extern void zone_page_free();
304
305 zone_t zone_zone; /* this is the zone containing other zones */
306
307 boolean_t zone_ignore_overflow = TRUE;
308
309 vm_map_t zone_map = VM_MAP_NULL;
310 vm_size_t zone_map_size = 12 * 1024 * 1024;
311
312 /*
313 * The VM system gives us an initial chunk of memory.
314 * It has to be big enough to allocate the zone_zone
315 * and some initial kernel data structures, like kernel maps.
316 * It is advantageous to make it bigger than really necessary,
317 * because this memory is more efficient than normal kernel
318 * virtual memory. (It doesn't have vm_page structures backing it
319 * and it may have other machine-dependent advantages.)
320 * So for best performance, zdata_size should approximate
321 * the amount of memory you expect the zone system to consume.
322 */
323
324 vm_offset_t zdata;
325 vm_size_t zdata_size = 420 * 1024;
326
327 #define lock_zone(zone) \
328 MACRO_BEGIN \
329 if (zone->pageable) { \
330 lock_write(&zone->complex_lock); \
331 } else { \
332 simple_lock(&zone->lock); \
333 } \
334 MACRO_END
335
336 #define unlock_zone(zone) \
337 MACRO_BEGIN \
338 if (zone->pageable) { \
339 lock_done(&zone->complex_lock); \
340 } else { \
341 simple_unlock(&zone->lock); \
342 } \
343 MACRO_END
344
345 #define lock_zone_init(zone) \
346 MACRO_BEGIN \
347 if (zone->pageable) { \
348 lock_init(&zone->complex_lock, TRUE); \
349 } else { \
350 simple_lock_init(&zone->lock); \
351 } \
352 MACRO_END
353
354 vm_offset_t zget_space();
355
356 decl_simple_lock_data(,zget_space_lock)
357 vm_offset_t zalloc_next_space;
358 vm_offset_t zalloc_end_of_space;
359 vm_size_t zalloc_wasted_space;
360
361 /*
362 * Garbage collection map information
363 */
364 decl_simple_lock_data(,zone_page_table_lock)
365 struct zone_page_table_entry * zone_page_table;
366 vm_offset_t zone_map_min_address;
367 vm_offset_t zone_map_max_address;
368 int zone_pages;
369
370 extern void zone_page_init();
371
372 #define ZONE_PAGE_USED 0
373 #define ZONE_PAGE_UNUSED -1
374
375
376 /*
377 * Protects first_zone, last_zone, num_zones,
378 * and the next_zone field of zones.
379 */
380 decl_simple_lock_data(,all_zones_lock)
381 zone_t first_zone;
382 zone_t *last_zone;
383 int num_zones;
384
385 /*
386 * zinit initializes a new zone. The zone data structures themselves
387 * are stored in a zone, which is initially a static structure that
388 * is initialized by zone_init.
389 */
390 zone_t zinit(size, max, alloc, pageable, name)
391 vm_size_t size; /* the size of an element */
392 vm_size_t max; /* maximum memory to use */
393 vm_size_t alloc; /* allocation size */
394 boolean_t pageable; /* is this zone pageable? */
395 char *name; /* a name for the zone */
396 {
397 register zone_t z;
398
399 if (zone_zone == ZONE_NULL)
400 z = (zone_t) zget_space(sizeof(struct zone));
401 else
402 z = (zone_t) zalloc(zone_zone);
403 if (z == ZONE_NULL)
404 panic("zinit");
405
406 if (alloc == 0)
407 alloc = PAGE_SIZE;
408
409 if (size == 0)
410 size = sizeof(z->free_elements);
411 /*
412 * Round off all the parameters appropriately.
413 */
414
415 if ((max = round_page(max)) < (alloc = round_page(alloc)))
416 max = alloc;
417
418 z->free_elements = 0;
419 z->cur_size = 0;
420 z->max_size = max;
421 z->elem_size = ((size-1) + sizeof(z->free_elements)) -
422 ((size-1) % sizeof(z->free_elements));
423
424 z->alloc_size = alloc;
425 z->pageable = pageable;
426 z->zone_name = name;
427 z->count = 0;
428 z->doing_alloc = FALSE;
429 z->exhaustible = z->sleepable = FALSE;
430 z->collectable = FALSE;
431 z->expandable = TRUE;
432 lock_zone_init(z);
433
434 /*
435 * Add the zone to the all-zones list.
436 */
437
438 z->next_zone = ZONE_NULL;
439 simple_lock(&all_zones_lock);
440 *last_zone = z;
441 last_zone = &z->next_zone;
442 num_zones++;
443 simple_unlock(&all_zones_lock);
444
445 return(z);
446 }
447
448 /*
449 * Cram the given memory into the specified zone.
450 */
451 void zcram(zone, newmem, size)
452 register zone_t zone;
453 vm_offset_t newmem;
454 vm_size_t size;
455 {
456 register vm_size_t elem_size;
457
458 if (newmem == (vm_offset_t) 0) {
459 panic("zcram - memory at zero");
460 }
461 elem_size = zone->elem_size;
462
463 lock_zone(zone);
464 while (size >= elem_size) {
465 ADD_TO_ZONE(zone, newmem);
466 zone_page_alloc(newmem, elem_size);
467 zone->count++; /* compensate for ADD_TO_ZONE */
468 size -= elem_size;
469 newmem += elem_size;
470 zone->cur_size += elem_size;
471 }
472 unlock_zone(zone);
473 }
474
475 /*
476 * Contiguous space allocator for non-paged zones. Allocates "size" amount
477 * of memory from zone_map.
478 */
479
480 vm_offset_t zget_space(size)
481 vm_offset_t size;
482 {
483 vm_offset_t new_space = 0;
484 vm_offset_t result;
485 vm_size_t space_to_add = 0; /*'=0' to quiet gcc warnings */
486
487 simple_lock(&zget_space_lock);
488 while ((zalloc_next_space + size) > zalloc_end_of_space) {
489 /*
490 * Add at least one page to allocation area.
491 */
492
493 space_to_add = round_page(size);
494
495 if (new_space == 0) {
496 /*
497 * Memory cannot be wired down while holding
498 * any locks that the pageout daemon might
499 * need to free up pages. [Making the zget_space
500 * lock a complex lock does not help in this
501 * regard.]
502 *
503 * Unlock and allocate memory. Because several
504 * threads might try to do this at once, don't
505 * use the memory before checking for available
506 * space again.
507 */
508
509 simple_unlock(&zget_space_lock);
510
511 if (kmem_alloc_wired(zone_map,
512 &new_space, space_to_add)
513 != KERN_SUCCESS)
514 return(0);
515 zone_page_init(new_space, space_to_add,
516 ZONE_PAGE_USED);
517 simple_lock(&zget_space_lock);
518 continue;
519 }
520
521
522 /*
523 * Memory was allocated in a previous iteration.
524 *
525 * Check whether the new region is contiguous
526 * with the old one.
527 */
528
529 if (new_space != zalloc_end_of_space) {
530 /*
531 * Throw away the remainder of the
532 * old space, and start a new one.
533 */
534 zalloc_wasted_space +=
535 zalloc_end_of_space - zalloc_next_space;
536 zalloc_next_space = new_space;
537 }
538
539 zalloc_end_of_space = new_space + space_to_add;
540
541 new_space = 0;
542 }
543 result = zalloc_next_space;
544 zalloc_next_space += size;
545 simple_unlock(&zget_space_lock);
546
547 if (new_space != 0)
548 kmem_free(zone_map, new_space, space_to_add);
549
550 return(result);
551 }
552
553
554 /*
555 * Initialize the "zone of zones" which uses fixed memory allocated
556 * earlier in memory initialization. zone_bootstrap is called
557 * before zone_init.
558 */
559 void zone_bootstrap()
560 {
561 simple_lock_init(&all_zones_lock);
562 first_zone = ZONE_NULL;
563 last_zone = &first_zone;
564 num_zones = 0;
565
566 simple_lock_init(&zget_space_lock);
567 zalloc_next_space = zdata;
568 zalloc_end_of_space = zdata + zdata_size;
569 zalloc_wasted_space = 0;
570
571 zone_zone = ZONE_NULL;
572 zone_zone = zinit(sizeof(struct zone), 128 * sizeof(struct zone),
573 sizeof(struct zone), FALSE, "zones");
574 }
575
576 void zone_init()
577 {
578 vm_offset_t zone_min;
579 vm_offset_t zone_max;
580
581 vm_size_t zone_table_size;
582
583 zone_map = kmem_suballoc(kernel_map, &zone_min, &zone_max,
584 zone_map_size, FALSE);
585
586 /*
587 * Setup garbage collection information:
588 */
589
590 zone_table_size = atop(zone_max - zone_min) *
591 sizeof(struct zone_page_table_entry);
592 if (kmem_alloc_wired(zone_map, (vm_offset_t *) &zone_page_table,
593 zone_table_size) != KERN_SUCCESS)
594 panic("zone_init");
595 zone_min = (vm_offset_t)zone_page_table + round_page(zone_table_size);
596 zone_pages = atop(zone_max - zone_min);
597 zone_map_min_address = zone_min;
598 zone_map_max_address = zone_max;
599 simple_lock_init(&zone_page_table_lock);
600 zone_page_init(zone_min, zone_max - zone_min, ZONE_PAGE_UNUSED);
601 }
602
603
604 /*
605 * zalloc returns an element from the specified zone.
606 */
607 vm_offset_t zalloc(zone)
608 register zone_t zone;
609 {
610 vm_offset_t addr;
611
612 if (zone == ZONE_NULL)
613 panic ("zalloc: null zone");
614
615 check_simple_locks();
616
617 lock_zone(zone);
618 REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
619 while (addr == 0) {
620 /*
621 * If nothing was there, try to get more
622 */
623 if (zone->doing_alloc) {
624 /*
625 * Someone is allocating memory for this zone.
626 * Wait for it to show up, then try again.
627 */
628 assert_wait((event_t)&zone->doing_alloc, TRUE);
629 /* XXX say wakeup needed */
630 unlock_zone(zone);
631 thread_block((void (*)()) 0);
632 lock_zone(zone);
633 }
634 else {
635 if ((zone->cur_size + (zone->pageable ?
636 zone->alloc_size : zone->elem_size)) >
637 zone->max_size) {
638 if (zone->exhaustible)
639 break;
640 /*
641 * Printf calls logwakeup, which calls
642 * select_wakeup which will do a zfree
643 * (which tries to take the select_zone
644 * lock... Hang. Release the lock now
645 * so it can be taken again later.
646 * NOTE: this used to be specific to
647 * the select_zone, but for
648 * cleanliness, we just unlock all
649 * zones before this.
650 */
651 if (zone->expandable) {
652 /*
653 * We're willing to overflow certain
654 * zones, but not without complaining.
655 *
656 * This is best used in conjunction
657 * with the collecatable flag. What we
658 * want is an assurance we can get the
659 * memory back, assuming there's no
660 * leak.
661 */
662 zone->max_size += (zone->max_size >> 1);
663 } else if (!zone_ignore_overflow) {
664 unlock_zone(zone);
665 printf("zone \"%s\" empty.\n",
666 zone->zone_name);
667 panic("zalloc");
668 }
669 }
670
671 if (zone->pageable)
672 zone->doing_alloc = TRUE;
673 unlock_zone(zone);
674
675 if (zone->pageable) {
676 if (kmem_alloc_pageable(zone_map, &addr,
677 zone->alloc_size)
678 != KERN_SUCCESS)
679 panic("zalloc");
680 zcram(zone, addr, zone->alloc_size);
681 lock_zone(zone);
682 zone->doing_alloc = FALSE;
683 /* XXX check before doing this */
684 thread_wakeup((event_t)&zone->doing_alloc);
685
686 REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
687 } else if (zone->collectable) {
688 if (kmem_alloc_wired(zone_map,
689 &addr, zone->alloc_size)
690 != KERN_SUCCESS)
691 panic("zalloc");
692 zone_page_init(addr, zone->alloc_size,
693 ZONE_PAGE_USED);
694 zcram(zone, addr, zone->alloc_size);
695 lock_zone(zone);
696 REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
697 } else {
698 addr = zget_space(zone->elem_size);
699 if (addr == 0)
700 panic("zalloc");
701
702 lock_zone(zone);
703 zone->count++;
704 zone->cur_size += zone->elem_size;
705 unlock_zone(zone);
706 zone_page_alloc(addr, zone->elem_size);
707 return(addr);
708 }
709 }
710 }
711
712 unlock_zone(zone);
713 return(addr);
714 }
715
716
717 /*
718 * zget returns an element from the specified zone
719 * and immediately returns nothing if there is nothing there.
720 *
721 * This form should be used when you can not block (like when
722 * processing an interrupt).
723 */
724 vm_offset_t zget(zone)
725 register zone_t zone;
726 {
727 register vm_offset_t addr;
728
729 if (zone == ZONE_NULL)
730 panic ("zalloc: null zone");
731
732 lock_zone(zone);
733 REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
734 unlock_zone(zone);
735
736 return(addr);
737 }
738
739 boolean_t zone_check = FALSE;
740
741 void zfree(zone, elem)
742 register zone_t zone;
743 vm_offset_t elem;
744 {
745 lock_zone(zone);
746 if (zone_check) {
747 vm_offset_t this;
748
749 /* check the zone's consistency */
750
751 for (this = zone->free_elements;
752 this != 0;
753 this = * (vm_offset_t *) this)
754 if (this == elem)
755 panic("zfree");
756 }
757 ADD_TO_ZONE(zone, elem);
758 unlock_zone(zone);
759 }
760
761 void zcollectable(zone)
762 zone_t zone;
763 {
764 zone->collectable = TRUE;
765 }
766
767 void zchange(zone, pageable, sleepable, exhaustible, collectable)
768 zone_t zone;
769 boolean_t pageable;
770 boolean_t sleepable;
771 boolean_t exhaustible;
772 boolean_t collectable;
773 {
774 zone->pageable = pageable;
775 zone->sleepable = sleepable;
776 zone->exhaustible = exhaustible;
777 zone->collectable = collectable;
778 lock_zone_init(zone);
779 }
780
781 /*
782 * Zone garbage collection subroutines
783 *
784 * These routines have in common the modification of entries in the
785 * zone_page_table. The latter contains one entry for every page
786 * in the zone_map.
787 *
788 * For each page table entry in the given range:
789 *
790 * zone_page_in_use - decrements in_free_list
791 * zone_page_free - increments in_free_list
792 * zone_page_init - initializes in_free_list and alloc_count
793 * zone_page_alloc - increments alloc_count
794 * zone_page_dealloc - decrements alloc_count
795 * zone_add_free_page_list - adds the page to the free list
796 *
797 * Two counts are maintained for each page, the in_free_list count and
798 * alloc_count. The alloc_count is how many zone elements have been
799 * allocated from a page. (Note that the page could contain elements
800 * that span page boundaries. The count includes these elements so
801 * one element may be counted in two pages.) In_free_list is a count
802 * of how many zone elements are currently free. If in_free_list is
803 * equal to alloc_count then the page is eligible for garbage
804 * collection.
805 *
806 * Alloc_count and in_free_list are initialized to the correct values
807 * for a particular zone when a page is zcram'ed into a zone. Subsequent
808 * gets and frees of zone elements will call zone_page_in_use and
809 * zone_page_free which modify the in_free_list count. When the zones
810 * garbage collector runs it will walk through a zones free element list,
811 * remove the elements that reside on collectable pages, and use
812 * zone_add_free_page_list to create a list of pages to be collected.
813 */
814
815 void zone_page_in_use(addr, size)
816 vm_offset_t addr;
817 vm_size_t size;
818 {
819 int i, j;
820 if ((addr < zone_map_min_address) ||
821 (addr+size > zone_map_max_address)) return;
822 i = atop(addr-zone_map_min_address);
823 j = atop((addr+size-1) - zone_map_min_address);
824 lock_zone_page_table();
825 for (; i <= j; i++) {
826 zone_page_table[i].in_free_list--;
827 }
828 unlock_zone_page_table();
829 }
830
831 void zone_page_free(addr, size)
832 vm_offset_t addr;
833 vm_size_t size;
834 {
835 int i, j;
836 if ((addr < zone_map_min_address) ||
837 (addr+size > zone_map_max_address)) return;
838 i = atop(addr-zone_map_min_address);
839 j = atop((addr+size-1) - zone_map_min_address);
840 lock_zone_page_table();
841 for (; i <= j; i++) {
842 /* Set in_free_list to (ZONE_PAGE_USED + 1) if
843 * it was previously set to ZONE_PAGE_UNUSED.
844 */
845 if (zone_page_table[i].in_free_list == ZONE_PAGE_UNUSED) {
846 zone_page_table[i].in_free_list = 1;
847 } else {
848 zone_page_table[i].in_free_list++;
849 }
850 }
851 unlock_zone_page_table();
852 }
853
854 void zone_page_init(addr, size, value)
855
856 vm_offset_t addr;
857 vm_size_t size;
858 int value;
859 {
860 int i, j;
861 if ((addr < zone_map_min_address) ||
862 (addr+size > zone_map_max_address)) return;
863 i = atop(addr-zone_map_min_address);
864 j = atop((addr+size-1) - zone_map_min_address);
865 lock_zone_page_table();
866 for (; i <= j; i++) {
867 zone_page_table[i].alloc_count = value;
868 zone_page_table[i].in_free_list = 0;
869 }
870 unlock_zone_page_table();
871 }
872
873 void zone_page_alloc(addr, size)
874 vm_offset_t addr;
875 vm_size_t size;
876 {
877 int i, j;
878 if ((addr < zone_map_min_address) ||
879 (addr+size > zone_map_max_address)) return;
880 i = atop(addr-zone_map_min_address);
881 j = atop((addr+size-1) - zone_map_min_address);
882 lock_zone_page_table();
883 for (; i <= j; i++) {
884 /* Set alloc_count to (ZONE_PAGE_USED + 1) if
885 * it was previously set to ZONE_PAGE_UNUSED.
886 */
887 if (zone_page_table[i].alloc_count == ZONE_PAGE_UNUSED) {
888 zone_page_table[i].alloc_count = 1;
889 } else {
890 zone_page_table[i].alloc_count++;
891 }
892 }
893 unlock_zone_page_table();
894 }
895
896 void zone_page_dealloc(addr, size)
897 vm_offset_t addr;
898 vm_size_t size;
899 {
900 int i, j;
901 if ((addr < zone_map_min_address) ||
902 (addr+size > zone_map_max_address)) return;
903 i = atop(addr-zone_map_min_address);
904 j = atop((addr+size-1) - zone_map_min_address);
905 lock_zone_page_table();
906 for (; i <= j; i++) {
907 zone_page_table[i].alloc_count--;
908 }
909 unlock_zone_page_table();
910 }
911
912 void
913 zone_add_free_page_list(free_list, addr, size)
914 struct zone_page_table_entry **free_list;
915 vm_offset_t addr;
916 vm_size_t size;
917 {
918 int i, j;
919 if ((addr < zone_map_min_address) ||
920 (addr+size > zone_map_max_address)) return;
921 i = atop(addr-zone_map_min_address);
922 j = atop((addr+size-1) - zone_map_min_address);
923 lock_zone_page_table();
924 for (; i <= j; i++) {
925 if (zone_page_table[i].alloc_count == 0) {
926 zone_page_table[i].next = *free_list;
927 *free_list = &zone_page_table[i];
928 zone_page_table[i].alloc_count = ZONE_PAGE_UNUSED;
929 zone_page_table[i].in_free_list = 0;
930 }
931 }
932 unlock_zone_page_table();
933 }
934
935
936 /* This is used for walking through a zone's free element list.
937 */
938 struct zone_free_entry {
939 struct zone_free_entry * next;
940 };
941
942
943 /* Zone garbage collection
944 *
945 * zone_gc will walk through all the free elements in all the
946 * zones that are marked collectable looking for reclaimable
947 * pages. zone_gc is called by consider_zone_gc when the system
948 * begins to run out of memory.
949 */
950 void
951 zone_gc()
952 {
953 int max_zones;
954 zone_t z;
955 int i;
956 register spl_t s;
957 struct zone_page_table_entry *freep;
958 struct zone_page_table_entry *zone_free_page_list;
959
960 simple_lock(&all_zones_lock);
961 max_zones = num_zones;
962 z = first_zone;
963 simple_unlock(&all_zones_lock);
964
965 zone_free_page_list = (struct zone_page_table_entry *) 0;
966
967 for (i = 0; i < max_zones; i++) {
968 struct zone_free_entry * last;
969 struct zone_free_entry * elt;
970 assert(z != ZONE_NULL);
971 /* run this at splhigh so that interupt routines that use zones
972 can not interupt while their zone is locked */
973 s=splhigh();
974 lock_zone(z);
975
976 if (!z->pageable && z->collectable) {
977
978 /* Count the free elements in each page. This loop
979 * requires that all in_free_list entries are zero.
980 */
981 elt = (struct zone_free_entry *)(z->free_elements);
982 while ((elt != (struct zone_free_entry *)0)) {
983 zone_page_free((vm_offset_t)elt, z->elem_size);
984 elt = elt->next;
985 }
986
987 /* Now determine which elements should be removed
988 * from the free list and, after all the elements
989 * on a page have been removed, add the element's
990 * page to a list of pages to be freed.
991 */
992 elt = (struct zone_free_entry *)(z->free_elements);
993 last = elt;
994 while ((elt != (struct zone_free_entry *)0)) {
995 if (((vm_offset_t)elt>=zone_map_min_address)&&
996 ((vm_offset_t)elt<=zone_map_max_address)&&
997 (zone_page(elt)->in_free_list ==
998 zone_page(elt)->alloc_count)) {
999
1000 z->cur_size -= z->elem_size;
1001 zone_page_in_use((vm_offset_t)elt, z->elem_size);
1002 zone_page_dealloc((vm_offset_t)elt, z->elem_size);
1003 if (zone_page(elt)->alloc_count == 0 ||
1004 zone_page(elt+(z->elem_size-1))->alloc_count==0) {
1005 zone_add_free_page_list(
1006 &zone_free_page_list,
1007 (vm_offset_t)elt, z->elem_size);
1008 }
1009
1010
1011 if (elt == last) {
1012 elt = elt->next;
1013 z->free_elements =(vm_offset_t)elt;
1014 last = elt;
1015 } else {
1016 last->next = elt->next;
1017 elt = elt->next;
1018 }
1019 } else {
1020 /* This element is not eligible for collection
1021 * so clear in_free_list in preparation for a
1022 * subsequent garbage collection pass.
1023 */
1024 if (((vm_offset_t)elt>=zone_map_min_address)&&
1025 ((vm_offset_t)elt<=zone_map_max_address)) {
1026 zone_page(elt)->in_free_list = 0;
1027 }
1028 last = elt;
1029 elt = elt->next;
1030 }
1031 }
1032 }
1033 unlock_zone(z);
1034 splx(s);
1035 simple_lock(&all_zones_lock);
1036 z = z->next_zone;
1037 simple_unlock(&all_zones_lock);
1038 }
1039
1040 for (freep = zone_free_page_list; freep != 0; freep = freep->next) {
1041 vm_offset_t free_addr;
1042
1043 free_addr = zone_map_min_address +
1044 PAGE_SIZE * (freep - zone_page_table);
1045 kmem_free(zone_map, free_addr, PAGE_SIZE);
1046 }
1047 }
1048
1049 boolean_t zone_gc_allowed = TRUE;
1050 unsigned zone_gc_last_tick = 0;
1051 unsigned zone_gc_max_rate = 0; /* in ticks */
1052
1053 /*
1054 * consider_zone_gc:
1055 *
1056 * Called by the pageout daemon when the system needs more free pages.
1057 */
1058
1059 void
1060 consider_zone_gc()
1061 {
1062 /*
1063 * By default, don't attempt zone GC more frequently
1064 * than once a second.
1065 */
1066
1067 if (zone_gc_max_rate == 0)
1068 zone_gc_max_rate = hz;
1069
1070 if (zone_gc_allowed &&
1071 (sched_tick > (zone_gc_last_tick + zone_gc_max_rate))) {
1072 zone_gc_last_tick = sched_tick;
1073 zone_gc();
1074 }
1075 }
1076
1077 #if MACH_DEBUG
1078 kern_return_t host_zone_info(host, namesp, namesCntp, infop, infoCntp)
1079 host_t host;
1080 zone_name_array_t *namesp;
1081 unsigned int *namesCntp;
1082 zone_info_array_t *infop;
1083 unsigned int *infoCntp;
1084 {
1085 zone_name_t *names;
1086 vm_offset_t names_addr;
1087 vm_size_t names_size = 0; /*'=0' to quiet gcc warnings */
1088 zone_info_t *info;
1089 vm_offset_t info_addr;
1090 vm_size_t info_size = 0; /*'=0' to quiet gcc warnings */
1091 unsigned int max_zones, i;
1092 zone_t z;
1093 kern_return_t kr;
1094
1095 if (host == HOST_NULL)
1096 return KERN_INVALID_HOST;
1097
1098 /*
1099 * We assume that zones aren't freed once allocated.
1100 * We won't pick up any zones that are allocated later.
1101 */
1102
1103 simple_lock(&all_zones_lock);
1104 max_zones = num_zones;
1105 z = first_zone;
1106 simple_unlock(&all_zones_lock);
1107
1108 if (max_zones <= *namesCntp) {
1109 /* use in-line memory */
1110
1111 names = *namesp;
1112 } else {
1113 names_size = round_page(max_zones * sizeof *names);
1114 kr = kmem_alloc_pageable(ipc_kernel_map,
1115 &names_addr, names_size);
1116 if (kr != KERN_SUCCESS)
1117 return kr;
1118
1119 names = (zone_name_t *) names_addr;
1120 }
1121
1122 if (max_zones <= *infoCntp) {
1123 /* use in-line memory */
1124
1125 info = *infop;
1126 } else {
1127 info_size = round_page(max_zones * sizeof *info);
1128 kr = kmem_alloc_pageable(ipc_kernel_map,
1129 &info_addr, info_size);
1130 if (kr != KERN_SUCCESS) {
1131 if (names != *namesp)
1132 kmem_free(ipc_kernel_map,
1133 names_addr, names_size);
1134 return kr;
1135 }
1136
1137 info = (zone_info_t *) info_addr;
1138 }
1139
1140 for (i = 0; i < max_zones; i++) {
1141 zone_name_t *zn = &names[i];
1142 zone_info_t *zi = &info[i];
1143 struct zone zcopy;
1144
1145 assert(z != ZONE_NULL);
1146
1147 lock_zone(z);
1148 zcopy = *z;
1149 unlock_zone(z);
1150
1151 simple_lock(&all_zones_lock);
1152 z = z->next_zone;
1153 simple_unlock(&all_zones_lock);
1154
1155 /* assuming here the name data is static */
1156 (void) strncpy(zn->zn_name, zcopy.zone_name,
1157 sizeof zn->zn_name);
1158
1159 zi->zi_count = zcopy.count;
1160 zi->zi_cur_size = zcopy.cur_size;
1161 zi->zi_max_size = zcopy.max_size;
1162 zi->zi_elem_size = zcopy.elem_size;
1163 zi->zi_alloc_size = zcopy.alloc_size;
1164 zi->zi_pageable = zcopy.pageable;
1165 zi->zi_sleepable = zcopy.sleepable;
1166 zi->zi_exhaustible = zcopy.exhaustible;
1167 zi->zi_collectable = zcopy.collectable;
1168 }
1169
1170 if (names != *namesp) {
1171 vm_size_t used;
1172 vm_map_copy_t copy;
1173
1174 used = max_zones * sizeof *names;
1175
1176 if (used != names_size)
1177 bzero((char *) (names_addr + used), names_size - used);
1178
1179 kr = vm_map_copyin(ipc_kernel_map, names_addr, names_size,
1180 TRUE, ©);
1181 assert(kr == KERN_SUCCESS);
1182
1183 *namesp = (zone_name_t *) copy;
1184 }
1185 *namesCntp = max_zones;
1186
1187 if (info != *infop) {
1188 vm_size_t used;
1189 vm_map_copy_t copy;
1190
1191 used = max_zones * sizeof *info;
1192
1193 if (used != info_size)
1194 bzero((char *) (info_addr + used), info_size - used);
1195
1196 kr = vm_map_copyin(ipc_kernel_map, info_addr, info_size,
1197 TRUE, ©);
1198 assert(kr == KERN_SUCCESS);
1199
1200 *infop = (zone_info_t *) copy;
1201 }
1202 *infoCntp = max_zones;
1203
1204 return KERN_SUCCESS;
1205 }
1206 #endif MACH_DEBUG
Cache object: 4b47f99cba6dc1768a12e75703efbd78
|