FreeBSD/Linux Kernel Cross Reference
sys/vm/uma_core.c
1 /*-
2 * Copyright (c) 2002, 2003, 2004, 2005 Jeffrey Roberson <jeff@FreeBSD.org>
3 * Copyright (c) 2004, 2005 Bosko Milekic <bmilekic@FreeBSD.org>
4 * Copyright (c) 2004-2005 Robert N. M. Watson
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice unmodified, this list of conditions, and the following
12 * disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 /*
30 * uma_core.c Implementation of the Universal Memory allocator
31 *
32 * This allocator is intended to replace the multitude of similar object caches
33 * in the standard FreeBSD kernel. The intent is to be flexible as well as
34 * effecient. A primary design goal is to return unused memory to the rest of
35 * the system. This will make the system as a whole more flexible due to the
36 * ability to move memory to subsystems which most need it instead of leaving
37 * pools of reserved memory unused.
38 *
39 * The basic ideas stem from similar slab/zone based allocators whose algorithms
40 * are well known.
41 *
42 */
43
44 /*
45 * TODO:
46 * - Improve memory usage for large allocations
47 * - Investigate cache size adjustments
48 */
49
50 #include <sys/cdefs.h>
51 __FBSDID("$FreeBSD: releng/6.0/sys/vm/uma_core.c 148476 2005-07-28 12:10:19Z rwatson $");
52
53 /* I should really use ktr.. */
54 /*
55 #define UMA_DEBUG 1
56 #define UMA_DEBUG_ALLOC 1
57 #define UMA_DEBUG_ALLOC_1 1
58 */
59
60 #include "opt_param.h"
61 #include <sys/param.h>
62 #include <sys/systm.h>
63 #include <sys/kernel.h>
64 #include <sys/types.h>
65 #include <sys/queue.h>
66 #include <sys/malloc.h>
67 #include <sys/ktr.h>
68 #include <sys/lock.h>
69 #include <sys/sysctl.h>
70 #include <sys/mutex.h>
71 #include <sys/proc.h>
72 #include <sys/sbuf.h>
73 #include <sys/smp.h>
74 #include <sys/vmmeter.h>
75
76 #include <vm/vm.h>
77 #include <vm/vm_object.h>
78 #include <vm/vm_page.h>
79 #include <vm/vm_param.h>
80 #include <vm/vm_map.h>
81 #include <vm/vm_kern.h>
82 #include <vm/vm_extern.h>
83 #include <vm/uma.h>
84 #include <vm/uma_int.h>
85 #include <vm/uma_dbg.h>
86
87 #include <machine/vmparam.h>
88
89 /*
90 * This is the zone and keg from which all zones are spawned. The idea is that
91 * even the zone & keg heads are allocated from the allocator, so we use the
92 * bss section to bootstrap us.
93 */
94 static struct uma_keg masterkeg;
95 static struct uma_zone masterzone_k;
96 static struct uma_zone masterzone_z;
97 static uma_zone_t kegs = &masterzone_k;
98 static uma_zone_t zones = &masterzone_z;
99
100 /* This is the zone from which all of uma_slab_t's are allocated. */
101 static uma_zone_t slabzone;
102 static uma_zone_t slabrefzone; /* With refcounters (for UMA_ZONE_REFCNT) */
103
104 /*
105 * The initial hash tables come out of this zone so they can be allocated
106 * prior to malloc coming up.
107 */
108 static uma_zone_t hashzone;
109
110 static MALLOC_DEFINE(M_UMAHASH, "UMAHash", "UMA Hash Buckets");
111
112 /*
113 * Are we allowed to allocate buckets?
114 */
115 static int bucketdisable = 1;
116
117 /* Linked list of all kegs in the system */
118 static LIST_HEAD(,uma_keg) uma_kegs = LIST_HEAD_INITIALIZER(&uma_kegs);
119
120 /* This mutex protects the keg list */
121 static struct mtx uma_mtx;
122
123 /* Linked list of boot time pages */
124 static LIST_HEAD(,uma_slab) uma_boot_pages =
125 LIST_HEAD_INITIALIZER(&uma_boot_pages);
126
127 /* Count of free boottime pages */
128 static int uma_boot_free = 0;
129
130 /* Is the VM done starting up? */
131 static int booted = 0;
132
133 /* Maximum number of allowed items-per-slab if the slab header is OFFPAGE */
134 static u_int uma_max_ipers;
135 static u_int uma_max_ipers_ref;
136
137 /*
138 * This is the handle used to schedule events that need to happen
139 * outside of the allocation fast path.
140 */
141 static struct callout uma_callout;
142 #define UMA_TIMEOUT 20 /* Seconds for callout interval. */
143
144 /*
145 * This structure is passed as the zone ctor arg so that I don't have to create
146 * a special allocation function just for zones.
147 */
148 struct uma_zctor_args {
149 char *name;
150 size_t size;
151 uma_ctor ctor;
152 uma_dtor dtor;
153 uma_init uminit;
154 uma_fini fini;
155 uma_keg_t keg;
156 int align;
157 u_int32_t flags;
158 };
159
160 struct uma_kctor_args {
161 uma_zone_t zone;
162 size_t size;
163 uma_init uminit;
164 uma_fini fini;
165 int align;
166 u_int32_t flags;
167 };
168
169 struct uma_bucket_zone {
170 uma_zone_t ubz_zone;
171 char *ubz_name;
172 int ubz_entries;
173 };
174
175 #define BUCKET_MAX 128
176
177 struct uma_bucket_zone bucket_zones[] = {
178 { NULL, "16 Bucket", 16 },
179 { NULL, "32 Bucket", 32 },
180 { NULL, "64 Bucket", 64 },
181 { NULL, "128 Bucket", 128 },
182 { NULL, NULL, 0}
183 };
184
185 #define BUCKET_SHIFT 4
186 #define BUCKET_ZONES ((BUCKET_MAX >> BUCKET_SHIFT) + 1)
187
188 /*
189 * bucket_size[] maps requested bucket sizes to zones that allocate a bucket
190 * of approximately the right size.
191 */
192 static uint8_t bucket_size[BUCKET_ZONES];
193
194 /*
195 * Flags and enumerations to be passed to internal functions.
196 */
197 enum zfreeskip { SKIP_NONE, SKIP_DTOR, SKIP_FINI };
198
199 #define ZFREE_STATFAIL 0x00000001 /* Update zone failure statistic. */
200 #define ZFREE_STATFREE 0x00000002 /* Update zone free statistic. */
201
202 /* Prototypes.. */
203
204 static void *obj_alloc(uma_zone_t, int, u_int8_t *, int);
205 static void *page_alloc(uma_zone_t, int, u_int8_t *, int);
206 static void *startup_alloc(uma_zone_t, int, u_int8_t *, int);
207 static void page_free(void *, int, u_int8_t);
208 static uma_slab_t slab_zalloc(uma_zone_t, int);
209 static void cache_drain(uma_zone_t);
210 static void bucket_drain(uma_zone_t, uma_bucket_t);
211 static void bucket_cache_drain(uma_zone_t zone);
212 static int keg_ctor(void *, int, void *, int);
213 static void keg_dtor(void *, int, void *);
214 static int zone_ctor(void *, int, void *, int);
215 static void zone_dtor(void *, int, void *);
216 static int zero_init(void *, int, int);
217 static void zone_small_init(uma_zone_t zone);
218 static void zone_large_init(uma_zone_t zone);
219 static void zone_foreach(void (*zfunc)(uma_zone_t));
220 static void zone_timeout(uma_zone_t zone);
221 static int hash_alloc(struct uma_hash *);
222 static int hash_expand(struct uma_hash *, struct uma_hash *);
223 static void hash_free(struct uma_hash *hash);
224 static void uma_timeout(void *);
225 static void uma_startup3(void);
226 static void *uma_zalloc_internal(uma_zone_t, void *, int);
227 static void uma_zfree_internal(uma_zone_t, void *, void *, enum zfreeskip,
228 int);
229 static void bucket_enable(void);
230 static void bucket_init(void);
231 static uma_bucket_t bucket_alloc(int, int);
232 static void bucket_free(uma_bucket_t);
233 static void bucket_zone_drain(void);
234 static int uma_zalloc_bucket(uma_zone_t zone, int flags);
235 static uma_slab_t uma_zone_slab(uma_zone_t zone, int flags);
236 static void *uma_slab_alloc(uma_zone_t zone, uma_slab_t slab);
237 static void zone_drain(uma_zone_t);
238 static uma_zone_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit,
239 uma_fini fini, int align, u_int32_t flags);
240
241 void uma_print_zone(uma_zone_t);
242 void uma_print_stats(void);
243 static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS);
244 static int sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS);
245 static int sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS);
246
247 #ifdef WITNESS
248 static int nosleepwithlocks = 1;
249 SYSCTL_INT(_debug, OID_AUTO, nosleepwithlocks, CTLFLAG_RW, &nosleepwithlocks,
250 0, "Convert M_WAITOK to M_NOWAIT to avoid lock-held-across-sleep paths");
251 #else
252 static int nosleepwithlocks = 0;
253 SYSCTL_INT(_debug, OID_AUTO, nosleepwithlocks, CTLFLAG_RW, &nosleepwithlocks,
254 0, "Convert M_WAITOK to M_NOWAIT to avoid lock-held-across-sleep paths");
255 #endif
256 SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING|CTLFLAG_RD,
257 NULL, 0, sysctl_vm_zone, "A", "Zone Info");
258 SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
259
260 SYSCTL_PROC(_vm, OID_AUTO, zone_count, CTLFLAG_RD|CTLTYPE_INT,
261 0, 0, sysctl_vm_zone_count, "I", "Number of UMA zones");
262
263 SYSCTL_PROC(_vm, OID_AUTO, zone_stats, CTLFLAG_RD|CTLTYPE_STRUCT,
264 0, 0, sysctl_vm_zone_stats, "s,struct uma_type_header", "Zone Stats");
265
266 /*
267 * This routine checks to see whether or not it's safe to enable buckets.
268 */
269
270 static void
271 bucket_enable(void)
272 {
273 if (cnt.v_free_count < cnt.v_free_min)
274 bucketdisable = 1;
275 else
276 bucketdisable = 0;
277 }
278
279 /*
280 * Initialize bucket_zones, the array of zones of buckets of various sizes.
281 *
282 * For each zone, calculate the memory required for each bucket, consisting
283 * of the header and an array of pointers. Initialize bucket_size[] to point
284 * the range of appropriate bucket sizes at the zone.
285 */
286 static void
287 bucket_init(void)
288 {
289 struct uma_bucket_zone *ubz;
290 int i;
291 int j;
292
293 for (i = 0, j = 0; bucket_zones[j].ubz_entries != 0; j++) {
294 int size;
295
296 ubz = &bucket_zones[j];
297 size = roundup(sizeof(struct uma_bucket), sizeof(void *));
298 size += sizeof(void *) * ubz->ubz_entries;
299 ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size,
300 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
301 for (; i <= ubz->ubz_entries; i += (1 << BUCKET_SHIFT))
302 bucket_size[i >> BUCKET_SHIFT] = j;
303 }
304 }
305
306 /*
307 * Given a desired number of entries for a bucket, return the zone from which
308 * to allocate the bucket.
309 */
310 static struct uma_bucket_zone *
311 bucket_zone_lookup(int entries)
312 {
313 int idx;
314
315 idx = howmany(entries, 1 << BUCKET_SHIFT);
316 return (&bucket_zones[bucket_size[idx]]);
317 }
318
319 static uma_bucket_t
320 bucket_alloc(int entries, int bflags)
321 {
322 struct uma_bucket_zone *ubz;
323 uma_bucket_t bucket;
324
325 /*
326 * This is to stop us from allocating per cpu buckets while we're
327 * running out of UMA_BOOT_PAGES. Otherwise, we would exhaust the
328 * boot pages. This also prevents us from allocating buckets in
329 * low memory situations.
330 */
331 if (bucketdisable)
332 return (NULL);
333
334 ubz = bucket_zone_lookup(entries);
335 bucket = uma_zalloc_internal(ubz->ubz_zone, NULL, bflags);
336 if (bucket) {
337 #ifdef INVARIANTS
338 bzero(bucket->ub_bucket, sizeof(void *) * ubz->ubz_entries);
339 #endif
340 bucket->ub_cnt = 0;
341 bucket->ub_entries = ubz->ubz_entries;
342 }
343
344 return (bucket);
345 }
346
347 static void
348 bucket_free(uma_bucket_t bucket)
349 {
350 struct uma_bucket_zone *ubz;
351
352 ubz = bucket_zone_lookup(bucket->ub_entries);
353 uma_zfree_internal(ubz->ubz_zone, bucket, NULL, SKIP_NONE,
354 ZFREE_STATFREE);
355 }
356
357 static void
358 bucket_zone_drain(void)
359 {
360 struct uma_bucket_zone *ubz;
361
362 for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
363 zone_drain(ubz->ubz_zone);
364 }
365
366
367 /*
368 * Routine called by timeout which is used to fire off some time interval
369 * based calculations. (stats, hash size, etc.)
370 *
371 * Arguments:
372 * arg Unused
373 *
374 * Returns:
375 * Nothing
376 */
377 static void
378 uma_timeout(void *unused)
379 {
380 bucket_enable();
381 zone_foreach(zone_timeout);
382
383 /* Reschedule this event */
384 callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
385 }
386
387 /*
388 * Routine to perform timeout driven calculations. This expands the
389 * hashes and does per cpu statistics aggregation.
390 *
391 * Arguments:
392 * zone The zone to operate on
393 *
394 * Returns:
395 * Nothing
396 */
397 static void
398 zone_timeout(uma_zone_t zone)
399 {
400 uma_keg_t keg;
401 u_int64_t alloc;
402
403 keg = zone->uz_keg;
404 alloc = 0;
405
406 /*
407 * Expand the zone hash table.
408 *
409 * This is done if the number of slabs is larger than the hash size.
410 * What I'm trying to do here is completely reduce collisions. This
411 * may be a little aggressive. Should I allow for two collisions max?
412 */
413 ZONE_LOCK(zone);
414 if (keg->uk_flags & UMA_ZONE_HASH &&
415 keg->uk_pages / keg->uk_ppera >= keg->uk_hash.uh_hashsize) {
416 struct uma_hash newhash;
417 struct uma_hash oldhash;
418 int ret;
419
420 /*
421 * This is so involved because allocating and freeing
422 * while the zone lock is held will lead to deadlock.
423 * I have to do everything in stages and check for
424 * races.
425 */
426 newhash = keg->uk_hash;
427 ZONE_UNLOCK(zone);
428 ret = hash_alloc(&newhash);
429 ZONE_LOCK(zone);
430 if (ret) {
431 if (hash_expand(&keg->uk_hash, &newhash)) {
432 oldhash = keg->uk_hash;
433 keg->uk_hash = newhash;
434 } else
435 oldhash = newhash;
436
437 ZONE_UNLOCK(zone);
438 hash_free(&oldhash);
439 ZONE_LOCK(zone);
440 }
441 }
442 ZONE_UNLOCK(zone);
443 }
444
445 /*
446 * Allocate and zero fill the next sized hash table from the appropriate
447 * backing store.
448 *
449 * Arguments:
450 * hash A new hash structure with the old hash size in uh_hashsize
451 *
452 * Returns:
453 * 1 on sucess and 0 on failure.
454 */
455 static int
456 hash_alloc(struct uma_hash *hash)
457 {
458 int oldsize;
459 int alloc;
460
461 oldsize = hash->uh_hashsize;
462
463 /* We're just going to go to a power of two greater */
464 if (oldsize) {
465 hash->uh_hashsize = oldsize * 2;
466 alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize;
467 hash->uh_slab_hash = (struct slabhead *)malloc(alloc,
468 M_UMAHASH, M_NOWAIT);
469 } else {
470 alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT;
471 hash->uh_slab_hash = uma_zalloc_internal(hashzone, NULL,
472 M_WAITOK);
473 hash->uh_hashsize = UMA_HASH_SIZE_INIT;
474 }
475 if (hash->uh_slab_hash) {
476 bzero(hash->uh_slab_hash, alloc);
477 hash->uh_hashmask = hash->uh_hashsize - 1;
478 return (1);
479 }
480
481 return (0);
482 }
483
484 /*
485 * Expands the hash table for HASH zones. This is done from zone_timeout
486 * to reduce collisions. This must not be done in the regular allocation
487 * path, otherwise, we can recurse on the vm while allocating pages.
488 *
489 * Arguments:
490 * oldhash The hash you want to expand
491 * newhash The hash structure for the new table
492 *
493 * Returns:
494 * Nothing
495 *
496 * Discussion:
497 */
498 static int
499 hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash)
500 {
501 uma_slab_t slab;
502 int hval;
503 int i;
504
505 if (!newhash->uh_slab_hash)
506 return (0);
507
508 if (oldhash->uh_hashsize >= newhash->uh_hashsize)
509 return (0);
510
511 /*
512 * I need to investigate hash algorithms for resizing without a
513 * full rehash.
514 */
515
516 for (i = 0; i < oldhash->uh_hashsize; i++)
517 while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) {
518 slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]);
519 SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink);
520 hval = UMA_HASH(newhash, slab->us_data);
521 SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval],
522 slab, us_hlink);
523 }
524
525 return (1);
526 }
527
528 /*
529 * Free the hash bucket to the appropriate backing store.
530 *
531 * Arguments:
532 * slab_hash The hash bucket we're freeing
533 * hashsize The number of entries in that hash bucket
534 *
535 * Returns:
536 * Nothing
537 */
538 static void
539 hash_free(struct uma_hash *hash)
540 {
541 if (hash->uh_slab_hash == NULL)
542 return;
543 if (hash->uh_hashsize == UMA_HASH_SIZE_INIT)
544 uma_zfree_internal(hashzone,
545 hash->uh_slab_hash, NULL, SKIP_NONE, ZFREE_STATFREE);
546 else
547 free(hash->uh_slab_hash, M_UMAHASH);
548 }
549
550 /*
551 * Frees all outstanding items in a bucket
552 *
553 * Arguments:
554 * zone The zone to free to, must be unlocked.
555 * bucket The free/alloc bucket with items, cpu queue must be locked.
556 *
557 * Returns:
558 * Nothing
559 */
560
561 static void
562 bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
563 {
564 uma_slab_t slab;
565 int mzone;
566 void *item;
567
568 if (bucket == NULL)
569 return;
570
571 slab = NULL;
572 mzone = 0;
573
574 /* We have to lookup the slab again for malloc.. */
575 if (zone->uz_keg->uk_flags & UMA_ZONE_MALLOC)
576 mzone = 1;
577
578 while (bucket->ub_cnt > 0) {
579 bucket->ub_cnt--;
580 item = bucket->ub_bucket[bucket->ub_cnt];
581 #ifdef INVARIANTS
582 bucket->ub_bucket[bucket->ub_cnt] = NULL;
583 KASSERT(item != NULL,
584 ("bucket_drain: botched ptr, item is NULL"));
585 #endif
586 /*
587 * This is extremely inefficient. The slab pointer was passed
588 * to uma_zfree_arg, but we lost it because the buckets don't
589 * hold them. This will go away when free() gets a size passed
590 * to it.
591 */
592 if (mzone)
593 slab = vtoslab((vm_offset_t)item & (~UMA_SLAB_MASK));
594 uma_zfree_internal(zone, item, slab, SKIP_DTOR, 0);
595 }
596 }
597
598 /*
599 * Drains the per cpu caches for a zone.
600 *
601 * NOTE: This may only be called while the zone is being turn down, and not
602 * during normal operation. This is necessary in order that we do not have
603 * to migrate CPUs to drain the per-CPU caches.
604 *
605 * Arguments:
606 * zone The zone to drain, must be unlocked.
607 *
608 * Returns:
609 * Nothing
610 */
611 static void
612 cache_drain(uma_zone_t zone)
613 {
614 uma_cache_t cache;
615 int cpu;
616
617 /*
618 * XXX: It is safe to not lock the per-CPU caches, because we're
619 * tearing down the zone anyway. I.e., there will be no further use
620 * of the caches at this point.
621 *
622 * XXX: It would good to be able to assert that the zone is being
623 * torn down to prevent improper use of cache_drain().
624 *
625 * XXX: We lock the zone before passing into bucket_cache_drain() as
626 * it is used elsewhere. Should the tear-down path be made special
627 * there in some form?
628 */
629 for (cpu = 0; cpu <= mp_maxid; cpu++) {
630 if (CPU_ABSENT(cpu))
631 continue;
632 cache = &zone->uz_cpu[cpu];
633 bucket_drain(zone, cache->uc_allocbucket);
634 bucket_drain(zone, cache->uc_freebucket);
635 if (cache->uc_allocbucket != NULL)
636 bucket_free(cache->uc_allocbucket);
637 if (cache->uc_freebucket != NULL)
638 bucket_free(cache->uc_freebucket);
639 cache->uc_allocbucket = cache->uc_freebucket = NULL;
640 }
641 ZONE_LOCK(zone);
642 bucket_cache_drain(zone);
643 ZONE_UNLOCK(zone);
644 }
645
646 /*
647 * Drain the cached buckets from a zone. Expects a locked zone on entry.
648 */
649 static void
650 bucket_cache_drain(uma_zone_t zone)
651 {
652 uma_bucket_t bucket;
653
654 /*
655 * Drain the bucket queues and free the buckets, we just keep two per
656 * cpu (alloc/free).
657 */
658 while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
659 LIST_REMOVE(bucket, ub_link);
660 ZONE_UNLOCK(zone);
661 bucket_drain(zone, bucket);
662 bucket_free(bucket);
663 ZONE_LOCK(zone);
664 }
665
666 /* Now we do the free queue.. */
667 while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
668 LIST_REMOVE(bucket, ub_link);
669 bucket_free(bucket);
670 }
671 }
672
673 /*
674 * Frees pages from a zone back to the system. This is done on demand from
675 * the pageout daemon.
676 *
677 * Arguments:
678 * zone The zone to free pages from
679 * all Should we drain all items?
680 *
681 * Returns:
682 * Nothing.
683 */
684 static void
685 zone_drain(uma_zone_t zone)
686 {
687 struct slabhead freeslabs = { 0 };
688 uma_keg_t keg;
689 uma_slab_t slab;
690 uma_slab_t n;
691 u_int8_t flags;
692 u_int8_t *mem;
693 int i;
694
695 keg = zone->uz_keg;
696
697 /*
698 * We don't want to take pages from statically allocated zones at this
699 * time
700 */
701 if (keg->uk_flags & UMA_ZONE_NOFREE || keg->uk_freef == NULL)
702 return;
703
704 ZONE_LOCK(zone);
705
706 #ifdef UMA_DEBUG
707 printf("%s free items: %u\n", zone->uz_name, keg->uk_free);
708 #endif
709 bucket_cache_drain(zone);
710 if (keg->uk_free == 0)
711 goto finished;
712
713 slab = LIST_FIRST(&keg->uk_free_slab);
714 while (slab) {
715 n = LIST_NEXT(slab, us_link);
716
717 /* We have no where to free these to */
718 if (slab->us_flags & UMA_SLAB_BOOT) {
719 slab = n;
720 continue;
721 }
722
723 LIST_REMOVE(slab, us_link);
724 keg->uk_pages -= keg->uk_ppera;
725 keg->uk_free -= keg->uk_ipers;
726
727 if (keg->uk_flags & UMA_ZONE_HASH)
728 UMA_HASH_REMOVE(&keg->uk_hash, slab, slab->us_data);
729
730 SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink);
731
732 slab = n;
733 }
734 finished:
735 ZONE_UNLOCK(zone);
736
737 while ((slab = SLIST_FIRST(&freeslabs)) != NULL) {
738 SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink);
739 if (keg->uk_fini)
740 for (i = 0; i < keg->uk_ipers; i++)
741 keg->uk_fini(
742 slab->us_data + (keg->uk_rsize * i),
743 keg->uk_size);
744 flags = slab->us_flags;
745 mem = slab->us_data;
746
747 if ((keg->uk_flags & UMA_ZONE_MALLOC) ||
748 (keg->uk_flags & UMA_ZONE_REFCNT)) {
749 vm_object_t obj;
750
751 if (flags & UMA_SLAB_KMEM)
752 obj = kmem_object;
753 else
754 obj = NULL;
755 for (i = 0; i < keg->uk_ppera; i++)
756 vsetobj((vm_offset_t)mem + (i * PAGE_SIZE),
757 obj);
758 }
759 if (keg->uk_flags & UMA_ZONE_OFFPAGE)
760 uma_zfree_internal(keg->uk_slabzone, slab, NULL,
761 SKIP_NONE, ZFREE_STATFREE);
762 #ifdef UMA_DEBUG
763 printf("%s: Returning %d bytes.\n",
764 zone->uz_name, UMA_SLAB_SIZE * keg->uk_ppera);
765 #endif
766 keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera, flags);
767 }
768 }
769
770 /*
771 * Allocate a new slab for a zone. This does not insert the slab onto a list.
772 *
773 * Arguments:
774 * zone The zone to allocate slabs for
775 * wait Shall we wait?
776 *
777 * Returns:
778 * The slab that was allocated or NULL if there is no memory and the
779 * caller specified M_NOWAIT.
780 */
781 static uma_slab_t
782 slab_zalloc(uma_zone_t zone, int wait)
783 {
784 uma_slabrefcnt_t slabref;
785 uma_slab_t slab;
786 uma_keg_t keg;
787 u_int8_t *mem;
788 u_int8_t flags;
789 int i;
790
791 slab = NULL;
792 keg = zone->uz_keg;
793
794 #ifdef UMA_DEBUG
795 printf("slab_zalloc: Allocating a new slab for %s\n", zone->uz_name);
796 #endif
797 ZONE_UNLOCK(zone);
798
799 if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
800 slab = uma_zalloc_internal(keg->uk_slabzone, NULL, wait);
801 if (slab == NULL) {
802 ZONE_LOCK(zone);
803 return NULL;
804 }
805 }
806
807 /*
808 * This reproduces the old vm_zone behavior of zero filling pages the
809 * first time they are added to a zone.
810 *
811 * Malloced items are zeroed in uma_zalloc.
812 */
813
814 if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
815 wait |= M_ZERO;
816 else
817 wait &= ~M_ZERO;
818
819 mem = keg->uk_allocf(zone, keg->uk_ppera * UMA_SLAB_SIZE,
820 &flags, wait);
821 if (mem == NULL) {
822 if (keg->uk_flags & UMA_ZONE_OFFPAGE)
823 uma_zfree_internal(keg->uk_slabzone, slab, NULL,
824 SKIP_NONE, ZFREE_STATFREE);
825 ZONE_LOCK(zone);
826 return (NULL);
827 }
828
829 /* Point the slab into the allocated memory */
830 if (!(keg->uk_flags & UMA_ZONE_OFFPAGE))
831 slab = (uma_slab_t )(mem + keg->uk_pgoff);
832
833 if ((keg->uk_flags & UMA_ZONE_MALLOC) ||
834 (keg->uk_flags & UMA_ZONE_REFCNT))
835 for (i = 0; i < keg->uk_ppera; i++)
836 vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab);
837
838 slab->us_keg = keg;
839 slab->us_data = mem;
840 slab->us_freecount = keg->uk_ipers;
841 slab->us_firstfree = 0;
842 slab->us_flags = flags;
843
844 if (keg->uk_flags & UMA_ZONE_REFCNT) {
845 slabref = (uma_slabrefcnt_t)slab;
846 for (i = 0; i < keg->uk_ipers; i++) {
847 slabref->us_freelist[i].us_refcnt = 0;
848 slabref->us_freelist[i].us_item = i+1;
849 }
850 } else {
851 for (i = 0; i < keg->uk_ipers; i++)
852 slab->us_freelist[i].us_item = i+1;
853 }
854
855 if (keg->uk_init != NULL) {
856 for (i = 0; i < keg->uk_ipers; i++)
857 if (keg->uk_init(slab->us_data + (keg->uk_rsize * i),
858 keg->uk_size, wait) != 0)
859 break;
860 if (i != keg->uk_ipers) {
861 if (keg->uk_fini != NULL) {
862 for (i--; i > -1; i--)
863 keg->uk_fini(slab->us_data +
864 (keg->uk_rsize * i),
865 keg->uk_size);
866 }
867 if ((keg->uk_flags & UMA_ZONE_MALLOC) ||
868 (keg->uk_flags & UMA_ZONE_REFCNT)) {
869 vm_object_t obj;
870
871 if (flags & UMA_SLAB_KMEM)
872 obj = kmem_object;
873 else
874 obj = NULL;
875 for (i = 0; i < keg->uk_ppera; i++)
876 vsetobj((vm_offset_t)mem +
877 (i * PAGE_SIZE), obj);
878 }
879 if (keg->uk_flags & UMA_ZONE_OFFPAGE)
880 uma_zfree_internal(keg->uk_slabzone, slab,
881 NULL, SKIP_NONE, ZFREE_STATFREE);
882 keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera,
883 flags);
884 ZONE_LOCK(zone);
885 return (NULL);
886 }
887 }
888 ZONE_LOCK(zone);
889
890 if (keg->uk_flags & UMA_ZONE_HASH)
891 UMA_HASH_INSERT(&keg->uk_hash, slab, mem);
892
893 keg->uk_pages += keg->uk_ppera;
894 keg->uk_free += keg->uk_ipers;
895
896 return (slab);
897 }
898
899 /*
900 * This function is intended to be used early on in place of page_alloc() so
901 * that we may use the boot time page cache to satisfy allocations before
902 * the VM is ready.
903 */
904 static void *
905 startup_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
906 {
907 uma_keg_t keg;
908
909 keg = zone->uz_keg;
910
911 /*
912 * Check our small startup cache to see if it has pages remaining.
913 */
914 mtx_lock(&uma_mtx);
915 if (uma_boot_free != 0) {
916 uma_slab_t tmps;
917
918 tmps = LIST_FIRST(&uma_boot_pages);
919 LIST_REMOVE(tmps, us_link);
920 uma_boot_free--;
921 mtx_unlock(&uma_mtx);
922 *pflag = tmps->us_flags;
923 return (tmps->us_data);
924 }
925 mtx_unlock(&uma_mtx);
926 if (booted == 0)
927 panic("UMA: Increase UMA_BOOT_PAGES");
928 /*
929 * Now that we've booted reset these users to their real allocator.
930 */
931 #ifdef UMA_MD_SMALL_ALLOC
932 keg->uk_allocf = uma_small_alloc;
933 #else
934 keg->uk_allocf = page_alloc;
935 #endif
936 return keg->uk_allocf(zone, bytes, pflag, wait);
937 }
938
939 /*
940 * Allocates a number of pages from the system
941 *
942 * Arguments:
943 * zone Unused
944 * bytes The number of bytes requested
945 * wait Shall we wait?
946 *
947 * Returns:
948 * A pointer to the alloced memory or possibly
949 * NULL if M_NOWAIT is set.
950 */
951 static void *
952 page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
953 {
954 void *p; /* Returned page */
955
956 *pflag = UMA_SLAB_KMEM;
957 p = (void *) kmem_malloc(kmem_map, bytes, wait);
958
959 return (p);
960 }
961
962 /*
963 * Allocates a number of pages from within an object
964 *
965 * Arguments:
966 * zone Unused
967 * bytes The number of bytes requested
968 * wait Shall we wait?
969 *
970 * Returns:
971 * A pointer to the alloced memory or possibly
972 * NULL if M_NOWAIT is set.
973 */
974 static void *
975 obj_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
976 {
977 vm_object_t object;
978 vm_offset_t retkva, zkva;
979 vm_page_t p;
980 int pages, startpages;
981
982 object = zone->uz_keg->uk_obj;
983 retkva = 0;
984
985 /*
986 * This looks a little weird since we're getting one page at a time.
987 */
988 VM_OBJECT_LOCK(object);
989 p = TAILQ_LAST(&object->memq, pglist);
990 pages = p != NULL ? p->pindex + 1 : 0;
991 startpages = pages;
992 zkva = zone->uz_keg->uk_kva + pages * PAGE_SIZE;
993 for (; bytes > 0; bytes -= PAGE_SIZE) {
994 p = vm_page_alloc(object, pages,
995 VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED);
996 if (p == NULL) {
997 if (pages != startpages)
998 pmap_qremove(retkva, pages - startpages);
999 while (pages != startpages) {
1000 pages--;
1001 p = TAILQ_LAST(&object->memq, pglist);
1002 vm_page_lock_queues();
1003 vm_page_unwire(p, 0);
1004 vm_page_free(p);
1005 vm_page_unlock_queues();
1006 }
1007 retkva = 0;
1008 goto done;
1009 }
1010 pmap_qenter(zkva, &p, 1);
1011 if (retkva == 0)
1012 retkva = zkva;
1013 zkva += PAGE_SIZE;
1014 pages += 1;
1015 }
1016 done:
1017 VM_OBJECT_UNLOCK(object);
1018 *flags = UMA_SLAB_PRIV;
1019
1020 return ((void *)retkva);
1021 }
1022
1023 /*
1024 * Frees a number of pages to the system
1025 *
1026 * Arguments:
1027 * mem A pointer to the memory to be freed
1028 * size The size of the memory being freed
1029 * flags The original p->us_flags field
1030 *
1031 * Returns:
1032 * Nothing
1033 */
1034 static void
1035 page_free(void *mem, int size, u_int8_t flags)
1036 {
1037 vm_map_t map;
1038
1039 if (flags & UMA_SLAB_KMEM)
1040 map = kmem_map;
1041 else
1042 panic("UMA: page_free used with invalid flags %d\n", flags);
1043
1044 kmem_free(map, (vm_offset_t)mem, size);
1045 }
1046
1047 /*
1048 * Zero fill initializer
1049 *
1050 * Arguments/Returns follow uma_init specifications
1051 */
1052 static int
1053 zero_init(void *mem, int size, int flags)
1054 {
1055 bzero(mem, size);
1056 return (0);
1057 }
1058
1059 /*
1060 * Finish creating a small uma zone. This calculates ipers, and the zone size.
1061 *
1062 * Arguments
1063 * zone The zone we should initialize
1064 *
1065 * Returns
1066 * Nothing
1067 */
1068 static void
1069 zone_small_init(uma_zone_t zone)
1070 {
1071 uma_keg_t keg;
1072 u_int rsize;
1073 u_int memused;
1074 u_int wastedspace;
1075 u_int shsize;
1076
1077 keg = zone->uz_keg;
1078 KASSERT(keg != NULL, ("Keg is null in zone_small_init"));
1079 rsize = keg->uk_size;
1080
1081 if (rsize < UMA_SMALLEST_UNIT)
1082 rsize = UMA_SMALLEST_UNIT;
1083 if (rsize & keg->uk_align)
1084 rsize = (rsize & ~keg->uk_align) + (keg->uk_align + 1);
1085
1086 keg->uk_rsize = rsize;
1087 keg->uk_ppera = 1;
1088
1089 if (keg->uk_flags & UMA_ZONE_REFCNT) {
1090 rsize += UMA_FRITMREF_SZ; /* linkage & refcnt */
1091 shsize = sizeof(struct uma_slab_refcnt);
1092 } else {
1093 rsize += UMA_FRITM_SZ; /* Account for linkage */
1094 shsize = sizeof(struct uma_slab);
1095 }
1096
1097 keg->uk_ipers = (UMA_SLAB_SIZE - shsize) / rsize;
1098 KASSERT(keg->uk_ipers != 0, ("zone_small_init: ipers is 0"));
1099 memused = keg->uk_ipers * rsize + shsize;
1100 wastedspace = UMA_SLAB_SIZE - memused;
1101
1102 /*
1103 * We can't do OFFPAGE if we're internal or if we've been
1104 * asked to not go to the VM for buckets. If we do this we
1105 * may end up going to the VM (kmem_map) for slabs which we
1106 * do not want to do if we're UMA_ZFLAG_CACHEONLY as a
1107 * result of UMA_ZONE_VM, which clearly forbids it.
1108 */
1109 if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) ||
1110 (keg->uk_flags & UMA_ZFLAG_CACHEONLY))
1111 return;
1112
1113 if ((wastedspace >= UMA_MAX_WASTE) &&
1114 (keg->uk_ipers < (UMA_SLAB_SIZE / keg->uk_rsize))) {
1115 keg->uk_ipers = UMA_SLAB_SIZE / keg->uk_rsize;
1116 KASSERT(keg->uk_ipers <= 255,
1117 ("zone_small_init: keg->uk_ipers too high!"));
1118 #ifdef UMA_DEBUG
1119 printf("UMA decided we need offpage slab headers for "
1120 "zone: %s, calculated wastedspace = %d, "
1121 "maximum wasted space allowed = %d, "
1122 "calculated ipers = %d, "
1123 "new wasted space = %d\n", zone->uz_name, wastedspace,
1124 UMA_MAX_WASTE, keg->uk_ipers,
1125 UMA_SLAB_SIZE - keg->uk_ipers * keg->uk_rsize);
1126 #endif
1127 keg->uk_flags |= UMA_ZONE_OFFPAGE;
1128 if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
1129 keg->uk_flags |= UMA_ZONE_HASH;
1130 }
1131 }
1132
1133 /*
1134 * Finish creating a large (> UMA_SLAB_SIZE) uma zone. Just give in and do
1135 * OFFPAGE for now. When I can allow for more dynamic slab sizes this will be
1136 * more complicated.
1137 *
1138 * Arguments
1139 * zone The zone we should initialize
1140 *
1141 * Returns
1142 * Nothing
1143 */
1144 static void
1145 zone_large_init(uma_zone_t zone)
1146 {
1147 uma_keg_t keg;
1148 int pages;
1149
1150 keg = zone->uz_keg;
1151
1152 KASSERT(keg != NULL, ("Keg is null in zone_large_init"));
1153 KASSERT((keg->uk_flags & UMA_ZFLAG_CACHEONLY) == 0,
1154 ("zone_large_init: Cannot large-init a UMA_ZFLAG_CACHEONLY zone"));
1155
1156 pages = keg->uk_size / UMA_SLAB_SIZE;
1157
1158 /* Account for remainder */
1159 if ((pages * UMA_SLAB_SIZE) < keg->uk_size)
1160 pages++;
1161
1162 keg->uk_ppera = pages;
1163 keg->uk_ipers = 1;
1164
1165 keg->uk_flags |= UMA_ZONE_OFFPAGE;
1166 if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
1167 keg->uk_flags |= UMA_ZONE_HASH;
1168
1169 keg->uk_rsize = keg->uk_size;
1170 }
1171
1172 /*
1173 * Keg header ctor. This initializes all fields, locks, etc. And inserts
1174 * the keg onto the global keg list.
1175 *
1176 * Arguments/Returns follow uma_ctor specifications
1177 * udata Actually uma_kctor_args
1178 */
1179 static int
1180 keg_ctor(void *mem, int size, void *udata, int flags)
1181 {
1182 struct uma_kctor_args *arg = udata;
1183 uma_keg_t keg = mem;
1184 uma_zone_t zone;
1185
1186 bzero(keg, size);
1187 keg->uk_size = arg->size;
1188 keg->uk_init = arg->uminit;
1189 keg->uk_fini = arg->fini;
1190 keg->uk_align = arg->align;
1191 keg->uk_free = 0;
1192 keg->uk_pages = 0;
1193 keg->uk_flags = arg->flags;
1194 keg->uk_allocf = page_alloc;
1195 keg->uk_freef = page_free;
1196 keg->uk_recurse = 0;
1197 keg->uk_slabzone = NULL;
1198
1199 /*
1200 * The master zone is passed to us at keg-creation time.
1201 */
1202 zone = arg->zone;
1203 zone->uz_keg = keg;
1204
1205 if (arg->flags & UMA_ZONE_VM)
1206 keg->uk_flags |= UMA_ZFLAG_CACHEONLY;
1207
1208 if (arg->flags & UMA_ZONE_ZINIT)
1209 keg->uk_init = zero_init;
1210
1211 /*
1212 * The +UMA_FRITM_SZ added to uk_size is to account for the
1213 * linkage that is added to the size in zone_small_init(). If
1214 * we don't account for this here then we may end up in
1215 * zone_small_init() with a calculated 'ipers' of 0.
1216 */
1217 if (keg->uk_flags & UMA_ZONE_REFCNT) {
1218 if ((keg->uk_size+UMA_FRITMREF_SZ) >
1219 (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt)))
1220 zone_large_init(zone);
1221 else
1222 zone_small_init(zone);
1223 } else {
1224 if ((keg->uk_size+UMA_FRITM_SZ) >
1225 (UMA_SLAB_SIZE - sizeof(struct uma_slab)))
1226 zone_large_init(zone);
1227 else
1228 zone_small_init(zone);
1229 }
1230
1231 if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
1232 if (keg->uk_flags & UMA_ZONE_REFCNT)
1233 keg->uk_slabzone = slabrefzone;
1234 else
1235 keg->uk_slabzone = slabzone;
1236 }
1237
1238 /*
1239 * If we haven't booted yet we need allocations to go through the
1240 * startup cache until the vm is ready.
1241 */
1242 if (keg->uk_ppera == 1) {
1243 #ifdef UMA_MD_SMALL_ALLOC
1244 keg->uk_allocf = uma_small_alloc;
1245 keg->uk_freef = uma_small_free;
1246 #endif
1247 if (booted == 0)
1248 keg->uk_allocf = startup_alloc;
1249 }
1250
1251 /*
1252 * Initialize keg's lock (shared among zones) through
1253 * Master zone
1254 */
1255 zone->uz_lock = &keg->uk_lock;
1256 if (arg->flags & UMA_ZONE_MTXCLASS)
1257 ZONE_LOCK_INIT(zone, 1);
1258 else
1259 ZONE_LOCK_INIT(zone, 0);
1260
1261 /*
1262 * If we're putting the slab header in the actual page we need to
1263 * figure out where in each page it goes. This calculates a right
1264 * justified offset into the memory on an ALIGN_PTR boundary.
1265 */
1266 if (!(keg->uk_flags & UMA_ZONE_OFFPAGE)) {
1267 u_int totsize;
1268
1269 /* Size of the slab struct and free list */
1270 if (keg->uk_flags & UMA_ZONE_REFCNT)
1271 totsize = sizeof(struct uma_slab_refcnt) +
1272 keg->uk_ipers * UMA_FRITMREF_SZ;
1273 else
1274 totsize = sizeof(struct uma_slab) +
1275 keg->uk_ipers * UMA_FRITM_SZ;
1276
1277 if (totsize & UMA_ALIGN_PTR)
1278 totsize = (totsize & ~UMA_ALIGN_PTR) +
1279 (UMA_ALIGN_PTR + 1);
1280 keg->uk_pgoff = UMA_SLAB_SIZE - totsize;
1281
1282 if (keg->uk_flags & UMA_ZONE_REFCNT)
1283 totsize = keg->uk_pgoff + sizeof(struct uma_slab_refcnt)
1284 + keg->uk_ipers * UMA_FRITMREF_SZ;
1285 else
1286 totsize = keg->uk_pgoff + sizeof(struct uma_slab)
1287 + keg->uk_ipers * UMA_FRITM_SZ;
1288
1289 /*
1290 * The only way the following is possible is if with our
1291 * UMA_ALIGN_PTR adjustments we are now bigger than
1292 * UMA_SLAB_SIZE. I haven't checked whether this is
1293 * mathematically possible for all cases, so we make
1294 * sure here anyway.
1295 */
1296 if (totsize > UMA_SLAB_SIZE) {
1297 printf("zone %s ipers %d rsize %d size %d\n",
1298 zone->uz_name, keg->uk_ipers, keg->uk_rsize,
1299 keg->uk_size);
1300 panic("UMA slab won't fit.\n");
1301 }
1302 }
1303
1304 if (keg->uk_flags & UMA_ZONE_HASH)
1305 hash_alloc(&keg->uk_hash);
1306
1307 #ifdef UMA_DEBUG
1308 printf("%s(%p) size = %d ipers = %d ppera = %d pgoff = %d\n",
1309 zone->uz_name, zone,
1310 keg->uk_size, keg->uk_ipers,
1311 keg->uk_ppera, keg->uk_pgoff);
1312 #endif
1313
1314 LIST_INSERT_HEAD(&keg->uk_zones, zone, uz_link);
1315
1316 mtx_lock(&uma_mtx);
1317 LIST_INSERT_HEAD(&uma_kegs, keg, uk_link);
1318 mtx_unlock(&uma_mtx);
1319 return (0);
1320 }
1321
1322 /*
1323 * Zone header ctor. This initializes all fields, locks, etc.
1324 *
1325 * Arguments/Returns follow uma_ctor specifications
1326 * udata Actually uma_zctor_args
1327 */
1328
1329 static int
1330 zone_ctor(void *mem, int size, void *udata, int flags)
1331 {
1332 struct uma_zctor_args *arg = udata;
1333 uma_zone_t zone = mem;
1334 uma_zone_t z;
1335 uma_keg_t keg;
1336
1337 bzero(zone, size);
1338 zone->uz_name = arg->name;
1339 zone->uz_ctor = arg->ctor;
1340 zone->uz_dtor = arg->dtor;
1341 zone->uz_init = NULL;
1342 zone->uz_fini = NULL;
1343 zone->uz_allocs = 0;
1344 zone->uz_frees = 0;
1345 zone->uz_fails = 0;
1346 zone->uz_fills = zone->uz_count = 0;
1347
1348 if (arg->flags & UMA_ZONE_SECONDARY) {
1349 KASSERT(arg->keg != NULL, ("Secondary zone on zero'd keg"));
1350 keg = arg->keg;
1351 zone->uz_keg = keg;
1352 zone->uz_init = arg->uminit;
1353 zone->uz_fini = arg->fini;
1354 zone->uz_lock = &keg->uk_lock;
1355 mtx_lock(&uma_mtx);
1356 ZONE_LOCK(zone);
1357 keg->uk_flags |= UMA_ZONE_SECONDARY;
1358 LIST_FOREACH(z, &keg->uk_zones, uz_link) {
1359 if (LIST_NEXT(z, uz_link) == NULL) {
1360 LIST_INSERT_AFTER(z, zone, uz_link);
1361 break;
1362 }
1363 }
1364 ZONE_UNLOCK(zone);
1365 mtx_unlock(&uma_mtx);
1366 } else if (arg->keg == NULL) {
1367 if (uma_kcreate(zone, arg->size, arg->uminit, arg->fini,
1368 arg->align, arg->flags) == NULL)
1369 return (ENOMEM);
1370 } else {
1371 struct uma_kctor_args karg;
1372 int error;
1373
1374 /* We should only be here from uma_startup() */
1375 karg.size = arg->size;
1376 karg.uminit = arg->uminit;
1377 karg.fini = arg->fini;
1378 karg.align = arg->align;
1379 karg.flags = arg->flags;
1380 karg.zone = zone;
1381 error = keg_ctor(arg->keg, sizeof(struct uma_keg), &karg,
1382 flags);
1383 if (error)
1384 return (error);
1385 }
1386 keg = zone->uz_keg;
1387 zone->uz_lock = &keg->uk_lock;
1388
1389 /*
1390 * Some internal zones don't have room allocated for the per cpu
1391 * caches. If we're internal, bail out here.
1392 */
1393 if (keg->uk_flags & UMA_ZFLAG_INTERNAL) {
1394 KASSERT((keg->uk_flags & UMA_ZONE_SECONDARY) == 0,
1395 ("Secondary zone requested UMA_ZFLAG_INTERNAL"));
1396 return (0);
1397 }
1398
1399 if (keg->uk_flags & UMA_ZONE_MAXBUCKET)
1400 zone->uz_count = BUCKET_MAX;
1401 else if (keg->uk_ipers <= BUCKET_MAX)
1402 zone->uz_count = keg->uk_ipers;
1403 else
1404 zone->uz_count = BUCKET_MAX;
1405 return (0);
1406 }
1407
1408 /*
1409 * Keg header dtor. This frees all data, destroys locks, frees the hash
1410 * table and removes the keg from the global list.
1411 *
1412 * Arguments/Returns follow uma_dtor specifications
1413 * udata unused
1414 */
1415 static void
1416 keg_dtor(void *arg, int size, void *udata)
1417 {
1418 uma_keg_t keg;
1419
1420 keg = (uma_keg_t)arg;
1421 mtx_lock(&keg->uk_lock);
1422 if (keg->uk_free != 0) {
1423 printf("Freed UMA keg was not empty (%d items). "
1424 " Lost %d pages of memory.\n",
1425 keg->uk_free, keg->uk_pages);
1426 }
1427 mtx_unlock(&keg->uk_lock);
1428
1429 if (keg->uk_flags & UMA_ZONE_HASH)
1430 hash_free(&keg->uk_hash);
1431
1432 mtx_destroy(&keg->uk_lock);
1433 }
1434
1435 /*
1436 * Zone header dtor.
1437 *
1438 * Arguments/Returns follow uma_dtor specifications
1439 * udata unused
1440 */
1441 static void
1442 zone_dtor(void *arg, int size, void *udata)
1443 {
1444 uma_zone_t zone;
1445 uma_keg_t keg;
1446
1447 zone = (uma_zone_t)arg;
1448 keg = zone->uz_keg;
1449
1450 if (!(keg->uk_flags & UMA_ZFLAG_INTERNAL))
1451 cache_drain(zone);
1452
1453 mtx_lock(&uma_mtx);
1454 zone_drain(zone);
1455 if (keg->uk_flags & UMA_ZONE_SECONDARY) {
1456 LIST_REMOVE(zone, uz_link);
1457 /*
1458 * XXX there are some races here where
1459 * the zone can be drained but zone lock
1460 * released and then refilled before we
1461 * remove it... we dont care for now
1462 */
1463 ZONE_LOCK(zone);
1464 if (LIST_EMPTY(&keg->uk_zones))
1465 keg->uk_flags &= ~UMA_ZONE_SECONDARY;
1466 ZONE_UNLOCK(zone);
1467 mtx_unlock(&uma_mtx);
1468 } else {
1469 LIST_REMOVE(keg, uk_link);
1470 LIST_REMOVE(zone, uz_link);
1471 mtx_unlock(&uma_mtx);
1472 uma_zfree_internal(kegs, keg, NULL, SKIP_NONE,
1473 ZFREE_STATFREE);
1474 }
1475 zone->uz_keg = NULL;
1476 }
1477
1478 /*
1479 * Traverses every zone in the system and calls a callback
1480 *
1481 * Arguments:
1482 * zfunc A pointer to a function which accepts a zone
1483 * as an argument.
1484 *
1485 * Returns:
1486 * Nothing
1487 */
1488 static void
1489 zone_foreach(void (*zfunc)(uma_zone_t))
1490 {
1491 uma_keg_t keg;
1492 uma_zone_t zone;
1493
1494 mtx_lock(&uma_mtx);
1495 LIST_FOREACH(keg, &uma_kegs, uk_link) {
1496 LIST_FOREACH(zone, &keg->uk_zones, uz_link)
1497 zfunc(zone);
1498 }
1499 mtx_unlock(&uma_mtx);
1500 }
1501
1502 /* Public functions */
1503 /* See uma.h */
1504 void
1505 uma_startup(void *bootmem)
1506 {
1507 struct uma_zctor_args args;
1508 uma_slab_t slab;
1509 u_int slabsize;
1510 u_int objsize, totsize, wsize;
1511 int i;
1512
1513 #ifdef UMA_DEBUG
1514 printf("Creating uma keg headers zone and keg.\n");
1515 #endif
1516 /*
1517 * The general UMA lock is a recursion-allowed lock because
1518 * there is a code path where, while we're still configured
1519 * to use startup_alloc() for backend page allocations, we
1520 * may end up in uma_reclaim() which calls zone_foreach(zone_drain),
1521 * which grabs uma_mtx, only to later call into startup_alloc()
1522 * because while freeing we needed to allocate a bucket. Since
1523 * startup_alloc() also takes uma_mtx, we need to be able to
1524 * recurse on it.
1525 */
1526 mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF | MTX_RECURSE);
1527
1528 /*
1529 * Figure out the maximum number of items-per-slab we'll have if
1530 * we're using the OFFPAGE slab header to track free items, given
1531 * all possible object sizes and the maximum desired wastage
1532 * (UMA_MAX_WASTE).
1533 *
1534 * We iterate until we find an object size for
1535 * which the calculated wastage in zone_small_init() will be
1536 * enough to warrant OFFPAGE. Since wastedspace versus objsize
1537 * is an overall increasing see-saw function, we find the smallest
1538 * objsize such that the wastage is always acceptable for objects
1539 * with that objsize or smaller. Since a smaller objsize always
1540 * generates a larger possible uma_max_ipers, we use this computed
1541 * objsize to calculate the largest ipers possible. Since the
1542 * ipers calculated for OFFPAGE slab headers is always larger than
1543 * the ipers initially calculated in zone_small_init(), we use
1544 * the former's equation (UMA_SLAB_SIZE / keg->uk_rsize) to
1545 * obtain the maximum ipers possible for offpage slab headers.
1546 *
1547 * It should be noted that ipers versus objsize is an inversly
1548 * proportional function which drops off rather quickly so as
1549 * long as our UMA_MAX_WASTE is such that the objsize we calculate
1550 * falls into the portion of the inverse relation AFTER the steep
1551 * falloff, then uma_max_ipers shouldn't be too high (~10 on i386).
1552 *
1553 * Note that we have 8-bits (1 byte) to use as a freelist index
1554 * inside the actual slab header itself and this is enough to
1555 * accomodate us. In the worst case, a UMA_SMALLEST_UNIT sized
1556 * object with offpage slab header would have ipers =
1557 * UMA_SLAB_SIZE / UMA_SMALLEST_UNIT (currently = 256), which is
1558 * 1 greater than what our byte-integer freelist index can
1559 * accomodate, but we know that this situation never occurs as
1560 * for UMA_SMALLEST_UNIT-sized objects, we will never calculate
1561 * that we need to go to offpage slab headers. Or, if we do,
1562 * then we trap that condition below and panic in the INVARIANTS case.
1563 */
1564 wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab) - UMA_MAX_WASTE;
1565 totsize = wsize;
1566 objsize = UMA_SMALLEST_UNIT;
1567 while (totsize >= wsize) {
1568 totsize = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) /
1569 (objsize + UMA_FRITM_SZ);
1570 totsize *= (UMA_FRITM_SZ + objsize);
1571 objsize++;
1572 }
1573 if (objsize > UMA_SMALLEST_UNIT)
1574 objsize--;
1575 uma_max_ipers = UMA_SLAB_SIZE / objsize;
1576
1577 wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt) - UMA_MAX_WASTE;
1578 totsize = wsize;
1579 objsize = UMA_SMALLEST_UNIT;
1580 while (totsize >= wsize) {
1581 totsize = (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt)) /
1582 (objsize + UMA_FRITMREF_SZ);
1583 totsize *= (UMA_FRITMREF_SZ + objsize);
1584 objsize++;
1585 }
1586 if (objsize > UMA_SMALLEST_UNIT)
1587 objsize--;
1588 uma_max_ipers_ref = UMA_SLAB_SIZE / objsize;
1589
1590 KASSERT((uma_max_ipers_ref <= 255) && (uma_max_ipers <= 255),
1591 ("uma_startup: calculated uma_max_ipers values too large!"));
1592
1593 #ifdef UMA_DEBUG
1594 printf("Calculated uma_max_ipers (for OFFPAGE) is %d\n", uma_max_ipers);
1595 printf("Calculated uma_max_ipers_slab (for OFFPAGE) is %d\n",
1596 uma_max_ipers_ref);
1597 #endif
1598
1599 /* "manually" create the initial zone */
1600 args.name = "UMA Kegs";
1601 args.size = sizeof(struct uma_keg);
1602 args.ctor = keg_ctor;
1603 args.dtor = keg_dtor;
1604 args.uminit = zero_init;
1605 args.fini = NULL;
1606 args.keg = &masterkeg;
1607 args.align = 32 - 1;
1608 args.flags = UMA_ZFLAG_INTERNAL;
1609 /* The initial zone has no Per cpu queues so it's smaller */
1610 zone_ctor(kegs, sizeof(struct uma_zone), &args, M_WAITOK);
1611
1612 #ifdef UMA_DEBUG
1613 printf("Filling boot free list.\n");
1614 #endif
1615 for (i = 0; i < UMA_BOOT_PAGES; i++) {
1616 slab = (uma_slab_t)((u_int8_t *)bootmem + (i * UMA_SLAB_SIZE));
1617 slab->us_data = (u_int8_t *)slab;
1618 slab->us_flags = UMA_SLAB_BOOT;
1619 LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link);
1620 uma_boot_free++;
1621 }
1622
1623 #ifdef UMA_DEBUG
1624 printf("Creating uma zone headers zone and keg.\n");
1625 #endif
1626 args.name = "UMA Zones";
1627 args.size = sizeof(struct uma_zone) +
1628 (sizeof(struct uma_cache) * (mp_maxid + 1));
1629 args.ctor = zone_ctor;
1630 args.dtor = zone_dtor;
1631 args.uminit = zero_init;
1632 args.fini = NULL;
1633 args.keg = NULL;
1634 args.align = 32 - 1;
1635 args.flags = UMA_ZFLAG_INTERNAL;
1636 /* The initial zone has no Per cpu queues so it's smaller */
1637 zone_ctor(zones, sizeof(struct uma_zone), &args, M_WAITOK);
1638
1639 #ifdef UMA_DEBUG
1640 printf("Initializing pcpu cache locks.\n");
1641 #endif
1642 #ifdef UMA_DEBUG
1643 printf("Creating slab and hash zones.\n");
1644 #endif
1645
1646 /*
1647 * This is the max number of free list items we'll have with
1648 * offpage slabs.
1649 */
1650 slabsize = uma_max_ipers * UMA_FRITM_SZ;
1651 slabsize += sizeof(struct uma_slab);
1652
1653 /* Now make a zone for slab headers */
1654 slabzone = uma_zcreate("UMA Slabs",
1655 slabsize,
1656 NULL, NULL, NULL, NULL,
1657 UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
1658
1659 /*
1660 * We also create a zone for the bigger slabs with reference
1661 * counts in them, to accomodate UMA_ZONE_REFCNT zones.
1662 */
1663 slabsize = uma_max_ipers_ref * UMA_FRITMREF_SZ;
1664 slabsize += sizeof(struct uma_slab_refcnt);
1665 slabrefzone = uma_zcreate("UMA RCntSlabs",
1666 slabsize,
1667 NULL, NULL, NULL, NULL,
1668 UMA_ALIGN_PTR,
1669 UMA_ZFLAG_INTERNAL);
1670
1671 hashzone = uma_zcreate("UMA Hash",
1672 sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT,
1673 NULL, NULL, NULL, NULL,
1674 UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
1675
1676 bucket_init();
1677
1678 #ifdef UMA_MD_SMALL_ALLOC
1679 booted = 1;
1680 #endif
1681
1682 #ifdef UMA_DEBUG
1683 printf("UMA startup complete.\n");
1684 #endif
1685 }
1686
1687 /* see uma.h */
1688 void
1689 uma_startup2(void)
1690 {
1691 booted = 1;
1692 bucket_enable();
1693 #ifdef UMA_DEBUG
1694 printf("UMA startup2 complete.\n");
1695 #endif
1696 }
1697
1698 /*
1699 * Initialize our callout handle
1700 *
1701 */
1702
1703 static void
1704 uma_startup3(void)
1705 {
1706 #ifdef UMA_DEBUG
1707 printf("Starting callout.\n");
1708 #endif
1709 callout_init(&uma_callout, CALLOUT_MPSAFE);
1710 callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
1711 #ifdef UMA_DEBUG
1712 printf("UMA startup3 complete.\n");
1713 #endif
1714 }
1715
1716 static uma_zone_t
1717 uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini,
1718 int align, u_int32_t flags)
1719 {
1720 struct uma_kctor_args args;
1721
1722 args.size = size;
1723 args.uminit = uminit;
1724 args.fini = fini;
1725 args.align = align;
1726 args.flags = flags;
1727 args.zone = zone;
1728 return (uma_zalloc_internal(kegs, &args, M_WAITOK));
1729 }
1730
1731 /* See uma.h */
1732 uma_zone_t
1733 uma_zcreate(char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
1734 uma_init uminit, uma_fini fini, int align, u_int32_t flags)
1735
1736 {
1737 struct uma_zctor_args args;
1738
1739 /* This stuff is essential for the zone ctor */
1740 args.name = name;
1741 args.size = size;
1742 args.ctor = ctor;
1743 args.dtor = dtor;
1744 args.uminit = uminit;
1745 args.fini = fini;
1746 args.align = align;
1747 args.flags = flags;
1748 args.keg = NULL;
1749
1750 return (uma_zalloc_internal(zones, &args, M_WAITOK));
1751 }
1752
1753 /* See uma.h */
1754 uma_zone_t
1755 uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor,
1756 uma_init zinit, uma_fini zfini, uma_zone_t master)
1757 {
1758 struct uma_zctor_args args;
1759
1760 args.name = name;
1761 args.size = master->uz_keg->uk_size;
1762 args.ctor = ctor;
1763 args.dtor = dtor;
1764 args.uminit = zinit;
1765 args.fini = zfini;
1766 args.align = master->uz_keg->uk_align;
1767 args.flags = master->uz_keg->uk_flags | UMA_ZONE_SECONDARY;
1768 args.keg = master->uz_keg;
1769
1770 return (uma_zalloc_internal(zones, &args, M_WAITOK));
1771 }
1772
1773 /* See uma.h */
1774 void
1775 uma_zdestroy(uma_zone_t zone)
1776 {
1777
1778 uma_zfree_internal(zones, zone, NULL, SKIP_NONE, ZFREE_STATFREE);
1779 }
1780
1781 /* See uma.h */
1782 void *
1783 uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
1784 {
1785 void *item;
1786 uma_cache_t cache;
1787 uma_bucket_t bucket;
1788 int cpu;
1789 int badness;
1790
1791 /* This is the fast path allocation */
1792 #ifdef UMA_DEBUG_ALLOC_1
1793 printf("Allocating one item from %s(%p)\n", zone->uz_name, zone);
1794 #endif
1795 CTR3(KTR_UMA, "uma_zalloc_arg thread %x zone %s flags %d", curthread,
1796 zone->uz_name, flags);
1797
1798 if (!(flags & M_NOWAIT)) {
1799 KASSERT(curthread->td_intr_nesting_level == 0,
1800 ("malloc(M_WAITOK) in interrupt context"));
1801 if (nosleepwithlocks) {
1802 #ifdef WITNESS
1803 badness = WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK,
1804 NULL,
1805 "malloc(M_WAITOK) of \"%s\", forcing M_NOWAIT",
1806 zone->uz_name);
1807 #else
1808 badness = 1;
1809 #endif
1810 } else {
1811 badness = 0;
1812 #ifdef WITNESS
1813 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
1814 "malloc(M_WAITOK) of \"%s\"", zone->uz_name);
1815 #endif
1816 }
1817 if (badness) {
1818 flags &= ~M_WAITOK;
1819 flags |= M_NOWAIT;
1820 }
1821 }
1822
1823 /*
1824 * If possible, allocate from the per-CPU cache. There are two
1825 * requirements for safe access to the per-CPU cache: (1) the thread
1826 * accessing the cache must not be preempted or yield during access,
1827 * and (2) the thread must not migrate CPUs without switching which
1828 * cache it accesses. We rely on a critical section to prevent
1829 * preemption and migration. We release the critical section in
1830 * order to acquire the zone mutex if we are unable to allocate from
1831 * the current cache; when we re-acquire the critical section, we
1832 * must detect and handle migration if it has occurred.
1833 */
1834 zalloc_restart:
1835 critical_enter();
1836 cpu = curcpu;
1837 cache = &zone->uz_cpu[cpu];
1838
1839 zalloc_start:
1840 bucket = cache->uc_allocbucket;
1841
1842 if (bucket) {
1843 if (bucket->ub_cnt > 0) {
1844 bucket->ub_cnt--;
1845 item = bucket->ub_bucket[bucket->ub_cnt];
1846 #ifdef INVARIANTS
1847 bucket->ub_bucket[bucket->ub_cnt] = NULL;
1848 #endif
1849 KASSERT(item != NULL,
1850 ("uma_zalloc: Bucket pointer mangled."));
1851 cache->uc_allocs++;
1852 critical_exit();
1853 #ifdef INVARIANTS
1854 ZONE_LOCK(zone);
1855 uma_dbg_alloc(zone, NULL, item);
1856 ZONE_UNLOCK(zone);
1857 #endif
1858 if (zone->uz_ctor != NULL) {
1859 if (zone->uz_ctor(item, zone->uz_keg->uk_size,
1860 udata, flags) != 0) {
1861 uma_zfree_internal(zone, item, udata,
1862 SKIP_DTOR, ZFREE_STATFAIL |
1863 ZFREE_STATFREE);
1864 return (NULL);
1865 }
1866 }
1867 if (flags & M_ZERO)
1868 bzero(item, zone->uz_keg->uk_size);
1869 return (item);
1870 } else if (cache->uc_freebucket) {
1871 /*
1872 * We have run out of items in our allocbucket.
1873 * See if we can switch with our free bucket.
1874 */
1875 if (cache->uc_freebucket->ub_cnt > 0) {
1876 #ifdef UMA_DEBUG_ALLOC
1877 printf("uma_zalloc: Swapping empty with"
1878 " alloc.\n");
1879 #endif
1880 bucket = cache->uc_freebucket;
1881 cache->uc_freebucket = cache->uc_allocbucket;
1882 cache->uc_allocbucket = bucket;
1883
1884 goto zalloc_start;
1885 }
1886 }
1887 }
1888 /*
1889 * Attempt to retrieve the item from the per-CPU cache has failed, so
1890 * we must go back to the zone. This requires the zone lock, so we
1891 * must drop the critical section, then re-acquire it when we go back
1892 * to the cache. Since the critical section is released, we may be
1893 * preempted or migrate. As such, make sure not to maintain any
1894 * thread-local state specific to the cache from prior to releasing
1895 * the critical section.
1896 */
1897 critical_exit();
1898 ZONE_LOCK(zone);
1899 critical_enter();
1900 cpu = curcpu;
1901 cache = &zone->uz_cpu[cpu];
1902 bucket = cache->uc_allocbucket;
1903 if (bucket != NULL) {
1904 if (bucket->ub_cnt > 0) {
1905 ZONE_UNLOCK(zone);
1906 goto zalloc_start;
1907 }
1908 bucket = cache->uc_freebucket;
1909 if (bucket != NULL && bucket->ub_cnt > 0) {
1910 ZONE_UNLOCK(zone);
1911 goto zalloc_start;
1912 }
1913 }
1914
1915 /* Since we have locked the zone we may as well send back our stats */
1916 zone->uz_allocs += cache->uc_allocs;
1917 cache->uc_allocs = 0;
1918 zone->uz_frees += cache->uc_frees;
1919 cache->uc_frees = 0;
1920
1921 /* Our old one is now a free bucket */
1922 if (cache->uc_allocbucket) {
1923 KASSERT(cache->uc_allocbucket->ub_cnt == 0,
1924 ("uma_zalloc_arg: Freeing a non free bucket."));
1925 LIST_INSERT_HEAD(&zone->uz_free_bucket,
1926 cache->uc_allocbucket, ub_link);
1927 cache->uc_allocbucket = NULL;
1928 }
1929
1930 /* Check the free list for a new alloc bucket */
1931 if ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
1932 KASSERT(bucket->ub_cnt != 0,
1933 ("uma_zalloc_arg: Returning an empty bucket."));
1934
1935 LIST_REMOVE(bucket, ub_link);
1936 cache->uc_allocbucket = bucket;
1937 ZONE_UNLOCK(zone);
1938 goto zalloc_start;
1939 }
1940 /* We are no longer associated with this CPU. */
1941 critical_exit();
1942
1943 /* Bump up our uz_count so we get here less */
1944 if (zone->uz_count < BUCKET_MAX)
1945 zone->uz_count++;
1946
1947 /*
1948 * Now lets just fill a bucket and put it on the free list. If that
1949 * works we'll restart the allocation from the begining.
1950 */
1951 if (uma_zalloc_bucket(zone, flags)) {
1952 ZONE_UNLOCK(zone);
1953 goto zalloc_restart;
1954 }
1955 ZONE_UNLOCK(zone);
1956 /*
1957 * We may not be able to get a bucket so return an actual item.
1958 */
1959 #ifdef UMA_DEBUG
1960 printf("uma_zalloc_arg: Bucketzone returned NULL\n");
1961 #endif
1962
1963 return (uma_zalloc_internal(zone, udata, flags));
1964 }
1965
1966 static uma_slab_t
1967 uma_zone_slab(uma_zone_t zone, int flags)
1968 {
1969 uma_slab_t slab;
1970 uma_keg_t keg;
1971
1972 keg = zone->uz_keg;
1973
1974 /*
1975 * This is to prevent us from recursively trying to allocate
1976 * buckets. The problem is that if an allocation forces us to
1977 * grab a new bucket we will call page_alloc, which will go off
1978 * and cause the vm to allocate vm_map_entries. If we need new
1979 * buckets there too we will recurse in kmem_alloc and bad
1980 * things happen. So instead we return a NULL bucket, and make
1981 * the code that allocates buckets smart enough to deal with it
1982 *
1983 * XXX: While we want this protection for the bucket zones so that
1984 * recursion from the VM is handled (and the calling code that
1985 * allocates buckets knows how to deal with it), we do not want
1986 * to prevent allocation from the slab header zones (slabzone
1987 * and slabrefzone) if uk_recurse is not zero for them. The
1988 * reason is that it could lead to NULL being returned for
1989 * slab header allocations even in the M_WAITOK case, and the
1990 * caller can't handle that.
1991 */
1992 if (keg->uk_flags & UMA_ZFLAG_INTERNAL && keg->uk_recurse != 0)
1993 if ((zone != slabzone) && (zone != slabrefzone))
1994 return (NULL);
1995
1996 slab = NULL;
1997
1998 for (;;) {
1999 /*
2000 * Find a slab with some space. Prefer slabs that are partially
2001 * used over those that are totally full. This helps to reduce
2002 * fragmentation.
2003 */
2004 if (keg->uk_free != 0) {
2005 if (!LIST_EMPTY(&keg->uk_part_slab)) {
2006 slab = LIST_FIRST(&keg->uk_part_slab);
2007 } else {
2008 slab = LIST_FIRST(&keg->uk_free_slab);
2009 LIST_REMOVE(slab, us_link);
2010 LIST_INSERT_HEAD(&keg->uk_part_slab, slab,
2011 us_link);
2012 }
2013 return (slab);
2014 }
2015
2016 /*
2017 * M_NOVM means don't ask at all!
2018 */
2019 if (flags & M_NOVM)
2020 break;
2021
2022 if (keg->uk_maxpages &&
2023 keg->uk_pages >= keg->uk_maxpages) {
2024 keg->uk_flags |= UMA_ZFLAG_FULL;
2025
2026 if (flags & M_NOWAIT)
2027 break;
2028 else
2029 msleep(keg, &keg->uk_lock, PVM,
2030 "zonelimit", 0);
2031 continue;
2032 }
2033 keg->uk_recurse++;
2034 slab = slab_zalloc(zone, flags);
2035 keg->uk_recurse--;
2036
2037 /*
2038 * If we got a slab here it's safe to mark it partially used
2039 * and return. We assume that the caller is going to remove
2040 * at least one item.
2041 */
2042 if (slab) {
2043 LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
2044 return (slab);
2045 }
2046 /*
2047 * We might not have been able to get a slab but another cpu
2048 * could have while we were unlocked. Check again before we
2049 * fail.
2050 */
2051 if (flags & M_NOWAIT)
2052 flags |= M_NOVM;
2053 }
2054 return (slab);
2055 }
2056
2057 static void *
2058 uma_slab_alloc(uma_zone_t zone, uma_slab_t slab)
2059 {
2060 uma_keg_t keg;
2061 uma_slabrefcnt_t slabref;
2062 void *item;
2063 u_int8_t freei;
2064
2065 keg = zone->uz_keg;
2066
2067 freei = slab->us_firstfree;
2068 if (keg->uk_flags & UMA_ZONE_REFCNT) {
2069 slabref = (uma_slabrefcnt_t)slab;
2070 slab->us_firstfree = slabref->us_freelist[freei].us_item;
2071 } else {
2072 slab->us_firstfree = slab->us_freelist[freei].us_item;
2073 }
2074 item = slab->us_data + (keg->uk_rsize * freei);
2075
2076 slab->us_freecount--;
2077 keg->uk_free--;
2078 #ifdef INVARIANTS
2079 uma_dbg_alloc(zone, slab, item);
2080 #endif
2081 /* Move this slab to the full list */
2082 if (slab->us_freecount == 0) {
2083 LIST_REMOVE(slab, us_link);
2084 LIST_INSERT_HEAD(&keg->uk_full_slab, slab, us_link);
2085 }
2086
2087 return (item);
2088 }
2089
2090 static int
2091 uma_zalloc_bucket(uma_zone_t zone, int flags)
2092 {
2093 uma_bucket_t bucket;
2094 uma_slab_t slab;
2095 int16_t saved;
2096 int max, origflags = flags;
2097
2098 /*
2099 * Try this zone's free list first so we don't allocate extra buckets.
2100 */
2101 if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
2102 KASSERT(bucket->ub_cnt == 0,
2103 ("uma_zalloc_bucket: Bucket on free list is not empty."));
2104 LIST_REMOVE(bucket, ub_link);
2105 } else {
2106 int bflags;
2107
2108 bflags = (flags & ~M_ZERO);
2109 if (zone->uz_keg->uk_flags & UMA_ZFLAG_CACHEONLY)
2110 bflags |= M_NOVM;
2111
2112 ZONE_UNLOCK(zone);
2113 bucket = bucket_alloc(zone->uz_count, bflags);
2114 ZONE_LOCK(zone);
2115 }
2116
2117 if (bucket == NULL)
2118 return (0);
2119
2120 #ifdef SMP
2121 /*
2122 * This code is here to limit the number of simultaneous bucket fills
2123 * for any given zone to the number of per cpu caches in this zone. This
2124 * is done so that we don't allocate more memory than we really need.
2125 */
2126 if (zone->uz_fills >= mp_ncpus)
2127 goto done;
2128
2129 #endif
2130 zone->uz_fills++;
2131
2132 max = MIN(bucket->ub_entries, zone->uz_count);
2133 /* Try to keep the buckets totally full */
2134 saved = bucket->ub_cnt;
2135 while (bucket->ub_cnt < max &&
2136 (slab = uma_zone_slab(zone, flags)) != NULL) {
2137 while (slab->us_freecount && bucket->ub_cnt < max) {
2138 bucket->ub_bucket[bucket->ub_cnt++] =
2139 uma_slab_alloc(zone, slab);
2140 }
2141
2142 /* Don't block on the next fill */
2143 flags |= M_NOWAIT;
2144 }
2145
2146 /*
2147 * We unlock here because we need to call the zone's init.
2148 * It should be safe to unlock because the slab dealt with
2149 * above is already on the appropriate list within the keg
2150 * and the bucket we filled is not yet on any list, so we
2151 * own it.
2152 */
2153 if (zone->uz_init != NULL) {
2154 int i;
2155
2156 ZONE_UNLOCK(zone);
2157 for (i = saved; i < bucket->ub_cnt; i++)
2158 if (zone->uz_init(bucket->ub_bucket[i],
2159 zone->uz_keg->uk_size, origflags) != 0)
2160 break;
2161 /*
2162 * If we couldn't initialize the whole bucket, put the
2163 * rest back onto the freelist.
2164 */
2165 if (i != bucket->ub_cnt) {
2166 int j;
2167
2168 for (j = i; j < bucket->ub_cnt; j++) {
2169 uma_zfree_internal(zone, bucket->ub_bucket[j],
2170 NULL, SKIP_FINI, 0);
2171 #ifdef INVARIANTS
2172 bucket->ub_bucket[j] = NULL;
2173 #endif
2174 }
2175 bucket->ub_cnt = i;
2176 }
2177 ZONE_LOCK(zone);
2178 }
2179
2180 zone->uz_fills--;
2181 if (bucket->ub_cnt != 0) {
2182 LIST_INSERT_HEAD(&zone->uz_full_bucket,
2183 bucket, ub_link);
2184 return (1);
2185 }
2186 #ifdef SMP
2187 done:
2188 #endif
2189 bucket_free(bucket);
2190
2191 return (0);
2192 }
2193 /*
2194 * Allocates an item for an internal zone
2195 *
2196 * Arguments
2197 * zone The zone to alloc for.
2198 * udata The data to be passed to the constructor.
2199 * flags M_WAITOK, M_NOWAIT, M_ZERO.
2200 *
2201 * Returns
2202 * NULL if there is no memory and M_NOWAIT is set
2203 * An item if successful
2204 */
2205
2206 static void *
2207 uma_zalloc_internal(uma_zone_t zone, void *udata, int flags)
2208 {
2209 uma_keg_t keg;
2210 uma_slab_t slab;
2211 void *item;
2212
2213 item = NULL;
2214 keg = zone->uz_keg;
2215
2216 #ifdef UMA_DEBUG_ALLOC
2217 printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone);
2218 #endif
2219 ZONE_LOCK(zone);
2220
2221 slab = uma_zone_slab(zone, flags);
2222 if (slab == NULL) {
2223 zone->uz_fails++;
2224 ZONE_UNLOCK(zone);
2225 return (NULL);
2226 }
2227
2228 item = uma_slab_alloc(zone, slab);
2229
2230 zone->uz_allocs++;
2231
2232 ZONE_UNLOCK(zone);
2233
2234 /*
2235 * We have to call both the zone's init (not the keg's init)
2236 * and the zone's ctor. This is because the item is going from
2237 * a keg slab directly to the user, and the user is expecting it
2238 * to be both zone-init'd as well as zone-ctor'd.
2239 */
2240 if (zone->uz_init != NULL) {
2241 if (zone->uz_init(item, keg->uk_size, flags) != 0) {
2242 uma_zfree_internal(zone, item, udata, SKIP_FINI,
2243 ZFREE_STATFAIL | ZFREE_STATFREE);
2244 return (NULL);
2245 }
2246 }
2247 if (zone->uz_ctor != NULL) {
2248 if (zone->uz_ctor(item, keg->uk_size, udata, flags) != 0) {
2249 uma_zfree_internal(zone, item, udata, SKIP_DTOR,
2250 ZFREE_STATFAIL | ZFREE_STATFREE);
2251 return (NULL);
2252 }
2253 }
2254 if (flags & M_ZERO)
2255 bzero(item, keg->uk_size);
2256
2257 return (item);
2258 }
2259
2260 /* See uma.h */
2261 void
2262 uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
2263 {
2264 uma_keg_t keg;
2265 uma_cache_t cache;
2266 uma_bucket_t bucket;
2267 int bflags;
2268 int cpu;
2269
2270 keg = zone->uz_keg;
2271
2272 #ifdef UMA_DEBUG_ALLOC_1
2273 printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone);
2274 #endif
2275 CTR2(KTR_UMA, "uma_zfree_arg thread %x zone %s", curthread,
2276 zone->uz_name);
2277
2278 if (zone->uz_dtor)
2279 zone->uz_dtor(item, keg->uk_size, udata);
2280 #ifdef INVARIANTS
2281 ZONE_LOCK(zone);
2282 if (keg->uk_flags & UMA_ZONE_MALLOC)
2283 uma_dbg_free(zone, udata, item);
2284 else
2285 uma_dbg_free(zone, NULL, item);
2286 ZONE_UNLOCK(zone);
2287 #endif
2288 /*
2289 * The race here is acceptable. If we miss it we'll just have to wait
2290 * a little longer for the limits to be reset.
2291 */
2292 if (keg->uk_flags & UMA_ZFLAG_FULL)
2293 goto zfree_internal;
2294
2295 /*
2296 * If possible, free to the per-CPU cache. There are two
2297 * requirements for safe access to the per-CPU cache: (1) the thread
2298 * accessing the cache must not be preempted or yield during access,
2299 * and (2) the thread must not migrate CPUs without switching which
2300 * cache it accesses. We rely on a critical section to prevent
2301 * preemption and migration. We release the critical section in
2302 * order to acquire the zone mutex if we are unable to free to the
2303 * current cache; when we re-acquire the critical section, we must
2304 * detect and handle migration if it has occurred.
2305 */
2306 zfree_restart:
2307 critical_enter();
2308 cpu = curcpu;
2309 cache = &zone->uz_cpu[cpu];
2310
2311 zfree_start:
2312 bucket = cache->uc_freebucket;
2313
2314 if (bucket) {
2315 /*
2316 * Do we have room in our bucket? It is OK for this uz count
2317 * check to be slightly out of sync.
2318 */
2319
2320 if (bucket->ub_cnt < bucket->ub_entries) {
2321 KASSERT(bucket->ub_bucket[bucket->ub_cnt] == NULL,
2322 ("uma_zfree: Freeing to non free bucket index."));
2323 bucket->ub_bucket[bucket->ub_cnt] = item;
2324 bucket->ub_cnt++;
2325 cache->uc_frees++;
2326 critical_exit();
2327 return;
2328 } else if (cache->uc_allocbucket) {
2329 #ifdef UMA_DEBUG_ALLOC
2330 printf("uma_zfree: Swapping buckets.\n");
2331 #endif
2332 /*
2333 * We have run out of space in our freebucket.
2334 * See if we can switch with our alloc bucket.
2335 */
2336 if (cache->uc_allocbucket->ub_cnt <
2337 cache->uc_freebucket->ub_cnt) {
2338 bucket = cache->uc_freebucket;
2339 cache->uc_freebucket = cache->uc_allocbucket;
2340 cache->uc_allocbucket = bucket;
2341 goto zfree_start;
2342 }
2343 }
2344 }
2345 /*
2346 * We can get here for two reasons:
2347 *
2348 * 1) The buckets are NULL
2349 * 2) The alloc and free buckets are both somewhat full.
2350 *
2351 * We must go back the zone, which requires acquiring the zone lock,
2352 * which in turn means we must release and re-acquire the critical
2353 * section. Since the critical section is released, we may be
2354 * preempted or migrate. As such, make sure not to maintain any
2355 * thread-local state specific to the cache from prior to releasing
2356 * the critical section.
2357 */
2358 critical_exit();
2359 ZONE_LOCK(zone);
2360 critical_enter();
2361 cpu = curcpu;
2362 cache = &zone->uz_cpu[cpu];
2363 if (cache->uc_freebucket != NULL) {
2364 if (cache->uc_freebucket->ub_cnt <
2365 cache->uc_freebucket->ub_entries) {
2366 ZONE_UNLOCK(zone);
2367 goto zfree_start;
2368 }
2369 if (cache->uc_allocbucket != NULL &&
2370 (cache->uc_allocbucket->ub_cnt <
2371 cache->uc_freebucket->ub_cnt)) {
2372 ZONE_UNLOCK(zone);
2373 goto zfree_start;
2374 }
2375 }
2376
2377 /* Since we have locked the zone we may as well send back our stats */
2378 zone->uz_allocs += cache->uc_allocs;
2379 cache->uc_allocs = 0;
2380 zone->uz_frees += cache->uc_frees;
2381 cache->uc_frees = 0;
2382
2383 bucket = cache->uc_freebucket;
2384 cache->uc_freebucket = NULL;
2385
2386 /* Can we throw this on the zone full list? */
2387 if (bucket != NULL) {
2388 #ifdef UMA_DEBUG_ALLOC
2389 printf("uma_zfree: Putting old bucket on the free list.\n");
2390 #endif
2391 /* ub_cnt is pointing to the last free item */
2392 KASSERT(bucket->ub_cnt != 0,
2393 ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n"));
2394 LIST_INSERT_HEAD(&zone->uz_full_bucket,
2395 bucket, ub_link);
2396 }
2397 if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
2398 LIST_REMOVE(bucket, ub_link);
2399 ZONE_UNLOCK(zone);
2400 cache->uc_freebucket = bucket;
2401 goto zfree_start;
2402 }
2403 /* We are no longer associated with this CPU. */
2404 critical_exit();
2405
2406 /* And the zone.. */
2407 ZONE_UNLOCK(zone);
2408
2409 #ifdef UMA_DEBUG_ALLOC
2410 printf("uma_zfree: Allocating new free bucket.\n");
2411 #endif
2412 bflags = M_NOWAIT;
2413
2414 if (keg->uk_flags & UMA_ZFLAG_CACHEONLY)
2415 bflags |= M_NOVM;
2416 bucket = bucket_alloc(zone->uz_count, bflags);
2417 if (bucket) {
2418 ZONE_LOCK(zone);
2419 LIST_INSERT_HEAD(&zone->uz_free_bucket,
2420 bucket, ub_link);
2421 ZONE_UNLOCK(zone);
2422 goto zfree_restart;
2423 }
2424
2425 /*
2426 * If nothing else caught this, we'll just do an internal free.
2427 */
2428 zfree_internal:
2429 uma_zfree_internal(zone, item, udata, SKIP_DTOR, ZFREE_STATFAIL |
2430 ZFREE_STATFREE);
2431
2432 return;
2433 }
2434
2435 /*
2436 * Frees an item to an INTERNAL zone or allocates a free bucket
2437 *
2438 * Arguments:
2439 * zone The zone to free to
2440 * item The item we're freeing
2441 * udata User supplied data for the dtor
2442 * skip Skip dtors and finis
2443 */
2444 static void
2445 uma_zfree_internal(uma_zone_t zone, void *item, void *udata,
2446 enum zfreeskip skip, int flags)
2447 {
2448 uma_slab_t slab;
2449 uma_slabrefcnt_t slabref;
2450 uma_keg_t keg;
2451 u_int8_t *mem;
2452 u_int8_t freei;
2453
2454 keg = zone->uz_keg;
2455
2456 if (skip < SKIP_DTOR && zone->uz_dtor)
2457 zone->uz_dtor(item, keg->uk_size, udata);
2458 if (skip < SKIP_FINI && zone->uz_fini)
2459 zone->uz_fini(item, keg->uk_size);
2460
2461 ZONE_LOCK(zone);
2462
2463 if (flags & ZFREE_STATFAIL)
2464 zone->uz_fails++;
2465 if (flags & ZFREE_STATFREE)
2466 zone->uz_frees++;
2467
2468 if (!(keg->uk_flags & UMA_ZONE_MALLOC)) {
2469 mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK));
2470 if (keg->uk_flags & UMA_ZONE_HASH)
2471 slab = hash_sfind(&keg->uk_hash, mem);
2472 else {
2473 mem += keg->uk_pgoff;
2474 slab = (uma_slab_t)mem;
2475 }
2476 } else {
2477 slab = (uma_slab_t)udata;
2478 }
2479
2480 /* Do we need to remove from any lists? */
2481 if (slab->us_freecount+1 == keg->uk_ipers) {
2482 LIST_REMOVE(slab, us_link);
2483 LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
2484 } else if (slab->us_freecount == 0) {
2485 LIST_REMOVE(slab, us_link);
2486 LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
2487 }
2488
2489 /* Slab management stuff */
2490 freei = ((unsigned long)item - (unsigned long)slab->us_data)
2491 / keg->uk_rsize;
2492
2493 #ifdef INVARIANTS
2494 if (!skip)
2495 uma_dbg_free(zone, slab, item);
2496 #endif
2497
2498 if (keg->uk_flags & UMA_ZONE_REFCNT) {
2499 slabref = (uma_slabrefcnt_t)slab;
2500 slabref->us_freelist[freei].us_item = slab->us_firstfree;
2501 } else {
2502 slab->us_freelist[freei].us_item = slab->us_firstfree;
2503 }
2504 slab->us_firstfree = freei;
2505 slab->us_freecount++;
2506
2507 /* Zone statistics */
2508 keg->uk_free++;
2509
2510 if (keg->uk_flags & UMA_ZFLAG_FULL) {
2511 if (keg->uk_pages < keg->uk_maxpages)
2512 keg->uk_flags &= ~UMA_ZFLAG_FULL;
2513
2514 /* We can handle one more allocation */
2515 wakeup_one(keg);
2516 }
2517
2518 ZONE_UNLOCK(zone);
2519 }
2520
2521 /* See uma.h */
2522 void
2523 uma_zone_set_max(uma_zone_t zone, int nitems)
2524 {
2525 uma_keg_t keg;
2526
2527 keg = zone->uz_keg;
2528 ZONE_LOCK(zone);
2529 if (keg->uk_ppera > 1)
2530 keg->uk_maxpages = nitems * keg->uk_ppera;
2531 else
2532 keg->uk_maxpages = nitems / keg->uk_ipers;
2533
2534 if (keg->uk_maxpages * keg->uk_ipers < nitems)
2535 keg->uk_maxpages++;
2536
2537 ZONE_UNLOCK(zone);
2538 }
2539
2540 /* See uma.h */
2541 void
2542 uma_zone_set_init(uma_zone_t zone, uma_init uminit)
2543 {
2544 ZONE_LOCK(zone);
2545 KASSERT(zone->uz_keg->uk_pages == 0,
2546 ("uma_zone_set_init on non-empty keg"));
2547 zone->uz_keg->uk_init = uminit;
2548 ZONE_UNLOCK(zone);
2549 }
2550
2551 /* See uma.h */
2552 void
2553 uma_zone_set_fini(uma_zone_t zone, uma_fini fini)
2554 {
2555 ZONE_LOCK(zone);
2556 KASSERT(zone->uz_keg->uk_pages == 0,
2557 ("uma_zone_set_fini on non-empty keg"));
2558 zone->uz_keg->uk_fini = fini;
2559 ZONE_UNLOCK(zone);
2560 }
2561
2562 /* See uma.h */
2563 void
2564 uma_zone_set_zinit(uma_zone_t zone, uma_init zinit)
2565 {
2566 ZONE_LOCK(zone);
2567 KASSERT(zone->uz_keg->uk_pages == 0,
2568 ("uma_zone_set_zinit on non-empty keg"));
2569 zone->uz_init = zinit;
2570 ZONE_UNLOCK(zone);
2571 }
2572
2573 /* See uma.h */
2574 void
2575 uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini)
2576 {
2577 ZONE_LOCK(zone);
2578 KASSERT(zone->uz_keg->uk_pages == 0,
2579 ("uma_zone_set_zfini on non-empty keg"));
2580 zone->uz_fini = zfini;
2581 ZONE_UNLOCK(zone);
2582 }
2583
2584 /* See uma.h */
2585 /* XXX uk_freef is not actually used with the zone locked */
2586 void
2587 uma_zone_set_freef(uma_zone_t zone, uma_free freef)
2588 {
2589 ZONE_LOCK(zone);
2590 zone->uz_keg->uk_freef = freef;
2591 ZONE_UNLOCK(zone);
2592 }
2593
2594 /* See uma.h */
2595 /* XXX uk_allocf is not actually used with the zone locked */
2596 void
2597 uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf)
2598 {
2599 ZONE_LOCK(zone);
2600 zone->uz_keg->uk_flags |= UMA_ZFLAG_PRIVALLOC;
2601 zone->uz_keg->uk_allocf = allocf;
2602 ZONE_UNLOCK(zone);
2603 }
2604
2605 /* See uma.h */
2606 int
2607 uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int count)
2608 {
2609 uma_keg_t keg;
2610 vm_offset_t kva;
2611 int pages;
2612
2613 keg = zone->uz_keg;
2614 pages = count / keg->uk_ipers;
2615
2616 if (pages * keg->uk_ipers < count)
2617 pages++;
2618
2619 kva = kmem_alloc_nofault(kernel_map, pages * UMA_SLAB_SIZE);
2620
2621 if (kva == 0)
2622 return (0);
2623 if (obj == NULL) {
2624 obj = vm_object_allocate(OBJT_DEFAULT,
2625 pages);
2626 } else {
2627 VM_OBJECT_LOCK_INIT(obj, "uma object");
2628 _vm_object_allocate(OBJT_DEFAULT,
2629 pages, obj);
2630 }
2631 ZONE_LOCK(zone);
2632 keg->uk_kva = kva;
2633 keg->uk_obj = obj;
2634 keg->uk_maxpages = pages;
2635 keg->uk_allocf = obj_alloc;
2636 keg->uk_flags |= UMA_ZONE_NOFREE | UMA_ZFLAG_PRIVALLOC;
2637 ZONE_UNLOCK(zone);
2638 return (1);
2639 }
2640
2641 /* See uma.h */
2642 void
2643 uma_prealloc(uma_zone_t zone, int items)
2644 {
2645 int slabs;
2646 uma_slab_t slab;
2647 uma_keg_t keg;
2648
2649 keg = zone->uz_keg;
2650 ZONE_LOCK(zone);
2651 slabs = items / keg->uk_ipers;
2652 if (slabs * keg->uk_ipers < items)
2653 slabs++;
2654 while (slabs > 0) {
2655 slab = slab_zalloc(zone, M_WAITOK);
2656 LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
2657 slabs--;
2658 }
2659 ZONE_UNLOCK(zone);
2660 }
2661
2662 /* See uma.h */
2663 u_int32_t *
2664 uma_find_refcnt(uma_zone_t zone, void *item)
2665 {
2666 uma_slabrefcnt_t slabref;
2667 uma_keg_t keg;
2668 u_int32_t *refcnt;
2669 int idx;
2670
2671 keg = zone->uz_keg;
2672 slabref = (uma_slabrefcnt_t)vtoslab((vm_offset_t)item &
2673 (~UMA_SLAB_MASK));
2674 KASSERT(slabref != NULL && slabref->us_keg->uk_flags & UMA_ZONE_REFCNT,
2675 ("uma_find_refcnt(): zone possibly not UMA_ZONE_REFCNT"));
2676 idx = ((unsigned long)item - (unsigned long)slabref->us_data)
2677 / keg->uk_rsize;
2678 refcnt = &slabref->us_freelist[idx].us_refcnt;
2679 return refcnt;
2680 }
2681
2682 /* See uma.h */
2683 void
2684 uma_reclaim(void)
2685 {
2686 #ifdef UMA_DEBUG
2687 printf("UMA: vm asked us to release pages!\n");
2688 #endif
2689 bucket_enable();
2690 zone_foreach(zone_drain);
2691 /*
2692 * Some slabs may have been freed but this zone will be visited early
2693 * we visit again so that we can free pages that are empty once other
2694 * zones are drained. We have to do the same for buckets.
2695 */
2696 zone_drain(slabzone);
2697 zone_drain(slabrefzone);
2698 bucket_zone_drain();
2699 }
2700
2701 void *
2702 uma_large_malloc(int size, int wait)
2703 {
2704 void *mem;
2705 uma_slab_t slab;
2706 u_int8_t flags;
2707
2708 slab = uma_zalloc_internal(slabzone, NULL, wait);
2709 if (slab == NULL)
2710 return (NULL);
2711 mem = page_alloc(NULL, size, &flags, wait);
2712 if (mem) {
2713 vsetslab((vm_offset_t)mem, slab);
2714 slab->us_data = mem;
2715 slab->us_flags = flags | UMA_SLAB_MALLOC;
2716 slab->us_size = size;
2717 } else {
2718 uma_zfree_internal(slabzone, slab, NULL, SKIP_NONE,
2719 ZFREE_STATFAIL | ZFREE_STATFREE);
2720 }
2721
2722 return (mem);
2723 }
2724
2725 void
2726 uma_large_free(uma_slab_t slab)
2727 {
2728 vsetobj((vm_offset_t)slab->us_data, kmem_object);
2729 page_free(slab->us_data, slab->us_size, slab->us_flags);
2730 uma_zfree_internal(slabzone, slab, NULL, SKIP_NONE, ZFREE_STATFREE);
2731 }
2732
2733 void
2734 uma_print_stats(void)
2735 {
2736 zone_foreach(uma_print_zone);
2737 }
2738
2739 static void
2740 slab_print(uma_slab_t slab)
2741 {
2742 printf("slab: keg %p, data %p, freecount %d, firstfree %d\n",
2743 slab->us_keg, slab->us_data, slab->us_freecount,
2744 slab->us_firstfree);
2745 }
2746
2747 static void
2748 cache_print(uma_cache_t cache)
2749 {
2750 printf("alloc: %p(%d), free: %p(%d)\n",
2751 cache->uc_allocbucket,
2752 cache->uc_allocbucket?cache->uc_allocbucket->ub_cnt:0,
2753 cache->uc_freebucket,
2754 cache->uc_freebucket?cache->uc_freebucket->ub_cnt:0);
2755 }
2756
2757 void
2758 uma_print_zone(uma_zone_t zone)
2759 {
2760 uma_cache_t cache;
2761 uma_keg_t keg;
2762 uma_slab_t slab;
2763 int i;
2764
2765 keg = zone->uz_keg;
2766 printf("%s(%p) size %d(%d) flags %d ipers %d ppera %d out %d free %d\n",
2767 zone->uz_name, zone, keg->uk_size, keg->uk_rsize, keg->uk_flags,
2768 keg->uk_ipers, keg->uk_ppera,
2769 (keg->uk_ipers * keg->uk_pages) - keg->uk_free, keg->uk_free);
2770 printf("Part slabs:\n");
2771 LIST_FOREACH(slab, &keg->uk_part_slab, us_link)
2772 slab_print(slab);
2773 printf("Free slabs:\n");
2774 LIST_FOREACH(slab, &keg->uk_free_slab, us_link)
2775 slab_print(slab);
2776 printf("Full slabs:\n");
2777 LIST_FOREACH(slab, &keg->uk_full_slab, us_link)
2778 slab_print(slab);
2779 for (i = 0; i <= mp_maxid; i++) {
2780 if (CPU_ABSENT(i))
2781 continue;
2782 cache = &zone->uz_cpu[i];
2783 printf("CPU %d Cache:\n", i);
2784 cache_print(cache);
2785 }
2786 }
2787
2788 /*
2789 * Generate statistics across both the zone and its per-cpu cache's. Return
2790 * desired statistics if the pointer is non-NULL for that statistic.
2791 *
2792 * Note: does not update the zone statistics, as it can't safely clear the
2793 * per-CPU cache statistic.
2794 *
2795 * XXXRW: Following the uc_allocbucket and uc_freebucket pointers here isn't
2796 * safe from off-CPU; we should modify the caches to track this information
2797 * directly so that we don't have to.
2798 */
2799 static void
2800 uma_zone_sumstat(uma_zone_t z, int *cachefreep, u_int64_t *allocsp,
2801 u_int64_t *freesp)
2802 {
2803 uma_cache_t cache;
2804 u_int64_t allocs, frees;
2805 int cachefree, cpu;
2806
2807 allocs = frees = 0;
2808 cachefree = 0;
2809 for (cpu = 0; cpu <= mp_maxid; cpu++) {
2810 if (CPU_ABSENT(cpu))
2811 continue;
2812 cache = &z->uz_cpu[cpu];
2813 if (cache->uc_allocbucket != NULL)
2814 cachefree += cache->uc_allocbucket->ub_cnt;
2815 if (cache->uc_freebucket != NULL)
2816 cachefree += cache->uc_freebucket->ub_cnt;
2817 allocs += cache->uc_allocs;
2818 frees += cache->uc_frees;
2819 }
2820 allocs += z->uz_allocs;
2821 frees += z->uz_frees;
2822 if (cachefreep != NULL)
2823 *cachefreep = cachefree;
2824 if (allocsp != NULL)
2825 *allocsp = allocs;
2826 if (freesp != NULL)
2827 *freesp = frees;
2828 }
2829
2830 /*
2831 * Sysctl handler for vm.zone
2832 *
2833 * stolen from vm_zone.c
2834 */
2835 static int
2836 sysctl_vm_zone(SYSCTL_HANDLER_ARGS)
2837 {
2838 int error, len, cnt;
2839 const int linesize = 128; /* conservative */
2840 int totalfree;
2841 char *tmpbuf, *offset;
2842 uma_zone_t z;
2843 uma_keg_t zk;
2844 char *p;
2845 int cachefree;
2846 uma_bucket_t bucket;
2847 u_int64_t allocs, frees;
2848
2849 cnt = 0;
2850 mtx_lock(&uma_mtx);
2851 LIST_FOREACH(zk, &uma_kegs, uk_link) {
2852 LIST_FOREACH(z, &zk->uk_zones, uz_link)
2853 cnt++;
2854 }
2855 mtx_unlock(&uma_mtx);
2856 MALLOC(tmpbuf, char *, (cnt == 0 ? 1 : cnt) * linesize,
2857 M_TEMP, M_WAITOK);
2858 len = snprintf(tmpbuf, linesize,
2859 "\nITEM SIZE LIMIT USED FREE REQUESTS\n\n");
2860 if (cnt == 0)
2861 tmpbuf[len - 1] = '\0';
2862 error = SYSCTL_OUT(req, tmpbuf, cnt == 0 ? len-1 : len);
2863 if (error || cnt == 0)
2864 goto out;
2865 offset = tmpbuf;
2866 mtx_lock(&uma_mtx);
2867 LIST_FOREACH(zk, &uma_kegs, uk_link) {
2868 LIST_FOREACH(z, &zk->uk_zones, uz_link) {
2869 if (cnt == 0) /* list may have changed size */
2870 break;
2871 ZONE_LOCK(z);
2872 cachefree = 0;
2873 if (!(zk->uk_flags & UMA_ZFLAG_INTERNAL)) {
2874 uma_zone_sumstat(z, &cachefree, &allocs, &frees);
2875 } else {
2876 allocs = z->uz_allocs;
2877 frees = z->uz_frees;
2878 }
2879
2880 LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link) {
2881 cachefree += bucket->ub_cnt;
2882 }
2883 totalfree = zk->uk_free + cachefree;
2884 len = snprintf(offset, linesize,
2885 "%-12.12s %6.6u, %8.8u, %6.6u, %6.6u, %8.8llu\n",
2886 z->uz_name, zk->uk_size,
2887 zk->uk_maxpages * zk->uk_ipers,
2888 (zk->uk_ipers * (zk->uk_pages / zk->uk_ppera)) - totalfree,
2889 totalfree,
2890 (unsigned long long)allocs);
2891 ZONE_UNLOCK(z);
2892 for (p = offset + 12; p > offset && *p == ' '; --p)
2893 /* nothing */ ;
2894 p[1] = ':';
2895 cnt--;
2896 offset += len;
2897 }
2898 }
2899 mtx_unlock(&uma_mtx);
2900 *offset++ = '\0';
2901 error = SYSCTL_OUT(req, tmpbuf, offset - tmpbuf);
2902 out:
2903 FREE(tmpbuf, M_TEMP);
2904 return (error);
2905 }
2906
2907 static int
2908 sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS)
2909 {
2910 uma_keg_t kz;
2911 uma_zone_t z;
2912 int count;
2913
2914 count = 0;
2915 mtx_lock(&uma_mtx);
2916 LIST_FOREACH(kz, &uma_kegs, uk_link) {
2917 LIST_FOREACH(z, &kz->uk_zones, uz_link)
2918 count++;
2919 }
2920 mtx_unlock(&uma_mtx);
2921 return (sysctl_handle_int(oidp, &count, 0, req));
2922 }
2923
2924 static int
2925 sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS)
2926 {
2927 struct uma_stream_header ush;
2928 struct uma_type_header uth;
2929 struct uma_percpu_stat ups;
2930 uma_bucket_t bucket;
2931 struct sbuf sbuf;
2932 uma_cache_t cache;
2933 uma_keg_t kz;
2934 uma_zone_t z;
2935 char *buffer;
2936 int buflen, count, error, i;
2937
2938 mtx_lock(&uma_mtx);
2939 restart:
2940 mtx_assert(&uma_mtx, MA_OWNED);
2941 count = 0;
2942 LIST_FOREACH(kz, &uma_kegs, uk_link) {
2943 LIST_FOREACH(z, &kz->uk_zones, uz_link)
2944 count++;
2945 }
2946 mtx_unlock(&uma_mtx);
2947
2948 buflen = sizeof(ush) + count * (sizeof(uth) + sizeof(ups) *
2949 (mp_maxid + 1)) + 1;
2950 buffer = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO);
2951
2952 mtx_lock(&uma_mtx);
2953 i = 0;
2954 LIST_FOREACH(kz, &uma_kegs, uk_link) {
2955 LIST_FOREACH(z, &kz->uk_zones, uz_link)
2956 i++;
2957 }
2958 if (i > count) {
2959 free(buffer, M_TEMP);
2960 goto restart;
2961 }
2962 count = i;
2963
2964 sbuf_new(&sbuf, buffer, buflen, SBUF_FIXEDLEN);
2965
2966 /*
2967 * Insert stream header.
2968 */
2969 bzero(&ush, sizeof(ush));
2970 ush.ush_version = UMA_STREAM_VERSION;
2971 ush.ush_maxcpus = (mp_maxid + 1);
2972 ush.ush_count = count;
2973 if (sbuf_bcat(&sbuf, &ush, sizeof(ush)) < 0) {
2974 mtx_unlock(&uma_mtx);
2975 error = ENOMEM;
2976 goto out;
2977 }
2978
2979 LIST_FOREACH(kz, &uma_kegs, uk_link) {
2980 LIST_FOREACH(z, &kz->uk_zones, uz_link) {
2981 bzero(&uth, sizeof(uth));
2982 ZONE_LOCK(z);
2983 strlcpy(uth.uth_name, z->uz_name, UTH_MAX_NAME);
2984 uth.uth_align = kz->uk_align;
2985 uth.uth_pages = kz->uk_pages;
2986 uth.uth_keg_free = kz->uk_free;
2987 uth.uth_size = kz->uk_size;
2988 uth.uth_rsize = kz->uk_rsize;
2989 uth.uth_maxpages = kz->uk_maxpages;
2990 if (kz->uk_ppera > 1)
2991 uth.uth_limit = kz->uk_maxpages /
2992 kz->uk_ppera;
2993 else
2994 uth.uth_limit = kz->uk_maxpages *
2995 kz->uk_ipers;
2996
2997 /*
2998 * A zone is secondary is it is not the first entry
2999 * on the keg's zone list.
3000 */
3001 if ((kz->uk_flags & UMA_ZONE_SECONDARY) &&
3002 (LIST_FIRST(&kz->uk_zones) != z))
3003 uth.uth_zone_flags = UTH_ZONE_SECONDARY;
3004
3005 LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link)
3006 uth.uth_zone_free += bucket->ub_cnt;
3007 uth.uth_allocs = z->uz_allocs;
3008 uth.uth_frees = z->uz_frees;
3009 uth.uth_fails = z->uz_fails;
3010 if (sbuf_bcat(&sbuf, &uth, sizeof(uth)) < 0) {
3011 ZONE_UNLOCK(z);
3012 mtx_unlock(&uma_mtx);
3013 error = ENOMEM;
3014 goto out;
3015 }
3016 /*
3017 * While it is not normally safe to access the cache
3018 * bucket pointers while not on the CPU that owns the
3019 * cache, we only allow the pointers to be exchanged
3020 * without the zone lock held, not invalidated, so
3021 * accept the possible race associated with bucket
3022 * exchange during monitoring.
3023 */
3024 for (i = 0; i < (mp_maxid + 1); i++) {
3025 bzero(&ups, sizeof(ups));
3026 if (kz->uk_flags & UMA_ZFLAG_INTERNAL)
3027 goto skip;
3028 cache = &z->uz_cpu[i];
3029 if (cache->uc_allocbucket != NULL)
3030 ups.ups_cache_free +=
3031 cache->uc_allocbucket->ub_cnt;
3032 if (cache->uc_freebucket != NULL)
3033 ups.ups_cache_free +=
3034 cache->uc_freebucket->ub_cnt;
3035 ups.ups_allocs = cache->uc_allocs;
3036 ups.ups_frees = cache->uc_frees;
3037 skip:
3038 if (sbuf_bcat(&sbuf, &ups, sizeof(ups)) < 0) {
3039 ZONE_UNLOCK(z);
3040 mtx_unlock(&uma_mtx);
3041 error = ENOMEM;
3042 goto out;
3043 }
3044 }
3045 ZONE_UNLOCK(z);
3046 }
3047 }
3048 mtx_unlock(&uma_mtx);
3049 sbuf_finish(&sbuf);
3050 error = SYSCTL_OUT(req, sbuf_data(&sbuf), sbuf_len(&sbuf));
3051 out:
3052 free(buffer, M_TEMP);
3053 return (error);
3054 }
Cache object: c611884be8ef1bd774c58b8b7c2da12d
|