FreeBSD/Linux Kernel Cross Reference
sys/vm/uma_core.c
1 /*-
2 * Copyright (c) 2002, 2003, 2004, 2005 Jeffrey Roberson <jeff@FreeBSD.org>
3 * Copyright (c) 2004, 2005 Bosko Milekic <bmilekic@FreeBSD.org>
4 * Copyright (c) 2004-2005 Robert N. M. Watson
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice unmodified, this list of conditions, and the following
12 * disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 /*
30 * uma_core.c Implementation of the Universal Memory allocator
31 *
32 * This allocator is intended to replace the multitude of similar object caches
33 * in the standard FreeBSD kernel. The intent is to be flexible as well as
34 * effecient. A primary design goal is to return unused memory to the rest of
35 * the system. This will make the system as a whole more flexible due to the
36 * ability to move memory to subsystems which most need it instead of leaving
37 * pools of reserved memory unused.
38 *
39 * The basic ideas stem from similar slab/zone based allocators whose algorithms
40 * are well known.
41 *
42 */
43
44 /*
45 * TODO:
46 * - Improve memory usage for large allocations
47 * - Investigate cache size adjustments
48 */
49
50 #include <sys/cdefs.h>
51 __FBSDID("$FreeBSD$");
52
53 /* I should really use ktr.. */
54 /*
55 #define UMA_DEBUG 1
56 #define UMA_DEBUG_ALLOC 1
57 #define UMA_DEBUG_ALLOC_1 1
58 */
59
60 #include "opt_ddb.h"
61 #include "opt_param.h"
62
63 #include <sys/param.h>
64 #include <sys/systm.h>
65 #include <sys/kernel.h>
66 #include <sys/types.h>
67 #include <sys/queue.h>
68 #include <sys/malloc.h>
69 #include <sys/ktr.h>
70 #include <sys/lock.h>
71 #include <sys/sysctl.h>
72 #include <sys/mutex.h>
73 #include <sys/proc.h>
74 #include <sys/sbuf.h>
75 #include <sys/smp.h>
76 #include <sys/vmmeter.h>
77
78 #include <vm/vm.h>
79 #include <vm/vm_object.h>
80 #include <vm/vm_page.h>
81 #include <vm/vm_param.h>
82 #include <vm/vm_map.h>
83 #include <vm/vm_kern.h>
84 #include <vm/vm_extern.h>
85 #include <vm/uma.h>
86 #include <vm/uma_int.h>
87 #include <vm/uma_dbg.h>
88
89 #include <machine/vmparam.h>
90
91 #include <ddb/ddb.h>
92
93 /*
94 * This is the zone and keg from which all zones are spawned. The idea is that
95 * even the zone & keg heads are allocated from the allocator, so we use the
96 * bss section to bootstrap us.
97 */
98 static struct uma_keg masterkeg;
99 static struct uma_zone masterzone_k;
100 static struct uma_zone masterzone_z;
101 static uma_zone_t kegs = &masterzone_k;
102 static uma_zone_t zones = &masterzone_z;
103
104 /* This is the zone from which all of uma_slab_t's are allocated. */
105 static uma_zone_t slabzone;
106 static uma_zone_t slabrefzone; /* With refcounters (for UMA_ZONE_REFCNT) */
107
108 /*
109 * The initial hash tables come out of this zone so they can be allocated
110 * prior to malloc coming up.
111 */
112 static uma_zone_t hashzone;
113
114 static MALLOC_DEFINE(M_UMAHASH, "UMAHash", "UMA Hash Buckets");
115
116 /*
117 * Are we allowed to allocate buckets?
118 */
119 static int bucketdisable = 1;
120
121 /* Linked list of all kegs in the system */
122 static LIST_HEAD(,uma_keg) uma_kegs = LIST_HEAD_INITIALIZER(&uma_kegs);
123
124 /* This mutex protects the keg list */
125 static struct mtx uma_mtx;
126
127 /* Linked list of boot time pages */
128 static LIST_HEAD(,uma_slab) uma_boot_pages =
129 LIST_HEAD_INITIALIZER(&uma_boot_pages);
130
131 /* This mutex protects the boot time pages list */
132 static struct mtx uma_boot_pages_mtx;
133
134 /* Is the VM done starting up? */
135 static int booted = 0;
136
137 /* Maximum number of allowed items-per-slab if the slab header is OFFPAGE */
138 static u_int uma_max_ipers;
139 static u_int uma_max_ipers_ref;
140
141 /*
142 * This is the handle used to schedule events that need to happen
143 * outside of the allocation fast path.
144 */
145 static struct callout uma_callout;
146 #define UMA_TIMEOUT 20 /* Seconds for callout interval. */
147
148 /*
149 * This structure is passed as the zone ctor arg so that I don't have to create
150 * a special allocation function just for zones.
151 */
152 struct uma_zctor_args {
153 char *name;
154 size_t size;
155 uma_ctor ctor;
156 uma_dtor dtor;
157 uma_init uminit;
158 uma_fini fini;
159 uma_keg_t keg;
160 int align;
161 u_int32_t flags;
162 };
163
164 struct uma_kctor_args {
165 uma_zone_t zone;
166 size_t size;
167 uma_init uminit;
168 uma_fini fini;
169 int align;
170 u_int32_t flags;
171 };
172
173 struct uma_bucket_zone {
174 uma_zone_t ubz_zone;
175 char *ubz_name;
176 int ubz_entries;
177 };
178
179 #define BUCKET_MAX 128
180
181 struct uma_bucket_zone bucket_zones[] = {
182 { NULL, "16 Bucket", 16 },
183 { NULL, "32 Bucket", 32 },
184 { NULL, "64 Bucket", 64 },
185 { NULL, "128 Bucket", 128 },
186 { NULL, NULL, 0}
187 };
188
189 #define BUCKET_SHIFT 4
190 #define BUCKET_ZONES ((BUCKET_MAX >> BUCKET_SHIFT) + 1)
191
192 /*
193 * bucket_size[] maps requested bucket sizes to zones that allocate a bucket
194 * of approximately the right size.
195 */
196 static uint8_t bucket_size[BUCKET_ZONES];
197
198 /*
199 * Flags and enumerations to be passed to internal functions.
200 */
201 enum zfreeskip { SKIP_NONE, SKIP_DTOR, SKIP_FINI };
202
203 #define ZFREE_STATFAIL 0x00000001 /* Update zone failure statistic. */
204 #define ZFREE_STATFREE 0x00000002 /* Update zone free statistic. */
205
206 /* Prototypes.. */
207
208 static void *obj_alloc(uma_zone_t, int, u_int8_t *, int);
209 static void *page_alloc(uma_zone_t, int, u_int8_t *, int);
210 static void *startup_alloc(uma_zone_t, int, u_int8_t *, int);
211 static void page_free(void *, int, u_int8_t);
212 static uma_slab_t slab_zalloc(uma_zone_t, int);
213 static void cache_drain(uma_zone_t);
214 static void bucket_drain(uma_zone_t, uma_bucket_t);
215 static void bucket_cache_drain(uma_zone_t zone);
216 static int keg_ctor(void *, int, void *, int);
217 static void keg_dtor(void *, int, void *);
218 static int zone_ctor(void *, int, void *, int);
219 static void zone_dtor(void *, int, void *);
220 static int zero_init(void *, int, int);
221 static void zone_small_init(uma_zone_t zone);
222 static void zone_large_init(uma_zone_t zone);
223 static void zone_foreach(void (*zfunc)(uma_zone_t));
224 static void zone_timeout(uma_zone_t zone);
225 static int hash_alloc(struct uma_hash *);
226 static int hash_expand(struct uma_hash *, struct uma_hash *);
227 static void hash_free(struct uma_hash *hash);
228 static void uma_timeout(void *);
229 static void uma_startup3(void);
230 static void *uma_zalloc_internal(uma_zone_t, void *, int);
231 static void uma_zfree_internal(uma_zone_t, void *, void *, enum zfreeskip,
232 int);
233 static void bucket_enable(void);
234 static void bucket_init(void);
235 static uma_bucket_t bucket_alloc(int, int);
236 static void bucket_free(uma_bucket_t);
237 static void bucket_zone_drain(void);
238 static int uma_zalloc_bucket(uma_zone_t zone, int flags);
239 static uma_slab_t uma_zone_slab(uma_zone_t zone, int flags);
240 static void *uma_slab_alloc(uma_zone_t zone, uma_slab_t slab);
241 static uma_zone_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit,
242 uma_fini fini, int align, u_int32_t flags);
243
244 void uma_print_zone(uma_zone_t);
245 void uma_print_stats(void);
246 static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS);
247 static int sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS);
248 static int sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS);
249
250 #ifdef WITNESS
251 static int nosleepwithlocks = 1;
252 SYSCTL_INT(_debug, OID_AUTO, nosleepwithlocks, CTLFLAG_RW, &nosleepwithlocks,
253 0, "Convert M_WAITOK to M_NOWAIT to avoid lock-held-across-sleep paths");
254 #else
255 static int nosleepwithlocks = 0;
256 SYSCTL_INT(_debug, OID_AUTO, nosleepwithlocks, CTLFLAG_RW, &nosleepwithlocks,
257 0, "Convert M_WAITOK to M_NOWAIT to avoid lock-held-across-sleep paths");
258 #endif
259 SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING|CTLFLAG_RD,
260 NULL, 0, sysctl_vm_zone, "A", "Zone Info");
261 SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
262
263 SYSCTL_PROC(_vm, OID_AUTO, zone_count, CTLFLAG_RD|CTLTYPE_INT,
264 0, 0, sysctl_vm_zone_count, "I", "Number of UMA zones");
265
266 SYSCTL_PROC(_vm, OID_AUTO, zone_stats, CTLFLAG_RD|CTLTYPE_STRUCT,
267 0, 0, sysctl_vm_zone_stats, "s,struct uma_type_header", "Zone Stats");
268
269 /*
270 * This routine checks to see whether or not it's safe to enable buckets.
271 */
272
273 static void
274 bucket_enable(void)
275 {
276 if (cnt.v_free_count < cnt.v_free_min)
277 bucketdisable = 1;
278 else
279 bucketdisable = 0;
280 }
281
282 /*
283 * Initialize bucket_zones, the array of zones of buckets of various sizes.
284 *
285 * For each zone, calculate the memory required for each bucket, consisting
286 * of the header and an array of pointers. Initialize bucket_size[] to point
287 * the range of appropriate bucket sizes at the zone.
288 */
289 static void
290 bucket_init(void)
291 {
292 struct uma_bucket_zone *ubz;
293 int i;
294 int j;
295
296 for (i = 0, j = 0; bucket_zones[j].ubz_entries != 0; j++) {
297 int size;
298
299 ubz = &bucket_zones[j];
300 size = roundup(sizeof(struct uma_bucket), sizeof(void *));
301 size += sizeof(void *) * ubz->ubz_entries;
302 ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size,
303 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
304 for (; i <= ubz->ubz_entries; i += (1 << BUCKET_SHIFT))
305 bucket_size[i >> BUCKET_SHIFT] = j;
306 }
307 }
308
309 /*
310 * Given a desired number of entries for a bucket, return the zone from which
311 * to allocate the bucket.
312 */
313 static struct uma_bucket_zone *
314 bucket_zone_lookup(int entries)
315 {
316 int idx;
317
318 idx = howmany(entries, 1 << BUCKET_SHIFT);
319 return (&bucket_zones[bucket_size[idx]]);
320 }
321
322 static uma_bucket_t
323 bucket_alloc(int entries, int bflags)
324 {
325 struct uma_bucket_zone *ubz;
326 uma_bucket_t bucket;
327
328 /*
329 * This is to stop us from allocating per cpu buckets while we're
330 * running out of vm.boot_pages. Otherwise, we would exhaust the
331 * boot pages. This also prevents us from allocating buckets in
332 * low memory situations.
333 */
334 if (bucketdisable)
335 return (NULL);
336
337 ubz = bucket_zone_lookup(entries);
338 bucket = uma_zalloc_internal(ubz->ubz_zone, NULL, bflags);
339 if (bucket) {
340 #ifdef INVARIANTS
341 bzero(bucket->ub_bucket, sizeof(void *) * ubz->ubz_entries);
342 #endif
343 bucket->ub_cnt = 0;
344 bucket->ub_entries = ubz->ubz_entries;
345 }
346
347 return (bucket);
348 }
349
350 static void
351 bucket_free(uma_bucket_t bucket)
352 {
353 struct uma_bucket_zone *ubz;
354
355 ubz = bucket_zone_lookup(bucket->ub_entries);
356 uma_zfree_internal(ubz->ubz_zone, bucket, NULL, SKIP_NONE,
357 ZFREE_STATFREE);
358 }
359
360 static void
361 bucket_zone_drain(void)
362 {
363 struct uma_bucket_zone *ubz;
364
365 for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
366 zone_drain(ubz->ubz_zone);
367 }
368
369
370 /*
371 * Routine called by timeout which is used to fire off some time interval
372 * based calculations. (stats, hash size, etc.)
373 *
374 * Arguments:
375 * arg Unused
376 *
377 * Returns:
378 * Nothing
379 */
380 static void
381 uma_timeout(void *unused)
382 {
383 bucket_enable();
384 zone_foreach(zone_timeout);
385
386 /* Reschedule this event */
387 callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
388 }
389
390 /*
391 * Routine to perform timeout driven calculations. This expands the
392 * hashes and does per cpu statistics aggregation.
393 *
394 * Arguments:
395 * zone The zone to operate on
396 *
397 * Returns:
398 * Nothing
399 */
400 static void
401 zone_timeout(uma_zone_t zone)
402 {
403 uma_keg_t keg;
404 u_int64_t alloc;
405
406 keg = zone->uz_keg;
407 alloc = 0;
408
409 /*
410 * Expand the zone hash table.
411 *
412 * This is done if the number of slabs is larger than the hash size.
413 * What I'm trying to do here is completely reduce collisions. This
414 * may be a little aggressive. Should I allow for two collisions max?
415 */
416 ZONE_LOCK(zone);
417 if (keg->uk_flags & UMA_ZONE_HASH &&
418 keg->uk_pages / keg->uk_ppera >= keg->uk_hash.uh_hashsize) {
419 struct uma_hash newhash;
420 struct uma_hash oldhash;
421 int ret;
422
423 /*
424 * This is so involved because allocating and freeing
425 * while the zone lock is held will lead to deadlock.
426 * I have to do everything in stages and check for
427 * races.
428 */
429 newhash = keg->uk_hash;
430 ZONE_UNLOCK(zone);
431 ret = hash_alloc(&newhash);
432 ZONE_LOCK(zone);
433 if (ret) {
434 if (hash_expand(&keg->uk_hash, &newhash)) {
435 oldhash = keg->uk_hash;
436 keg->uk_hash = newhash;
437 } else
438 oldhash = newhash;
439
440 ZONE_UNLOCK(zone);
441 hash_free(&oldhash);
442 ZONE_LOCK(zone);
443 }
444 }
445 ZONE_UNLOCK(zone);
446 }
447
448 /*
449 * Allocate and zero fill the next sized hash table from the appropriate
450 * backing store.
451 *
452 * Arguments:
453 * hash A new hash structure with the old hash size in uh_hashsize
454 *
455 * Returns:
456 * 1 on sucess and 0 on failure.
457 */
458 static int
459 hash_alloc(struct uma_hash *hash)
460 {
461 int oldsize;
462 int alloc;
463
464 oldsize = hash->uh_hashsize;
465
466 /* We're just going to go to a power of two greater */
467 if (oldsize) {
468 hash->uh_hashsize = oldsize * 2;
469 alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize;
470 hash->uh_slab_hash = (struct slabhead *)malloc(alloc,
471 M_UMAHASH, M_NOWAIT);
472 } else {
473 alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT;
474 hash->uh_slab_hash = uma_zalloc_internal(hashzone, NULL,
475 M_WAITOK);
476 hash->uh_hashsize = UMA_HASH_SIZE_INIT;
477 }
478 if (hash->uh_slab_hash) {
479 bzero(hash->uh_slab_hash, alloc);
480 hash->uh_hashmask = hash->uh_hashsize - 1;
481 return (1);
482 }
483
484 return (0);
485 }
486
487 /*
488 * Expands the hash table for HASH zones. This is done from zone_timeout
489 * to reduce collisions. This must not be done in the regular allocation
490 * path, otherwise, we can recurse on the vm while allocating pages.
491 *
492 * Arguments:
493 * oldhash The hash you want to expand
494 * newhash The hash structure for the new table
495 *
496 * Returns:
497 * Nothing
498 *
499 * Discussion:
500 */
501 static int
502 hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash)
503 {
504 uma_slab_t slab;
505 int hval;
506 int i;
507
508 if (!newhash->uh_slab_hash)
509 return (0);
510
511 if (oldhash->uh_hashsize >= newhash->uh_hashsize)
512 return (0);
513
514 /*
515 * I need to investigate hash algorithms for resizing without a
516 * full rehash.
517 */
518
519 for (i = 0; i < oldhash->uh_hashsize; i++)
520 while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) {
521 slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]);
522 SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink);
523 hval = UMA_HASH(newhash, slab->us_data);
524 SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval],
525 slab, us_hlink);
526 }
527
528 return (1);
529 }
530
531 /*
532 * Free the hash bucket to the appropriate backing store.
533 *
534 * Arguments:
535 * slab_hash The hash bucket we're freeing
536 * hashsize The number of entries in that hash bucket
537 *
538 * Returns:
539 * Nothing
540 */
541 static void
542 hash_free(struct uma_hash *hash)
543 {
544 if (hash->uh_slab_hash == NULL)
545 return;
546 if (hash->uh_hashsize == UMA_HASH_SIZE_INIT)
547 uma_zfree_internal(hashzone,
548 hash->uh_slab_hash, NULL, SKIP_NONE, ZFREE_STATFREE);
549 else
550 free(hash->uh_slab_hash, M_UMAHASH);
551 }
552
553 /*
554 * Frees all outstanding items in a bucket
555 *
556 * Arguments:
557 * zone The zone to free to, must be unlocked.
558 * bucket The free/alloc bucket with items, cpu queue must be locked.
559 *
560 * Returns:
561 * Nothing
562 */
563
564 static void
565 bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
566 {
567 uma_slab_t slab;
568 int mzone;
569 void *item;
570
571 if (bucket == NULL)
572 return;
573
574 slab = NULL;
575 mzone = 0;
576
577 /* We have to lookup the slab again for malloc.. */
578 if (zone->uz_keg->uk_flags & UMA_ZONE_MALLOC)
579 mzone = 1;
580
581 while (bucket->ub_cnt > 0) {
582 bucket->ub_cnt--;
583 item = bucket->ub_bucket[bucket->ub_cnt];
584 #ifdef INVARIANTS
585 bucket->ub_bucket[bucket->ub_cnt] = NULL;
586 KASSERT(item != NULL,
587 ("bucket_drain: botched ptr, item is NULL"));
588 #endif
589 /*
590 * This is extremely inefficient. The slab pointer was passed
591 * to uma_zfree_arg, but we lost it because the buckets don't
592 * hold them. This will go away when free() gets a size passed
593 * to it.
594 */
595 if (mzone)
596 slab = vtoslab((vm_offset_t)item & (~UMA_SLAB_MASK));
597 uma_zfree_internal(zone, item, slab, SKIP_DTOR, 0);
598 }
599 }
600
601 /*
602 * Drains the per cpu caches for a zone.
603 *
604 * NOTE: This may only be called while the zone is being turn down, and not
605 * during normal operation. This is necessary in order that we do not have
606 * to migrate CPUs to drain the per-CPU caches.
607 *
608 * Arguments:
609 * zone The zone to drain, must be unlocked.
610 *
611 * Returns:
612 * Nothing
613 */
614 static void
615 cache_drain(uma_zone_t zone)
616 {
617 uma_cache_t cache;
618 int cpu;
619
620 /*
621 * XXX: It is safe to not lock the per-CPU caches, because we're
622 * tearing down the zone anyway. I.e., there will be no further use
623 * of the caches at this point.
624 *
625 * XXX: It would good to be able to assert that the zone is being
626 * torn down to prevent improper use of cache_drain().
627 *
628 * XXX: We lock the zone before passing into bucket_cache_drain() as
629 * it is used elsewhere. Should the tear-down path be made special
630 * there in some form?
631 */
632 for (cpu = 0; cpu <= mp_maxid; cpu++) {
633 if (CPU_ABSENT(cpu))
634 continue;
635 cache = &zone->uz_cpu[cpu];
636 bucket_drain(zone, cache->uc_allocbucket);
637 bucket_drain(zone, cache->uc_freebucket);
638 if (cache->uc_allocbucket != NULL)
639 bucket_free(cache->uc_allocbucket);
640 if (cache->uc_freebucket != NULL)
641 bucket_free(cache->uc_freebucket);
642 cache->uc_allocbucket = cache->uc_freebucket = NULL;
643 }
644 ZONE_LOCK(zone);
645 bucket_cache_drain(zone);
646 ZONE_UNLOCK(zone);
647 }
648
649 /*
650 * Drain the cached buckets from a zone. Expects a locked zone on entry.
651 */
652 static void
653 bucket_cache_drain(uma_zone_t zone)
654 {
655 uma_bucket_t bucket;
656
657 /*
658 * Drain the bucket queues and free the buckets, we just keep two per
659 * cpu (alloc/free).
660 */
661 while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
662 LIST_REMOVE(bucket, ub_link);
663 ZONE_UNLOCK(zone);
664 bucket_drain(zone, bucket);
665 bucket_free(bucket);
666 ZONE_LOCK(zone);
667 }
668
669 /* Now we do the free queue.. */
670 while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
671 LIST_REMOVE(bucket, ub_link);
672 bucket_free(bucket);
673 }
674 }
675
676 /*
677 * Frees pages from a zone back to the system. This is done on demand from
678 * the pageout daemon.
679 *
680 * Arguments:
681 * zone The zone to free pages from
682 * all Should we drain all items?
683 *
684 * Returns:
685 * Nothing.
686 */
687 void
688 zone_drain(uma_zone_t zone)
689 {
690 struct slabhead freeslabs = { 0 };
691 uma_keg_t keg;
692 uma_slab_t slab;
693 uma_slab_t n;
694 u_int8_t flags;
695 u_int8_t *mem;
696 int i;
697
698 keg = zone->uz_keg;
699
700 /*
701 * We don't want to take pages from statically allocated zones at this
702 * time
703 */
704 if (keg->uk_flags & UMA_ZONE_NOFREE || keg->uk_freef == NULL)
705 return;
706
707 ZONE_LOCK(zone);
708
709 #ifdef UMA_DEBUG
710 printf("%s free items: %u\n", zone->uz_name, keg->uk_free);
711 #endif
712 bucket_cache_drain(zone);
713 if (keg->uk_free == 0)
714 goto finished;
715
716 slab = LIST_FIRST(&keg->uk_free_slab);
717 while (slab) {
718 n = LIST_NEXT(slab, us_link);
719
720 /* We have no where to free these to */
721 if (slab->us_flags & UMA_SLAB_BOOT) {
722 slab = n;
723 continue;
724 }
725
726 LIST_REMOVE(slab, us_link);
727 keg->uk_pages -= keg->uk_ppera;
728 keg->uk_free -= keg->uk_ipers;
729
730 if (keg->uk_flags & UMA_ZONE_HASH)
731 UMA_HASH_REMOVE(&keg->uk_hash, slab, slab->us_data);
732
733 SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink);
734
735 slab = n;
736 }
737 finished:
738 ZONE_UNLOCK(zone);
739
740 while ((slab = SLIST_FIRST(&freeslabs)) != NULL) {
741 SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink);
742 if (keg->uk_fini)
743 for (i = 0; i < keg->uk_ipers; i++)
744 keg->uk_fini(
745 slab->us_data + (keg->uk_rsize * i),
746 keg->uk_size);
747 flags = slab->us_flags;
748 mem = slab->us_data;
749
750 if ((keg->uk_flags & UMA_ZONE_MALLOC) ||
751 (keg->uk_flags & UMA_ZONE_REFCNT)) {
752 vm_object_t obj;
753
754 if (flags & UMA_SLAB_KMEM)
755 obj = kmem_object;
756 else
757 obj = NULL;
758 for (i = 0; i < keg->uk_ppera; i++)
759 vsetobj((vm_offset_t)mem + (i * PAGE_SIZE),
760 obj);
761 }
762 if (keg->uk_flags & UMA_ZONE_OFFPAGE)
763 uma_zfree_internal(keg->uk_slabzone, slab, NULL,
764 SKIP_NONE, ZFREE_STATFREE);
765 #ifdef UMA_DEBUG
766 printf("%s: Returning %d bytes.\n",
767 zone->uz_name, UMA_SLAB_SIZE * keg->uk_ppera);
768 #endif
769 keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera, flags);
770 }
771 }
772
773 /*
774 * Allocate a new slab for a zone. This does not insert the slab onto a list.
775 *
776 * Arguments:
777 * zone The zone to allocate slabs for
778 * wait Shall we wait?
779 *
780 * Returns:
781 * The slab that was allocated or NULL if there is no memory and the
782 * caller specified M_NOWAIT.
783 */
784 static uma_slab_t
785 slab_zalloc(uma_zone_t zone, int wait)
786 {
787 uma_slabrefcnt_t slabref;
788 uma_slab_t slab;
789 uma_keg_t keg;
790 u_int8_t *mem;
791 u_int8_t flags;
792 int i;
793
794 slab = NULL;
795 keg = zone->uz_keg;
796
797 #ifdef UMA_DEBUG
798 printf("slab_zalloc: Allocating a new slab for %s\n", zone->uz_name);
799 #endif
800 ZONE_UNLOCK(zone);
801
802 if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
803 slab = uma_zalloc_internal(keg->uk_slabzone, NULL, wait);
804 if (slab == NULL) {
805 ZONE_LOCK(zone);
806 return NULL;
807 }
808 }
809
810 /*
811 * This reproduces the old vm_zone behavior of zero filling pages the
812 * first time they are added to a zone.
813 *
814 * Malloced items are zeroed in uma_zalloc.
815 */
816
817 if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
818 wait |= M_ZERO;
819 else
820 wait &= ~M_ZERO;
821
822 mem = keg->uk_allocf(zone, keg->uk_ppera * UMA_SLAB_SIZE,
823 &flags, wait);
824 if (mem == NULL) {
825 if (keg->uk_flags & UMA_ZONE_OFFPAGE)
826 uma_zfree_internal(keg->uk_slabzone, slab, NULL,
827 SKIP_NONE, ZFREE_STATFREE);
828 ZONE_LOCK(zone);
829 return (NULL);
830 }
831
832 /* Point the slab into the allocated memory */
833 if (!(keg->uk_flags & UMA_ZONE_OFFPAGE))
834 slab = (uma_slab_t )(mem + keg->uk_pgoff);
835
836 if ((keg->uk_flags & UMA_ZONE_MALLOC) ||
837 (keg->uk_flags & UMA_ZONE_REFCNT))
838 for (i = 0; i < keg->uk_ppera; i++)
839 vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab);
840
841 slab->us_keg = keg;
842 slab->us_data = mem;
843 slab->us_freecount = keg->uk_ipers;
844 slab->us_firstfree = 0;
845 slab->us_flags = flags;
846
847 if (keg->uk_flags & UMA_ZONE_REFCNT) {
848 slabref = (uma_slabrefcnt_t)slab;
849 for (i = 0; i < keg->uk_ipers; i++) {
850 slabref->us_freelist[i].us_refcnt = 0;
851 slabref->us_freelist[i].us_item = i+1;
852 }
853 } else {
854 for (i = 0; i < keg->uk_ipers; i++)
855 slab->us_freelist[i].us_item = i+1;
856 }
857
858 if (keg->uk_init != NULL) {
859 for (i = 0; i < keg->uk_ipers; i++)
860 if (keg->uk_init(slab->us_data + (keg->uk_rsize * i),
861 keg->uk_size, wait) != 0)
862 break;
863 if (i != keg->uk_ipers) {
864 if (keg->uk_fini != NULL) {
865 for (i--; i > -1; i--)
866 keg->uk_fini(slab->us_data +
867 (keg->uk_rsize * i),
868 keg->uk_size);
869 }
870 if ((keg->uk_flags & UMA_ZONE_MALLOC) ||
871 (keg->uk_flags & UMA_ZONE_REFCNT)) {
872 vm_object_t obj;
873
874 if (flags & UMA_SLAB_KMEM)
875 obj = kmem_object;
876 else
877 obj = NULL;
878 for (i = 0; i < keg->uk_ppera; i++)
879 vsetobj((vm_offset_t)mem +
880 (i * PAGE_SIZE), obj);
881 }
882 if (keg->uk_flags & UMA_ZONE_OFFPAGE)
883 uma_zfree_internal(keg->uk_slabzone, slab,
884 NULL, SKIP_NONE, ZFREE_STATFREE);
885 keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera,
886 flags);
887 ZONE_LOCK(zone);
888 return (NULL);
889 }
890 }
891 ZONE_LOCK(zone);
892
893 if (keg->uk_flags & UMA_ZONE_HASH)
894 UMA_HASH_INSERT(&keg->uk_hash, slab, mem);
895
896 keg->uk_pages += keg->uk_ppera;
897 keg->uk_free += keg->uk_ipers;
898
899 return (slab);
900 }
901
902 /*
903 * This function is intended to be used early on in place of page_alloc() so
904 * that we may use the boot time page cache to satisfy allocations before
905 * the VM is ready.
906 */
907 static void *
908 startup_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
909 {
910 uma_keg_t keg;
911 uma_slab_t tmps;
912
913 keg = zone->uz_keg;
914
915 /*
916 * Check our small startup cache to see if it has pages remaining.
917 */
918 mtx_lock(&uma_boot_pages_mtx);
919 if ((tmps = LIST_FIRST(&uma_boot_pages)) != NULL) {
920 LIST_REMOVE(tmps, us_link);
921 mtx_unlock(&uma_boot_pages_mtx);
922 *pflag = tmps->us_flags;
923 return (tmps->us_data);
924 }
925 mtx_unlock(&uma_boot_pages_mtx);
926 if (booted == 0)
927 panic("UMA: Increase vm.boot_pages");
928 /*
929 * Now that we've booted reset these users to their real allocator.
930 */
931 #ifdef UMA_MD_SMALL_ALLOC
932 keg->uk_allocf = uma_small_alloc;
933 #else
934 keg->uk_allocf = page_alloc;
935 #endif
936 return keg->uk_allocf(zone, bytes, pflag, wait);
937 }
938
939 /*
940 * Allocates a number of pages from the system
941 *
942 * Arguments:
943 * zone Unused
944 * bytes The number of bytes requested
945 * wait Shall we wait?
946 *
947 * Returns:
948 * A pointer to the alloced memory or possibly
949 * NULL if M_NOWAIT is set.
950 */
951 static void *
952 page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
953 {
954 void *p; /* Returned page */
955
956 *pflag = UMA_SLAB_KMEM;
957 p = (void *) kmem_malloc(kmem_map, bytes, wait);
958
959 return (p);
960 }
961
962 /*
963 * Allocates a number of pages from within an object
964 *
965 * Arguments:
966 * zone Unused
967 * bytes The number of bytes requested
968 * wait Shall we wait?
969 *
970 * Returns:
971 * A pointer to the alloced memory or possibly
972 * NULL if M_NOWAIT is set.
973 */
974 static void *
975 obj_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
976 {
977 vm_object_t object;
978 vm_offset_t retkva, zkva;
979 vm_page_t p;
980 int pages, startpages;
981
982 object = zone->uz_keg->uk_obj;
983 retkva = 0;
984
985 /*
986 * This looks a little weird since we're getting one page at a time.
987 */
988 VM_OBJECT_LOCK(object);
989 p = TAILQ_LAST(&object->memq, pglist);
990 pages = p != NULL ? p->pindex + 1 : 0;
991 startpages = pages;
992 zkva = zone->uz_keg->uk_kva + pages * PAGE_SIZE;
993 for (; bytes > 0; bytes -= PAGE_SIZE) {
994 p = vm_page_alloc(object, pages,
995 VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED);
996 if (p == NULL) {
997 if (pages != startpages)
998 pmap_qremove(retkva, pages - startpages);
999 while (pages != startpages) {
1000 pages--;
1001 p = TAILQ_LAST(&object->memq, pglist);
1002 vm_page_lock_queues();
1003 vm_page_unwire(p, 0);
1004 vm_page_free(p);
1005 vm_page_unlock_queues();
1006 }
1007 retkva = 0;
1008 goto done;
1009 }
1010 pmap_qenter(zkva, &p, 1);
1011 if (retkva == 0)
1012 retkva = zkva;
1013 zkva += PAGE_SIZE;
1014 pages += 1;
1015 }
1016 done:
1017 VM_OBJECT_UNLOCK(object);
1018 *flags = UMA_SLAB_PRIV;
1019
1020 return ((void *)retkva);
1021 }
1022
1023 /*
1024 * Frees a number of pages to the system
1025 *
1026 * Arguments:
1027 * mem A pointer to the memory to be freed
1028 * size The size of the memory being freed
1029 * flags The original p->us_flags field
1030 *
1031 * Returns:
1032 * Nothing
1033 */
1034 static void
1035 page_free(void *mem, int size, u_int8_t flags)
1036 {
1037 vm_map_t map;
1038
1039 if (flags & UMA_SLAB_KMEM)
1040 map = kmem_map;
1041 else
1042 panic("UMA: page_free used with invalid flags %d\n", flags);
1043
1044 kmem_free(map, (vm_offset_t)mem, size);
1045 }
1046
1047 /*
1048 * Zero fill initializer
1049 *
1050 * Arguments/Returns follow uma_init specifications
1051 */
1052 static int
1053 zero_init(void *mem, int size, int flags)
1054 {
1055 bzero(mem, size);
1056 return (0);
1057 }
1058
1059 /*
1060 * Finish creating a small uma zone. This calculates ipers, and the zone size.
1061 *
1062 * Arguments
1063 * zone The zone we should initialize
1064 *
1065 * Returns
1066 * Nothing
1067 */
1068 static void
1069 zone_small_init(uma_zone_t zone)
1070 {
1071 uma_keg_t keg;
1072 u_int rsize;
1073 u_int memused;
1074 u_int wastedspace;
1075 u_int shsize;
1076
1077 keg = zone->uz_keg;
1078 KASSERT(keg != NULL, ("Keg is null in zone_small_init"));
1079 rsize = keg->uk_size;
1080
1081 if (rsize < UMA_SMALLEST_UNIT)
1082 rsize = UMA_SMALLEST_UNIT;
1083 if (rsize & keg->uk_align)
1084 rsize = (rsize & ~keg->uk_align) + (keg->uk_align + 1);
1085
1086 keg->uk_rsize = rsize;
1087 keg->uk_ppera = 1;
1088
1089 if (keg->uk_flags & UMA_ZONE_REFCNT) {
1090 rsize += UMA_FRITMREF_SZ; /* linkage & refcnt */
1091 shsize = sizeof(struct uma_slab_refcnt);
1092 } else {
1093 rsize += UMA_FRITM_SZ; /* Account for linkage */
1094 shsize = sizeof(struct uma_slab);
1095 }
1096
1097 keg->uk_ipers = (UMA_SLAB_SIZE - shsize) / rsize;
1098 KASSERT(keg->uk_ipers != 0, ("zone_small_init: ipers is 0"));
1099 memused = keg->uk_ipers * rsize + shsize;
1100 wastedspace = UMA_SLAB_SIZE - memused;
1101
1102 /*
1103 * We can't do OFFPAGE if we're internal or if we've been
1104 * asked to not go to the VM for buckets. If we do this we
1105 * may end up going to the VM (kmem_map) for slabs which we
1106 * do not want to do if we're UMA_ZFLAG_CACHEONLY as a
1107 * result of UMA_ZONE_VM, which clearly forbids it.
1108 */
1109 if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) ||
1110 (keg->uk_flags & UMA_ZFLAG_CACHEONLY))
1111 return;
1112
1113 if ((wastedspace >= UMA_MAX_WASTE) &&
1114 (keg->uk_ipers < (UMA_SLAB_SIZE / keg->uk_rsize))) {
1115 keg->uk_ipers = UMA_SLAB_SIZE / keg->uk_rsize;
1116 KASSERT(keg->uk_ipers <= 255,
1117 ("zone_small_init: keg->uk_ipers too high!"));
1118 #ifdef UMA_DEBUG
1119 printf("UMA decided we need offpage slab headers for "
1120 "zone: %s, calculated wastedspace = %d, "
1121 "maximum wasted space allowed = %d, "
1122 "calculated ipers = %d, "
1123 "new wasted space = %d\n", zone->uz_name, wastedspace,
1124 UMA_MAX_WASTE, keg->uk_ipers,
1125 UMA_SLAB_SIZE - keg->uk_ipers * keg->uk_rsize);
1126 #endif
1127 keg->uk_flags |= UMA_ZONE_OFFPAGE;
1128 if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
1129 keg->uk_flags |= UMA_ZONE_HASH;
1130 }
1131 }
1132
1133 /*
1134 * Finish creating a large (> UMA_SLAB_SIZE) uma zone. Just give in and do
1135 * OFFPAGE for now. When I can allow for more dynamic slab sizes this will be
1136 * more complicated.
1137 *
1138 * Arguments
1139 * zone The zone we should initialize
1140 *
1141 * Returns
1142 * Nothing
1143 */
1144 static void
1145 zone_large_init(uma_zone_t zone)
1146 {
1147 uma_keg_t keg;
1148 int pages;
1149
1150 keg = zone->uz_keg;
1151
1152 KASSERT(keg != NULL, ("Keg is null in zone_large_init"));
1153 KASSERT((keg->uk_flags & UMA_ZFLAG_CACHEONLY) == 0,
1154 ("zone_large_init: Cannot large-init a UMA_ZFLAG_CACHEONLY zone"));
1155
1156 pages = keg->uk_size / UMA_SLAB_SIZE;
1157
1158 /* Account for remainder */
1159 if ((pages * UMA_SLAB_SIZE) < keg->uk_size)
1160 pages++;
1161
1162 keg->uk_ppera = pages;
1163 keg->uk_ipers = 1;
1164
1165 keg->uk_flags |= UMA_ZONE_OFFPAGE;
1166 if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
1167 keg->uk_flags |= UMA_ZONE_HASH;
1168
1169 keg->uk_rsize = keg->uk_size;
1170 }
1171
1172 /*
1173 * Keg header ctor. This initializes all fields, locks, etc. And inserts
1174 * the keg onto the global keg list.
1175 *
1176 * Arguments/Returns follow uma_ctor specifications
1177 * udata Actually uma_kctor_args
1178 */
1179 static int
1180 keg_ctor(void *mem, int size, void *udata, int flags)
1181 {
1182 struct uma_kctor_args *arg = udata;
1183 uma_keg_t keg = mem;
1184 uma_zone_t zone;
1185
1186 bzero(keg, size);
1187 keg->uk_size = arg->size;
1188 keg->uk_init = arg->uminit;
1189 keg->uk_fini = arg->fini;
1190 keg->uk_align = arg->align;
1191 keg->uk_free = 0;
1192 keg->uk_pages = 0;
1193 keg->uk_flags = arg->flags;
1194 keg->uk_allocf = page_alloc;
1195 keg->uk_freef = page_free;
1196 keg->uk_recurse = 0;
1197 keg->uk_slabzone = NULL;
1198
1199 /*
1200 * The master zone is passed to us at keg-creation time.
1201 */
1202 zone = arg->zone;
1203 zone->uz_keg = keg;
1204
1205 if (arg->flags & UMA_ZONE_VM)
1206 keg->uk_flags |= UMA_ZFLAG_CACHEONLY;
1207
1208 if (arg->flags & UMA_ZONE_ZINIT)
1209 keg->uk_init = zero_init;
1210
1211 /*
1212 * The +UMA_FRITM_SZ added to uk_size is to account for the
1213 * linkage that is added to the size in zone_small_init(). If
1214 * we don't account for this here then we may end up in
1215 * zone_small_init() with a calculated 'ipers' of 0.
1216 */
1217 if (keg->uk_flags & UMA_ZONE_REFCNT) {
1218 if ((keg->uk_size+UMA_FRITMREF_SZ) >
1219 (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt)))
1220 zone_large_init(zone);
1221 else
1222 zone_small_init(zone);
1223 } else {
1224 if ((keg->uk_size+UMA_FRITM_SZ) >
1225 (UMA_SLAB_SIZE - sizeof(struct uma_slab)))
1226 zone_large_init(zone);
1227 else
1228 zone_small_init(zone);
1229 }
1230
1231 if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
1232 if (keg->uk_flags & UMA_ZONE_REFCNT)
1233 keg->uk_slabzone = slabrefzone;
1234 else
1235 keg->uk_slabzone = slabzone;
1236 }
1237
1238 /*
1239 * If we haven't booted yet we need allocations to go through the
1240 * startup cache until the vm is ready.
1241 */
1242 if (keg->uk_ppera == 1) {
1243 #ifdef UMA_MD_SMALL_ALLOC
1244 keg->uk_allocf = uma_small_alloc;
1245 keg->uk_freef = uma_small_free;
1246 #endif
1247 if (booted == 0)
1248 keg->uk_allocf = startup_alloc;
1249 }
1250
1251 /*
1252 * Initialize keg's lock (shared among zones) through
1253 * Master zone
1254 */
1255 zone->uz_lock = &keg->uk_lock;
1256 if (arg->flags & UMA_ZONE_MTXCLASS)
1257 ZONE_LOCK_INIT(zone, 1);
1258 else
1259 ZONE_LOCK_INIT(zone, 0);
1260
1261 /*
1262 * If we're putting the slab header in the actual page we need to
1263 * figure out where in each page it goes. This calculates a right
1264 * justified offset into the memory on an ALIGN_PTR boundary.
1265 */
1266 if (!(keg->uk_flags & UMA_ZONE_OFFPAGE)) {
1267 u_int totsize;
1268
1269 /* Size of the slab struct and free list */
1270 if (keg->uk_flags & UMA_ZONE_REFCNT)
1271 totsize = sizeof(struct uma_slab_refcnt) +
1272 keg->uk_ipers * UMA_FRITMREF_SZ;
1273 else
1274 totsize = sizeof(struct uma_slab) +
1275 keg->uk_ipers * UMA_FRITM_SZ;
1276
1277 if (totsize & UMA_ALIGN_PTR)
1278 totsize = (totsize & ~UMA_ALIGN_PTR) +
1279 (UMA_ALIGN_PTR + 1);
1280 keg->uk_pgoff = UMA_SLAB_SIZE - totsize;
1281
1282 if (keg->uk_flags & UMA_ZONE_REFCNT)
1283 totsize = keg->uk_pgoff + sizeof(struct uma_slab_refcnt)
1284 + keg->uk_ipers * UMA_FRITMREF_SZ;
1285 else
1286 totsize = keg->uk_pgoff + sizeof(struct uma_slab)
1287 + keg->uk_ipers * UMA_FRITM_SZ;
1288
1289 /*
1290 * The only way the following is possible is if with our
1291 * UMA_ALIGN_PTR adjustments we are now bigger than
1292 * UMA_SLAB_SIZE. I haven't checked whether this is
1293 * mathematically possible for all cases, so we make
1294 * sure here anyway.
1295 */
1296 if (totsize > UMA_SLAB_SIZE) {
1297 printf("zone %s ipers %d rsize %d size %d\n",
1298 zone->uz_name, keg->uk_ipers, keg->uk_rsize,
1299 keg->uk_size);
1300 panic("UMA slab won't fit.\n");
1301 }
1302 }
1303
1304 if (keg->uk_flags & UMA_ZONE_HASH)
1305 hash_alloc(&keg->uk_hash);
1306
1307 #ifdef UMA_DEBUG
1308 printf("%s(%p) size = %d ipers = %d ppera = %d pgoff = %d\n",
1309 zone->uz_name, zone,
1310 keg->uk_size, keg->uk_ipers,
1311 keg->uk_ppera, keg->uk_pgoff);
1312 #endif
1313
1314 LIST_INSERT_HEAD(&keg->uk_zones, zone, uz_link);
1315
1316 mtx_lock(&uma_mtx);
1317 LIST_INSERT_HEAD(&uma_kegs, keg, uk_link);
1318 mtx_unlock(&uma_mtx);
1319 return (0);
1320 }
1321
1322 /*
1323 * Zone header ctor. This initializes all fields, locks, etc.
1324 *
1325 * Arguments/Returns follow uma_ctor specifications
1326 * udata Actually uma_zctor_args
1327 */
1328
1329 static int
1330 zone_ctor(void *mem, int size, void *udata, int flags)
1331 {
1332 struct uma_zctor_args *arg = udata;
1333 uma_zone_t zone = mem;
1334 uma_zone_t z;
1335 uma_keg_t keg;
1336
1337 bzero(zone, size);
1338 zone->uz_name = arg->name;
1339 zone->uz_ctor = arg->ctor;
1340 zone->uz_dtor = arg->dtor;
1341 zone->uz_init = NULL;
1342 zone->uz_fini = NULL;
1343 zone->uz_allocs = 0;
1344 zone->uz_frees = 0;
1345 zone->uz_fails = 0;
1346 zone->uz_fills = zone->uz_count = 0;
1347
1348 if (arg->flags & UMA_ZONE_SECONDARY) {
1349 KASSERT(arg->keg != NULL, ("Secondary zone on zero'd keg"));
1350 keg = arg->keg;
1351 zone->uz_keg = keg;
1352 zone->uz_init = arg->uminit;
1353 zone->uz_fini = arg->fini;
1354 zone->uz_lock = &keg->uk_lock;
1355 mtx_lock(&uma_mtx);
1356 ZONE_LOCK(zone);
1357 keg->uk_flags |= UMA_ZONE_SECONDARY;
1358 LIST_FOREACH(z, &keg->uk_zones, uz_link) {
1359 if (LIST_NEXT(z, uz_link) == NULL) {
1360 LIST_INSERT_AFTER(z, zone, uz_link);
1361 break;
1362 }
1363 }
1364 ZONE_UNLOCK(zone);
1365 mtx_unlock(&uma_mtx);
1366 } else if (arg->keg == NULL) {
1367 if (uma_kcreate(zone, arg->size, arg->uminit, arg->fini,
1368 arg->align, arg->flags) == NULL)
1369 return (ENOMEM);
1370 } else {
1371 struct uma_kctor_args karg;
1372 int error;
1373
1374 /* We should only be here from uma_startup() */
1375 karg.size = arg->size;
1376 karg.uminit = arg->uminit;
1377 karg.fini = arg->fini;
1378 karg.align = arg->align;
1379 karg.flags = arg->flags;
1380 karg.zone = zone;
1381 error = keg_ctor(arg->keg, sizeof(struct uma_keg), &karg,
1382 flags);
1383 if (error)
1384 return (error);
1385 }
1386 keg = zone->uz_keg;
1387 zone->uz_lock = &keg->uk_lock;
1388
1389 /*
1390 * Some internal zones don't have room allocated for the per cpu
1391 * caches. If we're internal, bail out here.
1392 */
1393 if (keg->uk_flags & UMA_ZFLAG_INTERNAL) {
1394 KASSERT((keg->uk_flags & UMA_ZONE_SECONDARY) == 0,
1395 ("Secondary zone requested UMA_ZFLAG_INTERNAL"));
1396 return (0);
1397 }
1398
1399 if (keg->uk_flags & UMA_ZONE_MAXBUCKET)
1400 zone->uz_count = BUCKET_MAX;
1401 else if (keg->uk_ipers <= BUCKET_MAX)
1402 zone->uz_count = keg->uk_ipers;
1403 else
1404 zone->uz_count = BUCKET_MAX;
1405 return (0);
1406 }
1407
1408 /*
1409 * Keg header dtor. This frees all data, destroys locks, frees the hash
1410 * table and removes the keg from the global list.
1411 *
1412 * Arguments/Returns follow uma_dtor specifications
1413 * udata unused
1414 */
1415 static void
1416 keg_dtor(void *arg, int size, void *udata)
1417 {
1418 uma_keg_t keg;
1419
1420 keg = (uma_keg_t)arg;
1421 mtx_lock(&keg->uk_lock);
1422 if (keg->uk_free != 0) {
1423 printf("Freed UMA keg was not empty (%d items). "
1424 " Lost %d pages of memory.\n",
1425 keg->uk_free, keg->uk_pages);
1426 }
1427 mtx_unlock(&keg->uk_lock);
1428
1429 if (keg->uk_flags & UMA_ZONE_HASH)
1430 hash_free(&keg->uk_hash);
1431
1432 mtx_destroy(&keg->uk_lock);
1433 }
1434
1435 /*
1436 * Zone header dtor.
1437 *
1438 * Arguments/Returns follow uma_dtor specifications
1439 * udata unused
1440 */
1441 static void
1442 zone_dtor(void *arg, int size, void *udata)
1443 {
1444 uma_zone_t zone;
1445 uma_keg_t keg;
1446
1447 zone = (uma_zone_t)arg;
1448 keg = zone->uz_keg;
1449
1450 if (!(keg->uk_flags & UMA_ZFLAG_INTERNAL))
1451 cache_drain(zone);
1452
1453 mtx_lock(&uma_mtx);
1454 zone_drain(zone);
1455 if (keg->uk_flags & UMA_ZONE_SECONDARY) {
1456 LIST_REMOVE(zone, uz_link);
1457 /*
1458 * XXX there are some races here where
1459 * the zone can be drained but zone lock
1460 * released and then refilled before we
1461 * remove it... we dont care for now
1462 */
1463 ZONE_LOCK(zone);
1464 if (LIST_EMPTY(&keg->uk_zones))
1465 keg->uk_flags &= ~UMA_ZONE_SECONDARY;
1466 ZONE_UNLOCK(zone);
1467 mtx_unlock(&uma_mtx);
1468 } else {
1469 LIST_REMOVE(keg, uk_link);
1470 LIST_REMOVE(zone, uz_link);
1471 mtx_unlock(&uma_mtx);
1472 uma_zfree_internal(kegs, keg, NULL, SKIP_NONE,
1473 ZFREE_STATFREE);
1474 }
1475 zone->uz_keg = NULL;
1476 }
1477
1478 /*
1479 * Traverses every zone in the system and calls a callback
1480 *
1481 * Arguments:
1482 * zfunc A pointer to a function which accepts a zone
1483 * as an argument.
1484 *
1485 * Returns:
1486 * Nothing
1487 */
1488 static void
1489 zone_foreach(void (*zfunc)(uma_zone_t))
1490 {
1491 uma_keg_t keg;
1492 uma_zone_t zone;
1493
1494 mtx_lock(&uma_mtx);
1495 LIST_FOREACH(keg, &uma_kegs, uk_link) {
1496 LIST_FOREACH(zone, &keg->uk_zones, uz_link)
1497 zfunc(zone);
1498 }
1499 mtx_unlock(&uma_mtx);
1500 }
1501
1502 /* Public functions */
1503 /* See uma.h */
1504 void
1505 uma_startup(void *bootmem, int boot_pages)
1506 {
1507 struct uma_zctor_args args;
1508 uma_slab_t slab;
1509 u_int slabsize;
1510 u_int objsize, totsize, wsize;
1511 int i;
1512
1513 #ifdef UMA_DEBUG
1514 printf("Creating uma keg headers zone and keg.\n");
1515 #endif
1516 mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF);
1517
1518 /*
1519 * Figure out the maximum number of items-per-slab we'll have if
1520 * we're using the OFFPAGE slab header to track free items, given
1521 * all possible object sizes and the maximum desired wastage
1522 * (UMA_MAX_WASTE).
1523 *
1524 * We iterate until we find an object size for
1525 * which the calculated wastage in zone_small_init() will be
1526 * enough to warrant OFFPAGE. Since wastedspace versus objsize
1527 * is an overall increasing see-saw function, we find the smallest
1528 * objsize such that the wastage is always acceptable for objects
1529 * with that objsize or smaller. Since a smaller objsize always
1530 * generates a larger possible uma_max_ipers, we use this computed
1531 * objsize to calculate the largest ipers possible. Since the
1532 * ipers calculated for OFFPAGE slab headers is always larger than
1533 * the ipers initially calculated in zone_small_init(), we use
1534 * the former's equation (UMA_SLAB_SIZE / keg->uk_rsize) to
1535 * obtain the maximum ipers possible for offpage slab headers.
1536 *
1537 * It should be noted that ipers versus objsize is an inversly
1538 * proportional function which drops off rather quickly so as
1539 * long as our UMA_MAX_WASTE is such that the objsize we calculate
1540 * falls into the portion of the inverse relation AFTER the steep
1541 * falloff, then uma_max_ipers shouldn't be too high (~10 on i386).
1542 *
1543 * Note that we have 8-bits (1 byte) to use as a freelist index
1544 * inside the actual slab header itself and this is enough to
1545 * accomodate us. In the worst case, a UMA_SMALLEST_UNIT sized
1546 * object with offpage slab header would have ipers =
1547 * UMA_SLAB_SIZE / UMA_SMALLEST_UNIT (currently = 256), which is
1548 * 1 greater than what our byte-integer freelist index can
1549 * accomodate, but we know that this situation never occurs as
1550 * for UMA_SMALLEST_UNIT-sized objects, we will never calculate
1551 * that we need to go to offpage slab headers. Or, if we do,
1552 * then we trap that condition below and panic in the INVARIANTS case.
1553 */
1554 wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab) - UMA_MAX_WASTE;
1555 totsize = wsize;
1556 objsize = UMA_SMALLEST_UNIT;
1557 while (totsize >= wsize) {
1558 totsize = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) /
1559 (objsize + UMA_FRITM_SZ);
1560 totsize *= (UMA_FRITM_SZ + objsize);
1561 objsize++;
1562 }
1563 if (objsize > UMA_SMALLEST_UNIT)
1564 objsize--;
1565 uma_max_ipers = UMA_SLAB_SIZE / objsize;
1566
1567 wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt) - UMA_MAX_WASTE;
1568 totsize = wsize;
1569 objsize = UMA_SMALLEST_UNIT;
1570 while (totsize >= wsize) {
1571 totsize = (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt)) /
1572 (objsize + UMA_FRITMREF_SZ);
1573 totsize *= (UMA_FRITMREF_SZ + objsize);
1574 objsize++;
1575 }
1576 if (objsize > UMA_SMALLEST_UNIT)
1577 objsize--;
1578 uma_max_ipers_ref = UMA_SLAB_SIZE / objsize;
1579
1580 KASSERT((uma_max_ipers_ref <= 255) && (uma_max_ipers <= 255),
1581 ("uma_startup: calculated uma_max_ipers values too large!"));
1582
1583 #ifdef UMA_DEBUG
1584 printf("Calculated uma_max_ipers (for OFFPAGE) is %d\n", uma_max_ipers);
1585 printf("Calculated uma_max_ipers_slab (for OFFPAGE) is %d\n",
1586 uma_max_ipers_ref);
1587 #endif
1588
1589 /* "manually" create the initial zone */
1590 args.name = "UMA Kegs";
1591 args.size = sizeof(struct uma_keg);
1592 args.ctor = keg_ctor;
1593 args.dtor = keg_dtor;
1594 args.uminit = zero_init;
1595 args.fini = NULL;
1596 args.keg = &masterkeg;
1597 args.align = 32 - 1;
1598 args.flags = UMA_ZFLAG_INTERNAL;
1599 /* The initial zone has no Per cpu queues so it's smaller */
1600 zone_ctor(kegs, sizeof(struct uma_zone), &args, M_WAITOK);
1601
1602 #ifdef UMA_DEBUG
1603 printf("Filling boot free list.\n");
1604 #endif
1605 for (i = 0; i < boot_pages; i++) {
1606 slab = (uma_slab_t)((u_int8_t *)bootmem + (i * UMA_SLAB_SIZE));
1607 slab->us_data = (u_int8_t *)slab;
1608 slab->us_flags = UMA_SLAB_BOOT;
1609 LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link);
1610 }
1611 mtx_init(&uma_boot_pages_mtx, "UMA boot pages", NULL, MTX_DEF);
1612
1613 #ifdef UMA_DEBUG
1614 printf("Creating uma zone headers zone and keg.\n");
1615 #endif
1616 args.name = "UMA Zones";
1617 args.size = sizeof(struct uma_zone) +
1618 (sizeof(struct uma_cache) * (mp_maxid + 1));
1619 args.ctor = zone_ctor;
1620 args.dtor = zone_dtor;
1621 args.uminit = zero_init;
1622 args.fini = NULL;
1623 args.keg = NULL;
1624 args.align = 32 - 1;
1625 args.flags = UMA_ZFLAG_INTERNAL;
1626 /* The initial zone has no Per cpu queues so it's smaller */
1627 zone_ctor(zones, sizeof(struct uma_zone), &args, M_WAITOK);
1628
1629 #ifdef UMA_DEBUG
1630 printf("Initializing pcpu cache locks.\n");
1631 #endif
1632 #ifdef UMA_DEBUG
1633 printf("Creating slab and hash zones.\n");
1634 #endif
1635
1636 /*
1637 * This is the max number of free list items we'll have with
1638 * offpage slabs.
1639 */
1640 slabsize = uma_max_ipers * UMA_FRITM_SZ;
1641 slabsize += sizeof(struct uma_slab);
1642
1643 /* Now make a zone for slab headers */
1644 slabzone = uma_zcreate("UMA Slabs",
1645 slabsize,
1646 NULL, NULL, NULL, NULL,
1647 UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
1648
1649 /*
1650 * We also create a zone for the bigger slabs with reference
1651 * counts in them, to accomodate UMA_ZONE_REFCNT zones.
1652 */
1653 slabsize = uma_max_ipers_ref * UMA_FRITMREF_SZ;
1654 slabsize += sizeof(struct uma_slab_refcnt);
1655 slabrefzone = uma_zcreate("UMA RCntSlabs",
1656 slabsize,
1657 NULL, NULL, NULL, NULL,
1658 UMA_ALIGN_PTR,
1659 UMA_ZFLAG_INTERNAL);
1660
1661 hashzone = uma_zcreate("UMA Hash",
1662 sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT,
1663 NULL, NULL, NULL, NULL,
1664 UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
1665
1666 bucket_init();
1667
1668 #ifdef UMA_MD_SMALL_ALLOC
1669 booted = 1;
1670 #endif
1671
1672 #ifdef UMA_DEBUG
1673 printf("UMA startup complete.\n");
1674 #endif
1675 }
1676
1677 /* see uma.h */
1678 void
1679 uma_startup2(void)
1680 {
1681 booted = 1;
1682 bucket_enable();
1683 #ifdef UMA_DEBUG
1684 printf("UMA startup2 complete.\n");
1685 #endif
1686 }
1687
1688 /*
1689 * Initialize our callout handle
1690 *
1691 */
1692
1693 static void
1694 uma_startup3(void)
1695 {
1696 #ifdef UMA_DEBUG
1697 printf("Starting callout.\n");
1698 #endif
1699 callout_init(&uma_callout, CALLOUT_MPSAFE);
1700 callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
1701 #ifdef UMA_DEBUG
1702 printf("UMA startup3 complete.\n");
1703 #endif
1704 }
1705
1706 static uma_zone_t
1707 uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini,
1708 int align, u_int32_t flags)
1709 {
1710 struct uma_kctor_args args;
1711
1712 args.size = size;
1713 args.uminit = uminit;
1714 args.fini = fini;
1715 args.align = align;
1716 args.flags = flags;
1717 args.zone = zone;
1718 return (uma_zalloc_internal(kegs, &args, M_WAITOK));
1719 }
1720
1721 /* See uma.h */
1722 uma_zone_t
1723 uma_zcreate(char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
1724 uma_init uminit, uma_fini fini, int align, u_int32_t flags)
1725
1726 {
1727 struct uma_zctor_args args;
1728
1729 /* This stuff is essential for the zone ctor */
1730 args.name = name;
1731 args.size = size;
1732 args.ctor = ctor;
1733 args.dtor = dtor;
1734 args.uminit = uminit;
1735 args.fini = fini;
1736 args.align = align;
1737 args.flags = flags;
1738 args.keg = NULL;
1739
1740 return (uma_zalloc_internal(zones, &args, M_WAITOK));
1741 }
1742
1743 /* See uma.h */
1744 uma_zone_t
1745 uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor,
1746 uma_init zinit, uma_fini zfini, uma_zone_t master)
1747 {
1748 struct uma_zctor_args args;
1749
1750 args.name = name;
1751 args.size = master->uz_keg->uk_size;
1752 args.ctor = ctor;
1753 args.dtor = dtor;
1754 args.uminit = zinit;
1755 args.fini = zfini;
1756 args.align = master->uz_keg->uk_align;
1757 args.flags = master->uz_keg->uk_flags | UMA_ZONE_SECONDARY;
1758 args.keg = master->uz_keg;
1759
1760 return (uma_zalloc_internal(zones, &args, M_WAITOK));
1761 }
1762
1763 /* See uma.h */
1764 void
1765 uma_zdestroy(uma_zone_t zone)
1766 {
1767
1768 uma_zfree_internal(zones, zone, NULL, SKIP_NONE, ZFREE_STATFREE);
1769 }
1770
1771 /* See uma.h */
1772 void *
1773 uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
1774 {
1775 void *item;
1776 uma_cache_t cache;
1777 uma_bucket_t bucket;
1778 int cpu;
1779 int badness;
1780
1781 /* This is the fast path allocation */
1782 #ifdef UMA_DEBUG_ALLOC_1
1783 printf("Allocating one item from %s(%p)\n", zone->uz_name, zone);
1784 #endif
1785 CTR3(KTR_UMA, "uma_zalloc_arg thread %x zone %s flags %d", curthread,
1786 zone->uz_name, flags);
1787
1788 if (!(flags & M_NOWAIT)) {
1789 KASSERT(curthread->td_intr_nesting_level == 0,
1790 ("malloc(M_WAITOK) in interrupt context"));
1791 if (nosleepwithlocks) {
1792 #ifdef WITNESS
1793 badness = WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK,
1794 NULL,
1795 "malloc(M_WAITOK) of \"%s\", forcing M_NOWAIT",
1796 zone->uz_name);
1797 #else
1798 badness = 1;
1799 #endif
1800 } else {
1801 badness = 0;
1802 #ifdef WITNESS
1803 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
1804 "malloc(M_WAITOK) of \"%s\"", zone->uz_name);
1805 #endif
1806 }
1807 if (badness) {
1808 flags &= ~M_WAITOK;
1809 flags |= M_NOWAIT;
1810 }
1811 }
1812
1813 /*
1814 * If possible, allocate from the per-CPU cache. There are two
1815 * requirements for safe access to the per-CPU cache: (1) the thread
1816 * accessing the cache must not be preempted or yield during access,
1817 * and (2) the thread must not migrate CPUs without switching which
1818 * cache it accesses. We rely on a critical section to prevent
1819 * preemption and migration. We release the critical section in
1820 * order to acquire the zone mutex if we are unable to allocate from
1821 * the current cache; when we re-acquire the critical section, we
1822 * must detect and handle migration if it has occurred.
1823 */
1824 zalloc_restart:
1825 critical_enter();
1826 cpu = curcpu;
1827 cache = &zone->uz_cpu[cpu];
1828
1829 zalloc_start:
1830 bucket = cache->uc_allocbucket;
1831
1832 if (bucket) {
1833 if (bucket->ub_cnt > 0) {
1834 bucket->ub_cnt--;
1835 item = bucket->ub_bucket[bucket->ub_cnt];
1836 #ifdef INVARIANTS
1837 bucket->ub_bucket[bucket->ub_cnt] = NULL;
1838 #endif
1839 KASSERT(item != NULL,
1840 ("uma_zalloc: Bucket pointer mangled."));
1841 cache->uc_allocs++;
1842 critical_exit();
1843 #ifdef INVARIANTS
1844 ZONE_LOCK(zone);
1845 uma_dbg_alloc(zone, NULL, item);
1846 ZONE_UNLOCK(zone);
1847 #endif
1848 if (zone->uz_ctor != NULL) {
1849 if (zone->uz_ctor(item, zone->uz_keg->uk_size,
1850 udata, flags) != 0) {
1851 uma_zfree_internal(zone, item, udata,
1852 SKIP_DTOR, ZFREE_STATFAIL |
1853 ZFREE_STATFREE);
1854 return (NULL);
1855 }
1856 }
1857 if (flags & M_ZERO)
1858 bzero(item, zone->uz_keg->uk_size);
1859 return (item);
1860 } else if (cache->uc_freebucket) {
1861 /*
1862 * We have run out of items in our allocbucket.
1863 * See if we can switch with our free bucket.
1864 */
1865 if (cache->uc_freebucket->ub_cnt > 0) {
1866 #ifdef UMA_DEBUG_ALLOC
1867 printf("uma_zalloc: Swapping empty with"
1868 " alloc.\n");
1869 #endif
1870 bucket = cache->uc_freebucket;
1871 cache->uc_freebucket = cache->uc_allocbucket;
1872 cache->uc_allocbucket = bucket;
1873
1874 goto zalloc_start;
1875 }
1876 }
1877 }
1878 /*
1879 * Attempt to retrieve the item from the per-CPU cache has failed, so
1880 * we must go back to the zone. This requires the zone lock, so we
1881 * must drop the critical section, then re-acquire it when we go back
1882 * to the cache. Since the critical section is released, we may be
1883 * preempted or migrate. As such, make sure not to maintain any
1884 * thread-local state specific to the cache from prior to releasing
1885 * the critical section.
1886 */
1887 critical_exit();
1888 ZONE_LOCK(zone);
1889 critical_enter();
1890 cpu = curcpu;
1891 cache = &zone->uz_cpu[cpu];
1892 bucket = cache->uc_allocbucket;
1893 if (bucket != NULL) {
1894 if (bucket->ub_cnt > 0) {
1895 ZONE_UNLOCK(zone);
1896 goto zalloc_start;
1897 }
1898 bucket = cache->uc_freebucket;
1899 if (bucket != NULL && bucket->ub_cnt > 0) {
1900 ZONE_UNLOCK(zone);
1901 goto zalloc_start;
1902 }
1903 }
1904
1905 /* Since we have locked the zone we may as well send back our stats */
1906 zone->uz_allocs += cache->uc_allocs;
1907 cache->uc_allocs = 0;
1908 zone->uz_frees += cache->uc_frees;
1909 cache->uc_frees = 0;
1910
1911 /* Our old one is now a free bucket */
1912 if (cache->uc_allocbucket) {
1913 KASSERT(cache->uc_allocbucket->ub_cnt == 0,
1914 ("uma_zalloc_arg: Freeing a non free bucket."));
1915 LIST_INSERT_HEAD(&zone->uz_free_bucket,
1916 cache->uc_allocbucket, ub_link);
1917 cache->uc_allocbucket = NULL;
1918 }
1919
1920 /* Check the free list for a new alloc bucket */
1921 if ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
1922 KASSERT(bucket->ub_cnt != 0,
1923 ("uma_zalloc_arg: Returning an empty bucket."));
1924
1925 LIST_REMOVE(bucket, ub_link);
1926 cache->uc_allocbucket = bucket;
1927 ZONE_UNLOCK(zone);
1928 goto zalloc_start;
1929 }
1930 /* We are no longer associated with this CPU. */
1931 critical_exit();
1932
1933 /* Bump up our uz_count so we get here less */
1934 if (zone->uz_count < BUCKET_MAX)
1935 zone->uz_count++;
1936
1937 /*
1938 * Now lets just fill a bucket and put it on the free list. If that
1939 * works we'll restart the allocation from the begining.
1940 */
1941 if (uma_zalloc_bucket(zone, flags)) {
1942 ZONE_UNLOCK(zone);
1943 goto zalloc_restart;
1944 }
1945 ZONE_UNLOCK(zone);
1946 /*
1947 * We may not be able to get a bucket so return an actual item.
1948 */
1949 #ifdef UMA_DEBUG
1950 printf("uma_zalloc_arg: Bucketzone returned NULL\n");
1951 #endif
1952
1953 return (uma_zalloc_internal(zone, udata, flags));
1954 }
1955
1956 static uma_slab_t
1957 uma_zone_slab(uma_zone_t zone, int flags)
1958 {
1959 uma_slab_t slab;
1960 uma_keg_t keg;
1961
1962 keg = zone->uz_keg;
1963
1964 /*
1965 * This is to prevent us from recursively trying to allocate
1966 * buckets. The problem is that if an allocation forces us to
1967 * grab a new bucket we will call page_alloc, which will go off
1968 * and cause the vm to allocate vm_map_entries. If we need new
1969 * buckets there too we will recurse in kmem_alloc and bad
1970 * things happen. So instead we return a NULL bucket, and make
1971 * the code that allocates buckets smart enough to deal with it
1972 *
1973 * XXX: While we want this protection for the bucket zones so that
1974 * recursion from the VM is handled (and the calling code that
1975 * allocates buckets knows how to deal with it), we do not want
1976 * to prevent allocation from the slab header zones (slabzone
1977 * and slabrefzone) if uk_recurse is not zero for them. The
1978 * reason is that it could lead to NULL being returned for
1979 * slab header allocations even in the M_WAITOK case, and the
1980 * caller can't handle that.
1981 */
1982 if (keg->uk_flags & UMA_ZFLAG_INTERNAL && keg->uk_recurse != 0)
1983 if (zone != slabzone && zone != slabrefzone && zone != zones)
1984 return (NULL);
1985
1986 slab = NULL;
1987
1988 for (;;) {
1989 /*
1990 * Find a slab with some space. Prefer slabs that are partially
1991 * used over those that are totally full. This helps to reduce
1992 * fragmentation.
1993 */
1994 if (keg->uk_free != 0) {
1995 if (!LIST_EMPTY(&keg->uk_part_slab)) {
1996 slab = LIST_FIRST(&keg->uk_part_slab);
1997 } else {
1998 slab = LIST_FIRST(&keg->uk_free_slab);
1999 LIST_REMOVE(slab, us_link);
2000 LIST_INSERT_HEAD(&keg->uk_part_slab, slab,
2001 us_link);
2002 }
2003 return (slab);
2004 }
2005
2006 /*
2007 * M_NOVM means don't ask at all!
2008 */
2009 if (flags & M_NOVM)
2010 break;
2011
2012 if (keg->uk_maxpages &&
2013 keg->uk_pages >= keg->uk_maxpages) {
2014 keg->uk_flags |= UMA_ZFLAG_FULL;
2015
2016 if (flags & M_NOWAIT)
2017 break;
2018 else
2019 msleep(keg, &keg->uk_lock, PVM,
2020 "zonelimit", 0);
2021 continue;
2022 }
2023 keg->uk_recurse++;
2024 slab = slab_zalloc(zone, flags);
2025 keg->uk_recurse--;
2026
2027 /*
2028 * If we got a slab here it's safe to mark it partially used
2029 * and return. We assume that the caller is going to remove
2030 * at least one item.
2031 */
2032 if (slab) {
2033 LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
2034 return (slab);
2035 }
2036 /*
2037 * We might not have been able to get a slab but another cpu
2038 * could have while we were unlocked. Check again before we
2039 * fail.
2040 */
2041 if (flags & M_NOWAIT)
2042 flags |= M_NOVM;
2043 }
2044 return (slab);
2045 }
2046
2047 static void *
2048 uma_slab_alloc(uma_zone_t zone, uma_slab_t slab)
2049 {
2050 uma_keg_t keg;
2051 uma_slabrefcnt_t slabref;
2052 void *item;
2053 u_int8_t freei;
2054
2055 keg = zone->uz_keg;
2056
2057 freei = slab->us_firstfree;
2058 if (keg->uk_flags & UMA_ZONE_REFCNT) {
2059 slabref = (uma_slabrefcnt_t)slab;
2060 slab->us_firstfree = slabref->us_freelist[freei].us_item;
2061 } else {
2062 slab->us_firstfree = slab->us_freelist[freei].us_item;
2063 }
2064 item = slab->us_data + (keg->uk_rsize * freei);
2065
2066 slab->us_freecount--;
2067 keg->uk_free--;
2068 #ifdef INVARIANTS
2069 uma_dbg_alloc(zone, slab, item);
2070 #endif
2071 /* Move this slab to the full list */
2072 if (slab->us_freecount == 0) {
2073 LIST_REMOVE(slab, us_link);
2074 LIST_INSERT_HEAD(&keg->uk_full_slab, slab, us_link);
2075 }
2076
2077 return (item);
2078 }
2079
2080 static int
2081 uma_zalloc_bucket(uma_zone_t zone, int flags)
2082 {
2083 uma_bucket_t bucket;
2084 uma_slab_t slab;
2085 int16_t saved;
2086 int max, origflags = flags;
2087
2088 /*
2089 * Try this zone's free list first so we don't allocate extra buckets.
2090 */
2091 if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
2092 KASSERT(bucket->ub_cnt == 0,
2093 ("uma_zalloc_bucket: Bucket on free list is not empty."));
2094 LIST_REMOVE(bucket, ub_link);
2095 } else {
2096 int bflags;
2097
2098 bflags = (flags & ~M_ZERO);
2099 if (zone->uz_keg->uk_flags & UMA_ZFLAG_CACHEONLY)
2100 bflags |= M_NOVM;
2101
2102 ZONE_UNLOCK(zone);
2103 bucket = bucket_alloc(zone->uz_count, bflags);
2104 ZONE_LOCK(zone);
2105 }
2106
2107 if (bucket == NULL)
2108 return (0);
2109
2110 #ifdef SMP
2111 /*
2112 * This code is here to limit the number of simultaneous bucket fills
2113 * for any given zone to the number of per cpu caches in this zone. This
2114 * is done so that we don't allocate more memory than we really need.
2115 */
2116 if (zone->uz_fills >= mp_ncpus)
2117 goto done;
2118
2119 #endif
2120 zone->uz_fills++;
2121
2122 max = MIN(bucket->ub_entries, zone->uz_count);
2123 /* Try to keep the buckets totally full */
2124 saved = bucket->ub_cnt;
2125 while (bucket->ub_cnt < max &&
2126 (slab = uma_zone_slab(zone, flags)) != NULL) {
2127 while (slab->us_freecount && bucket->ub_cnt < max) {
2128 bucket->ub_bucket[bucket->ub_cnt++] =
2129 uma_slab_alloc(zone, slab);
2130 }
2131
2132 /* Don't block on the next fill */
2133 flags |= M_NOWAIT;
2134 }
2135
2136 /*
2137 * We unlock here because we need to call the zone's init.
2138 * It should be safe to unlock because the slab dealt with
2139 * above is already on the appropriate list within the keg
2140 * and the bucket we filled is not yet on any list, so we
2141 * own it.
2142 */
2143 if (zone->uz_init != NULL) {
2144 int i;
2145
2146 ZONE_UNLOCK(zone);
2147 for (i = saved; i < bucket->ub_cnt; i++)
2148 if (zone->uz_init(bucket->ub_bucket[i],
2149 zone->uz_keg->uk_size, origflags) != 0)
2150 break;
2151 /*
2152 * If we couldn't initialize the whole bucket, put the
2153 * rest back onto the freelist.
2154 */
2155 if (i != bucket->ub_cnt) {
2156 int j;
2157
2158 for (j = i; j < bucket->ub_cnt; j++) {
2159 uma_zfree_internal(zone, bucket->ub_bucket[j],
2160 NULL, SKIP_FINI, 0);
2161 #ifdef INVARIANTS
2162 bucket->ub_bucket[j] = NULL;
2163 #endif
2164 }
2165 bucket->ub_cnt = i;
2166 }
2167 ZONE_LOCK(zone);
2168 }
2169
2170 zone->uz_fills--;
2171 if (bucket->ub_cnt != 0) {
2172 LIST_INSERT_HEAD(&zone->uz_full_bucket,
2173 bucket, ub_link);
2174 return (1);
2175 }
2176 #ifdef SMP
2177 done:
2178 #endif
2179 bucket_free(bucket);
2180
2181 return (0);
2182 }
2183 /*
2184 * Allocates an item for an internal zone
2185 *
2186 * Arguments
2187 * zone The zone to alloc for.
2188 * udata The data to be passed to the constructor.
2189 * flags M_WAITOK, M_NOWAIT, M_ZERO.
2190 *
2191 * Returns
2192 * NULL if there is no memory and M_NOWAIT is set
2193 * An item if successful
2194 */
2195
2196 static void *
2197 uma_zalloc_internal(uma_zone_t zone, void *udata, int flags)
2198 {
2199 uma_keg_t keg;
2200 uma_slab_t slab;
2201 void *item;
2202
2203 item = NULL;
2204 keg = zone->uz_keg;
2205
2206 #ifdef UMA_DEBUG_ALLOC
2207 printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone);
2208 #endif
2209 ZONE_LOCK(zone);
2210
2211 slab = uma_zone_slab(zone, flags);
2212 if (slab == NULL) {
2213 zone->uz_fails++;
2214 ZONE_UNLOCK(zone);
2215 return (NULL);
2216 }
2217
2218 item = uma_slab_alloc(zone, slab);
2219
2220 zone->uz_allocs++;
2221
2222 ZONE_UNLOCK(zone);
2223
2224 /*
2225 * We have to call both the zone's init (not the keg's init)
2226 * and the zone's ctor. This is because the item is going from
2227 * a keg slab directly to the user, and the user is expecting it
2228 * to be both zone-init'd as well as zone-ctor'd.
2229 */
2230 if (zone->uz_init != NULL) {
2231 if (zone->uz_init(item, keg->uk_size, flags) != 0) {
2232 uma_zfree_internal(zone, item, udata, SKIP_FINI,
2233 ZFREE_STATFAIL | ZFREE_STATFREE);
2234 return (NULL);
2235 }
2236 }
2237 if (zone->uz_ctor != NULL) {
2238 if (zone->uz_ctor(item, keg->uk_size, udata, flags) != 0) {
2239 uma_zfree_internal(zone, item, udata, SKIP_DTOR,
2240 ZFREE_STATFAIL | ZFREE_STATFREE);
2241 return (NULL);
2242 }
2243 }
2244 if (flags & M_ZERO)
2245 bzero(item, keg->uk_size);
2246
2247 return (item);
2248 }
2249
2250 /* See uma.h */
2251 void
2252 uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
2253 {
2254 uma_keg_t keg;
2255 uma_cache_t cache;
2256 uma_bucket_t bucket;
2257 int bflags;
2258 int cpu;
2259
2260 keg = zone->uz_keg;
2261
2262 #ifdef UMA_DEBUG_ALLOC_1
2263 printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone);
2264 #endif
2265 CTR2(KTR_UMA, "uma_zfree_arg thread %x zone %s", curthread,
2266 zone->uz_name);
2267
2268 if (zone->uz_dtor)
2269 zone->uz_dtor(item, keg->uk_size, udata);
2270 #ifdef INVARIANTS
2271 ZONE_LOCK(zone);
2272 if (keg->uk_flags & UMA_ZONE_MALLOC)
2273 uma_dbg_free(zone, udata, item);
2274 else
2275 uma_dbg_free(zone, NULL, item);
2276 ZONE_UNLOCK(zone);
2277 #endif
2278 /*
2279 * The race here is acceptable. If we miss it we'll just have to wait
2280 * a little longer for the limits to be reset.
2281 */
2282 if (keg->uk_flags & UMA_ZFLAG_FULL)
2283 goto zfree_internal;
2284
2285 /*
2286 * If possible, free to the per-CPU cache. There are two
2287 * requirements for safe access to the per-CPU cache: (1) the thread
2288 * accessing the cache must not be preempted or yield during access,
2289 * and (2) the thread must not migrate CPUs without switching which
2290 * cache it accesses. We rely on a critical section to prevent
2291 * preemption and migration. We release the critical section in
2292 * order to acquire the zone mutex if we are unable to free to the
2293 * current cache; when we re-acquire the critical section, we must
2294 * detect and handle migration if it has occurred.
2295 */
2296 zfree_restart:
2297 critical_enter();
2298 cpu = curcpu;
2299 cache = &zone->uz_cpu[cpu];
2300
2301 zfree_start:
2302 bucket = cache->uc_freebucket;
2303
2304 if (bucket) {
2305 /*
2306 * Do we have room in our bucket? It is OK for this uz count
2307 * check to be slightly out of sync.
2308 */
2309
2310 if (bucket->ub_cnt < bucket->ub_entries) {
2311 KASSERT(bucket->ub_bucket[bucket->ub_cnt] == NULL,
2312 ("uma_zfree: Freeing to non free bucket index."));
2313 bucket->ub_bucket[bucket->ub_cnt] = item;
2314 bucket->ub_cnt++;
2315 cache->uc_frees++;
2316 critical_exit();
2317 return;
2318 } else if (cache->uc_allocbucket) {
2319 #ifdef UMA_DEBUG_ALLOC
2320 printf("uma_zfree: Swapping buckets.\n");
2321 #endif
2322 /*
2323 * We have run out of space in our freebucket.
2324 * See if we can switch with our alloc bucket.
2325 */
2326 if (cache->uc_allocbucket->ub_cnt <
2327 cache->uc_freebucket->ub_cnt) {
2328 bucket = cache->uc_freebucket;
2329 cache->uc_freebucket = cache->uc_allocbucket;
2330 cache->uc_allocbucket = bucket;
2331 goto zfree_start;
2332 }
2333 }
2334 }
2335 /*
2336 * We can get here for two reasons:
2337 *
2338 * 1) The buckets are NULL
2339 * 2) The alloc and free buckets are both somewhat full.
2340 *
2341 * We must go back the zone, which requires acquiring the zone lock,
2342 * which in turn means we must release and re-acquire the critical
2343 * section. Since the critical section is released, we may be
2344 * preempted or migrate. As such, make sure not to maintain any
2345 * thread-local state specific to the cache from prior to releasing
2346 * the critical section.
2347 */
2348 critical_exit();
2349 ZONE_LOCK(zone);
2350 critical_enter();
2351 cpu = curcpu;
2352 cache = &zone->uz_cpu[cpu];
2353 if (cache->uc_freebucket != NULL) {
2354 if (cache->uc_freebucket->ub_cnt <
2355 cache->uc_freebucket->ub_entries) {
2356 ZONE_UNLOCK(zone);
2357 goto zfree_start;
2358 }
2359 if (cache->uc_allocbucket != NULL &&
2360 (cache->uc_allocbucket->ub_cnt <
2361 cache->uc_freebucket->ub_cnt)) {
2362 ZONE_UNLOCK(zone);
2363 goto zfree_start;
2364 }
2365 }
2366
2367 /* Since we have locked the zone we may as well send back our stats */
2368 zone->uz_allocs += cache->uc_allocs;
2369 cache->uc_allocs = 0;
2370 zone->uz_frees += cache->uc_frees;
2371 cache->uc_frees = 0;
2372
2373 bucket = cache->uc_freebucket;
2374 cache->uc_freebucket = NULL;
2375
2376 /* Can we throw this on the zone full list? */
2377 if (bucket != NULL) {
2378 #ifdef UMA_DEBUG_ALLOC
2379 printf("uma_zfree: Putting old bucket on the free list.\n");
2380 #endif
2381 /* ub_cnt is pointing to the last free item */
2382 KASSERT(bucket->ub_cnt != 0,
2383 ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n"));
2384 LIST_INSERT_HEAD(&zone->uz_full_bucket,
2385 bucket, ub_link);
2386 }
2387 if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
2388 LIST_REMOVE(bucket, ub_link);
2389 ZONE_UNLOCK(zone);
2390 cache->uc_freebucket = bucket;
2391 goto zfree_start;
2392 }
2393 /* We are no longer associated with this CPU. */
2394 critical_exit();
2395
2396 /* And the zone.. */
2397 ZONE_UNLOCK(zone);
2398
2399 #ifdef UMA_DEBUG_ALLOC
2400 printf("uma_zfree: Allocating new free bucket.\n");
2401 #endif
2402 bflags = M_NOWAIT;
2403
2404 if (keg->uk_flags & UMA_ZFLAG_CACHEONLY)
2405 bflags |= M_NOVM;
2406 bucket = bucket_alloc(zone->uz_count, bflags);
2407 if (bucket) {
2408 ZONE_LOCK(zone);
2409 LIST_INSERT_HEAD(&zone->uz_free_bucket,
2410 bucket, ub_link);
2411 ZONE_UNLOCK(zone);
2412 goto zfree_restart;
2413 }
2414
2415 /*
2416 * If nothing else caught this, we'll just do an internal free.
2417 */
2418 zfree_internal:
2419 uma_zfree_internal(zone, item, udata, SKIP_DTOR, ZFREE_STATFREE);
2420
2421 return;
2422 }
2423
2424 /*
2425 * Frees an item to an INTERNAL zone or allocates a free bucket
2426 *
2427 * Arguments:
2428 * zone The zone to free to
2429 * item The item we're freeing
2430 * udata User supplied data for the dtor
2431 * skip Skip dtors and finis
2432 */
2433 static void
2434 uma_zfree_internal(uma_zone_t zone, void *item, void *udata,
2435 enum zfreeskip skip, int flags)
2436 {
2437 uma_slab_t slab;
2438 uma_slabrefcnt_t slabref;
2439 uma_keg_t keg;
2440 u_int8_t *mem;
2441 u_int8_t freei;
2442
2443 keg = zone->uz_keg;
2444
2445 if (skip < SKIP_DTOR && zone->uz_dtor)
2446 zone->uz_dtor(item, keg->uk_size, udata);
2447 if (skip < SKIP_FINI && zone->uz_fini)
2448 zone->uz_fini(item, keg->uk_size);
2449
2450 ZONE_LOCK(zone);
2451
2452 if (flags & ZFREE_STATFAIL)
2453 zone->uz_fails++;
2454 if (flags & ZFREE_STATFREE)
2455 zone->uz_frees++;
2456
2457 if (!(keg->uk_flags & UMA_ZONE_MALLOC)) {
2458 mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK));
2459 if (keg->uk_flags & UMA_ZONE_HASH)
2460 slab = hash_sfind(&keg->uk_hash, mem);
2461 else {
2462 mem += keg->uk_pgoff;
2463 slab = (uma_slab_t)mem;
2464 }
2465 } else {
2466 slab = (uma_slab_t)udata;
2467 }
2468
2469 /* Do we need to remove from any lists? */
2470 if (slab->us_freecount+1 == keg->uk_ipers) {
2471 LIST_REMOVE(slab, us_link);
2472 LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
2473 } else if (slab->us_freecount == 0) {
2474 LIST_REMOVE(slab, us_link);
2475 LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
2476 }
2477
2478 /* Slab management stuff */
2479 freei = ((unsigned long)item - (unsigned long)slab->us_data)
2480 / keg->uk_rsize;
2481
2482 #ifdef INVARIANTS
2483 if (!skip)
2484 uma_dbg_free(zone, slab, item);
2485 #endif
2486
2487 if (keg->uk_flags & UMA_ZONE_REFCNT) {
2488 slabref = (uma_slabrefcnt_t)slab;
2489 slabref->us_freelist[freei].us_item = slab->us_firstfree;
2490 } else {
2491 slab->us_freelist[freei].us_item = slab->us_firstfree;
2492 }
2493 slab->us_firstfree = freei;
2494 slab->us_freecount++;
2495
2496 /* Zone statistics */
2497 keg->uk_free++;
2498
2499 if (keg->uk_flags & UMA_ZFLAG_FULL) {
2500 if (keg->uk_pages < keg->uk_maxpages)
2501 keg->uk_flags &= ~UMA_ZFLAG_FULL;
2502
2503 /*
2504 * We can handle one more allocation. Since we're clearing ZFLAG_FULL,
2505 * wake up all procs blocked on pages. This should be uncommon, so
2506 * keeping this simple for now (rather than adding count of blocked
2507 * threads etc).
2508 */
2509 wakeup(keg);
2510 }
2511
2512 ZONE_UNLOCK(zone);
2513 }
2514
2515 /* See uma.h */
2516 void
2517 uma_zone_set_max(uma_zone_t zone, int nitems)
2518 {
2519 uma_keg_t keg;
2520
2521 keg = zone->uz_keg;
2522 ZONE_LOCK(zone);
2523 if (keg->uk_ppera > 1)
2524 keg->uk_maxpages = nitems * keg->uk_ppera;
2525 else
2526 keg->uk_maxpages = nitems / keg->uk_ipers;
2527
2528 if (keg->uk_maxpages * keg->uk_ipers < nitems)
2529 keg->uk_maxpages++;
2530
2531 ZONE_UNLOCK(zone);
2532 }
2533
2534 /* See uma.h */
2535 void
2536 uma_zone_set_init(uma_zone_t zone, uma_init uminit)
2537 {
2538 ZONE_LOCK(zone);
2539 KASSERT(zone->uz_keg->uk_pages == 0,
2540 ("uma_zone_set_init on non-empty keg"));
2541 zone->uz_keg->uk_init = uminit;
2542 ZONE_UNLOCK(zone);
2543 }
2544
2545 /* See uma.h */
2546 void
2547 uma_zone_set_fini(uma_zone_t zone, uma_fini fini)
2548 {
2549 ZONE_LOCK(zone);
2550 KASSERT(zone->uz_keg->uk_pages == 0,
2551 ("uma_zone_set_fini on non-empty keg"));
2552 zone->uz_keg->uk_fini = fini;
2553 ZONE_UNLOCK(zone);
2554 }
2555
2556 /* See uma.h */
2557 void
2558 uma_zone_set_zinit(uma_zone_t zone, uma_init zinit)
2559 {
2560 ZONE_LOCK(zone);
2561 KASSERT(zone->uz_keg->uk_pages == 0,
2562 ("uma_zone_set_zinit on non-empty keg"));
2563 zone->uz_init = zinit;
2564 ZONE_UNLOCK(zone);
2565 }
2566
2567 /* See uma.h */
2568 void
2569 uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini)
2570 {
2571 ZONE_LOCK(zone);
2572 KASSERT(zone->uz_keg->uk_pages == 0,
2573 ("uma_zone_set_zfini on non-empty keg"));
2574 zone->uz_fini = zfini;
2575 ZONE_UNLOCK(zone);
2576 }
2577
2578 /* See uma.h */
2579 /* XXX uk_freef is not actually used with the zone locked */
2580 void
2581 uma_zone_set_freef(uma_zone_t zone, uma_free freef)
2582 {
2583 ZONE_LOCK(zone);
2584 zone->uz_keg->uk_freef = freef;
2585 ZONE_UNLOCK(zone);
2586 }
2587
2588 /* See uma.h */
2589 /* XXX uk_allocf is not actually used with the zone locked */
2590 void
2591 uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf)
2592 {
2593 ZONE_LOCK(zone);
2594 zone->uz_keg->uk_flags |= UMA_ZFLAG_PRIVALLOC;
2595 zone->uz_keg->uk_allocf = allocf;
2596 ZONE_UNLOCK(zone);
2597 }
2598
2599 /* See uma.h */
2600 int
2601 uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int count)
2602 {
2603 uma_keg_t keg;
2604 vm_offset_t kva;
2605 int pages;
2606
2607 keg = zone->uz_keg;
2608 pages = count / keg->uk_ipers;
2609
2610 if (pages * keg->uk_ipers < count)
2611 pages++;
2612
2613 kva = kmem_alloc_nofault(kernel_map, pages * UMA_SLAB_SIZE);
2614
2615 if (kva == 0)
2616 return (0);
2617 if (obj == NULL) {
2618 obj = vm_object_allocate(OBJT_DEFAULT,
2619 pages);
2620 } else {
2621 VM_OBJECT_LOCK_INIT(obj, "uma object");
2622 _vm_object_allocate(OBJT_DEFAULT,
2623 pages, obj);
2624 }
2625 ZONE_LOCK(zone);
2626 keg->uk_kva = kva;
2627 keg->uk_obj = obj;
2628 keg->uk_maxpages = pages;
2629 keg->uk_allocf = obj_alloc;
2630 keg->uk_flags |= UMA_ZONE_NOFREE | UMA_ZFLAG_PRIVALLOC;
2631 ZONE_UNLOCK(zone);
2632 return (1);
2633 }
2634
2635 /* See uma.h */
2636 void
2637 uma_prealloc(uma_zone_t zone, int items)
2638 {
2639 int slabs;
2640 uma_slab_t slab;
2641 uma_keg_t keg;
2642
2643 keg = zone->uz_keg;
2644 ZONE_LOCK(zone);
2645 slabs = items / keg->uk_ipers;
2646 if (slabs * keg->uk_ipers < items)
2647 slabs++;
2648 while (slabs > 0) {
2649 slab = slab_zalloc(zone, M_WAITOK);
2650 LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
2651 slabs--;
2652 }
2653 ZONE_UNLOCK(zone);
2654 }
2655
2656 /* See uma.h */
2657 u_int32_t *
2658 uma_find_refcnt(uma_zone_t zone, void *item)
2659 {
2660 uma_slabrefcnt_t slabref;
2661 uma_keg_t keg;
2662 u_int32_t *refcnt;
2663 int idx;
2664
2665 keg = zone->uz_keg;
2666 slabref = (uma_slabrefcnt_t)vtoslab((vm_offset_t)item &
2667 (~UMA_SLAB_MASK));
2668 KASSERT(slabref != NULL && slabref->us_keg->uk_flags & UMA_ZONE_REFCNT,
2669 ("uma_find_refcnt(): zone possibly not UMA_ZONE_REFCNT"));
2670 idx = ((unsigned long)item - (unsigned long)slabref->us_data)
2671 / keg->uk_rsize;
2672 refcnt = &slabref->us_freelist[idx].us_refcnt;
2673 return refcnt;
2674 }
2675
2676 /* See uma.h */
2677 void
2678 uma_reclaim(void)
2679 {
2680 #ifdef UMA_DEBUG
2681 printf("UMA: vm asked us to release pages!\n");
2682 #endif
2683 bucket_enable();
2684 zone_foreach(zone_drain);
2685 /*
2686 * Some slabs may have been freed but this zone will be visited early
2687 * we visit again so that we can free pages that are empty once other
2688 * zones are drained. We have to do the same for buckets.
2689 */
2690 zone_drain(slabzone);
2691 zone_drain(slabrefzone);
2692 bucket_zone_drain();
2693 }
2694
2695 /* See uma.h */
2696 int
2697 uma_zone_exhausted(uma_zone_t zone)
2698 {
2699 int full;
2700
2701 ZONE_LOCK(zone);
2702 full = (zone->uz_keg->uk_flags & UMA_ZFLAG_FULL);
2703 ZONE_UNLOCK(zone);
2704 return (full);
2705 }
2706
2707 int
2708 uma_zone_exhausted_nolock(uma_zone_t zone)
2709 {
2710 return (zone->uz_keg->uk_flags & UMA_ZFLAG_FULL);
2711 }
2712
2713 void *
2714 uma_large_malloc(int size, int wait)
2715 {
2716 void *mem;
2717 uma_slab_t slab;
2718 u_int8_t flags;
2719
2720 slab = uma_zalloc_internal(slabzone, NULL, wait);
2721 if (slab == NULL)
2722 return (NULL);
2723 mem = page_alloc(NULL, size, &flags, wait);
2724 if (mem) {
2725 vsetslab((vm_offset_t)mem, slab);
2726 slab->us_data = mem;
2727 slab->us_flags = flags | UMA_SLAB_MALLOC;
2728 slab->us_size = size;
2729 } else {
2730 uma_zfree_internal(slabzone, slab, NULL, SKIP_NONE,
2731 ZFREE_STATFAIL | ZFREE_STATFREE);
2732 }
2733
2734 return (mem);
2735 }
2736
2737 void
2738 uma_large_free(uma_slab_t slab)
2739 {
2740 vsetobj((vm_offset_t)slab->us_data, kmem_object);
2741 page_free(slab->us_data, slab->us_size, slab->us_flags);
2742 uma_zfree_internal(slabzone, slab, NULL, SKIP_NONE, ZFREE_STATFREE);
2743 }
2744
2745 void
2746 uma_print_stats(void)
2747 {
2748 zone_foreach(uma_print_zone);
2749 }
2750
2751 static void
2752 slab_print(uma_slab_t slab)
2753 {
2754 printf("slab: keg %p, data %p, freecount %d, firstfree %d\n",
2755 slab->us_keg, slab->us_data, slab->us_freecount,
2756 slab->us_firstfree);
2757 }
2758
2759 static void
2760 cache_print(uma_cache_t cache)
2761 {
2762 printf("alloc: %p(%d), free: %p(%d)\n",
2763 cache->uc_allocbucket,
2764 cache->uc_allocbucket?cache->uc_allocbucket->ub_cnt:0,
2765 cache->uc_freebucket,
2766 cache->uc_freebucket?cache->uc_freebucket->ub_cnt:0);
2767 }
2768
2769 void
2770 uma_print_zone(uma_zone_t zone)
2771 {
2772 uma_cache_t cache;
2773 uma_keg_t keg;
2774 uma_slab_t slab;
2775 int i;
2776
2777 keg = zone->uz_keg;
2778 printf("%s(%p) size %d(%d) flags %d ipers %d ppera %d out %d free %d\n",
2779 zone->uz_name, zone, keg->uk_size, keg->uk_rsize, keg->uk_flags,
2780 keg->uk_ipers, keg->uk_ppera,
2781 (keg->uk_ipers * keg->uk_pages) - keg->uk_free, keg->uk_free);
2782 printf("Part slabs:\n");
2783 LIST_FOREACH(slab, &keg->uk_part_slab, us_link)
2784 slab_print(slab);
2785 printf("Free slabs:\n");
2786 LIST_FOREACH(slab, &keg->uk_free_slab, us_link)
2787 slab_print(slab);
2788 printf("Full slabs:\n");
2789 LIST_FOREACH(slab, &keg->uk_full_slab, us_link)
2790 slab_print(slab);
2791 for (i = 0; i <= mp_maxid; i++) {
2792 if (CPU_ABSENT(i))
2793 continue;
2794 cache = &zone->uz_cpu[i];
2795 printf("CPU %d Cache:\n", i);
2796 cache_print(cache);
2797 }
2798 }
2799
2800 /*
2801 * Generate statistics across both the zone and its per-cpu cache's. Return
2802 * desired statistics if the pointer is non-NULL for that statistic.
2803 *
2804 * Note: does not update the zone statistics, as it can't safely clear the
2805 * per-CPU cache statistic.
2806 *
2807 * XXXRW: Following the uc_allocbucket and uc_freebucket pointers here isn't
2808 * safe from off-CPU; we should modify the caches to track this information
2809 * directly so that we don't have to.
2810 */
2811 static void
2812 uma_zone_sumstat(uma_zone_t z, int *cachefreep, u_int64_t *allocsp,
2813 u_int64_t *freesp)
2814 {
2815 uma_cache_t cache;
2816 u_int64_t allocs, frees;
2817 int cachefree, cpu;
2818
2819 allocs = frees = 0;
2820 cachefree = 0;
2821 for (cpu = 0; cpu <= mp_maxid; cpu++) {
2822 if (CPU_ABSENT(cpu))
2823 continue;
2824 cache = &z->uz_cpu[cpu];
2825 if (cache->uc_allocbucket != NULL)
2826 cachefree += cache->uc_allocbucket->ub_cnt;
2827 if (cache->uc_freebucket != NULL)
2828 cachefree += cache->uc_freebucket->ub_cnt;
2829 allocs += cache->uc_allocs;
2830 frees += cache->uc_frees;
2831 }
2832 allocs += z->uz_allocs;
2833 frees += z->uz_frees;
2834 if (cachefreep != NULL)
2835 *cachefreep = cachefree;
2836 if (allocsp != NULL)
2837 *allocsp = allocs;
2838 if (freesp != NULL)
2839 *freesp = frees;
2840 }
2841
2842 /*
2843 * Sysctl handler for vm.zone
2844 *
2845 * stolen from vm_zone.c
2846 */
2847 static int
2848 sysctl_vm_zone(SYSCTL_HANDLER_ARGS)
2849 {
2850 int error, len, cnt;
2851 const int linesize = 128; /* conservative */
2852 int totalfree;
2853 char *tmpbuf, *offset;
2854 uma_zone_t z;
2855 uma_keg_t zk;
2856 char *p;
2857 int cachefree;
2858 uma_bucket_t bucket;
2859 u_int64_t allocs, frees;
2860
2861 cnt = 0;
2862 mtx_lock(&uma_mtx);
2863 LIST_FOREACH(zk, &uma_kegs, uk_link) {
2864 LIST_FOREACH(z, &zk->uk_zones, uz_link)
2865 cnt++;
2866 }
2867 mtx_unlock(&uma_mtx);
2868 MALLOC(tmpbuf, char *, (cnt == 0 ? 1 : cnt) * linesize,
2869 M_TEMP, M_WAITOK);
2870 len = snprintf(tmpbuf, linesize,
2871 "\nITEM SIZE LIMIT USED FREE REQUESTS\n\n");
2872 if (cnt == 0)
2873 tmpbuf[len - 1] = '\0';
2874 error = SYSCTL_OUT(req, tmpbuf, cnt == 0 ? len-1 : len);
2875 if (error || cnt == 0)
2876 goto out;
2877 offset = tmpbuf;
2878 mtx_lock(&uma_mtx);
2879 LIST_FOREACH(zk, &uma_kegs, uk_link) {
2880 LIST_FOREACH(z, &zk->uk_zones, uz_link) {
2881 if (cnt == 0) /* list may have changed size */
2882 break;
2883 ZONE_LOCK(z);
2884 cachefree = 0;
2885 if (!(zk->uk_flags & UMA_ZFLAG_INTERNAL)) {
2886 uma_zone_sumstat(z, &cachefree, &allocs, &frees);
2887 } else {
2888 allocs = z->uz_allocs;
2889 frees = z->uz_frees;
2890 }
2891
2892 LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link) {
2893 cachefree += bucket->ub_cnt;
2894 }
2895 totalfree = zk->uk_free + cachefree;
2896 len = snprintf(offset, linesize,
2897 "%-12.12s %6.6u, %8.8u, %6.6u, %6.6u, %8.8llu\n",
2898 z->uz_name, zk->uk_size,
2899 zk->uk_maxpages * zk->uk_ipers,
2900 (zk->uk_ipers * (zk->uk_pages / zk->uk_ppera)) - totalfree,
2901 totalfree,
2902 (unsigned long long)allocs);
2903 ZONE_UNLOCK(z);
2904 for (p = offset + 12; p > offset && *p == ' '; --p)
2905 /* nothing */ ;
2906 p[1] = ':';
2907 cnt--;
2908 offset += len;
2909 }
2910 }
2911 mtx_unlock(&uma_mtx);
2912 *offset++ = '\0';
2913 error = SYSCTL_OUT(req, tmpbuf, offset - tmpbuf);
2914 out:
2915 FREE(tmpbuf, M_TEMP);
2916 return (error);
2917 }
2918
2919 static int
2920 sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS)
2921 {
2922 uma_keg_t kz;
2923 uma_zone_t z;
2924 int count;
2925
2926 count = 0;
2927 mtx_lock(&uma_mtx);
2928 LIST_FOREACH(kz, &uma_kegs, uk_link) {
2929 LIST_FOREACH(z, &kz->uk_zones, uz_link)
2930 count++;
2931 }
2932 mtx_unlock(&uma_mtx);
2933 return (sysctl_handle_int(oidp, &count, 0, req));
2934 }
2935
2936 static int
2937 sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS)
2938 {
2939 struct uma_stream_header ush;
2940 struct uma_type_header uth;
2941 struct uma_percpu_stat ups;
2942 uma_bucket_t bucket;
2943 struct sbuf sbuf;
2944 uma_cache_t cache;
2945 uma_keg_t kz;
2946 uma_zone_t z;
2947 char *buffer;
2948 int buflen, count, error, i;
2949
2950 mtx_lock(&uma_mtx);
2951 restart:
2952 mtx_assert(&uma_mtx, MA_OWNED);
2953 count = 0;
2954 LIST_FOREACH(kz, &uma_kegs, uk_link) {
2955 LIST_FOREACH(z, &kz->uk_zones, uz_link)
2956 count++;
2957 }
2958 mtx_unlock(&uma_mtx);
2959
2960 buflen = sizeof(ush) + count * (sizeof(uth) + sizeof(ups) *
2961 (mp_maxid + 1)) + 1;
2962 buffer = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO);
2963
2964 mtx_lock(&uma_mtx);
2965 i = 0;
2966 LIST_FOREACH(kz, &uma_kegs, uk_link) {
2967 LIST_FOREACH(z, &kz->uk_zones, uz_link)
2968 i++;
2969 }
2970 if (i > count) {
2971 free(buffer, M_TEMP);
2972 goto restart;
2973 }
2974 count = i;
2975
2976 sbuf_new(&sbuf, buffer, buflen, SBUF_FIXEDLEN);
2977
2978 /*
2979 * Insert stream header.
2980 */
2981 bzero(&ush, sizeof(ush));
2982 ush.ush_version = UMA_STREAM_VERSION;
2983 ush.ush_maxcpus = (mp_maxid + 1);
2984 ush.ush_count = count;
2985 if (sbuf_bcat(&sbuf, &ush, sizeof(ush)) < 0) {
2986 mtx_unlock(&uma_mtx);
2987 error = ENOMEM;
2988 goto out;
2989 }
2990
2991 LIST_FOREACH(kz, &uma_kegs, uk_link) {
2992 LIST_FOREACH(z, &kz->uk_zones, uz_link) {
2993 bzero(&uth, sizeof(uth));
2994 ZONE_LOCK(z);
2995 strlcpy(uth.uth_name, z->uz_name, UTH_MAX_NAME);
2996 uth.uth_align = kz->uk_align;
2997 uth.uth_pages = kz->uk_pages;
2998 uth.uth_keg_free = kz->uk_free;
2999 uth.uth_size = kz->uk_size;
3000 uth.uth_rsize = kz->uk_rsize;
3001 uth.uth_maxpages = kz->uk_maxpages;
3002 if (kz->uk_ppera > 1)
3003 uth.uth_limit = kz->uk_maxpages /
3004 kz->uk_ppera;
3005 else
3006 uth.uth_limit = kz->uk_maxpages *
3007 kz->uk_ipers;
3008
3009 /*
3010 * A zone is secondary is it is not the first entry
3011 * on the keg's zone list.
3012 */
3013 if ((kz->uk_flags & UMA_ZONE_SECONDARY) &&
3014 (LIST_FIRST(&kz->uk_zones) != z))
3015 uth.uth_zone_flags = UTH_ZONE_SECONDARY;
3016
3017 LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link)
3018 uth.uth_zone_free += bucket->ub_cnt;
3019 uth.uth_allocs = z->uz_allocs;
3020 uth.uth_frees = z->uz_frees;
3021 uth.uth_fails = z->uz_fails;
3022 if (sbuf_bcat(&sbuf, &uth, sizeof(uth)) < 0) {
3023 ZONE_UNLOCK(z);
3024 mtx_unlock(&uma_mtx);
3025 error = ENOMEM;
3026 goto out;
3027 }
3028 /*
3029 * While it is not normally safe to access the cache
3030 * bucket pointers while not on the CPU that owns the
3031 * cache, we only allow the pointers to be exchanged
3032 * without the zone lock held, not invalidated, so
3033 * accept the possible race associated with bucket
3034 * exchange during monitoring.
3035 */
3036 for (i = 0; i < (mp_maxid + 1); i++) {
3037 bzero(&ups, sizeof(ups));
3038 if (kz->uk_flags & UMA_ZFLAG_INTERNAL)
3039 goto skip;
3040 if (CPU_ABSENT(i))
3041 goto skip;
3042 cache = &z->uz_cpu[i];
3043 if (cache->uc_allocbucket != NULL)
3044 ups.ups_cache_free +=
3045 cache->uc_allocbucket->ub_cnt;
3046 if (cache->uc_freebucket != NULL)
3047 ups.ups_cache_free +=
3048 cache->uc_freebucket->ub_cnt;
3049 ups.ups_allocs = cache->uc_allocs;
3050 ups.ups_frees = cache->uc_frees;
3051 skip:
3052 if (sbuf_bcat(&sbuf, &ups, sizeof(ups)) < 0) {
3053 ZONE_UNLOCK(z);
3054 mtx_unlock(&uma_mtx);
3055 error = ENOMEM;
3056 goto out;
3057 }
3058 }
3059 ZONE_UNLOCK(z);
3060 }
3061 }
3062 mtx_unlock(&uma_mtx);
3063 sbuf_finish(&sbuf);
3064 error = SYSCTL_OUT(req, sbuf_data(&sbuf), sbuf_len(&sbuf));
3065 out:
3066 free(buffer, M_TEMP);
3067 return (error);
3068 }
3069
3070 #ifdef DDB
3071 DB_SHOW_COMMAND(uma, db_show_uma)
3072 {
3073 u_int64_t allocs, frees;
3074 uma_bucket_t bucket;
3075 uma_keg_t kz;
3076 uma_zone_t z;
3077 int cachefree;
3078
3079 db_printf("%18s %12s %12s %12s %8s\n", "Zone", "Allocs", "Frees",
3080 "Used", "Cache");
3081 LIST_FOREACH(kz, &uma_kegs, uk_link) {
3082 LIST_FOREACH(z, &kz->uk_zones, uz_link) {
3083 if (kz->uk_flags & UMA_ZFLAG_INTERNAL) {
3084 allocs = z->uz_allocs;
3085 frees = z->uz_frees;
3086 cachefree = 0;
3087 } else
3088 uma_zone_sumstat(z, &cachefree, &allocs,
3089 &frees);
3090 if (!((kz->uk_flags & UMA_ZONE_SECONDARY) &&
3091 (LIST_FIRST(&kz->uk_zones) != z)))
3092 cachefree += kz->uk_free;
3093 LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link)
3094 cachefree += bucket->ub_cnt;
3095 db_printf("%18s %12ju %12ju %12ju %8d\n", z->uz_name,
3096 allocs, frees, allocs - frees, cachefree);
3097 }
3098 }
3099 }
3100 #endif
Cache object: ffce7a9f3ff97bdedc5d234c0f5dbab3
|