FreeBSD/Linux Kernel Cross Reference
sys/vm/uma_core.c
1 /*-
2 * Copyright (c) 2002-2005, 2009, 2013 Jeffrey Roberson <jeff@FreeBSD.org>
3 * Copyright (c) 2004, 2005 Bosko Milekic <bmilekic@FreeBSD.org>
4 * Copyright (c) 2004-2006 Robert N. M. Watson
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice unmodified, this list of conditions, and the following
12 * disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 /*
30 * uma_core.c Implementation of the Universal Memory allocator
31 *
32 * This allocator is intended to replace the multitude of similar object caches
33 * in the standard FreeBSD kernel. The intent is to be flexible as well as
34 * efficient. A primary design goal is to return unused memory to the rest of
35 * the system. This will make the system as a whole more flexible due to the
36 * ability to move memory to subsystems which most need it instead of leaving
37 * pools of reserved memory unused.
38 *
39 * The basic ideas stem from similar slab/zone based allocators whose algorithms
40 * are well known.
41 *
42 */
43
44 /*
45 * TODO:
46 * - Improve memory usage for large allocations
47 * - Investigate cache size adjustments
48 */
49
50 #include <sys/cdefs.h>
51 __FBSDID("$FreeBSD: releng/11.2/sys/vm/uma_core.c 332572 2018-04-16 15:07:19Z glebius $");
52
53 /* I should really use ktr.. */
54 /*
55 #define UMA_DEBUG 1
56 #define UMA_DEBUG_ALLOC 1
57 #define UMA_DEBUG_ALLOC_1 1
58 */
59
60 #include "opt_ddb.h"
61 #include "opt_param.h"
62 #include "opt_vm.h"
63
64 #include <sys/param.h>
65 #include <sys/systm.h>
66 #include <sys/bitset.h>
67 #include <sys/eventhandler.h>
68 #include <sys/kernel.h>
69 #include <sys/types.h>
70 #include <sys/queue.h>
71 #include <sys/malloc.h>
72 #include <sys/ktr.h>
73 #include <sys/lock.h>
74 #include <sys/sysctl.h>
75 #include <sys/mutex.h>
76 #include <sys/proc.h>
77 #include <sys/random.h>
78 #include <sys/rwlock.h>
79 #include <sys/sbuf.h>
80 #include <sys/sched.h>
81 #include <sys/smp.h>
82 #include <sys/taskqueue.h>
83 #include <sys/vmmeter.h>
84
85 #include <vm/vm.h>
86 #include <vm/vm_object.h>
87 #include <vm/vm_page.h>
88 #include <vm/vm_pageout.h>
89 #include <vm/vm_param.h>
90 #include <vm/vm_map.h>
91 #include <vm/vm_kern.h>
92 #include <vm/vm_extern.h>
93 #include <vm/uma.h>
94 #include <vm/uma_int.h>
95 #include <vm/uma_dbg.h>
96
97 #include <ddb/ddb.h>
98
99 #ifdef DEBUG_MEMGUARD
100 #include <vm/memguard.h>
101 #endif
102
103 /*
104 * This is the zone and keg from which all zones are spawned. The idea is that
105 * even the zone & keg heads are allocated from the allocator, so we use the
106 * bss section to bootstrap us.
107 */
108 static struct uma_keg masterkeg;
109 static struct uma_zone masterzone_k;
110 static struct uma_zone masterzone_z;
111 static uma_zone_t kegs = &masterzone_k;
112 static uma_zone_t zones = &masterzone_z;
113
114 /* This is the zone from which all of uma_slab_t's are allocated. */
115 static uma_zone_t slabzone;
116
117 /*
118 * The initial hash tables come out of this zone so they can be allocated
119 * prior to malloc coming up.
120 */
121 static uma_zone_t hashzone;
122
123 /* The boot-time adjusted value for cache line alignment. */
124 int uma_align_cache = 64 - 1;
125
126 static MALLOC_DEFINE(M_UMAHASH, "UMAHash", "UMA Hash Buckets");
127
128 /*
129 * Are we allowed to allocate buckets?
130 */
131 static int bucketdisable = 1;
132
133 /* Linked list of all kegs in the system */
134 static LIST_HEAD(,uma_keg) uma_kegs = LIST_HEAD_INITIALIZER(uma_kegs);
135
136 /* Linked list of all cache-only zones in the system */
137 static LIST_HEAD(,uma_zone) uma_cachezones =
138 LIST_HEAD_INITIALIZER(uma_cachezones);
139
140 /* This RW lock protects the keg list */
141 static struct rwlock_padalign __exclusive_cache_line uma_rwlock;
142
143 /* Linked list of boot time pages */
144 static LIST_HEAD(,uma_slab) uma_boot_pages =
145 LIST_HEAD_INITIALIZER(uma_boot_pages);
146
147 /* This mutex protects the boot time pages list */
148 static struct mtx_padalign uma_boot_pages_mtx;
149
150 static struct sx uma_drain_lock;
151
152 /* Is the VM done starting up? */
153 static int booted = 0;
154 #define UMA_STARTUP 1
155 #define UMA_STARTUP2 2
156
157 /*
158 * This is the handle used to schedule events that need to happen
159 * outside of the allocation fast path.
160 */
161 static struct callout uma_callout;
162 #define UMA_TIMEOUT 20 /* Seconds for callout interval. */
163
164 /*
165 * This structure is passed as the zone ctor arg so that I don't have to create
166 * a special allocation function just for zones.
167 */
168 struct uma_zctor_args {
169 const char *name;
170 size_t size;
171 uma_ctor ctor;
172 uma_dtor dtor;
173 uma_init uminit;
174 uma_fini fini;
175 uma_import import;
176 uma_release release;
177 void *arg;
178 uma_keg_t keg;
179 int align;
180 uint32_t flags;
181 };
182
183 struct uma_kctor_args {
184 uma_zone_t zone;
185 size_t size;
186 uma_init uminit;
187 uma_fini fini;
188 int align;
189 uint32_t flags;
190 };
191
192 struct uma_bucket_zone {
193 uma_zone_t ubz_zone;
194 char *ubz_name;
195 int ubz_entries; /* Number of items it can hold. */
196 int ubz_maxsize; /* Maximum allocation size per-item. */
197 };
198
199 /*
200 * Compute the actual number of bucket entries to pack them in power
201 * of two sizes for more efficient space utilization.
202 */
203 #define BUCKET_SIZE(n) \
204 (((sizeof(void *) * (n)) - sizeof(struct uma_bucket)) / sizeof(void *))
205
206 #define BUCKET_MAX BUCKET_SIZE(256)
207
208 struct uma_bucket_zone bucket_zones[] = {
209 { NULL, "4 Bucket", BUCKET_SIZE(4), 4096 },
210 { NULL, "6 Bucket", BUCKET_SIZE(6), 3072 },
211 { NULL, "8 Bucket", BUCKET_SIZE(8), 2048 },
212 { NULL, "12 Bucket", BUCKET_SIZE(12), 1536 },
213 { NULL, "16 Bucket", BUCKET_SIZE(16), 1024 },
214 { NULL, "32 Bucket", BUCKET_SIZE(32), 512 },
215 { NULL, "64 Bucket", BUCKET_SIZE(64), 256 },
216 { NULL, "128 Bucket", BUCKET_SIZE(128), 128 },
217 { NULL, "256 Bucket", BUCKET_SIZE(256), 64 },
218 { NULL, NULL, 0}
219 };
220
221 /*
222 * Flags and enumerations to be passed to internal functions.
223 */
224 enum zfreeskip { SKIP_NONE = 0, SKIP_DTOR, SKIP_FINI };
225
226 /* Prototypes.. */
227
228 static void *noobj_alloc(uma_zone_t, vm_size_t, uint8_t *, int);
229 static void *page_alloc(uma_zone_t, vm_size_t, uint8_t *, int);
230 static void *startup_alloc(uma_zone_t, vm_size_t, uint8_t *, int);
231 static void page_free(void *, vm_size_t, uint8_t);
232 static uma_slab_t keg_alloc_slab(uma_keg_t, uma_zone_t, int);
233 static void cache_drain(uma_zone_t);
234 static void bucket_drain(uma_zone_t, uma_bucket_t);
235 static void bucket_cache_drain(uma_zone_t zone);
236 static int keg_ctor(void *, int, void *, int);
237 static void keg_dtor(void *, int, void *);
238 static int zone_ctor(void *, int, void *, int);
239 static void zone_dtor(void *, int, void *);
240 static int zero_init(void *, int, int);
241 static void keg_small_init(uma_keg_t keg);
242 static void keg_large_init(uma_keg_t keg);
243 static void zone_foreach(void (*zfunc)(uma_zone_t));
244 static void zone_timeout(uma_zone_t zone);
245 static int hash_alloc(struct uma_hash *);
246 static int hash_expand(struct uma_hash *, struct uma_hash *);
247 static void hash_free(struct uma_hash *hash);
248 static void uma_timeout(void *);
249 static void uma_startup3(void);
250 static void *zone_alloc_item(uma_zone_t, void *, int);
251 static void zone_free_item(uma_zone_t, void *, void *, enum zfreeskip);
252 static void bucket_enable(void);
253 static void bucket_init(void);
254 static uma_bucket_t bucket_alloc(uma_zone_t zone, void *, int);
255 static void bucket_free(uma_zone_t zone, uma_bucket_t, void *);
256 static void bucket_zone_drain(void);
257 static uma_bucket_t zone_alloc_bucket(uma_zone_t zone, void *, int flags);
258 static uma_slab_t zone_fetch_slab(uma_zone_t zone, uma_keg_t last, int flags);
259 static uma_slab_t zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int flags);
260 static void *slab_alloc_item(uma_keg_t keg, uma_slab_t slab);
261 static void slab_free_item(uma_keg_t keg, uma_slab_t slab, void *item);
262 static uma_keg_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit,
263 uma_fini fini, int align, uint32_t flags);
264 static int zone_import(uma_zone_t zone, void **bucket, int max, int flags);
265 static void zone_release(uma_zone_t zone, void **bucket, int cnt);
266 static void uma_zero_item(void *item, uma_zone_t zone);
267
268 void uma_print_zone(uma_zone_t);
269 void uma_print_stats(void);
270 static int sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS);
271 static int sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS);
272
273 #ifdef INVARIANTS
274 static void uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item);
275 static void uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item);
276 #endif
277
278 SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
279
280 SYSCTL_PROC(_vm, OID_AUTO, zone_count, CTLFLAG_RD|CTLTYPE_INT,
281 0, 0, sysctl_vm_zone_count, "I", "Number of UMA zones");
282
283 SYSCTL_PROC(_vm, OID_AUTO, zone_stats, CTLFLAG_RD|CTLTYPE_STRUCT,
284 0, 0, sysctl_vm_zone_stats, "s,struct uma_type_header", "Zone Stats");
285
286 static int zone_warnings = 1;
287 SYSCTL_INT(_vm, OID_AUTO, zone_warnings, CTLFLAG_RWTUN, &zone_warnings, 0,
288 "Warn when UMA zones becomes full");
289
290 /*
291 * This routine checks to see whether or not it's safe to enable buckets.
292 */
293 static void
294 bucket_enable(void)
295 {
296 bucketdisable = vm_page_count_min();
297 }
298
299 /*
300 * Initialize bucket_zones, the array of zones of buckets of various sizes.
301 *
302 * For each zone, calculate the memory required for each bucket, consisting
303 * of the header and an array of pointers.
304 */
305 static void
306 bucket_init(void)
307 {
308 struct uma_bucket_zone *ubz;
309 int size;
310
311 for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++) {
312 size = roundup(sizeof(struct uma_bucket), sizeof(void *));
313 size += sizeof(void *) * ubz->ubz_entries;
314 ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size,
315 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
316 UMA_ZONE_MTXCLASS | UMA_ZFLAG_BUCKET);
317 }
318 }
319
320 /*
321 * Given a desired number of entries for a bucket, return the zone from which
322 * to allocate the bucket.
323 */
324 static struct uma_bucket_zone *
325 bucket_zone_lookup(int entries)
326 {
327 struct uma_bucket_zone *ubz;
328
329 for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
330 if (ubz->ubz_entries >= entries)
331 return (ubz);
332 ubz--;
333 return (ubz);
334 }
335
336 static int
337 bucket_select(int size)
338 {
339 struct uma_bucket_zone *ubz;
340
341 ubz = &bucket_zones[0];
342 if (size > ubz->ubz_maxsize)
343 return MAX((ubz->ubz_maxsize * ubz->ubz_entries) / size, 1);
344
345 for (; ubz->ubz_entries != 0; ubz++)
346 if (ubz->ubz_maxsize < size)
347 break;
348 ubz--;
349 return (ubz->ubz_entries);
350 }
351
352 static uma_bucket_t
353 bucket_alloc(uma_zone_t zone, void *udata, int flags)
354 {
355 struct uma_bucket_zone *ubz;
356 uma_bucket_t bucket;
357
358 /*
359 * This is to stop us from allocating per cpu buckets while we're
360 * running out of vm.boot_pages. Otherwise, we would exhaust the
361 * boot pages. This also prevents us from allocating buckets in
362 * low memory situations.
363 */
364 if (bucketdisable)
365 return (NULL);
366 /*
367 * To limit bucket recursion we store the original zone flags
368 * in a cookie passed via zalloc_arg/zfree_arg. This allows the
369 * NOVM flag to persist even through deep recursions. We also
370 * store ZFLAG_BUCKET once we have recursed attempting to allocate
371 * a bucket for a bucket zone so we do not allow infinite bucket
372 * recursion. This cookie will even persist to frees of unused
373 * buckets via the allocation path or bucket allocations in the
374 * free path.
375 */
376 if ((zone->uz_flags & UMA_ZFLAG_BUCKET) == 0)
377 udata = (void *)(uintptr_t)zone->uz_flags;
378 else {
379 if ((uintptr_t)udata & UMA_ZFLAG_BUCKET)
380 return (NULL);
381 udata = (void *)((uintptr_t)udata | UMA_ZFLAG_BUCKET);
382 }
383 if ((uintptr_t)udata & UMA_ZFLAG_CACHEONLY)
384 flags |= M_NOVM;
385 ubz = bucket_zone_lookup(zone->uz_count);
386 if (ubz->ubz_zone == zone && (ubz + 1)->ubz_entries != 0)
387 ubz++;
388 bucket = uma_zalloc_arg(ubz->ubz_zone, udata, flags);
389 if (bucket) {
390 #ifdef INVARIANTS
391 bzero(bucket->ub_bucket, sizeof(void *) * ubz->ubz_entries);
392 #endif
393 bucket->ub_cnt = 0;
394 bucket->ub_entries = ubz->ubz_entries;
395 }
396
397 return (bucket);
398 }
399
400 static void
401 bucket_free(uma_zone_t zone, uma_bucket_t bucket, void *udata)
402 {
403 struct uma_bucket_zone *ubz;
404
405 KASSERT(bucket->ub_cnt == 0,
406 ("bucket_free: Freeing a non free bucket."));
407 if ((zone->uz_flags & UMA_ZFLAG_BUCKET) == 0)
408 udata = (void *)(uintptr_t)zone->uz_flags;
409 ubz = bucket_zone_lookup(bucket->ub_entries);
410 uma_zfree_arg(ubz->ubz_zone, bucket, udata);
411 }
412
413 static void
414 bucket_zone_drain(void)
415 {
416 struct uma_bucket_zone *ubz;
417
418 for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
419 zone_drain(ubz->ubz_zone);
420 }
421
422 static void
423 zone_log_warning(uma_zone_t zone)
424 {
425 static const struct timeval warninterval = { 300, 0 };
426
427 if (!zone_warnings || zone->uz_warning == NULL)
428 return;
429
430 if (ratecheck(&zone->uz_ratecheck, &warninterval))
431 printf("[zone: %s] %s\n", zone->uz_name, zone->uz_warning);
432 }
433
434 static inline void
435 zone_maxaction(uma_zone_t zone)
436 {
437
438 if (zone->uz_maxaction.ta_func != NULL)
439 taskqueue_enqueue(taskqueue_thread, &zone->uz_maxaction);
440 }
441
442 static void
443 zone_foreach_keg(uma_zone_t zone, void (*kegfn)(uma_keg_t))
444 {
445 uma_klink_t klink;
446
447 LIST_FOREACH(klink, &zone->uz_kegs, kl_link)
448 kegfn(klink->kl_keg);
449 }
450
451 /*
452 * Routine called by timeout which is used to fire off some time interval
453 * based calculations. (stats, hash size, etc.)
454 *
455 * Arguments:
456 * arg Unused
457 *
458 * Returns:
459 * Nothing
460 */
461 static void
462 uma_timeout(void *unused)
463 {
464 bucket_enable();
465 zone_foreach(zone_timeout);
466
467 /* Reschedule this event */
468 callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
469 }
470
471 /*
472 * Routine to perform timeout driven calculations. This expands the
473 * hashes and does per cpu statistics aggregation.
474 *
475 * Returns nothing.
476 */
477 static void
478 keg_timeout(uma_keg_t keg)
479 {
480
481 KEG_LOCK(keg);
482 /*
483 * Expand the keg hash table.
484 *
485 * This is done if the number of slabs is larger than the hash size.
486 * What I'm trying to do here is completely reduce collisions. This
487 * may be a little aggressive. Should I allow for two collisions max?
488 */
489 if (keg->uk_flags & UMA_ZONE_HASH &&
490 keg->uk_pages / keg->uk_ppera >= keg->uk_hash.uh_hashsize) {
491 struct uma_hash newhash;
492 struct uma_hash oldhash;
493 int ret;
494
495 /*
496 * This is so involved because allocating and freeing
497 * while the keg lock is held will lead to deadlock.
498 * I have to do everything in stages and check for
499 * races.
500 */
501 newhash = keg->uk_hash;
502 KEG_UNLOCK(keg);
503 ret = hash_alloc(&newhash);
504 KEG_LOCK(keg);
505 if (ret) {
506 if (hash_expand(&keg->uk_hash, &newhash)) {
507 oldhash = keg->uk_hash;
508 keg->uk_hash = newhash;
509 } else
510 oldhash = newhash;
511
512 KEG_UNLOCK(keg);
513 hash_free(&oldhash);
514 return;
515 }
516 }
517 KEG_UNLOCK(keg);
518 }
519
520 static void
521 zone_timeout(uma_zone_t zone)
522 {
523
524 zone_foreach_keg(zone, &keg_timeout);
525 }
526
527 /*
528 * Allocate and zero fill the next sized hash table from the appropriate
529 * backing store.
530 *
531 * Arguments:
532 * hash A new hash structure with the old hash size in uh_hashsize
533 *
534 * Returns:
535 * 1 on success and 0 on failure.
536 */
537 static int
538 hash_alloc(struct uma_hash *hash)
539 {
540 int oldsize;
541 int alloc;
542
543 oldsize = hash->uh_hashsize;
544
545 /* We're just going to go to a power of two greater */
546 if (oldsize) {
547 hash->uh_hashsize = oldsize * 2;
548 alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize;
549 hash->uh_slab_hash = (struct slabhead *)malloc(alloc,
550 M_UMAHASH, M_NOWAIT);
551 } else {
552 alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT;
553 hash->uh_slab_hash = zone_alloc_item(hashzone, NULL,
554 M_WAITOK);
555 hash->uh_hashsize = UMA_HASH_SIZE_INIT;
556 }
557 if (hash->uh_slab_hash) {
558 bzero(hash->uh_slab_hash, alloc);
559 hash->uh_hashmask = hash->uh_hashsize - 1;
560 return (1);
561 }
562
563 return (0);
564 }
565
566 /*
567 * Expands the hash table for HASH zones. This is done from zone_timeout
568 * to reduce collisions. This must not be done in the regular allocation
569 * path, otherwise, we can recurse on the vm while allocating pages.
570 *
571 * Arguments:
572 * oldhash The hash you want to expand
573 * newhash The hash structure for the new table
574 *
575 * Returns:
576 * Nothing
577 *
578 * Discussion:
579 */
580 static int
581 hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash)
582 {
583 uma_slab_t slab;
584 int hval;
585 int i;
586
587 if (!newhash->uh_slab_hash)
588 return (0);
589
590 if (oldhash->uh_hashsize >= newhash->uh_hashsize)
591 return (0);
592
593 /*
594 * I need to investigate hash algorithms for resizing without a
595 * full rehash.
596 */
597
598 for (i = 0; i < oldhash->uh_hashsize; i++)
599 while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) {
600 slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]);
601 SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink);
602 hval = UMA_HASH(newhash, slab->us_data);
603 SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval],
604 slab, us_hlink);
605 }
606
607 return (1);
608 }
609
610 /*
611 * Free the hash bucket to the appropriate backing store.
612 *
613 * Arguments:
614 * slab_hash The hash bucket we're freeing
615 * hashsize The number of entries in that hash bucket
616 *
617 * Returns:
618 * Nothing
619 */
620 static void
621 hash_free(struct uma_hash *hash)
622 {
623 if (hash->uh_slab_hash == NULL)
624 return;
625 if (hash->uh_hashsize == UMA_HASH_SIZE_INIT)
626 zone_free_item(hashzone, hash->uh_slab_hash, NULL, SKIP_NONE);
627 else
628 free(hash->uh_slab_hash, M_UMAHASH);
629 }
630
631 /*
632 * Frees all outstanding items in a bucket
633 *
634 * Arguments:
635 * zone The zone to free to, must be unlocked.
636 * bucket The free/alloc bucket with items, cpu queue must be locked.
637 *
638 * Returns:
639 * Nothing
640 */
641
642 static void
643 bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
644 {
645 int i;
646
647 if (bucket == NULL)
648 return;
649
650 if (zone->uz_fini)
651 for (i = 0; i < bucket->ub_cnt; i++)
652 zone->uz_fini(bucket->ub_bucket[i], zone->uz_size);
653 zone->uz_release(zone->uz_arg, bucket->ub_bucket, bucket->ub_cnt);
654 bucket->ub_cnt = 0;
655 }
656
657 /*
658 * Drains the per cpu caches for a zone.
659 *
660 * NOTE: This may only be called while the zone is being turn down, and not
661 * during normal operation. This is necessary in order that we do not have
662 * to migrate CPUs to drain the per-CPU caches.
663 *
664 * Arguments:
665 * zone The zone to drain, must be unlocked.
666 *
667 * Returns:
668 * Nothing
669 */
670 static void
671 cache_drain(uma_zone_t zone)
672 {
673 uma_cache_t cache;
674 int cpu;
675
676 /*
677 * XXX: It is safe to not lock the per-CPU caches, because we're
678 * tearing down the zone anyway. I.e., there will be no further use
679 * of the caches at this point.
680 *
681 * XXX: It would good to be able to assert that the zone is being
682 * torn down to prevent improper use of cache_drain().
683 *
684 * XXX: We lock the zone before passing into bucket_cache_drain() as
685 * it is used elsewhere. Should the tear-down path be made special
686 * there in some form?
687 */
688 CPU_FOREACH(cpu) {
689 cache = &zone->uz_cpu[cpu];
690 bucket_drain(zone, cache->uc_allocbucket);
691 bucket_drain(zone, cache->uc_freebucket);
692 if (cache->uc_allocbucket != NULL)
693 bucket_free(zone, cache->uc_allocbucket, NULL);
694 if (cache->uc_freebucket != NULL)
695 bucket_free(zone, cache->uc_freebucket, NULL);
696 cache->uc_allocbucket = cache->uc_freebucket = NULL;
697 }
698 ZONE_LOCK(zone);
699 bucket_cache_drain(zone);
700 ZONE_UNLOCK(zone);
701 }
702
703 static void
704 cache_shrink(uma_zone_t zone)
705 {
706
707 if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
708 return;
709
710 ZONE_LOCK(zone);
711 zone->uz_count = (zone->uz_count_min + zone->uz_count) / 2;
712 ZONE_UNLOCK(zone);
713 }
714
715 static void
716 cache_drain_safe_cpu(uma_zone_t zone)
717 {
718 uma_cache_t cache;
719 uma_bucket_t b1, b2;
720
721 if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
722 return;
723
724 b1 = b2 = NULL;
725 ZONE_LOCK(zone);
726 critical_enter();
727 cache = &zone->uz_cpu[curcpu];
728 if (cache->uc_allocbucket) {
729 if (cache->uc_allocbucket->ub_cnt != 0)
730 LIST_INSERT_HEAD(&zone->uz_buckets,
731 cache->uc_allocbucket, ub_link);
732 else
733 b1 = cache->uc_allocbucket;
734 cache->uc_allocbucket = NULL;
735 }
736 if (cache->uc_freebucket) {
737 if (cache->uc_freebucket->ub_cnt != 0)
738 LIST_INSERT_HEAD(&zone->uz_buckets,
739 cache->uc_freebucket, ub_link);
740 else
741 b2 = cache->uc_freebucket;
742 cache->uc_freebucket = NULL;
743 }
744 critical_exit();
745 ZONE_UNLOCK(zone);
746 if (b1)
747 bucket_free(zone, b1, NULL);
748 if (b2)
749 bucket_free(zone, b2, NULL);
750 }
751
752 /*
753 * Safely drain per-CPU caches of a zone(s) to alloc bucket.
754 * This is an expensive call because it needs to bind to all CPUs
755 * one by one and enter a critical section on each of them in order
756 * to safely access their cache buckets.
757 * Zone lock must not be held on call this function.
758 */
759 static void
760 cache_drain_safe(uma_zone_t zone)
761 {
762 int cpu;
763
764 /*
765 * Polite bucket sizes shrinking was not enouth, shrink aggressively.
766 */
767 if (zone)
768 cache_shrink(zone);
769 else
770 zone_foreach(cache_shrink);
771
772 CPU_FOREACH(cpu) {
773 thread_lock(curthread);
774 sched_bind(curthread, cpu);
775 thread_unlock(curthread);
776
777 if (zone)
778 cache_drain_safe_cpu(zone);
779 else
780 zone_foreach(cache_drain_safe_cpu);
781 }
782 thread_lock(curthread);
783 sched_unbind(curthread);
784 thread_unlock(curthread);
785 }
786
787 /*
788 * Drain the cached buckets from a zone. Expects a locked zone on entry.
789 */
790 static void
791 bucket_cache_drain(uma_zone_t zone)
792 {
793 uma_bucket_t bucket;
794
795 /*
796 * Drain the bucket queues and free the buckets, we just keep two per
797 * cpu (alloc/free).
798 */
799 while ((bucket = LIST_FIRST(&zone->uz_buckets)) != NULL) {
800 LIST_REMOVE(bucket, ub_link);
801 ZONE_UNLOCK(zone);
802 bucket_drain(zone, bucket);
803 bucket_free(zone, bucket, NULL);
804 ZONE_LOCK(zone);
805 }
806
807 /*
808 * Shrink further bucket sizes. Price of single zone lock collision
809 * is probably lower then price of global cache drain.
810 */
811 if (zone->uz_count > zone->uz_count_min)
812 zone->uz_count--;
813 }
814
815 static void
816 keg_free_slab(uma_keg_t keg, uma_slab_t slab, int start)
817 {
818 uint8_t *mem;
819 int i;
820 uint8_t flags;
821
822 mem = slab->us_data;
823 flags = slab->us_flags;
824 i = start;
825 if (keg->uk_fini != NULL) {
826 for (i--; i > -1; i--)
827 keg->uk_fini(slab->us_data + (keg->uk_rsize * i),
828 keg->uk_size);
829 }
830 if (keg->uk_flags & UMA_ZONE_OFFPAGE)
831 zone_free_item(keg->uk_slabzone, slab, NULL, SKIP_NONE);
832 #ifdef UMA_DEBUG
833 printf("%s: Returning %d bytes.\n", keg->uk_name,
834 PAGE_SIZE * keg->uk_ppera);
835 #endif
836 keg->uk_freef(mem, PAGE_SIZE * keg->uk_ppera, flags);
837 }
838
839 /*
840 * Frees pages from a keg back to the system. This is done on demand from
841 * the pageout daemon.
842 *
843 * Returns nothing.
844 */
845 static void
846 keg_drain(uma_keg_t keg)
847 {
848 struct slabhead freeslabs = { 0 };
849 uma_slab_t slab, tmp;
850
851 /*
852 * We don't want to take pages from statically allocated kegs at this
853 * time
854 */
855 if (keg->uk_flags & UMA_ZONE_NOFREE || keg->uk_freef == NULL)
856 return;
857
858 #ifdef UMA_DEBUG
859 printf("%s free items: %u\n", keg->uk_name, keg->uk_free);
860 #endif
861 KEG_LOCK(keg);
862 if (keg->uk_free == 0)
863 goto finished;
864
865 LIST_FOREACH_SAFE(slab, &keg->uk_free_slab, us_link, tmp) {
866 /* We have nowhere to free these to. */
867 if (slab->us_flags & UMA_SLAB_BOOT)
868 continue;
869
870 LIST_REMOVE(slab, us_link);
871 keg->uk_pages -= keg->uk_ppera;
872 keg->uk_free -= keg->uk_ipers;
873
874 if (keg->uk_flags & UMA_ZONE_HASH)
875 UMA_HASH_REMOVE(&keg->uk_hash, slab, slab->us_data);
876
877 SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink);
878 }
879 finished:
880 KEG_UNLOCK(keg);
881
882 while ((slab = SLIST_FIRST(&freeslabs)) != NULL) {
883 SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink);
884 keg_free_slab(keg, slab, keg->uk_ipers);
885 }
886 }
887
888 static void
889 zone_drain_wait(uma_zone_t zone, int waitok)
890 {
891
892 /*
893 * Set draining to interlock with zone_dtor() so we can release our
894 * locks as we go. Only dtor() should do a WAITOK call since it
895 * is the only call that knows the structure will still be available
896 * when it wakes up.
897 */
898 ZONE_LOCK(zone);
899 while (zone->uz_flags & UMA_ZFLAG_DRAINING) {
900 if (waitok == M_NOWAIT)
901 goto out;
902 msleep(zone, zone->uz_lockptr, PVM, "zonedrain", 1);
903 }
904 zone->uz_flags |= UMA_ZFLAG_DRAINING;
905 bucket_cache_drain(zone);
906 ZONE_UNLOCK(zone);
907 /*
908 * The DRAINING flag protects us from being freed while
909 * we're running. Normally the uma_rwlock would protect us but we
910 * must be able to release and acquire the right lock for each keg.
911 */
912 zone_foreach_keg(zone, &keg_drain);
913 ZONE_LOCK(zone);
914 zone->uz_flags &= ~UMA_ZFLAG_DRAINING;
915 wakeup(zone);
916 out:
917 ZONE_UNLOCK(zone);
918 }
919
920 void
921 zone_drain(uma_zone_t zone)
922 {
923
924 zone_drain_wait(zone, M_NOWAIT);
925 }
926
927 /*
928 * Allocate a new slab for a keg. This does not insert the slab onto a list.
929 *
930 * Arguments:
931 * wait Shall we wait?
932 *
933 * Returns:
934 * The slab that was allocated or NULL if there is no memory and the
935 * caller specified M_NOWAIT.
936 */
937 static uma_slab_t
938 keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int wait)
939 {
940 uma_alloc allocf;
941 uma_slab_t slab;
942 uint8_t *mem;
943 uint8_t flags;
944 int i;
945
946 mtx_assert(&keg->uk_lock, MA_OWNED);
947 slab = NULL;
948 mem = NULL;
949
950 #ifdef UMA_DEBUG
951 printf("alloc_slab: Allocating a new slab for %s\n", keg->uk_name);
952 #endif
953 allocf = keg->uk_allocf;
954 KEG_UNLOCK(keg);
955
956 if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
957 slab = zone_alloc_item(keg->uk_slabzone, NULL, wait);
958 if (slab == NULL)
959 goto out;
960 }
961
962 /*
963 * This reproduces the old vm_zone behavior of zero filling pages the
964 * first time they are added to a zone.
965 *
966 * Malloced items are zeroed in uma_zalloc.
967 */
968
969 if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
970 wait |= M_ZERO;
971 else
972 wait &= ~M_ZERO;
973
974 if (keg->uk_flags & UMA_ZONE_NODUMP)
975 wait |= M_NODUMP;
976
977 /* zone is passed for legacy reasons. */
978 mem = allocf(zone, keg->uk_ppera * PAGE_SIZE, &flags, wait);
979 if (mem == NULL) {
980 if (keg->uk_flags & UMA_ZONE_OFFPAGE)
981 zone_free_item(keg->uk_slabzone, slab, NULL, SKIP_NONE);
982 slab = NULL;
983 goto out;
984 }
985
986 /* Point the slab into the allocated memory */
987 if (!(keg->uk_flags & UMA_ZONE_OFFPAGE))
988 slab = (uma_slab_t )(mem + keg->uk_pgoff);
989
990 if (keg->uk_flags & UMA_ZONE_VTOSLAB)
991 for (i = 0; i < keg->uk_ppera; i++)
992 vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab);
993
994 slab->us_keg = keg;
995 slab->us_data = mem;
996 slab->us_freecount = keg->uk_ipers;
997 slab->us_flags = flags;
998 BIT_FILL(SLAB_SETSIZE, &slab->us_free);
999 #ifdef INVARIANTS
1000 BIT_ZERO(SLAB_SETSIZE, &slab->us_debugfree);
1001 #endif
1002
1003 if (keg->uk_init != NULL) {
1004 for (i = 0; i < keg->uk_ipers; i++)
1005 if (keg->uk_init(slab->us_data + (keg->uk_rsize * i),
1006 keg->uk_size, wait) != 0)
1007 break;
1008 if (i != keg->uk_ipers) {
1009 keg_free_slab(keg, slab, i);
1010 slab = NULL;
1011 goto out;
1012 }
1013 }
1014 out:
1015 KEG_LOCK(keg);
1016
1017 if (slab != NULL) {
1018 if (keg->uk_flags & UMA_ZONE_HASH)
1019 UMA_HASH_INSERT(&keg->uk_hash, slab, mem);
1020
1021 keg->uk_pages += keg->uk_ppera;
1022 keg->uk_free += keg->uk_ipers;
1023 }
1024
1025 return (slab);
1026 }
1027
1028 /*
1029 * This function is intended to be used early on in place of page_alloc() so
1030 * that we may use the boot time page cache to satisfy allocations before
1031 * the VM is ready.
1032 */
1033 static void *
1034 startup_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *pflag, int wait)
1035 {
1036 uma_keg_t keg;
1037 uma_slab_t tmps;
1038 int pages, check_pages;
1039
1040 keg = zone_first_keg(zone);
1041 pages = howmany(bytes, PAGE_SIZE);
1042 check_pages = pages - 1;
1043 KASSERT(pages > 0, ("startup_alloc can't reserve 0 pages\n"));
1044
1045 /*
1046 * Check our small startup cache to see if it has pages remaining.
1047 */
1048 mtx_lock(&uma_boot_pages_mtx);
1049
1050 /* First check if we have enough room. */
1051 tmps = LIST_FIRST(&uma_boot_pages);
1052 while (tmps != NULL && check_pages-- > 0)
1053 tmps = LIST_NEXT(tmps, us_link);
1054 if (tmps != NULL) {
1055 /*
1056 * It's ok to lose tmps references. The last one will
1057 * have tmps->us_data pointing to the start address of
1058 * "pages" contiguous pages of memory.
1059 */
1060 while (pages-- > 0) {
1061 tmps = LIST_FIRST(&uma_boot_pages);
1062 LIST_REMOVE(tmps, us_link);
1063 }
1064 mtx_unlock(&uma_boot_pages_mtx);
1065 *pflag = tmps->us_flags;
1066 return (tmps->us_data);
1067 }
1068 mtx_unlock(&uma_boot_pages_mtx);
1069 if (booted < UMA_STARTUP2)
1070 panic("UMA: Increase vm.boot_pages");
1071 /*
1072 * Now that we've booted reset these users to their real allocator.
1073 */
1074 #ifdef UMA_MD_SMALL_ALLOC
1075 keg->uk_allocf = (keg->uk_ppera > 1) ? page_alloc : uma_small_alloc;
1076 #else
1077 keg->uk_allocf = page_alloc;
1078 #endif
1079 return keg->uk_allocf(zone, bytes, pflag, wait);
1080 }
1081
1082 /*
1083 * Allocates a number of pages from the system
1084 *
1085 * Arguments:
1086 * bytes The number of bytes requested
1087 * wait Shall we wait?
1088 *
1089 * Returns:
1090 * A pointer to the alloced memory or possibly
1091 * NULL if M_NOWAIT is set.
1092 */
1093 static void *
1094 page_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *pflag, int wait)
1095 {
1096 void *p; /* Returned page */
1097
1098 *pflag = UMA_SLAB_KMEM;
1099 p = (void *) kmem_malloc(kmem_arena, bytes, wait);
1100
1101 return (p);
1102 }
1103
1104 /*
1105 * Allocates a number of pages from within an object
1106 *
1107 * Arguments:
1108 * bytes The number of bytes requested
1109 * wait Shall we wait?
1110 *
1111 * Returns:
1112 * A pointer to the alloced memory or possibly
1113 * NULL if M_NOWAIT is set.
1114 */
1115 static void *
1116 noobj_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *flags, int wait)
1117 {
1118 TAILQ_HEAD(, vm_page) alloctail;
1119 u_long npages;
1120 vm_offset_t retkva, zkva;
1121 vm_page_t p, p_next;
1122 uma_keg_t keg;
1123
1124 TAILQ_INIT(&alloctail);
1125 keg = zone_first_keg(zone);
1126
1127 npages = howmany(bytes, PAGE_SIZE);
1128 while (npages > 0) {
1129 p = vm_page_alloc(NULL, 0, VM_ALLOC_INTERRUPT |
1130 VM_ALLOC_WIRED | VM_ALLOC_NOOBJ |
1131 ((wait & M_WAITOK) != 0 ? VM_ALLOC_WAITOK :
1132 VM_ALLOC_NOWAIT));
1133 if (p != NULL) {
1134 /*
1135 * Since the page does not belong to an object, its
1136 * listq is unused.
1137 */
1138 TAILQ_INSERT_TAIL(&alloctail, p, listq);
1139 npages--;
1140 continue;
1141 }
1142 /*
1143 * Page allocation failed, free intermediate pages and
1144 * exit.
1145 */
1146 TAILQ_FOREACH_SAFE(p, &alloctail, listq, p_next) {
1147 vm_page_unwire(p, PQ_NONE);
1148 vm_page_free(p);
1149 }
1150 return (NULL);
1151 }
1152 *flags = UMA_SLAB_PRIV;
1153 zkva = keg->uk_kva +
1154 atomic_fetchadd_long(&keg->uk_offset, round_page(bytes));
1155 retkva = zkva;
1156 TAILQ_FOREACH(p, &alloctail, listq) {
1157 pmap_qenter(zkva, &p, 1);
1158 zkva += PAGE_SIZE;
1159 }
1160
1161 return ((void *)retkva);
1162 }
1163
1164 /*
1165 * Frees a number of pages to the system
1166 *
1167 * Arguments:
1168 * mem A pointer to the memory to be freed
1169 * size The size of the memory being freed
1170 * flags The original p->us_flags field
1171 *
1172 * Returns:
1173 * Nothing
1174 */
1175 static void
1176 page_free(void *mem, vm_size_t size, uint8_t flags)
1177 {
1178 struct vmem *vmem;
1179
1180 if (flags & UMA_SLAB_KMEM)
1181 vmem = kmem_arena;
1182 else if (flags & UMA_SLAB_KERNEL)
1183 vmem = kernel_arena;
1184 else
1185 panic("UMA: page_free used with invalid flags %d", flags);
1186
1187 kmem_free(vmem, (vm_offset_t)mem, size);
1188 }
1189
1190 /*
1191 * Zero fill initializer
1192 *
1193 * Arguments/Returns follow uma_init specifications
1194 */
1195 static int
1196 zero_init(void *mem, int size, int flags)
1197 {
1198 bzero(mem, size);
1199 return (0);
1200 }
1201
1202 /*
1203 * Finish creating a small uma keg. This calculates ipers, and the keg size.
1204 *
1205 * Arguments
1206 * keg The zone we should initialize
1207 *
1208 * Returns
1209 * Nothing
1210 */
1211 static void
1212 keg_small_init(uma_keg_t keg)
1213 {
1214 u_int rsize;
1215 u_int memused;
1216 u_int wastedspace;
1217 u_int shsize;
1218 u_int slabsize;
1219
1220 if (keg->uk_flags & UMA_ZONE_PCPU) {
1221 u_int ncpus = (mp_maxid + 1) ? (mp_maxid + 1) : MAXCPU;
1222
1223 slabsize = sizeof(struct pcpu);
1224 keg->uk_ppera = howmany(ncpus * sizeof(struct pcpu),
1225 PAGE_SIZE);
1226 } else {
1227 slabsize = UMA_SLAB_SIZE;
1228 keg->uk_ppera = 1;
1229 }
1230
1231 /*
1232 * Calculate the size of each allocation (rsize) according to
1233 * alignment. If the requested size is smaller than we have
1234 * allocation bits for we round it up.
1235 */
1236 rsize = keg->uk_size;
1237 if (rsize < slabsize / SLAB_SETSIZE)
1238 rsize = slabsize / SLAB_SETSIZE;
1239 if (rsize & keg->uk_align)
1240 rsize = (rsize & ~keg->uk_align) + (keg->uk_align + 1);
1241 keg->uk_rsize = rsize;
1242
1243 KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0 ||
1244 keg->uk_rsize < sizeof(struct pcpu),
1245 ("%s: size %u too large", __func__, keg->uk_rsize));
1246
1247 if (keg->uk_flags & UMA_ZONE_OFFPAGE)
1248 shsize = 0;
1249 else
1250 shsize = sizeof(struct uma_slab);
1251
1252 if (rsize <= slabsize - shsize)
1253 keg->uk_ipers = (slabsize - shsize) / rsize;
1254 else {
1255 /* Handle special case when we have 1 item per slab, so
1256 * alignment requirement can be relaxed. */
1257 KASSERT(keg->uk_size <= slabsize - shsize,
1258 ("%s: size %u greater than slab", __func__, keg->uk_size));
1259 keg->uk_ipers = 1;
1260 }
1261 KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= SLAB_SETSIZE,
1262 ("%s: keg->uk_ipers %u", __func__, keg->uk_ipers));
1263
1264 memused = keg->uk_ipers * rsize + shsize;
1265 wastedspace = slabsize - memused;
1266
1267 /*
1268 * We can't do OFFPAGE if we're internal or if we've been
1269 * asked to not go to the VM for buckets. If we do this we
1270 * may end up going to the VM for slabs which we do not
1271 * want to do if we're UMA_ZFLAG_CACHEONLY as a result
1272 * of UMA_ZONE_VM, which clearly forbids it.
1273 */
1274 if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) ||
1275 (keg->uk_flags & UMA_ZFLAG_CACHEONLY))
1276 return;
1277
1278 /*
1279 * See if using an OFFPAGE slab will limit our waste. Only do
1280 * this if it permits more items per-slab.
1281 *
1282 * XXX We could try growing slabsize to limit max waste as well.
1283 * Historically this was not done because the VM could not
1284 * efficiently handle contiguous allocations.
1285 */
1286 if ((wastedspace >= slabsize / UMA_MAX_WASTE) &&
1287 (keg->uk_ipers < (slabsize / keg->uk_rsize))) {
1288 keg->uk_ipers = slabsize / keg->uk_rsize;
1289 KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= SLAB_SETSIZE,
1290 ("%s: keg->uk_ipers %u", __func__, keg->uk_ipers));
1291 #ifdef UMA_DEBUG
1292 printf("UMA decided we need offpage slab headers for "
1293 "keg: %s, calculated wastedspace = %d, "
1294 "maximum wasted space allowed = %d, "
1295 "calculated ipers = %d, "
1296 "new wasted space = %d\n", keg->uk_name, wastedspace,
1297 slabsize / UMA_MAX_WASTE, keg->uk_ipers,
1298 slabsize - keg->uk_ipers * keg->uk_rsize);
1299 #endif
1300 keg->uk_flags |= UMA_ZONE_OFFPAGE;
1301 }
1302
1303 if ((keg->uk_flags & UMA_ZONE_OFFPAGE) &&
1304 (keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
1305 keg->uk_flags |= UMA_ZONE_HASH;
1306 }
1307
1308 /*
1309 * Finish creating a large (> UMA_SLAB_SIZE) uma kegs. Just give in and do
1310 * OFFPAGE for now. When I can allow for more dynamic slab sizes this will be
1311 * more complicated.
1312 *
1313 * Arguments
1314 * keg The keg we should initialize
1315 *
1316 * Returns
1317 * Nothing
1318 */
1319 static void
1320 keg_large_init(uma_keg_t keg)
1321 {
1322 u_int shsize;
1323
1324 KASSERT(keg != NULL, ("Keg is null in keg_large_init"));
1325 KASSERT((keg->uk_flags & UMA_ZFLAG_CACHEONLY) == 0,
1326 ("keg_large_init: Cannot large-init a UMA_ZFLAG_CACHEONLY keg"));
1327 KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0,
1328 ("%s: Cannot large-init a UMA_ZONE_PCPU keg", __func__));
1329
1330 keg->uk_ppera = howmany(keg->uk_size, PAGE_SIZE);
1331 keg->uk_ipers = 1;
1332 keg->uk_rsize = keg->uk_size;
1333
1334 /* Check whether we have enough space to not do OFFPAGE. */
1335 if ((keg->uk_flags & UMA_ZONE_OFFPAGE) == 0) {
1336 shsize = sizeof(struct uma_slab);
1337 if (shsize & UMA_ALIGN_PTR)
1338 shsize = (shsize & ~UMA_ALIGN_PTR) +
1339 (UMA_ALIGN_PTR + 1);
1340
1341 if (PAGE_SIZE * keg->uk_ppera - keg->uk_rsize < shsize) {
1342 /*
1343 * We can't do OFFPAGE if we're internal, in which case
1344 * we need an extra page per allocation to contain the
1345 * slab header.
1346 */
1347 if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) == 0)
1348 keg->uk_flags |= UMA_ZONE_OFFPAGE;
1349 else
1350 keg->uk_ppera++;
1351 }
1352 }
1353
1354 if ((keg->uk_flags & UMA_ZONE_OFFPAGE) &&
1355 (keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
1356 keg->uk_flags |= UMA_ZONE_HASH;
1357 }
1358
1359 static void
1360 keg_cachespread_init(uma_keg_t keg)
1361 {
1362 int alignsize;
1363 int trailer;
1364 int pages;
1365 int rsize;
1366
1367 KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0,
1368 ("%s: Cannot cachespread-init a UMA_ZONE_PCPU keg", __func__));
1369
1370 alignsize = keg->uk_align + 1;
1371 rsize = keg->uk_size;
1372 /*
1373 * We want one item to start on every align boundary in a page. To
1374 * do this we will span pages. We will also extend the item by the
1375 * size of align if it is an even multiple of align. Otherwise, it
1376 * would fall on the same boundary every time.
1377 */
1378 if (rsize & keg->uk_align)
1379 rsize = (rsize & ~keg->uk_align) + alignsize;
1380 if ((rsize & alignsize) == 0)
1381 rsize += alignsize;
1382 trailer = rsize - keg->uk_size;
1383 pages = (rsize * (PAGE_SIZE / alignsize)) / PAGE_SIZE;
1384 pages = MIN(pages, (128 * 1024) / PAGE_SIZE);
1385 keg->uk_rsize = rsize;
1386 keg->uk_ppera = pages;
1387 keg->uk_ipers = ((pages * PAGE_SIZE) + trailer) / rsize;
1388 keg->uk_flags |= UMA_ZONE_OFFPAGE | UMA_ZONE_VTOSLAB;
1389 KASSERT(keg->uk_ipers <= SLAB_SETSIZE,
1390 ("%s: keg->uk_ipers too high(%d) increase max_ipers", __func__,
1391 keg->uk_ipers));
1392 }
1393
1394 /*
1395 * Keg header ctor. This initializes all fields, locks, etc. And inserts
1396 * the keg onto the global keg list.
1397 *
1398 * Arguments/Returns follow uma_ctor specifications
1399 * udata Actually uma_kctor_args
1400 */
1401 static int
1402 keg_ctor(void *mem, int size, void *udata, int flags)
1403 {
1404 struct uma_kctor_args *arg = udata;
1405 uma_keg_t keg = mem;
1406 uma_zone_t zone;
1407
1408 bzero(keg, size);
1409 keg->uk_size = arg->size;
1410 keg->uk_init = arg->uminit;
1411 keg->uk_fini = arg->fini;
1412 keg->uk_align = arg->align;
1413 keg->uk_free = 0;
1414 keg->uk_reserve = 0;
1415 keg->uk_pages = 0;
1416 keg->uk_flags = arg->flags;
1417 keg->uk_allocf = page_alloc;
1418 keg->uk_freef = page_free;
1419 keg->uk_slabzone = NULL;
1420
1421 /*
1422 * The master zone is passed to us at keg-creation time.
1423 */
1424 zone = arg->zone;
1425 keg->uk_name = zone->uz_name;
1426
1427 if (arg->flags & UMA_ZONE_VM)
1428 keg->uk_flags |= UMA_ZFLAG_CACHEONLY;
1429
1430 if (arg->flags & UMA_ZONE_ZINIT)
1431 keg->uk_init = zero_init;
1432
1433 if (arg->flags & UMA_ZONE_MALLOC)
1434 keg->uk_flags |= UMA_ZONE_VTOSLAB;
1435
1436 if (arg->flags & UMA_ZONE_PCPU)
1437 #ifdef SMP
1438 keg->uk_flags |= UMA_ZONE_OFFPAGE;
1439 #else
1440 keg->uk_flags &= ~UMA_ZONE_PCPU;
1441 #endif
1442
1443 if (keg->uk_flags & UMA_ZONE_CACHESPREAD) {
1444 keg_cachespread_init(keg);
1445 } else {
1446 if (keg->uk_size > (UMA_SLAB_SIZE - sizeof(struct uma_slab)))
1447 keg_large_init(keg);
1448 else
1449 keg_small_init(keg);
1450 }
1451
1452 if (keg->uk_flags & UMA_ZONE_OFFPAGE)
1453 keg->uk_slabzone = slabzone;
1454
1455 /*
1456 * If we haven't booted yet we need allocations to go through the
1457 * startup cache until the vm is ready.
1458 */
1459 if (keg->uk_ppera == 1) {
1460 #ifdef UMA_MD_SMALL_ALLOC
1461 keg->uk_allocf = uma_small_alloc;
1462 keg->uk_freef = uma_small_free;
1463
1464 if (booted < UMA_STARTUP)
1465 keg->uk_allocf = startup_alloc;
1466 #else
1467 if (booted < UMA_STARTUP2)
1468 keg->uk_allocf = startup_alloc;
1469 #endif
1470 } else if (booted < UMA_STARTUP2 &&
1471 (keg->uk_flags & UMA_ZFLAG_INTERNAL))
1472 keg->uk_allocf = startup_alloc;
1473
1474 /*
1475 * Initialize keg's lock
1476 */
1477 KEG_LOCK_INIT(keg, (arg->flags & UMA_ZONE_MTXCLASS));
1478
1479 /*
1480 * If we're putting the slab header in the actual page we need to
1481 * figure out where in each page it goes. This calculates a right
1482 * justified offset into the memory on an ALIGN_PTR boundary.
1483 */
1484 if (!(keg->uk_flags & UMA_ZONE_OFFPAGE)) {
1485 u_int totsize;
1486
1487 /* Size of the slab struct and free list */
1488 totsize = sizeof(struct uma_slab);
1489
1490 if (totsize & UMA_ALIGN_PTR)
1491 totsize = (totsize & ~UMA_ALIGN_PTR) +
1492 (UMA_ALIGN_PTR + 1);
1493 keg->uk_pgoff = (PAGE_SIZE * keg->uk_ppera) - totsize;
1494
1495 /*
1496 * The only way the following is possible is if with our
1497 * UMA_ALIGN_PTR adjustments we are now bigger than
1498 * UMA_SLAB_SIZE. I haven't checked whether this is
1499 * mathematically possible for all cases, so we make
1500 * sure here anyway.
1501 */
1502 totsize = keg->uk_pgoff + sizeof(struct uma_slab);
1503 if (totsize > PAGE_SIZE * keg->uk_ppera) {
1504 printf("zone %s ipers %d rsize %d size %d\n",
1505 zone->uz_name, keg->uk_ipers, keg->uk_rsize,
1506 keg->uk_size);
1507 panic("UMA slab won't fit.");
1508 }
1509 }
1510
1511 if (keg->uk_flags & UMA_ZONE_HASH)
1512 hash_alloc(&keg->uk_hash);
1513
1514 #ifdef UMA_DEBUG
1515 printf("UMA: %s(%p) size %d(%d) flags %#x ipers %d ppera %d out %d free %d\n",
1516 zone->uz_name, zone, keg->uk_size, keg->uk_rsize, keg->uk_flags,
1517 keg->uk_ipers, keg->uk_ppera,
1518 (keg->uk_pages / keg->uk_ppera) * keg->uk_ipers - keg->uk_free,
1519 keg->uk_free);
1520 #endif
1521
1522 LIST_INSERT_HEAD(&keg->uk_zones, zone, uz_link);
1523
1524 rw_wlock(&uma_rwlock);
1525 LIST_INSERT_HEAD(&uma_kegs, keg, uk_link);
1526 rw_wunlock(&uma_rwlock);
1527 return (0);
1528 }
1529
1530 /*
1531 * Zone header ctor. This initializes all fields, locks, etc.
1532 *
1533 * Arguments/Returns follow uma_ctor specifications
1534 * udata Actually uma_zctor_args
1535 */
1536 static int
1537 zone_ctor(void *mem, int size, void *udata, int flags)
1538 {
1539 struct uma_zctor_args *arg = udata;
1540 uma_zone_t zone = mem;
1541 uma_zone_t z;
1542 uma_keg_t keg;
1543
1544 bzero(zone, size);
1545 zone->uz_name = arg->name;
1546 zone->uz_ctor = arg->ctor;
1547 zone->uz_dtor = arg->dtor;
1548 zone->uz_slab = zone_fetch_slab;
1549 zone->uz_init = NULL;
1550 zone->uz_fini = NULL;
1551 zone->uz_allocs = 0;
1552 zone->uz_frees = 0;
1553 zone->uz_fails = 0;
1554 zone->uz_sleeps = 0;
1555 zone->uz_count = 0;
1556 zone->uz_count_min = 0;
1557 zone->uz_flags = 0;
1558 zone->uz_warning = NULL;
1559 timevalclear(&zone->uz_ratecheck);
1560 keg = arg->keg;
1561
1562 ZONE_LOCK_INIT(zone, (arg->flags & UMA_ZONE_MTXCLASS));
1563
1564 /*
1565 * This is a pure cache zone, no kegs.
1566 */
1567 if (arg->import) {
1568 if (arg->flags & UMA_ZONE_VM)
1569 arg->flags |= UMA_ZFLAG_CACHEONLY;
1570 zone->uz_flags = arg->flags;
1571 zone->uz_size = arg->size;
1572 zone->uz_import = arg->import;
1573 zone->uz_release = arg->release;
1574 zone->uz_arg = arg->arg;
1575 zone->uz_lockptr = &zone->uz_lock;
1576 rw_wlock(&uma_rwlock);
1577 LIST_INSERT_HEAD(&uma_cachezones, zone, uz_link);
1578 rw_wunlock(&uma_rwlock);
1579 goto out;
1580 }
1581
1582 /*
1583 * Use the regular zone/keg/slab allocator.
1584 */
1585 zone->uz_import = (uma_import)zone_import;
1586 zone->uz_release = (uma_release)zone_release;
1587 zone->uz_arg = zone;
1588
1589 if (arg->flags & UMA_ZONE_SECONDARY) {
1590 KASSERT(arg->keg != NULL, ("Secondary zone on zero'd keg"));
1591 zone->uz_init = arg->uminit;
1592 zone->uz_fini = arg->fini;
1593 zone->uz_lockptr = &keg->uk_lock;
1594 zone->uz_flags |= UMA_ZONE_SECONDARY;
1595 rw_wlock(&uma_rwlock);
1596 ZONE_LOCK(zone);
1597 LIST_FOREACH(z, &keg->uk_zones, uz_link) {
1598 if (LIST_NEXT(z, uz_link) == NULL) {
1599 LIST_INSERT_AFTER(z, zone, uz_link);
1600 break;
1601 }
1602 }
1603 ZONE_UNLOCK(zone);
1604 rw_wunlock(&uma_rwlock);
1605 } else if (keg == NULL) {
1606 if ((keg = uma_kcreate(zone, arg->size, arg->uminit, arg->fini,
1607 arg->align, arg->flags)) == NULL)
1608 return (ENOMEM);
1609 } else {
1610 struct uma_kctor_args karg;
1611 int error;
1612
1613 /* We should only be here from uma_startup() */
1614 karg.size = arg->size;
1615 karg.uminit = arg->uminit;
1616 karg.fini = arg->fini;
1617 karg.align = arg->align;
1618 karg.flags = arg->flags;
1619 karg.zone = zone;
1620 error = keg_ctor(arg->keg, sizeof(struct uma_keg), &karg,
1621 flags);
1622 if (error)
1623 return (error);
1624 }
1625
1626 /*
1627 * Link in the first keg.
1628 */
1629 zone->uz_klink.kl_keg = keg;
1630 LIST_INSERT_HEAD(&zone->uz_kegs, &zone->uz_klink, kl_link);
1631 zone->uz_lockptr = &keg->uk_lock;
1632 zone->uz_size = keg->uk_size;
1633 zone->uz_flags |= (keg->uk_flags &
1634 (UMA_ZONE_INHERIT | UMA_ZFLAG_INHERIT));
1635
1636 /*
1637 * Some internal zones don't have room allocated for the per cpu
1638 * caches. If we're internal, bail out here.
1639 */
1640 if (keg->uk_flags & UMA_ZFLAG_INTERNAL) {
1641 KASSERT((zone->uz_flags & UMA_ZONE_SECONDARY) == 0,
1642 ("Secondary zone requested UMA_ZFLAG_INTERNAL"));
1643 return (0);
1644 }
1645
1646 out:
1647 if ((arg->flags & UMA_ZONE_MAXBUCKET) == 0)
1648 zone->uz_count = bucket_select(zone->uz_size);
1649 else
1650 zone->uz_count = BUCKET_MAX;
1651 zone->uz_count_min = zone->uz_count;
1652
1653 return (0);
1654 }
1655
1656 /*
1657 * Keg header dtor. This frees all data, destroys locks, frees the hash
1658 * table and removes the keg from the global list.
1659 *
1660 * Arguments/Returns follow uma_dtor specifications
1661 * udata unused
1662 */
1663 static void
1664 keg_dtor(void *arg, int size, void *udata)
1665 {
1666 uma_keg_t keg;
1667
1668 keg = (uma_keg_t)arg;
1669 KEG_LOCK(keg);
1670 if (keg->uk_free != 0) {
1671 printf("Freed UMA keg (%s) was not empty (%d items). "
1672 " Lost %d pages of memory.\n",
1673 keg->uk_name ? keg->uk_name : "",
1674 keg->uk_free, keg->uk_pages);
1675 }
1676 KEG_UNLOCK(keg);
1677
1678 hash_free(&keg->uk_hash);
1679
1680 KEG_LOCK_FINI(keg);
1681 }
1682
1683 /*
1684 * Zone header dtor.
1685 *
1686 * Arguments/Returns follow uma_dtor specifications
1687 * udata unused
1688 */
1689 static void
1690 zone_dtor(void *arg, int size, void *udata)
1691 {
1692 uma_klink_t klink;
1693 uma_zone_t zone;
1694 uma_keg_t keg;
1695
1696 zone = (uma_zone_t)arg;
1697 keg = zone_first_keg(zone);
1698
1699 if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL))
1700 cache_drain(zone);
1701
1702 rw_wlock(&uma_rwlock);
1703 LIST_REMOVE(zone, uz_link);
1704 rw_wunlock(&uma_rwlock);
1705 /*
1706 * XXX there are some races here where
1707 * the zone can be drained but zone lock
1708 * released and then refilled before we
1709 * remove it... we dont care for now
1710 */
1711 zone_drain_wait(zone, M_WAITOK);
1712 /*
1713 * Unlink all of our kegs.
1714 */
1715 while ((klink = LIST_FIRST(&zone->uz_kegs)) != NULL) {
1716 klink->kl_keg = NULL;
1717 LIST_REMOVE(klink, kl_link);
1718 if (klink == &zone->uz_klink)
1719 continue;
1720 free(klink, M_TEMP);
1721 }
1722 /*
1723 * We only destroy kegs from non secondary zones.
1724 */
1725 if (keg != NULL && (zone->uz_flags & UMA_ZONE_SECONDARY) == 0) {
1726 rw_wlock(&uma_rwlock);
1727 LIST_REMOVE(keg, uk_link);
1728 rw_wunlock(&uma_rwlock);
1729 zone_free_item(kegs, keg, NULL, SKIP_NONE);
1730 }
1731 ZONE_LOCK_FINI(zone);
1732 }
1733
1734 /*
1735 * Traverses every zone in the system and calls a callback
1736 *
1737 * Arguments:
1738 * zfunc A pointer to a function which accepts a zone
1739 * as an argument.
1740 *
1741 * Returns:
1742 * Nothing
1743 */
1744 static void
1745 zone_foreach(void (*zfunc)(uma_zone_t))
1746 {
1747 uma_keg_t keg;
1748 uma_zone_t zone;
1749
1750 rw_rlock(&uma_rwlock);
1751 LIST_FOREACH(keg, &uma_kegs, uk_link) {
1752 LIST_FOREACH(zone, &keg->uk_zones, uz_link)
1753 zfunc(zone);
1754 }
1755 rw_runlock(&uma_rwlock);
1756 }
1757
1758 /* Public functions */
1759 /* See uma.h */
1760 void
1761 uma_startup(void *bootmem, int boot_pages)
1762 {
1763 struct uma_zctor_args args;
1764 uma_slab_t slab;
1765 int i;
1766
1767 #ifdef UMA_DEBUG
1768 printf("Creating uma keg headers zone and keg.\n");
1769 #endif
1770 rw_init(&uma_rwlock, "UMA lock");
1771
1772 /* "manually" create the initial zone */
1773 memset(&args, 0, sizeof(args));
1774 args.name = "UMA Kegs";
1775 args.size = sizeof(struct uma_keg);
1776 args.ctor = keg_ctor;
1777 args.dtor = keg_dtor;
1778 args.uminit = zero_init;
1779 args.fini = NULL;
1780 args.keg = &masterkeg;
1781 args.align = 32 - 1;
1782 args.flags = UMA_ZFLAG_INTERNAL;
1783 /* The initial zone has no Per cpu queues so it's smaller */
1784 zone_ctor(kegs, sizeof(struct uma_zone), &args, M_WAITOK);
1785
1786 #ifdef UMA_DEBUG
1787 printf("Filling boot free list.\n");
1788 #endif
1789 for (i = 0; i < boot_pages; i++) {
1790 slab = (uma_slab_t)((uint8_t *)bootmem + (i * UMA_SLAB_SIZE));
1791 slab->us_data = (uint8_t *)slab;
1792 slab->us_flags = UMA_SLAB_BOOT;
1793 LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link);
1794 }
1795 mtx_init(&uma_boot_pages_mtx, "UMA boot pages", NULL, MTX_DEF);
1796
1797 #ifdef UMA_DEBUG
1798 printf("Creating uma zone headers zone and keg.\n");
1799 #endif
1800 args.name = "UMA Zones";
1801 args.size = sizeof(struct uma_zone) +
1802 (sizeof(struct uma_cache) * (mp_maxid + 1));
1803 args.ctor = zone_ctor;
1804 args.dtor = zone_dtor;
1805 args.uminit = zero_init;
1806 args.fini = NULL;
1807 args.keg = NULL;
1808 args.align = 32 - 1;
1809 args.flags = UMA_ZFLAG_INTERNAL;
1810 /* The initial zone has no Per cpu queues so it's smaller */
1811 zone_ctor(zones, sizeof(struct uma_zone), &args, M_WAITOK);
1812
1813 #ifdef UMA_DEBUG
1814 printf("Creating slab and hash zones.\n");
1815 #endif
1816
1817 /* Now make a zone for slab headers */
1818 slabzone = uma_zcreate("UMA Slabs",
1819 sizeof(struct uma_slab),
1820 NULL, NULL, NULL, NULL,
1821 UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
1822
1823 hashzone = uma_zcreate("UMA Hash",
1824 sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT,
1825 NULL, NULL, NULL, NULL,
1826 UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
1827
1828 bucket_init();
1829
1830 booted = UMA_STARTUP;
1831
1832 #ifdef UMA_DEBUG
1833 printf("UMA startup complete.\n");
1834 #endif
1835 }
1836
1837 /* see uma.h */
1838 void
1839 uma_startup2(void)
1840 {
1841 booted = UMA_STARTUP2;
1842 bucket_enable();
1843 sx_init(&uma_drain_lock, "umadrain");
1844 #ifdef UMA_DEBUG
1845 printf("UMA startup2 complete.\n");
1846 #endif
1847 }
1848
1849 /*
1850 * Initialize our callout handle
1851 *
1852 */
1853
1854 static void
1855 uma_startup3(void)
1856 {
1857 #ifdef UMA_DEBUG
1858 printf("Starting callout.\n");
1859 #endif
1860 callout_init(&uma_callout, 1);
1861 callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
1862 #ifdef UMA_DEBUG
1863 printf("UMA startup3 complete.\n");
1864 #endif
1865 }
1866
1867 static uma_keg_t
1868 uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini,
1869 int align, uint32_t flags)
1870 {
1871 struct uma_kctor_args args;
1872
1873 args.size = size;
1874 args.uminit = uminit;
1875 args.fini = fini;
1876 args.align = (align == UMA_ALIGN_CACHE) ? uma_align_cache : align;
1877 args.flags = flags;
1878 args.zone = zone;
1879 return (zone_alloc_item(kegs, &args, M_WAITOK));
1880 }
1881
1882 /* See uma.h */
1883 void
1884 uma_set_align(int align)
1885 {
1886
1887 if (align != UMA_ALIGN_CACHE)
1888 uma_align_cache = align;
1889 }
1890
1891 /* See uma.h */
1892 uma_zone_t
1893 uma_zcreate(const char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
1894 uma_init uminit, uma_fini fini, int align, uint32_t flags)
1895
1896 {
1897 struct uma_zctor_args args;
1898 uma_zone_t res;
1899 bool locked;
1900
1901 KASSERT(powerof2(align + 1), ("invalid zone alignment %d for \"%s\"",
1902 align, name));
1903
1904 /* This stuff is essential for the zone ctor */
1905 memset(&args, 0, sizeof(args));
1906 args.name = name;
1907 args.size = size;
1908 args.ctor = ctor;
1909 args.dtor = dtor;
1910 args.uminit = uminit;
1911 args.fini = fini;
1912 #ifdef INVARIANTS
1913 /*
1914 * If a zone is being created with an empty constructor and
1915 * destructor, pass UMA constructor/destructor which checks for
1916 * memory use after free.
1917 */
1918 if ((!(flags & (UMA_ZONE_ZINIT | UMA_ZONE_NOFREE))) &&
1919 ctor == NULL && dtor == NULL && uminit == NULL && fini == NULL) {
1920 args.ctor = trash_ctor;
1921 args.dtor = trash_dtor;
1922 args.uminit = trash_init;
1923 args.fini = trash_fini;
1924 }
1925 #endif
1926 args.align = align;
1927 args.flags = flags;
1928 args.keg = NULL;
1929
1930 if (booted < UMA_STARTUP2) {
1931 locked = false;
1932 } else {
1933 sx_slock(&uma_drain_lock);
1934 locked = true;
1935 }
1936 res = zone_alloc_item(zones, &args, M_WAITOK);
1937 if (locked)
1938 sx_sunlock(&uma_drain_lock);
1939 return (res);
1940 }
1941
1942 /* See uma.h */
1943 uma_zone_t
1944 uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor,
1945 uma_init zinit, uma_fini zfini, uma_zone_t master)
1946 {
1947 struct uma_zctor_args args;
1948 uma_keg_t keg;
1949 uma_zone_t res;
1950 bool locked;
1951
1952 keg = zone_first_keg(master);
1953 memset(&args, 0, sizeof(args));
1954 args.name = name;
1955 args.size = keg->uk_size;
1956 args.ctor = ctor;
1957 args.dtor = dtor;
1958 args.uminit = zinit;
1959 args.fini = zfini;
1960 args.align = keg->uk_align;
1961 args.flags = keg->uk_flags | UMA_ZONE_SECONDARY;
1962 args.keg = keg;
1963
1964 if (booted < UMA_STARTUP2) {
1965 locked = false;
1966 } else {
1967 sx_slock(&uma_drain_lock);
1968 locked = true;
1969 }
1970 /* XXX Attaches only one keg of potentially many. */
1971 res = zone_alloc_item(zones, &args, M_WAITOK);
1972 if (locked)
1973 sx_sunlock(&uma_drain_lock);
1974 return (res);
1975 }
1976
1977 /* See uma.h */
1978 uma_zone_t
1979 uma_zcache_create(char *name, int size, uma_ctor ctor, uma_dtor dtor,
1980 uma_init zinit, uma_fini zfini, uma_import zimport,
1981 uma_release zrelease, void *arg, int flags)
1982 {
1983 struct uma_zctor_args args;
1984
1985 memset(&args, 0, sizeof(args));
1986 args.name = name;
1987 args.size = size;
1988 args.ctor = ctor;
1989 args.dtor = dtor;
1990 args.uminit = zinit;
1991 args.fini = zfini;
1992 args.import = zimport;
1993 args.release = zrelease;
1994 args.arg = arg;
1995 args.align = 0;
1996 args.flags = flags;
1997
1998 return (zone_alloc_item(zones, &args, M_WAITOK));
1999 }
2000
2001 static void
2002 zone_lock_pair(uma_zone_t a, uma_zone_t b)
2003 {
2004 if (a < b) {
2005 ZONE_LOCK(a);
2006 mtx_lock_flags(b->uz_lockptr, MTX_DUPOK);
2007 } else {
2008 ZONE_LOCK(b);
2009 mtx_lock_flags(a->uz_lockptr, MTX_DUPOK);
2010 }
2011 }
2012
2013 static void
2014 zone_unlock_pair(uma_zone_t a, uma_zone_t b)
2015 {
2016
2017 ZONE_UNLOCK(a);
2018 ZONE_UNLOCK(b);
2019 }
2020
2021 int
2022 uma_zsecond_add(uma_zone_t zone, uma_zone_t master)
2023 {
2024 uma_klink_t klink;
2025 uma_klink_t kl;
2026 int error;
2027
2028 error = 0;
2029 klink = malloc(sizeof(*klink), M_TEMP, M_WAITOK | M_ZERO);
2030
2031 zone_lock_pair(zone, master);
2032 /*
2033 * zone must use vtoslab() to resolve objects and must already be
2034 * a secondary.
2035 */
2036 if ((zone->uz_flags & (UMA_ZONE_VTOSLAB | UMA_ZONE_SECONDARY))
2037 != (UMA_ZONE_VTOSLAB | UMA_ZONE_SECONDARY)) {
2038 error = EINVAL;
2039 goto out;
2040 }
2041 /*
2042 * The new master must also use vtoslab().
2043 */
2044 if ((zone->uz_flags & UMA_ZONE_VTOSLAB) != UMA_ZONE_VTOSLAB) {
2045 error = EINVAL;
2046 goto out;
2047 }
2048
2049 /*
2050 * The underlying object must be the same size. rsize
2051 * may be different.
2052 */
2053 if (master->uz_size != zone->uz_size) {
2054 error = E2BIG;
2055 goto out;
2056 }
2057 /*
2058 * Put it at the end of the list.
2059 */
2060 klink->kl_keg = zone_first_keg(master);
2061 LIST_FOREACH(kl, &zone->uz_kegs, kl_link) {
2062 if (LIST_NEXT(kl, kl_link) == NULL) {
2063 LIST_INSERT_AFTER(kl, klink, kl_link);
2064 break;
2065 }
2066 }
2067 klink = NULL;
2068 zone->uz_flags |= UMA_ZFLAG_MULTI;
2069 zone->uz_slab = zone_fetch_slab_multi;
2070
2071 out:
2072 zone_unlock_pair(zone, master);
2073 if (klink != NULL)
2074 free(klink, M_TEMP);
2075
2076 return (error);
2077 }
2078
2079
2080 /* See uma.h */
2081 void
2082 uma_zdestroy(uma_zone_t zone)
2083 {
2084
2085 sx_slock(&uma_drain_lock);
2086 zone_free_item(zones, zone, NULL, SKIP_NONE);
2087 sx_sunlock(&uma_drain_lock);
2088 }
2089
2090 void
2091 uma_zwait(uma_zone_t zone)
2092 {
2093 void *item;
2094
2095 item = uma_zalloc_arg(zone, NULL, M_WAITOK);
2096 uma_zfree(zone, item);
2097 }
2098
2099 /* See uma.h */
2100 void *
2101 uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
2102 {
2103 void *item;
2104 uma_cache_t cache;
2105 uma_bucket_t bucket;
2106 int lockfail;
2107 int cpu;
2108
2109 /* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
2110 random_harvest_fast_uma(&zone, sizeof(zone), 1, RANDOM_UMA);
2111
2112 /* This is the fast path allocation */
2113 #ifdef UMA_DEBUG_ALLOC_1
2114 printf("Allocating one item from %s(%p)\n", zone->uz_name, zone);
2115 #endif
2116 CTR3(KTR_UMA, "uma_zalloc_arg thread %x zone %s flags %d", curthread,
2117 zone->uz_name, flags);
2118
2119 if (flags & M_WAITOK) {
2120 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
2121 "uma_zalloc_arg: zone \"%s\"", zone->uz_name);
2122 }
2123 KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(),
2124 ("uma_zalloc_arg: called with spinlock or critical section held"));
2125
2126 #ifdef DEBUG_MEMGUARD
2127 if (memguard_cmp_zone(zone)) {
2128 item = memguard_alloc(zone->uz_size, flags);
2129 if (item != NULL) {
2130 if (zone->uz_init != NULL &&
2131 zone->uz_init(item, zone->uz_size, flags) != 0)
2132 return (NULL);
2133 if (zone->uz_ctor != NULL &&
2134 zone->uz_ctor(item, zone->uz_size, udata,
2135 flags) != 0) {
2136 zone->uz_fini(item, zone->uz_size);
2137 return (NULL);
2138 }
2139 return (item);
2140 }
2141 /* This is unfortunate but should not be fatal. */
2142 }
2143 #endif
2144 /*
2145 * If possible, allocate from the per-CPU cache. There are two
2146 * requirements for safe access to the per-CPU cache: (1) the thread
2147 * accessing the cache must not be preempted or yield during access,
2148 * and (2) the thread must not migrate CPUs without switching which
2149 * cache it accesses. We rely on a critical section to prevent
2150 * preemption and migration. We release the critical section in
2151 * order to acquire the zone mutex if we are unable to allocate from
2152 * the current cache; when we re-acquire the critical section, we
2153 * must detect and handle migration if it has occurred.
2154 */
2155 critical_enter();
2156 cpu = curcpu;
2157 cache = &zone->uz_cpu[cpu];
2158
2159 zalloc_start:
2160 bucket = cache->uc_allocbucket;
2161 if (bucket != NULL && bucket->ub_cnt > 0) {
2162 bucket->ub_cnt--;
2163 item = bucket->ub_bucket[bucket->ub_cnt];
2164 #ifdef INVARIANTS
2165 bucket->ub_bucket[bucket->ub_cnt] = NULL;
2166 #endif
2167 KASSERT(item != NULL, ("uma_zalloc: Bucket pointer mangled."));
2168 cache->uc_allocs++;
2169 critical_exit();
2170 if (zone->uz_ctor != NULL &&
2171 zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) {
2172 atomic_add_long(&zone->uz_fails, 1);
2173 zone_free_item(zone, item, udata, SKIP_DTOR);
2174 return (NULL);
2175 }
2176 #ifdef INVARIANTS
2177 uma_dbg_alloc(zone, NULL, item);
2178 #endif
2179 if (flags & M_ZERO)
2180 uma_zero_item(item, zone);
2181 return (item);
2182 }
2183
2184 /*
2185 * We have run out of items in our alloc bucket.
2186 * See if we can switch with our free bucket.
2187 */
2188 bucket = cache->uc_freebucket;
2189 if (bucket != NULL && bucket->ub_cnt > 0) {
2190 #ifdef UMA_DEBUG_ALLOC
2191 printf("uma_zalloc: Swapping empty with alloc.\n");
2192 #endif
2193 cache->uc_freebucket = cache->uc_allocbucket;
2194 cache->uc_allocbucket = bucket;
2195 goto zalloc_start;
2196 }
2197
2198 /*
2199 * Discard any empty allocation bucket while we hold no locks.
2200 */
2201 bucket = cache->uc_allocbucket;
2202 cache->uc_allocbucket = NULL;
2203 critical_exit();
2204 if (bucket != NULL)
2205 bucket_free(zone, bucket, udata);
2206
2207 /* Short-circuit for zones without buckets and low memory. */
2208 if (zone->uz_count == 0 || bucketdisable)
2209 goto zalloc_item;
2210
2211 /*
2212 * Attempt to retrieve the item from the per-CPU cache has failed, so
2213 * we must go back to the zone. This requires the zone lock, so we
2214 * must drop the critical section, then re-acquire it when we go back
2215 * to the cache. Since the critical section is released, we may be
2216 * preempted or migrate. As such, make sure not to maintain any
2217 * thread-local state specific to the cache from prior to releasing
2218 * the critical section.
2219 */
2220 lockfail = 0;
2221 if (ZONE_TRYLOCK(zone) == 0) {
2222 /* Record contention to size the buckets. */
2223 ZONE_LOCK(zone);
2224 lockfail = 1;
2225 }
2226 critical_enter();
2227 cpu = curcpu;
2228 cache = &zone->uz_cpu[cpu];
2229
2230 /*
2231 * Since we have locked the zone we may as well send back our stats.
2232 */
2233 atomic_add_long(&zone->uz_allocs, cache->uc_allocs);
2234 atomic_add_long(&zone->uz_frees, cache->uc_frees);
2235 cache->uc_allocs = 0;
2236 cache->uc_frees = 0;
2237
2238 /* See if we lost the race to fill the cache. */
2239 if (cache->uc_allocbucket != NULL) {
2240 ZONE_UNLOCK(zone);
2241 goto zalloc_start;
2242 }
2243
2244 /*
2245 * Check the zone's cache of buckets.
2246 */
2247 if ((bucket = LIST_FIRST(&zone->uz_buckets)) != NULL) {
2248 KASSERT(bucket->ub_cnt != 0,
2249 ("uma_zalloc_arg: Returning an empty bucket."));
2250
2251 LIST_REMOVE(bucket, ub_link);
2252 cache->uc_allocbucket = bucket;
2253 ZONE_UNLOCK(zone);
2254 goto zalloc_start;
2255 }
2256 /* We are no longer associated with this CPU. */
2257 critical_exit();
2258
2259 /*
2260 * We bump the uz count when the cache size is insufficient to
2261 * handle the working set.
2262 */
2263 if (lockfail && zone->uz_count < BUCKET_MAX)
2264 zone->uz_count++;
2265 ZONE_UNLOCK(zone);
2266
2267 /*
2268 * Now lets just fill a bucket and put it on the free list. If that
2269 * works we'll restart the allocation from the beginning and it
2270 * will use the just filled bucket.
2271 */
2272 bucket = zone_alloc_bucket(zone, udata, flags);
2273 if (bucket != NULL) {
2274 ZONE_LOCK(zone);
2275 critical_enter();
2276 cpu = curcpu;
2277 cache = &zone->uz_cpu[cpu];
2278 /*
2279 * See if we lost the race or were migrated. Cache the
2280 * initialized bucket to make this less likely or claim
2281 * the memory directly.
2282 */
2283 if (cache->uc_allocbucket == NULL)
2284 cache->uc_allocbucket = bucket;
2285 else
2286 LIST_INSERT_HEAD(&zone->uz_buckets, bucket, ub_link);
2287 ZONE_UNLOCK(zone);
2288 goto zalloc_start;
2289 }
2290
2291 /*
2292 * We may not be able to get a bucket so return an actual item.
2293 */
2294 #ifdef UMA_DEBUG
2295 printf("uma_zalloc_arg: Bucketzone returned NULL\n");
2296 #endif
2297
2298 zalloc_item:
2299 item = zone_alloc_item(zone, udata, flags);
2300
2301 return (item);
2302 }
2303
2304 static uma_slab_t
2305 keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int flags)
2306 {
2307 uma_slab_t slab;
2308 int reserve;
2309
2310 mtx_assert(&keg->uk_lock, MA_OWNED);
2311 slab = NULL;
2312 reserve = 0;
2313 if ((flags & M_USE_RESERVE) == 0)
2314 reserve = keg->uk_reserve;
2315
2316 for (;;) {
2317 /*
2318 * Find a slab with some space. Prefer slabs that are partially
2319 * used over those that are totally full. This helps to reduce
2320 * fragmentation.
2321 */
2322 if (keg->uk_free > reserve) {
2323 if (!LIST_EMPTY(&keg->uk_part_slab)) {
2324 slab = LIST_FIRST(&keg->uk_part_slab);
2325 } else {
2326 slab = LIST_FIRST(&keg->uk_free_slab);
2327 LIST_REMOVE(slab, us_link);
2328 LIST_INSERT_HEAD(&keg->uk_part_slab, slab,
2329 us_link);
2330 }
2331 MPASS(slab->us_keg == keg);
2332 return (slab);
2333 }
2334
2335 /*
2336 * M_NOVM means don't ask at all!
2337 */
2338 if (flags & M_NOVM)
2339 break;
2340
2341 if (keg->uk_maxpages && keg->uk_pages >= keg->uk_maxpages) {
2342 keg->uk_flags |= UMA_ZFLAG_FULL;
2343 /*
2344 * If this is not a multi-zone, set the FULL bit.
2345 * Otherwise slab_multi() takes care of it.
2346 */
2347 if ((zone->uz_flags & UMA_ZFLAG_MULTI) == 0) {
2348 zone->uz_flags |= UMA_ZFLAG_FULL;
2349 zone_log_warning(zone);
2350 zone_maxaction(zone);
2351 }
2352 if (flags & M_NOWAIT)
2353 break;
2354 zone->uz_sleeps++;
2355 msleep(keg, &keg->uk_lock, PVM, "keglimit", 0);
2356 continue;
2357 }
2358 slab = keg_alloc_slab(keg, zone, flags);
2359 /*
2360 * If we got a slab here it's safe to mark it partially used
2361 * and return. We assume that the caller is going to remove
2362 * at least one item.
2363 */
2364 if (slab) {
2365 MPASS(slab->us_keg == keg);
2366 LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
2367 return (slab);
2368 }
2369 /*
2370 * We might not have been able to get a slab but another cpu
2371 * could have while we were unlocked. Check again before we
2372 * fail.
2373 */
2374 flags |= M_NOVM;
2375 }
2376 return (slab);
2377 }
2378
2379 static uma_slab_t
2380 zone_fetch_slab(uma_zone_t zone, uma_keg_t keg, int flags)
2381 {
2382 uma_slab_t slab;
2383
2384 if (keg == NULL) {
2385 keg = zone_first_keg(zone);
2386 KEG_LOCK(keg);
2387 }
2388
2389 for (;;) {
2390 slab = keg_fetch_slab(keg, zone, flags);
2391 if (slab)
2392 return (slab);
2393 if (flags & (M_NOWAIT | M_NOVM))
2394 break;
2395 }
2396 KEG_UNLOCK(keg);
2397 return (NULL);
2398 }
2399
2400 /*
2401 * uma_zone_fetch_slab_multi: Fetches a slab from one available keg. Returns
2402 * with the keg locked. On NULL no lock is held.
2403 *
2404 * The last pointer is used to seed the search. It is not required.
2405 */
2406 static uma_slab_t
2407 zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int rflags)
2408 {
2409 uma_klink_t klink;
2410 uma_slab_t slab;
2411 uma_keg_t keg;
2412 int flags;
2413 int empty;
2414 int full;
2415
2416 /*
2417 * Don't wait on the first pass. This will skip limit tests
2418 * as well. We don't want to block if we can find a provider
2419 * without blocking.
2420 */
2421 flags = (rflags & ~M_WAITOK) | M_NOWAIT;
2422 /*
2423 * Use the last slab allocated as a hint for where to start
2424 * the search.
2425 */
2426 if (last != NULL) {
2427 slab = keg_fetch_slab(last, zone, flags);
2428 if (slab)
2429 return (slab);
2430 KEG_UNLOCK(last);
2431 }
2432 /*
2433 * Loop until we have a slab incase of transient failures
2434 * while M_WAITOK is specified. I'm not sure this is 100%
2435 * required but we've done it for so long now.
2436 */
2437 for (;;) {
2438 empty = 0;
2439 full = 0;
2440 /*
2441 * Search the available kegs for slabs. Be careful to hold the
2442 * correct lock while calling into the keg layer.
2443 */
2444 LIST_FOREACH(klink, &zone->uz_kegs, kl_link) {
2445 keg = klink->kl_keg;
2446 KEG_LOCK(keg);
2447 if ((keg->uk_flags & UMA_ZFLAG_FULL) == 0) {
2448 slab = keg_fetch_slab(keg, zone, flags);
2449 if (slab)
2450 return (slab);
2451 }
2452 if (keg->uk_flags & UMA_ZFLAG_FULL)
2453 full++;
2454 else
2455 empty++;
2456 KEG_UNLOCK(keg);
2457 }
2458 if (rflags & (M_NOWAIT | M_NOVM))
2459 break;
2460 flags = rflags;
2461 /*
2462 * All kegs are full. XXX We can't atomically check all kegs
2463 * and sleep so just sleep for a short period and retry.
2464 */
2465 if (full && !empty) {
2466 ZONE_LOCK(zone);
2467 zone->uz_flags |= UMA_ZFLAG_FULL;
2468 zone->uz_sleeps++;
2469 zone_log_warning(zone);
2470 zone_maxaction(zone);
2471 msleep(zone, zone->uz_lockptr, PVM,
2472 "zonelimit", hz/100);
2473 zone->uz_flags &= ~UMA_ZFLAG_FULL;
2474 ZONE_UNLOCK(zone);
2475 continue;
2476 }
2477 }
2478 return (NULL);
2479 }
2480
2481 static void *
2482 slab_alloc_item(uma_keg_t keg, uma_slab_t slab)
2483 {
2484 void *item;
2485 uint8_t freei;
2486
2487 MPASS(keg == slab->us_keg);
2488 mtx_assert(&keg->uk_lock, MA_OWNED);
2489
2490 freei = BIT_FFS(SLAB_SETSIZE, &slab->us_free) - 1;
2491 BIT_CLR(SLAB_SETSIZE, freei, &slab->us_free);
2492 item = slab->us_data + (keg->uk_rsize * freei);
2493 slab->us_freecount--;
2494 keg->uk_free--;
2495
2496 /* Move this slab to the full list */
2497 if (slab->us_freecount == 0) {
2498 LIST_REMOVE(slab, us_link);
2499 LIST_INSERT_HEAD(&keg->uk_full_slab, slab, us_link);
2500 }
2501
2502 return (item);
2503 }
2504
2505 static int
2506 zone_import(uma_zone_t zone, void **bucket, int max, int flags)
2507 {
2508 uma_slab_t slab;
2509 uma_keg_t keg;
2510 int i;
2511
2512 slab = NULL;
2513 keg = NULL;
2514 /* Try to keep the buckets totally full */
2515 for (i = 0; i < max; ) {
2516 if ((slab = zone->uz_slab(zone, keg, flags)) == NULL)
2517 break;
2518 keg = slab->us_keg;
2519 while (slab->us_freecount && i < max) {
2520 bucket[i++] = slab_alloc_item(keg, slab);
2521 if (keg->uk_free <= keg->uk_reserve)
2522 break;
2523 }
2524 /* Don't grab more than one slab at a time. */
2525 flags &= ~M_WAITOK;
2526 flags |= M_NOWAIT;
2527 }
2528 if (slab != NULL)
2529 KEG_UNLOCK(keg);
2530
2531 return i;
2532 }
2533
2534 static uma_bucket_t
2535 zone_alloc_bucket(uma_zone_t zone, void *udata, int flags)
2536 {
2537 uma_bucket_t bucket;
2538 int max;
2539
2540 /* Don't wait for buckets, preserve caller's NOVM setting. */
2541 bucket = bucket_alloc(zone, udata, M_NOWAIT | (flags & M_NOVM));
2542 if (bucket == NULL)
2543 return (NULL);
2544
2545 max = MIN(bucket->ub_entries, zone->uz_count);
2546 bucket->ub_cnt = zone->uz_import(zone->uz_arg, bucket->ub_bucket,
2547 max, flags);
2548
2549 /*
2550 * Initialize the memory if necessary.
2551 */
2552 if (bucket->ub_cnt != 0 && zone->uz_init != NULL) {
2553 int i;
2554
2555 for (i = 0; i < bucket->ub_cnt; i++)
2556 if (zone->uz_init(bucket->ub_bucket[i], zone->uz_size,
2557 flags) != 0)
2558 break;
2559 /*
2560 * If we couldn't initialize the whole bucket, put the
2561 * rest back onto the freelist.
2562 */
2563 if (i != bucket->ub_cnt) {
2564 zone->uz_release(zone->uz_arg, &bucket->ub_bucket[i],
2565 bucket->ub_cnt - i);
2566 #ifdef INVARIANTS
2567 bzero(&bucket->ub_bucket[i],
2568 sizeof(void *) * (bucket->ub_cnt - i));
2569 #endif
2570 bucket->ub_cnt = i;
2571 }
2572 }
2573
2574 if (bucket->ub_cnt == 0) {
2575 bucket_free(zone, bucket, udata);
2576 atomic_add_long(&zone->uz_fails, 1);
2577 return (NULL);
2578 }
2579
2580 return (bucket);
2581 }
2582
2583 /*
2584 * Allocates a single item from a zone.
2585 *
2586 * Arguments
2587 * zone The zone to alloc for.
2588 * udata The data to be passed to the constructor.
2589 * flags M_WAITOK, M_NOWAIT, M_ZERO.
2590 *
2591 * Returns
2592 * NULL if there is no memory and M_NOWAIT is set
2593 * An item if successful
2594 */
2595
2596 static void *
2597 zone_alloc_item(uma_zone_t zone, void *udata, int flags)
2598 {
2599 void *item;
2600
2601 item = NULL;
2602
2603 #ifdef UMA_DEBUG_ALLOC
2604 printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone);
2605 #endif
2606 if (zone->uz_import(zone->uz_arg, &item, 1, flags) != 1)
2607 goto fail;
2608 atomic_add_long(&zone->uz_allocs, 1);
2609
2610 /*
2611 * We have to call both the zone's init (not the keg's init)
2612 * and the zone's ctor. This is because the item is going from
2613 * a keg slab directly to the user, and the user is expecting it
2614 * to be both zone-init'd as well as zone-ctor'd.
2615 */
2616 if (zone->uz_init != NULL) {
2617 if (zone->uz_init(item, zone->uz_size, flags) != 0) {
2618 zone_free_item(zone, item, udata, SKIP_FINI);
2619 goto fail;
2620 }
2621 }
2622 if (zone->uz_ctor != NULL) {
2623 if (zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) {
2624 zone_free_item(zone, item, udata, SKIP_DTOR);
2625 goto fail;
2626 }
2627 }
2628 #ifdef INVARIANTS
2629 uma_dbg_alloc(zone, NULL, item);
2630 #endif
2631 if (flags & M_ZERO)
2632 uma_zero_item(item, zone);
2633
2634 return (item);
2635
2636 fail:
2637 atomic_add_long(&zone->uz_fails, 1);
2638 return (NULL);
2639 }
2640
2641 /* See uma.h */
2642 void
2643 uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
2644 {
2645 uma_cache_t cache;
2646 uma_bucket_t bucket;
2647 int lockfail;
2648 int cpu;
2649
2650 /* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
2651 random_harvest_fast_uma(&zone, sizeof(zone), 1, RANDOM_UMA);
2652
2653 #ifdef UMA_DEBUG_ALLOC_1
2654 printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone);
2655 #endif
2656 CTR2(KTR_UMA, "uma_zfree_arg thread %x zone %s", curthread,
2657 zone->uz_name);
2658
2659 KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(),
2660 ("uma_zfree_arg: called with spinlock or critical section held"));
2661
2662 /* uma_zfree(..., NULL) does nothing, to match free(9). */
2663 if (item == NULL)
2664 return;
2665 #ifdef DEBUG_MEMGUARD
2666 if (is_memguard_addr(item)) {
2667 if (zone->uz_dtor != NULL)
2668 zone->uz_dtor(item, zone->uz_size, udata);
2669 if (zone->uz_fini != NULL)
2670 zone->uz_fini(item, zone->uz_size);
2671 memguard_free(item);
2672 return;
2673 }
2674 #endif
2675 #ifdef INVARIANTS
2676 if (zone->uz_flags & UMA_ZONE_MALLOC)
2677 uma_dbg_free(zone, udata, item);
2678 else
2679 uma_dbg_free(zone, NULL, item);
2680 #endif
2681 if (zone->uz_dtor != NULL)
2682 zone->uz_dtor(item, zone->uz_size, udata);
2683
2684 /*
2685 * The race here is acceptable. If we miss it we'll just have to wait
2686 * a little longer for the limits to be reset.
2687 */
2688 if (zone->uz_flags & UMA_ZFLAG_FULL)
2689 goto zfree_item;
2690
2691 /*
2692 * If possible, free to the per-CPU cache. There are two
2693 * requirements for safe access to the per-CPU cache: (1) the thread
2694 * accessing the cache must not be preempted or yield during access,
2695 * and (2) the thread must not migrate CPUs without switching which
2696 * cache it accesses. We rely on a critical section to prevent
2697 * preemption and migration. We release the critical section in
2698 * order to acquire the zone mutex if we are unable to free to the
2699 * current cache; when we re-acquire the critical section, we must
2700 * detect and handle migration if it has occurred.
2701 */
2702 zfree_restart:
2703 critical_enter();
2704 cpu = curcpu;
2705 cache = &zone->uz_cpu[cpu];
2706
2707 zfree_start:
2708 /*
2709 * Try to free into the allocbucket first to give LIFO ordering
2710 * for cache-hot datastructures. Spill over into the freebucket
2711 * if necessary. Alloc will swap them if one runs dry.
2712 */
2713 bucket = cache->uc_allocbucket;
2714 if (bucket == NULL || bucket->ub_cnt >= bucket->ub_entries)
2715 bucket = cache->uc_freebucket;
2716 if (bucket != NULL && bucket->ub_cnt < bucket->ub_entries) {
2717 KASSERT(bucket->ub_bucket[bucket->ub_cnt] == NULL,
2718 ("uma_zfree: Freeing to non free bucket index."));
2719 bucket->ub_bucket[bucket->ub_cnt] = item;
2720 bucket->ub_cnt++;
2721 cache->uc_frees++;
2722 critical_exit();
2723 return;
2724 }
2725
2726 /*
2727 * We must go back the zone, which requires acquiring the zone lock,
2728 * which in turn means we must release and re-acquire the critical
2729 * section. Since the critical section is released, we may be
2730 * preempted or migrate. As such, make sure not to maintain any
2731 * thread-local state specific to the cache from prior to releasing
2732 * the critical section.
2733 */
2734 critical_exit();
2735 if (zone->uz_count == 0 || bucketdisable)
2736 goto zfree_item;
2737
2738 lockfail = 0;
2739 if (ZONE_TRYLOCK(zone) == 0) {
2740 /* Record contention to size the buckets. */
2741 ZONE_LOCK(zone);
2742 lockfail = 1;
2743 }
2744 critical_enter();
2745 cpu = curcpu;
2746 cache = &zone->uz_cpu[cpu];
2747
2748 /*
2749 * Since we have locked the zone we may as well send back our stats.
2750 */
2751 atomic_add_long(&zone->uz_allocs, cache->uc_allocs);
2752 atomic_add_long(&zone->uz_frees, cache->uc_frees);
2753 cache->uc_allocs = 0;
2754 cache->uc_frees = 0;
2755
2756 bucket = cache->uc_freebucket;
2757 if (bucket != NULL && bucket->ub_cnt < bucket->ub_entries) {
2758 ZONE_UNLOCK(zone);
2759 goto zfree_start;
2760 }
2761 cache->uc_freebucket = NULL;
2762 /* We are no longer associated with this CPU. */
2763 critical_exit();
2764
2765 /* Can we throw this on the zone full list? */
2766 if (bucket != NULL) {
2767 #ifdef UMA_DEBUG_ALLOC
2768 printf("uma_zfree: Putting old bucket on the free list.\n");
2769 #endif
2770 /* ub_cnt is pointing to the last free item */
2771 KASSERT(bucket->ub_cnt != 0,
2772 ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n"));
2773 LIST_INSERT_HEAD(&zone->uz_buckets, bucket, ub_link);
2774 }
2775
2776 /*
2777 * We bump the uz count when the cache size is insufficient to
2778 * handle the working set.
2779 */
2780 if (lockfail && zone->uz_count < BUCKET_MAX)
2781 zone->uz_count++;
2782 ZONE_UNLOCK(zone);
2783
2784 #ifdef UMA_DEBUG_ALLOC
2785 printf("uma_zfree: Allocating new free bucket.\n");
2786 #endif
2787 bucket = bucket_alloc(zone, udata, M_NOWAIT);
2788 if (bucket) {
2789 critical_enter();
2790 cpu = curcpu;
2791 cache = &zone->uz_cpu[cpu];
2792 if (cache->uc_freebucket == NULL) {
2793 cache->uc_freebucket = bucket;
2794 goto zfree_start;
2795 }
2796 /*
2797 * We lost the race, start over. We have to drop our
2798 * critical section to free the bucket.
2799 */
2800 critical_exit();
2801 bucket_free(zone, bucket, udata);
2802 goto zfree_restart;
2803 }
2804
2805 /*
2806 * If nothing else caught this, we'll just do an internal free.
2807 */
2808 zfree_item:
2809 zone_free_item(zone, item, udata, SKIP_DTOR);
2810
2811 return;
2812 }
2813
2814 static void
2815 slab_free_item(uma_keg_t keg, uma_slab_t slab, void *item)
2816 {
2817 uint8_t freei;
2818
2819 mtx_assert(&keg->uk_lock, MA_OWNED);
2820 MPASS(keg == slab->us_keg);
2821
2822 /* Do we need to remove from any lists? */
2823 if (slab->us_freecount+1 == keg->uk_ipers) {
2824 LIST_REMOVE(slab, us_link);
2825 LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
2826 } else if (slab->us_freecount == 0) {
2827 LIST_REMOVE(slab, us_link);
2828 LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
2829 }
2830
2831 /* Slab management. */
2832 freei = ((uintptr_t)item - (uintptr_t)slab->us_data) / keg->uk_rsize;
2833 BIT_SET(SLAB_SETSIZE, freei, &slab->us_free);
2834 slab->us_freecount++;
2835
2836 /* Keg statistics. */
2837 keg->uk_free++;
2838 }
2839
2840 static void
2841 zone_release(uma_zone_t zone, void **bucket, int cnt)
2842 {
2843 void *item;
2844 uma_slab_t slab;
2845 uma_keg_t keg;
2846 uint8_t *mem;
2847 int clearfull;
2848 int i;
2849
2850 clearfull = 0;
2851 keg = zone_first_keg(zone);
2852 KEG_LOCK(keg);
2853 for (i = 0; i < cnt; i++) {
2854 item = bucket[i];
2855 if (!(zone->uz_flags & UMA_ZONE_VTOSLAB)) {
2856 mem = (uint8_t *)((uintptr_t)item & (~UMA_SLAB_MASK));
2857 if (zone->uz_flags & UMA_ZONE_HASH) {
2858 slab = hash_sfind(&keg->uk_hash, mem);
2859 } else {
2860 mem += keg->uk_pgoff;
2861 slab = (uma_slab_t)mem;
2862 }
2863 } else {
2864 slab = vtoslab((vm_offset_t)item);
2865 if (slab->us_keg != keg) {
2866 KEG_UNLOCK(keg);
2867 keg = slab->us_keg;
2868 KEG_LOCK(keg);
2869 }
2870 }
2871 slab_free_item(keg, slab, item);
2872 if (keg->uk_flags & UMA_ZFLAG_FULL) {
2873 if (keg->uk_pages < keg->uk_maxpages) {
2874 keg->uk_flags &= ~UMA_ZFLAG_FULL;
2875 clearfull = 1;
2876 }
2877
2878 /*
2879 * We can handle one more allocation. Since we're
2880 * clearing ZFLAG_FULL, wake up all procs blocked
2881 * on pages. This should be uncommon, so keeping this
2882 * simple for now (rather than adding count of blocked
2883 * threads etc).
2884 */
2885 wakeup(keg);
2886 }
2887 }
2888 KEG_UNLOCK(keg);
2889 if (clearfull) {
2890 ZONE_LOCK(zone);
2891 zone->uz_flags &= ~UMA_ZFLAG_FULL;
2892 wakeup(zone);
2893 ZONE_UNLOCK(zone);
2894 }
2895
2896 }
2897
2898 /*
2899 * Frees a single item to any zone.
2900 *
2901 * Arguments:
2902 * zone The zone to free to
2903 * item The item we're freeing
2904 * udata User supplied data for the dtor
2905 * skip Skip dtors and finis
2906 */
2907 static void
2908 zone_free_item(uma_zone_t zone, void *item, void *udata, enum zfreeskip skip)
2909 {
2910
2911 #ifdef INVARIANTS
2912 if (skip == SKIP_NONE) {
2913 if (zone->uz_flags & UMA_ZONE_MALLOC)
2914 uma_dbg_free(zone, udata, item);
2915 else
2916 uma_dbg_free(zone, NULL, item);
2917 }
2918 #endif
2919 if (skip < SKIP_DTOR && zone->uz_dtor)
2920 zone->uz_dtor(item, zone->uz_size, udata);
2921
2922 if (skip < SKIP_FINI && zone->uz_fini)
2923 zone->uz_fini(item, zone->uz_size);
2924
2925 atomic_add_long(&zone->uz_frees, 1);
2926 zone->uz_release(zone->uz_arg, &item, 1);
2927 }
2928
2929 /* See uma.h */
2930 int
2931 uma_zone_set_max(uma_zone_t zone, int nitems)
2932 {
2933 uma_keg_t keg;
2934
2935 keg = zone_first_keg(zone);
2936 if (keg == NULL)
2937 return (0);
2938 KEG_LOCK(keg);
2939 keg->uk_maxpages = (nitems / keg->uk_ipers) * keg->uk_ppera;
2940 if (keg->uk_maxpages * keg->uk_ipers < nitems)
2941 keg->uk_maxpages += keg->uk_ppera;
2942 nitems = (keg->uk_maxpages / keg->uk_ppera) * keg->uk_ipers;
2943 KEG_UNLOCK(keg);
2944
2945 return (nitems);
2946 }
2947
2948 /* See uma.h */
2949 int
2950 uma_zone_get_max(uma_zone_t zone)
2951 {
2952 int nitems;
2953 uma_keg_t keg;
2954
2955 keg = zone_first_keg(zone);
2956 if (keg == NULL)
2957 return (0);
2958 KEG_LOCK(keg);
2959 nitems = (keg->uk_maxpages / keg->uk_ppera) * keg->uk_ipers;
2960 KEG_UNLOCK(keg);
2961
2962 return (nitems);
2963 }
2964
2965 /* See uma.h */
2966 void
2967 uma_zone_set_warning(uma_zone_t zone, const char *warning)
2968 {
2969
2970 ZONE_LOCK(zone);
2971 zone->uz_warning = warning;
2972 ZONE_UNLOCK(zone);
2973 }
2974
2975 /* See uma.h */
2976 void
2977 uma_zone_set_maxaction(uma_zone_t zone, uma_maxaction_t maxaction)
2978 {
2979
2980 ZONE_LOCK(zone);
2981 TASK_INIT(&zone->uz_maxaction, 0, (task_fn_t *)maxaction, zone);
2982 ZONE_UNLOCK(zone);
2983 }
2984
2985 /* See uma.h */
2986 int
2987 uma_zone_get_cur(uma_zone_t zone)
2988 {
2989 int64_t nitems;
2990 u_int i;
2991
2992 ZONE_LOCK(zone);
2993 nitems = zone->uz_allocs - zone->uz_frees;
2994 CPU_FOREACH(i) {
2995 /*
2996 * See the comment in sysctl_vm_zone_stats() regarding the
2997 * safety of accessing the per-cpu caches. With the zone lock
2998 * held, it is safe, but can potentially result in stale data.
2999 */
3000 nitems += zone->uz_cpu[i].uc_allocs -
3001 zone->uz_cpu[i].uc_frees;
3002 }
3003 ZONE_UNLOCK(zone);
3004
3005 return (nitems < 0 ? 0 : nitems);
3006 }
3007
3008 /* See uma.h */
3009 void
3010 uma_zone_set_init(uma_zone_t zone, uma_init uminit)
3011 {
3012 uma_keg_t keg;
3013
3014 keg = zone_first_keg(zone);
3015 KASSERT(keg != NULL, ("uma_zone_set_init: Invalid zone type"));
3016 KEG_LOCK(keg);
3017 KASSERT(keg->uk_pages == 0,
3018 ("uma_zone_set_init on non-empty keg"));
3019 keg->uk_init = uminit;
3020 KEG_UNLOCK(keg);
3021 }
3022
3023 /* See uma.h */
3024 void
3025 uma_zone_set_fini(uma_zone_t zone, uma_fini fini)
3026 {
3027 uma_keg_t keg;
3028
3029 keg = zone_first_keg(zone);
3030 KASSERT(keg != NULL, ("uma_zone_set_fini: Invalid zone type"));
3031 KEG_LOCK(keg);
3032 KASSERT(keg->uk_pages == 0,
3033 ("uma_zone_set_fini on non-empty keg"));
3034 keg->uk_fini = fini;
3035 KEG_UNLOCK(keg);
3036 }
3037
3038 /* See uma.h */
3039 void
3040 uma_zone_set_zinit(uma_zone_t zone, uma_init zinit)
3041 {
3042
3043 ZONE_LOCK(zone);
3044 KASSERT(zone_first_keg(zone)->uk_pages == 0,
3045 ("uma_zone_set_zinit on non-empty keg"));
3046 zone->uz_init = zinit;
3047 ZONE_UNLOCK(zone);
3048 }
3049
3050 /* See uma.h */
3051 void
3052 uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini)
3053 {
3054
3055 ZONE_LOCK(zone);
3056 KASSERT(zone_first_keg(zone)->uk_pages == 0,
3057 ("uma_zone_set_zfini on non-empty keg"));
3058 zone->uz_fini = zfini;
3059 ZONE_UNLOCK(zone);
3060 }
3061
3062 /* See uma.h */
3063 /* XXX uk_freef is not actually used with the zone locked */
3064 void
3065 uma_zone_set_freef(uma_zone_t zone, uma_free freef)
3066 {
3067 uma_keg_t keg;
3068
3069 keg = zone_first_keg(zone);
3070 KASSERT(keg != NULL, ("uma_zone_set_freef: Invalid zone type"));
3071 KEG_LOCK(keg);
3072 keg->uk_freef = freef;
3073 KEG_UNLOCK(keg);
3074 }
3075
3076 /* See uma.h */
3077 /* XXX uk_allocf is not actually used with the zone locked */
3078 void
3079 uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf)
3080 {
3081 uma_keg_t keg;
3082
3083 keg = zone_first_keg(zone);
3084 KEG_LOCK(keg);
3085 keg->uk_allocf = allocf;
3086 KEG_UNLOCK(keg);
3087 }
3088
3089 /* See uma.h */
3090 void
3091 uma_zone_reserve(uma_zone_t zone, int items)
3092 {
3093 uma_keg_t keg;
3094
3095 keg = zone_first_keg(zone);
3096 if (keg == NULL)
3097 return;
3098 KEG_LOCK(keg);
3099 keg->uk_reserve = items;
3100 KEG_UNLOCK(keg);
3101
3102 return;
3103 }
3104
3105 /* See uma.h */
3106 int
3107 uma_zone_reserve_kva(uma_zone_t zone, int count)
3108 {
3109 uma_keg_t keg;
3110 vm_offset_t kva;
3111 u_int pages;
3112
3113 keg = zone_first_keg(zone);
3114 if (keg == NULL)
3115 return (0);
3116 pages = count / keg->uk_ipers;
3117
3118 if (pages * keg->uk_ipers < count)
3119 pages++;
3120 pages *= keg->uk_ppera;
3121
3122 #ifdef UMA_MD_SMALL_ALLOC
3123 if (keg->uk_ppera > 1) {
3124 #else
3125 if (1) {
3126 #endif
3127 kva = kva_alloc((vm_size_t)pages * PAGE_SIZE);
3128 if (kva == 0)
3129 return (0);
3130 } else
3131 kva = 0;
3132 KEG_LOCK(keg);
3133 keg->uk_kva = kva;
3134 keg->uk_offset = 0;
3135 keg->uk_maxpages = pages;
3136 #ifdef UMA_MD_SMALL_ALLOC
3137 keg->uk_allocf = (keg->uk_ppera > 1) ? noobj_alloc : uma_small_alloc;
3138 #else
3139 keg->uk_allocf = noobj_alloc;
3140 #endif
3141 keg->uk_flags |= UMA_ZONE_NOFREE;
3142 KEG_UNLOCK(keg);
3143
3144 return (1);
3145 }
3146
3147 /* See uma.h */
3148 void
3149 uma_prealloc(uma_zone_t zone, int items)
3150 {
3151 int slabs;
3152 uma_slab_t slab;
3153 uma_keg_t keg;
3154
3155 keg = zone_first_keg(zone);
3156 if (keg == NULL)
3157 return;
3158 KEG_LOCK(keg);
3159 slabs = items / keg->uk_ipers;
3160 if (slabs * keg->uk_ipers < items)
3161 slabs++;
3162 while (slabs > 0) {
3163 slab = keg_alloc_slab(keg, zone, M_WAITOK);
3164 if (slab == NULL)
3165 break;
3166 MPASS(slab->us_keg == keg);
3167 LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
3168 slabs--;
3169 }
3170 KEG_UNLOCK(keg);
3171 }
3172
3173 /* See uma.h */
3174 static void
3175 uma_reclaim_locked(bool kmem_danger)
3176 {
3177
3178 #ifdef UMA_DEBUG
3179 printf("UMA: vm asked us to release pages!\n");
3180 #endif
3181 sx_assert(&uma_drain_lock, SA_XLOCKED);
3182 bucket_enable();
3183 zone_foreach(zone_drain);
3184 if (vm_page_count_min() || kmem_danger) {
3185 cache_drain_safe(NULL);
3186 zone_foreach(zone_drain);
3187 }
3188 /*
3189 * Some slabs may have been freed but this zone will be visited early
3190 * we visit again so that we can free pages that are empty once other
3191 * zones are drained. We have to do the same for buckets.
3192 */
3193 zone_drain(slabzone);
3194 bucket_zone_drain();
3195 }
3196
3197 void
3198 uma_reclaim(void)
3199 {
3200
3201 sx_xlock(&uma_drain_lock);
3202 uma_reclaim_locked(false);
3203 sx_xunlock(&uma_drain_lock);
3204 }
3205
3206 static int uma_reclaim_needed;
3207
3208 void
3209 uma_reclaim_wakeup(void)
3210 {
3211
3212 uma_reclaim_needed = 1;
3213 wakeup(&uma_reclaim_needed);
3214 }
3215
3216 void
3217 uma_reclaim_worker(void *arg __unused)
3218 {
3219
3220 sx_xlock(&uma_drain_lock);
3221 for (;;) {
3222 sx_sleep(&uma_reclaim_needed, &uma_drain_lock, PVM,
3223 "umarcl", 0);
3224 if (uma_reclaim_needed) {
3225 uma_reclaim_needed = 0;
3226 sx_xunlock(&uma_drain_lock);
3227 EVENTHANDLER_INVOKE(vm_lowmem, VM_LOW_KMEM);
3228 sx_xlock(&uma_drain_lock);
3229 uma_reclaim_locked(true);
3230 }
3231 }
3232 }
3233
3234 /* See uma.h */
3235 int
3236 uma_zone_exhausted(uma_zone_t zone)
3237 {
3238 int full;
3239
3240 ZONE_LOCK(zone);
3241 full = (zone->uz_flags & UMA_ZFLAG_FULL);
3242 ZONE_UNLOCK(zone);
3243 return (full);
3244 }
3245
3246 int
3247 uma_zone_exhausted_nolock(uma_zone_t zone)
3248 {
3249 return (zone->uz_flags & UMA_ZFLAG_FULL);
3250 }
3251
3252 void *
3253 uma_large_malloc(vm_size_t size, int wait)
3254 {
3255 void *mem;
3256 uma_slab_t slab;
3257 uint8_t flags;
3258
3259 slab = zone_alloc_item(slabzone, NULL, wait);
3260 if (slab == NULL)
3261 return (NULL);
3262 mem = page_alloc(NULL, size, &flags, wait);
3263 if (mem) {
3264 vsetslab((vm_offset_t)mem, slab);
3265 slab->us_data = mem;
3266 slab->us_flags = flags | UMA_SLAB_MALLOC;
3267 slab->us_size = size;
3268 } else {
3269 zone_free_item(slabzone, slab, NULL, SKIP_NONE);
3270 }
3271
3272 return (mem);
3273 }
3274
3275 void
3276 uma_large_free(uma_slab_t slab)
3277 {
3278
3279 page_free(slab->us_data, slab->us_size, slab->us_flags);
3280 zone_free_item(slabzone, slab, NULL, SKIP_NONE);
3281 }
3282
3283 static void
3284 uma_zero_item(void *item, uma_zone_t zone)
3285 {
3286 int i;
3287
3288 if (zone->uz_flags & UMA_ZONE_PCPU) {
3289 CPU_FOREACH(i)
3290 bzero(zpcpu_get_cpu(item, i), zone->uz_size);
3291 } else
3292 bzero(item, zone->uz_size);
3293 }
3294
3295 void
3296 uma_print_stats(void)
3297 {
3298 zone_foreach(uma_print_zone);
3299 }
3300
3301 static void
3302 slab_print(uma_slab_t slab)
3303 {
3304 printf("slab: keg %p, data %p, freecount %d\n",
3305 slab->us_keg, slab->us_data, slab->us_freecount);
3306 }
3307
3308 static void
3309 cache_print(uma_cache_t cache)
3310 {
3311 printf("alloc: %p(%d), free: %p(%d)\n",
3312 cache->uc_allocbucket,
3313 cache->uc_allocbucket?cache->uc_allocbucket->ub_cnt:0,
3314 cache->uc_freebucket,
3315 cache->uc_freebucket?cache->uc_freebucket->ub_cnt:0);
3316 }
3317
3318 static void
3319 uma_print_keg(uma_keg_t keg)
3320 {
3321 uma_slab_t slab;
3322
3323 printf("keg: %s(%p) size %d(%d) flags %#x ipers %d ppera %d "
3324 "out %d free %d limit %d\n",
3325 keg->uk_name, keg, keg->uk_size, keg->uk_rsize, keg->uk_flags,
3326 keg->uk_ipers, keg->uk_ppera,
3327 (keg->uk_pages / keg->uk_ppera) * keg->uk_ipers - keg->uk_free,
3328 keg->uk_free, (keg->uk_maxpages / keg->uk_ppera) * keg->uk_ipers);
3329 printf("Part slabs:\n");
3330 LIST_FOREACH(slab, &keg->uk_part_slab, us_link)
3331 slab_print(slab);
3332 printf("Free slabs:\n");
3333 LIST_FOREACH(slab, &keg->uk_free_slab, us_link)
3334 slab_print(slab);
3335 printf("Full slabs:\n");
3336 LIST_FOREACH(slab, &keg->uk_full_slab, us_link)
3337 slab_print(slab);
3338 }
3339
3340 void
3341 uma_print_zone(uma_zone_t zone)
3342 {
3343 uma_cache_t cache;
3344 uma_klink_t kl;
3345 int i;
3346
3347 printf("zone: %s(%p) size %d flags %#x\n",
3348 zone->uz_name, zone, zone->uz_size, zone->uz_flags);
3349 LIST_FOREACH(kl, &zone->uz_kegs, kl_link)
3350 uma_print_keg(kl->kl_keg);
3351 CPU_FOREACH(i) {
3352 cache = &zone->uz_cpu[i];
3353 printf("CPU %d Cache:\n", i);
3354 cache_print(cache);
3355 }
3356 }
3357
3358 #ifdef DDB
3359 /*
3360 * Generate statistics across both the zone and its per-cpu cache's. Return
3361 * desired statistics if the pointer is non-NULL for that statistic.
3362 *
3363 * Note: does not update the zone statistics, as it can't safely clear the
3364 * per-CPU cache statistic.
3365 *
3366 * XXXRW: Following the uc_allocbucket and uc_freebucket pointers here isn't
3367 * safe from off-CPU; we should modify the caches to track this information
3368 * directly so that we don't have to.
3369 */
3370 static void
3371 uma_zone_sumstat(uma_zone_t z, int *cachefreep, uint64_t *allocsp,
3372 uint64_t *freesp, uint64_t *sleepsp)
3373 {
3374 uma_cache_t cache;
3375 uint64_t allocs, frees, sleeps;
3376 int cachefree, cpu;
3377
3378 allocs = frees = sleeps = 0;
3379 cachefree = 0;
3380 CPU_FOREACH(cpu) {
3381 cache = &z->uz_cpu[cpu];
3382 if (cache->uc_allocbucket != NULL)
3383 cachefree += cache->uc_allocbucket->ub_cnt;
3384 if (cache->uc_freebucket != NULL)
3385 cachefree += cache->uc_freebucket->ub_cnt;
3386 allocs += cache->uc_allocs;
3387 frees += cache->uc_frees;
3388 }
3389 allocs += z->uz_allocs;
3390 frees += z->uz_frees;
3391 sleeps += z->uz_sleeps;
3392 if (cachefreep != NULL)
3393 *cachefreep = cachefree;
3394 if (allocsp != NULL)
3395 *allocsp = allocs;
3396 if (freesp != NULL)
3397 *freesp = frees;
3398 if (sleepsp != NULL)
3399 *sleepsp = sleeps;
3400 }
3401 #endif /* DDB */
3402
3403 static int
3404 sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS)
3405 {
3406 uma_keg_t kz;
3407 uma_zone_t z;
3408 int count;
3409
3410 count = 0;
3411 rw_rlock(&uma_rwlock);
3412 LIST_FOREACH(kz, &uma_kegs, uk_link) {
3413 LIST_FOREACH(z, &kz->uk_zones, uz_link)
3414 count++;
3415 }
3416 rw_runlock(&uma_rwlock);
3417 return (sysctl_handle_int(oidp, &count, 0, req));
3418 }
3419
3420 static int
3421 sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS)
3422 {
3423 struct uma_stream_header ush;
3424 struct uma_type_header uth;
3425 struct uma_percpu_stat *ups;
3426 uma_bucket_t bucket;
3427 struct sbuf sbuf;
3428 uma_cache_t cache;
3429 uma_klink_t kl;
3430 uma_keg_t kz;
3431 uma_zone_t z;
3432 uma_keg_t k;
3433 int count, error, i;
3434
3435 error = sysctl_wire_old_buffer(req, 0);
3436 if (error != 0)
3437 return (error);
3438 sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
3439 sbuf_clear_flags(&sbuf, SBUF_INCLUDENUL);
3440 ups = malloc((mp_maxid + 1) * sizeof(*ups), M_TEMP, M_WAITOK);
3441
3442 count = 0;
3443 rw_rlock(&uma_rwlock);
3444 LIST_FOREACH(kz, &uma_kegs, uk_link) {
3445 LIST_FOREACH(z, &kz->uk_zones, uz_link)
3446 count++;
3447 }
3448
3449 /*
3450 * Insert stream header.
3451 */
3452 bzero(&ush, sizeof(ush));
3453 ush.ush_version = UMA_STREAM_VERSION;
3454 ush.ush_maxcpus = (mp_maxid + 1);
3455 ush.ush_count = count;
3456 (void)sbuf_bcat(&sbuf, &ush, sizeof(ush));
3457
3458 LIST_FOREACH(kz, &uma_kegs, uk_link) {
3459 LIST_FOREACH(z, &kz->uk_zones, uz_link) {
3460 bzero(&uth, sizeof(uth));
3461 ZONE_LOCK(z);
3462 strlcpy(uth.uth_name, z->uz_name, UTH_MAX_NAME);
3463 uth.uth_align = kz->uk_align;
3464 uth.uth_size = kz->uk_size;
3465 uth.uth_rsize = kz->uk_rsize;
3466 LIST_FOREACH(kl, &z->uz_kegs, kl_link) {
3467 k = kl->kl_keg;
3468 uth.uth_maxpages += k->uk_maxpages;
3469 uth.uth_pages += k->uk_pages;
3470 uth.uth_keg_free += k->uk_free;
3471 uth.uth_limit = (k->uk_maxpages / k->uk_ppera)
3472 * k->uk_ipers;
3473 }
3474
3475 /*
3476 * A zone is secondary is it is not the first entry
3477 * on the keg's zone list.
3478 */
3479 if ((z->uz_flags & UMA_ZONE_SECONDARY) &&
3480 (LIST_FIRST(&kz->uk_zones) != z))
3481 uth.uth_zone_flags = UTH_ZONE_SECONDARY;
3482
3483 LIST_FOREACH(bucket, &z->uz_buckets, ub_link)
3484 uth.uth_zone_free += bucket->ub_cnt;
3485 uth.uth_allocs = z->uz_allocs;
3486 uth.uth_frees = z->uz_frees;
3487 uth.uth_fails = z->uz_fails;
3488 uth.uth_sleeps = z->uz_sleeps;
3489 /*
3490 * While it is not normally safe to access the cache
3491 * bucket pointers while not on the CPU that owns the
3492 * cache, we only allow the pointers to be exchanged
3493 * without the zone lock held, not invalidated, so
3494 * accept the possible race associated with bucket
3495 * exchange during monitoring.
3496 */
3497 for (i = 0; i < mp_maxid + 1; i++) {
3498 bzero(&ups[i], sizeof(*ups));
3499 if (kz->uk_flags & UMA_ZFLAG_INTERNAL ||
3500 CPU_ABSENT(i))
3501 continue;
3502 cache = &z->uz_cpu[i];
3503 if (cache->uc_allocbucket != NULL)
3504 ups[i].ups_cache_free +=
3505 cache->uc_allocbucket->ub_cnt;
3506 if (cache->uc_freebucket != NULL)
3507 ups[i].ups_cache_free +=
3508 cache->uc_freebucket->ub_cnt;
3509 ups[i].ups_allocs = cache->uc_allocs;
3510 ups[i].ups_frees = cache->uc_frees;
3511 }
3512 ZONE_UNLOCK(z);
3513 (void)sbuf_bcat(&sbuf, &uth, sizeof(uth));
3514 for (i = 0; i < mp_maxid + 1; i++)
3515 (void)sbuf_bcat(&sbuf, &ups[i], sizeof(ups[i]));
3516 }
3517 }
3518 rw_runlock(&uma_rwlock);
3519 error = sbuf_finish(&sbuf);
3520 sbuf_delete(&sbuf);
3521 free(ups, M_TEMP);
3522 return (error);
3523 }
3524
3525 int
3526 sysctl_handle_uma_zone_max(SYSCTL_HANDLER_ARGS)
3527 {
3528 uma_zone_t zone = *(uma_zone_t *)arg1;
3529 int error, max;
3530
3531 max = uma_zone_get_max(zone);
3532 error = sysctl_handle_int(oidp, &max, 0, req);
3533 if (error || !req->newptr)
3534 return (error);
3535
3536 uma_zone_set_max(zone, max);
3537
3538 return (0);
3539 }
3540
3541 int
3542 sysctl_handle_uma_zone_cur(SYSCTL_HANDLER_ARGS)
3543 {
3544 uma_zone_t zone = *(uma_zone_t *)arg1;
3545 int cur;
3546
3547 cur = uma_zone_get_cur(zone);
3548 return (sysctl_handle_int(oidp, &cur, 0, req));
3549 }
3550
3551 #ifdef INVARIANTS
3552 static uma_slab_t
3553 uma_dbg_getslab(uma_zone_t zone, void *item)
3554 {
3555 uma_slab_t slab;
3556 uma_keg_t keg;
3557 uint8_t *mem;
3558
3559 mem = (uint8_t *)((uintptr_t)item & (~UMA_SLAB_MASK));
3560 if (zone->uz_flags & UMA_ZONE_VTOSLAB) {
3561 slab = vtoslab((vm_offset_t)mem);
3562 } else {
3563 /*
3564 * It is safe to return the slab here even though the
3565 * zone is unlocked because the item's allocation state
3566 * essentially holds a reference.
3567 */
3568 ZONE_LOCK(zone);
3569 keg = LIST_FIRST(&zone->uz_kegs)->kl_keg;
3570 if (keg->uk_flags & UMA_ZONE_HASH)
3571 slab = hash_sfind(&keg->uk_hash, mem);
3572 else
3573 slab = (uma_slab_t)(mem + keg->uk_pgoff);
3574 ZONE_UNLOCK(zone);
3575 }
3576
3577 return (slab);
3578 }
3579
3580 /*
3581 * Set up the slab's freei data such that uma_dbg_free can function.
3582 *
3583 */
3584 static void
3585 uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item)
3586 {
3587 uma_keg_t keg;
3588 int freei;
3589
3590 if (zone_first_keg(zone) == NULL)
3591 return;
3592 if (slab == NULL) {
3593 slab = uma_dbg_getslab(zone, item);
3594 if (slab == NULL)
3595 panic("uma: item %p did not belong to zone %s\n",
3596 item, zone->uz_name);
3597 }
3598 keg = slab->us_keg;
3599 freei = ((uintptr_t)item - (uintptr_t)slab->us_data) / keg->uk_rsize;
3600
3601 if (BIT_ISSET(SLAB_SETSIZE, freei, &slab->us_debugfree))
3602 panic("Duplicate alloc of %p from zone %p(%s) slab %p(%d)\n",
3603 item, zone, zone->uz_name, slab, freei);
3604 BIT_SET_ATOMIC(SLAB_SETSIZE, freei, &slab->us_debugfree);
3605
3606 return;
3607 }
3608
3609 /*
3610 * Verifies freed addresses. Checks for alignment, valid slab membership
3611 * and duplicate frees.
3612 *
3613 */
3614 static void
3615 uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item)
3616 {
3617 uma_keg_t keg;
3618 int freei;
3619
3620 if (zone_first_keg(zone) == NULL)
3621 return;
3622 if (slab == NULL) {
3623 slab = uma_dbg_getslab(zone, item);
3624 if (slab == NULL)
3625 panic("uma: Freed item %p did not belong to zone %s\n",
3626 item, zone->uz_name);
3627 }
3628 keg = slab->us_keg;
3629 freei = ((uintptr_t)item - (uintptr_t)slab->us_data) / keg->uk_rsize;
3630
3631 if (freei >= keg->uk_ipers)
3632 panic("Invalid free of %p from zone %p(%s) slab %p(%d)\n",
3633 item, zone, zone->uz_name, slab, freei);
3634
3635 if (((freei * keg->uk_rsize) + slab->us_data) != item)
3636 panic("Unaligned free of %p from zone %p(%s) slab %p(%d)\n",
3637 item, zone, zone->uz_name, slab, freei);
3638
3639 if (!BIT_ISSET(SLAB_SETSIZE, freei, &slab->us_debugfree))
3640 panic("Duplicate free of %p from zone %p(%s) slab %p(%d)\n",
3641 item, zone, zone->uz_name, slab, freei);
3642
3643 BIT_CLR_ATOMIC(SLAB_SETSIZE, freei, &slab->us_debugfree);
3644 }
3645 #endif /* INVARIANTS */
3646
3647 #ifdef DDB
3648 DB_SHOW_COMMAND(uma, db_show_uma)
3649 {
3650 uint64_t allocs, frees, sleeps;
3651 uma_bucket_t bucket;
3652 uma_keg_t kz;
3653 uma_zone_t z;
3654 int cachefree;
3655
3656 db_printf("%18s %8s %8s %8s %12s %8s %8s\n", "Zone", "Size", "Used",
3657 "Free", "Requests", "Sleeps", "Bucket");
3658 LIST_FOREACH(kz, &uma_kegs, uk_link) {
3659 LIST_FOREACH(z, &kz->uk_zones, uz_link) {
3660 if (kz->uk_flags & UMA_ZFLAG_INTERNAL) {
3661 allocs = z->uz_allocs;
3662 frees = z->uz_frees;
3663 sleeps = z->uz_sleeps;
3664 cachefree = 0;
3665 } else
3666 uma_zone_sumstat(z, &cachefree, &allocs,
3667 &frees, &sleeps);
3668 if (!((z->uz_flags & UMA_ZONE_SECONDARY) &&
3669 (LIST_FIRST(&kz->uk_zones) != z)))
3670 cachefree += kz->uk_free;
3671 LIST_FOREACH(bucket, &z->uz_buckets, ub_link)
3672 cachefree += bucket->ub_cnt;
3673 db_printf("%18s %8ju %8jd %8d %12ju %8ju %8u\n",
3674 z->uz_name, (uintmax_t)kz->uk_size,
3675 (intmax_t)(allocs - frees), cachefree,
3676 (uintmax_t)allocs, sleeps, z->uz_count);
3677 if (db_pager_quit)
3678 return;
3679 }
3680 }
3681 }
3682
3683 DB_SHOW_COMMAND(umacache, db_show_umacache)
3684 {
3685 uint64_t allocs, frees;
3686 uma_bucket_t bucket;
3687 uma_zone_t z;
3688 int cachefree;
3689
3690 db_printf("%18s %8s %8s %8s %12s %8s\n", "Zone", "Size", "Used", "Free",
3691 "Requests", "Bucket");
3692 LIST_FOREACH(z, &uma_cachezones, uz_link) {
3693 uma_zone_sumstat(z, &cachefree, &allocs, &frees, NULL);
3694 LIST_FOREACH(bucket, &z->uz_buckets, ub_link)
3695 cachefree += bucket->ub_cnt;
3696 db_printf("%18s %8ju %8jd %8d %12ju %8u\n",
3697 z->uz_name, (uintmax_t)z->uz_size,
3698 (intmax_t)(allocs - frees), cachefree,
3699 (uintmax_t)allocs, z->uz_count);
3700 if (db_pager_quit)
3701 return;
3702 }
3703 }
3704 #endif /* DDB */
Cache object: b36734d91353479af24f7a6aaa684620
|