FreeBSD/Linux Kernel Cross Reference
sys/vm/uma_core.c
1 /*
2 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice unmodified, this list of conditions, and the following
10 * disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *
26 * $FreeBSD: releng/5.0/sys/vm/uma_core.c 107048 2002-11-18 08:27:14Z jeff $
27 *
28 */
29
30 /*
31 * uma_core.c Implementation of the Universal Memory allocator
32 *
33 * This allocator is intended to replace the multitude of similar object caches
34 * in the standard FreeBSD kernel. The intent is to be flexible as well as
35 * effecient. A primary design goal is to return unused memory to the rest of
36 * the system. This will make the system as a whole more flexible due to the
37 * ability to move memory to subsystems which most need it instead of leaving
38 * pools of reserved memory unused.
39 *
40 * The basic ideas stem from similar slab/zone based allocators whose algorithms
41 * are well known.
42 *
43 */
44
45 /*
46 * TODO:
47 * - Improve memory usage for large allocations
48 * - Investigate cache size adjustments
49 */
50
51 /* I should really use ktr.. */
52 /*
53 #define UMA_DEBUG 1
54 #define UMA_DEBUG_ALLOC 1
55 #define UMA_DEBUG_ALLOC_1 1
56 */
57
58
59 #include "opt_param.h"
60 #include <sys/param.h>
61 #include <sys/systm.h>
62 #include <sys/kernel.h>
63 #include <sys/types.h>
64 #include <sys/queue.h>
65 #include <sys/malloc.h>
66 #include <sys/lock.h>
67 #include <sys/sysctl.h>
68 #include <sys/mutex.h>
69 #include <sys/proc.h>
70 #include <sys/smp.h>
71 #include <sys/vmmeter.h>
72
73 #include <vm/vm.h>
74 #include <vm/vm_object.h>
75 #include <vm/vm_page.h>
76 #include <vm/vm_param.h>
77 #include <vm/vm_map.h>
78 #include <vm/vm_kern.h>
79 #include <vm/vm_extern.h>
80 #include <vm/uma.h>
81 #include <vm/uma_int.h>
82 #include <vm/uma_dbg.h>
83
84 #include <machine/vmparam.h>
85
86 /*
87 * This is the zone from which all zones are spawned. The idea is that even
88 * the zone heads are allocated from the allocator, so we use the bss section
89 * to bootstrap us.
90 */
91 static struct uma_zone masterzone;
92 static uma_zone_t zones = &masterzone;
93
94 /* This is the zone from which all of uma_slab_t's are allocated. */
95 static uma_zone_t slabzone;
96
97 /*
98 * The initial hash tables come out of this zone so they can be allocated
99 * prior to malloc coming up.
100 */
101 static uma_zone_t hashzone;
102
103 /*
104 * Zone that buckets come from.
105 */
106 static uma_zone_t bucketzone;
107
108 /*
109 * Are we allowed to allocate buckets?
110 */
111 static int bucketdisable = 1;
112
113 /* Linked list of all zones in the system */
114 static LIST_HEAD(,uma_zone) uma_zones = LIST_HEAD_INITIALIZER(&uma_zones);
115
116 /* This mutex protects the zone list */
117 static struct mtx uma_mtx;
118
119 /* Linked list of boot time pages */
120 static LIST_HEAD(,uma_slab) uma_boot_pages =
121 LIST_HEAD_INITIALIZER(&uma_boot_pages);
122
123 /* Count of free boottime pages */
124 static int uma_boot_free = 0;
125
126 /* Is the VM done starting up? */
127 static int booted = 0;
128
129 /* This is the handle used to schedule our working set calculator */
130 static struct callout uma_callout;
131
132 /* This is mp_maxid + 1, for use while looping over each cpu */
133 static int maxcpu;
134
135 /*
136 * This structure is passed as the zone ctor arg so that I don't have to create
137 * a special allocation function just for zones.
138 */
139 struct uma_zctor_args {
140 char *name;
141 size_t size;
142 uma_ctor ctor;
143 uma_dtor dtor;
144 uma_init uminit;
145 uma_fini fini;
146 int align;
147 u_int16_t flags;
148 };
149
150 /* Prototypes.. */
151
152 static void *obj_alloc(uma_zone_t, int, u_int8_t *, int);
153 static void *page_alloc(uma_zone_t, int, u_int8_t *, int);
154 static void page_free(void *, int, u_int8_t);
155 static uma_slab_t slab_zalloc(uma_zone_t, int);
156 static void cache_drain(uma_zone_t);
157 static void bucket_drain(uma_zone_t, uma_bucket_t);
158 static void zone_drain(uma_zone_t);
159 static void zone_ctor(void *, int, void *);
160 static void zone_dtor(void *, int, void *);
161 static void zero_init(void *, int);
162 static void zone_small_init(uma_zone_t zone);
163 static void zone_large_init(uma_zone_t zone);
164 static void zone_foreach(void (*zfunc)(uma_zone_t));
165 static void zone_timeout(uma_zone_t zone);
166 static int hash_alloc(struct uma_hash *);
167 static int hash_expand(struct uma_hash *, struct uma_hash *);
168 static void hash_free(struct uma_hash *hash);
169 static void uma_timeout(void *);
170 static void uma_startup3(void);
171 static void *uma_zalloc_internal(uma_zone_t, void *, int);
172 static void uma_zfree_internal(uma_zone_t, void *, void *, int);
173 static void bucket_enable(void);
174 static int uma_zalloc_bucket(uma_zone_t zone, int flags);
175 static uma_slab_t uma_zone_slab(uma_zone_t zone, int flags);
176 static void *uma_slab_alloc(uma_zone_t zone, uma_slab_t slab);
177
178 void uma_print_zone(uma_zone_t);
179 void uma_print_stats(void);
180 static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS);
181
182 SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING|CTLFLAG_RD,
183 NULL, 0, sysctl_vm_zone, "A", "Zone Info");
184 SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
185
186 /*
187 * This routine checks to see whether or not it's safe to enable buckets.
188 */
189
190 static void
191 bucket_enable(void)
192 {
193 if (cnt.v_free_count < cnt.v_free_min)
194 bucketdisable = 1;
195 else
196 bucketdisable = 0;
197 }
198
199
200 /*
201 * Routine called by timeout which is used to fire off some time interval
202 * based calculations. (working set, stats, etc.)
203 *
204 * Arguments:
205 * arg Unused
206 *
207 * Returns:
208 * Nothing
209 */
210 static void
211 uma_timeout(void *unused)
212 {
213 bucket_enable();
214 zone_foreach(zone_timeout);
215
216 /* Reschedule this event */
217 callout_reset(&uma_callout, UMA_WORKING_TIME * hz, uma_timeout, NULL);
218 }
219
220 /*
221 * Routine to perform timeout driven calculations. This does the working set
222 * as well as hash expanding, and per cpu statistics aggregation.
223 *
224 * Arguments:
225 * zone The zone to operate on
226 *
227 * Returns:
228 * Nothing
229 */
230 static void
231 zone_timeout(uma_zone_t zone)
232 {
233 uma_cache_t cache;
234 u_int64_t alloc;
235 int free;
236 int cpu;
237
238 alloc = 0;
239 free = 0;
240
241 /*
242 * Aggregate per cpu cache statistics back to the zone.
243 *
244 * I may rewrite this to set a flag in the per cpu cache instead of
245 * locking. If the flag is not cleared on the next round I will have
246 * to lock and do it here instead so that the statistics don't get too
247 * far out of sync.
248 */
249 if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL)) {
250 for (cpu = 0; cpu < maxcpu; cpu++) {
251 if (CPU_ABSENT(cpu))
252 continue;
253 CPU_LOCK(zone, cpu);
254 cache = &zone->uz_cpu[cpu];
255 /* Add them up, and reset */
256 alloc += cache->uc_allocs;
257 cache->uc_allocs = 0;
258 if (cache->uc_allocbucket)
259 free += cache->uc_allocbucket->ub_ptr + 1;
260 if (cache->uc_freebucket)
261 free += cache->uc_freebucket->ub_ptr + 1;
262 CPU_UNLOCK(zone, cpu);
263 }
264 }
265
266 /* Now push these stats back into the zone.. */
267 ZONE_LOCK(zone);
268 zone->uz_allocs += alloc;
269
270 /*
271 * cachefree is an instantanious snapshot of what is in the per cpu
272 * caches, not an accurate counter
273 */
274 zone->uz_cachefree = free;
275
276 /*
277 * Expand the zone hash table.
278 *
279 * This is done if the number of slabs is larger than the hash size.
280 * What I'm trying to do here is completely reduce collisions. This
281 * may be a little aggressive. Should I allow for two collisions max?
282 */
283
284 if (zone->uz_flags & UMA_ZFLAG_HASH &&
285 zone->uz_pages / zone->uz_ppera >= zone->uz_hash.uh_hashsize) {
286 struct uma_hash newhash;
287 struct uma_hash oldhash;
288 int ret;
289
290 /*
291 * This is so involved because allocating and freeing
292 * while the zone lock is held will lead to deadlock.
293 * I have to do everything in stages and check for
294 * races.
295 */
296 newhash = zone->uz_hash;
297 ZONE_UNLOCK(zone);
298 ret = hash_alloc(&newhash);
299 ZONE_LOCK(zone);
300 if (ret) {
301 if (hash_expand(&zone->uz_hash, &newhash)) {
302 oldhash = zone->uz_hash;
303 zone->uz_hash = newhash;
304 } else
305 oldhash = newhash;
306
307 ZONE_UNLOCK(zone);
308 hash_free(&oldhash);
309 ZONE_LOCK(zone);
310 }
311 }
312
313 /*
314 * Here we compute the working set size as the total number of items
315 * left outstanding since the last time interval. This is slightly
316 * suboptimal. What we really want is the highest number of outstanding
317 * items during the last time quantum. This should be close enough.
318 *
319 * The working set size is used to throttle the zone_drain function.
320 * We don't want to return memory that we may need again immediately.
321 */
322 alloc = zone->uz_allocs - zone->uz_oallocs;
323 zone->uz_oallocs = zone->uz_allocs;
324 zone->uz_wssize = alloc;
325
326 ZONE_UNLOCK(zone);
327 }
328
329 /*
330 * Allocate and zero fill the next sized hash table from the appropriate
331 * backing store.
332 *
333 * Arguments:
334 * hash A new hash structure with the old hash size in uh_hashsize
335 *
336 * Returns:
337 * 1 on sucess and 0 on failure.
338 */
339 static int
340 hash_alloc(struct uma_hash *hash)
341 {
342 int oldsize;
343 int alloc;
344
345 oldsize = hash->uh_hashsize;
346
347 /* We're just going to go to a power of two greater */
348 if (oldsize) {
349 hash->uh_hashsize = oldsize * 2;
350 alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize;
351 /* XXX Shouldn't be abusing DEVBUF here */
352 hash->uh_slab_hash = (struct slabhead *)malloc(alloc,
353 M_DEVBUF, M_NOWAIT);
354 } else {
355 alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT;
356 hash->uh_slab_hash = uma_zalloc_internal(hashzone, NULL,
357 M_WAITOK);
358 hash->uh_hashsize = UMA_HASH_SIZE_INIT;
359 }
360 if (hash->uh_slab_hash) {
361 bzero(hash->uh_slab_hash, alloc);
362 hash->uh_hashmask = hash->uh_hashsize - 1;
363 return (1);
364 }
365
366 return (0);
367 }
368
369 /*
370 * Expands the hash table for OFFPAGE zones. This is done from zone_timeout
371 * to reduce collisions. This must not be done in the regular allocation path,
372 * otherwise, we can recurse on the vm while allocating pages.
373 *
374 * Arguments:
375 * oldhash The hash you want to expand
376 * newhash The hash structure for the new table
377 *
378 * Returns:
379 * Nothing
380 *
381 * Discussion:
382 */
383 static int
384 hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash)
385 {
386 uma_slab_t slab;
387 int hval;
388 int i;
389
390 if (!newhash->uh_slab_hash)
391 return (0);
392
393 if (oldhash->uh_hashsize >= newhash->uh_hashsize)
394 return (0);
395
396 /*
397 * I need to investigate hash algorithms for resizing without a
398 * full rehash.
399 */
400
401 for (i = 0; i < oldhash->uh_hashsize; i++)
402 while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) {
403 slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]);
404 SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink);
405 hval = UMA_HASH(newhash, slab->us_data);
406 SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval],
407 slab, us_hlink);
408 }
409
410 return (1);
411 }
412
413 /*
414 * Free the hash bucket to the appropriate backing store.
415 *
416 * Arguments:
417 * slab_hash The hash bucket we're freeing
418 * hashsize The number of entries in that hash bucket
419 *
420 * Returns:
421 * Nothing
422 */
423 static void
424 hash_free(struct uma_hash *hash)
425 {
426 if (hash->uh_slab_hash == NULL)
427 return;
428 if (hash->uh_hashsize == UMA_HASH_SIZE_INIT)
429 uma_zfree_internal(hashzone,
430 hash->uh_slab_hash, NULL, 0);
431 else
432 free(hash->uh_slab_hash, M_DEVBUF);
433 }
434
435 /*
436 * Frees all outstanding items in a bucket
437 *
438 * Arguments:
439 * zone The zone to free to, must be unlocked.
440 * bucket The free/alloc bucket with items, cpu queue must be locked.
441 *
442 * Returns:
443 * Nothing
444 */
445
446 static void
447 bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
448 {
449 uma_slab_t slab;
450 int mzone;
451 void *item;
452
453 if (bucket == NULL)
454 return;
455
456 slab = NULL;
457 mzone = 0;
458
459 /* We have to lookup the slab again for malloc.. */
460 if (zone->uz_flags & UMA_ZFLAG_MALLOC)
461 mzone = 1;
462
463 while (bucket->ub_ptr > -1) {
464 item = bucket->ub_bucket[bucket->ub_ptr];
465 #ifdef INVARIANTS
466 bucket->ub_bucket[bucket->ub_ptr] = NULL;
467 KASSERT(item != NULL,
468 ("bucket_drain: botched ptr, item is NULL"));
469 #endif
470 bucket->ub_ptr--;
471 /*
472 * This is extremely inefficient. The slab pointer was passed
473 * to uma_zfree_arg, but we lost it because the buckets don't
474 * hold them. This will go away when free() gets a size passed
475 * to it.
476 */
477 if (mzone)
478 slab = vtoslab((vm_offset_t)item & (~UMA_SLAB_MASK));
479 uma_zfree_internal(zone, item, slab, 1);
480 }
481 }
482
483 /*
484 * Drains the per cpu caches for a zone.
485 *
486 * Arguments:
487 * zone The zone to drain, must be unlocked.
488 *
489 * Returns:
490 * Nothing
491 *
492 * This function returns with the zone locked so that the per cpu queues can
493 * not be filled until zone_drain is finished.
494 *
495 */
496 static void
497 cache_drain(uma_zone_t zone)
498 {
499 uma_bucket_t bucket;
500 uma_cache_t cache;
501 int cpu;
502
503 /*
504 * Flush out the per cpu queues.
505 *
506 * XXX This causes unnecessary thrashing due to immediately having
507 * empty per cpu queues. I need to improve this.
508 */
509
510 /*
511 * We have to lock each cpu cache before locking the zone
512 */
513 ZONE_UNLOCK(zone);
514
515 for (cpu = 0; cpu < maxcpu; cpu++) {
516 if (CPU_ABSENT(cpu))
517 continue;
518 CPU_LOCK(zone, cpu);
519 cache = &zone->uz_cpu[cpu];
520 bucket_drain(zone, cache->uc_allocbucket);
521 bucket_drain(zone, cache->uc_freebucket);
522 }
523
524 /*
525 * Drain the bucket queues and free the buckets, we just keep two per
526 * cpu (alloc/free).
527 */
528 ZONE_LOCK(zone);
529 while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
530 LIST_REMOVE(bucket, ub_link);
531 ZONE_UNLOCK(zone);
532 bucket_drain(zone, bucket);
533 uma_zfree_internal(bucketzone, bucket, NULL, 0);
534 ZONE_LOCK(zone);
535 }
536
537 /* Now we do the free queue.. */
538 while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
539 LIST_REMOVE(bucket, ub_link);
540 uma_zfree_internal(bucketzone, bucket, NULL, 0);
541 }
542
543 /* We unlock here, but they will all block until the zone is unlocked */
544 for (cpu = 0; cpu < maxcpu; cpu++) {
545 if (CPU_ABSENT(cpu))
546 continue;
547 CPU_UNLOCK(zone, cpu);
548 }
549
550 zone->uz_cachefree = 0;
551 }
552
553 /*
554 * Frees pages from a zone back to the system. This is done on demand from
555 * the pageout daemon.
556 *
557 * Arguments:
558 * zone The zone to free pages from
559 * all Should we drain all items?
560 *
561 * Returns:
562 * Nothing.
563 */
564 static void
565 zone_drain(uma_zone_t zone)
566 {
567 struct slabhead freeslabs = {};
568 uma_slab_t slab;
569 uma_slab_t n;
570 u_int64_t extra;
571 u_int8_t flags;
572 u_int8_t *mem;
573 int i;
574
575 /*
576 * We don't want to take pages from staticly allocated zones at this
577 * time
578 */
579 if (zone->uz_flags & UMA_ZFLAG_NOFREE || zone->uz_freef == NULL)
580 return;
581
582 ZONE_LOCK(zone);
583
584 if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL))
585 cache_drain(zone);
586
587 if (zone->uz_free < zone->uz_wssize)
588 goto finished;
589 #ifdef UMA_DEBUG
590 printf("%s working set size: %llu free items: %u\n",
591 zone->uz_name, (unsigned long long)zone->uz_wssize, zone->uz_free);
592 #endif
593 extra = zone->uz_free - zone->uz_wssize;
594 extra /= zone->uz_ipers;
595
596 /* extra is now the number of extra slabs that we can free */
597
598 if (extra == 0)
599 goto finished;
600
601 slab = LIST_FIRST(&zone->uz_free_slab);
602 while (slab && extra) {
603 n = LIST_NEXT(slab, us_link);
604
605 /* We have no where to free these to */
606 if (slab->us_flags & UMA_SLAB_BOOT) {
607 slab = n;
608 continue;
609 }
610
611 LIST_REMOVE(slab, us_link);
612 zone->uz_pages -= zone->uz_ppera;
613 zone->uz_free -= zone->uz_ipers;
614
615 if (zone->uz_flags & UMA_ZFLAG_HASH)
616 UMA_HASH_REMOVE(&zone->uz_hash, slab, slab->us_data);
617
618 SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink);
619
620 slab = n;
621 extra--;
622 }
623 finished:
624 ZONE_UNLOCK(zone);
625
626 while ((slab = SLIST_FIRST(&freeslabs)) != NULL) {
627 SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink);
628 if (zone->uz_fini)
629 for (i = 0; i < zone->uz_ipers; i++)
630 zone->uz_fini(
631 slab->us_data + (zone->uz_rsize * i),
632 zone->uz_size);
633 flags = slab->us_flags;
634 mem = slab->us_data;
635
636 if (zone->uz_flags & UMA_ZFLAG_OFFPAGE)
637 uma_zfree_internal(slabzone, slab, NULL, 0);
638 if (zone->uz_flags & UMA_ZFLAG_MALLOC) {
639 vm_object_t obj;
640
641 if (flags & UMA_SLAB_KMEM)
642 obj = kmem_object;
643 else
644 obj = NULL;
645 for (i = 0; i < zone->uz_ppera; i++)
646 vsetobj((vm_offset_t)mem + (i * PAGE_SIZE),
647 obj);
648 }
649 #ifdef UMA_DEBUG
650 printf("%s: Returning %d bytes.\n",
651 zone->uz_name, UMA_SLAB_SIZE * zone->uz_ppera);
652 #endif
653 zone->uz_freef(mem, UMA_SLAB_SIZE * zone->uz_ppera, flags);
654 }
655
656 }
657
658 /*
659 * Allocate a new slab for a zone. This does not insert the slab onto a list.
660 *
661 * Arguments:
662 * zone The zone to allocate slabs for
663 * wait Shall we wait?
664 *
665 * Returns:
666 * The slab that was allocated or NULL if there is no memory and the
667 * caller specified M_NOWAIT.
668 *
669 */
670 static uma_slab_t
671 slab_zalloc(uma_zone_t zone, int wait)
672 {
673 uma_slab_t slab; /* Starting slab */
674 u_int8_t *mem;
675 u_int8_t flags;
676 int i;
677
678 slab = NULL;
679
680 #ifdef UMA_DEBUG
681 printf("slab_zalloc: Allocating a new slab for %s\n", zone->uz_name);
682 #endif
683 ZONE_UNLOCK(zone);
684
685 if (zone->uz_flags & UMA_ZFLAG_OFFPAGE) {
686 slab = uma_zalloc_internal(slabzone, NULL, wait);
687 if (slab == NULL) {
688 ZONE_LOCK(zone);
689 return NULL;
690 }
691 }
692
693 /*
694 * This reproduces the old vm_zone behavior of zero filling pages the
695 * first time they are added to a zone.
696 *
697 * Malloced items are zeroed in uma_zalloc.
698 */
699
700 if ((zone->uz_flags & UMA_ZFLAG_MALLOC) == 0)
701 wait |= M_ZERO;
702 else
703 wait &= ~M_ZERO;
704
705 if (booted || (zone->uz_flags & UMA_ZFLAG_PRIVALLOC)) {
706 mtx_lock(&Giant);
707 mem = zone->uz_allocf(zone,
708 zone->uz_ppera * UMA_SLAB_SIZE, &flags, wait);
709 mtx_unlock(&Giant);
710 if (mem == NULL) {
711 ZONE_LOCK(zone);
712 return (NULL);
713 }
714 } else {
715 uma_slab_t tmps;
716
717 if (zone->uz_ppera > 1)
718 panic("UMA: Attemping to allocate multiple pages before vm has started.\n");
719 if (zone->uz_flags & UMA_ZFLAG_MALLOC)
720 panic("Mallocing before uma_startup2 has been called.\n");
721 if (uma_boot_free == 0)
722 panic("UMA: Ran out of pre init pages, increase UMA_BOOT_PAGES\n");
723 tmps = LIST_FIRST(&uma_boot_pages);
724 LIST_REMOVE(tmps, us_link);
725 uma_boot_free--;
726 mem = tmps->us_data;
727 flags = tmps->us_flags;
728 }
729
730 /* Point the slab into the allocated memory */
731 if (!(zone->uz_flags & UMA_ZFLAG_OFFPAGE))
732 slab = (uma_slab_t )(mem + zone->uz_pgoff);
733
734 if (zone->uz_flags & UMA_ZFLAG_MALLOC)
735 for (i = 0; i < zone->uz_ppera; i++)
736 vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab);
737
738 slab->us_zone = zone;
739 slab->us_data = mem;
740
741 /*
742 * This is intended to spread data out across cache lines.
743 *
744 * This code doesn't seem to work properly on x86, and on alpha
745 * it makes absolutely no performance difference. I'm sure it could
746 * use some tuning, but sun makes outrageous claims about it's
747 * performance.
748 */
749 #if 0
750 if (zone->uz_cachemax) {
751 slab->us_data += zone->uz_cacheoff;
752 zone->uz_cacheoff += UMA_CACHE_INC;
753 if (zone->uz_cacheoff > zone->uz_cachemax)
754 zone->uz_cacheoff = 0;
755 }
756 #endif
757
758 slab->us_freecount = zone->uz_ipers;
759 slab->us_firstfree = 0;
760 slab->us_flags = flags;
761 for (i = 0; i < zone->uz_ipers; i++)
762 slab->us_freelist[i] = i+1;
763
764 if (zone->uz_init)
765 for (i = 0; i < zone->uz_ipers; i++)
766 zone->uz_init(slab->us_data + (zone->uz_rsize * i),
767 zone->uz_size);
768 ZONE_LOCK(zone);
769
770 if (zone->uz_flags & UMA_ZFLAG_HASH)
771 UMA_HASH_INSERT(&zone->uz_hash, slab, mem);
772
773 zone->uz_pages += zone->uz_ppera;
774 zone->uz_free += zone->uz_ipers;
775
776
777 return (slab);
778 }
779
780 /*
781 * Allocates a number of pages from the system
782 *
783 * Arguments:
784 * zone Unused
785 * bytes The number of bytes requested
786 * wait Shall we wait?
787 *
788 * Returns:
789 * A pointer to the alloced memory or possibly
790 * NULL if M_NOWAIT is set.
791 */
792 static void *
793 page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
794 {
795 void *p; /* Returned page */
796
797 *pflag = UMA_SLAB_KMEM;
798 p = (void *) kmem_malloc(kmem_map, bytes, wait);
799
800 return (p);
801 }
802
803 /*
804 * Allocates a number of pages from within an object
805 *
806 * Arguments:
807 * zone Unused
808 * bytes The number of bytes requested
809 * wait Shall we wait?
810 *
811 * Returns:
812 * A pointer to the alloced memory or possibly
813 * NULL if M_NOWAIT is set.
814 *
815 * TODO: If we fail during a multi-page allocation release the pages that have
816 * already been allocated.
817 */
818 static void *
819 obj_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
820 {
821 vm_offset_t zkva;
822 vm_offset_t retkva;
823 vm_page_t p;
824 int pages;
825
826 retkva = 0;
827 pages = zone->uz_pages;
828
829 /*
830 * This looks a little weird since we're getting one page at a time
831 */
832 while (bytes > 0) {
833 p = vm_page_alloc(zone->uz_obj, pages,
834 VM_ALLOC_INTERRUPT);
835 if (p == NULL)
836 return (NULL);
837
838 zkva = zone->uz_kva + pages * PAGE_SIZE;
839 if (retkva == 0)
840 retkva = zkva;
841 pmap_qenter(zkva, &p, 1);
842 bytes -= PAGE_SIZE;
843 pages += 1;
844 }
845
846 *flags = UMA_SLAB_PRIV;
847
848 return ((void *)retkva);
849 }
850
851 /*
852 * Frees a number of pages to the system
853 *
854 * Arguments:
855 * mem A pointer to the memory to be freed
856 * size The size of the memory being freed
857 * flags The original p->us_flags field
858 *
859 * Returns:
860 * Nothing
861 *
862 */
863 static void
864 page_free(void *mem, int size, u_int8_t flags)
865 {
866 vm_map_t map;
867
868 if (flags & UMA_SLAB_KMEM)
869 map = kmem_map;
870 else
871 panic("UMA: page_free used with invalid flags %d\n", flags);
872
873 kmem_free(map, (vm_offset_t)mem, size);
874 }
875
876 /*
877 * Zero fill initializer
878 *
879 * Arguments/Returns follow uma_init specifications
880 *
881 */
882 static void
883 zero_init(void *mem, int size)
884 {
885 bzero(mem, size);
886 }
887
888 /*
889 * Finish creating a small uma zone. This calculates ipers, and the zone size.
890 *
891 * Arguments
892 * zone The zone we should initialize
893 *
894 * Returns
895 * Nothing
896 */
897 static void
898 zone_small_init(uma_zone_t zone)
899 {
900 int rsize;
901 int memused;
902 int ipers;
903
904 rsize = zone->uz_size;
905
906 if (rsize < UMA_SMALLEST_UNIT)
907 rsize = UMA_SMALLEST_UNIT;
908
909 if (rsize & zone->uz_align)
910 rsize = (rsize & ~zone->uz_align) + (zone->uz_align + 1);
911
912 zone->uz_rsize = rsize;
913
914 rsize += 1; /* Account for the byte of linkage */
915 zone->uz_ipers = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) / rsize;
916 zone->uz_ppera = 1;
917
918 memused = zone->uz_ipers * zone->uz_rsize;
919
920 /* Can we do any better? */
921 if ((UMA_SLAB_SIZE - memused) >= UMA_MAX_WASTE) {
922 if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
923 return;
924 ipers = UMA_SLAB_SIZE / zone->uz_rsize;
925 if (ipers > zone->uz_ipers) {
926 zone->uz_flags |= UMA_ZFLAG_OFFPAGE;
927 if ((zone->uz_flags & UMA_ZFLAG_MALLOC) == 0)
928 zone->uz_flags |= UMA_ZFLAG_HASH;
929 zone->uz_ipers = ipers;
930 }
931 }
932
933 }
934
935 /*
936 * Finish creating a large (> UMA_SLAB_SIZE) uma zone. Just give in and do
937 * OFFPAGE for now. When I can allow for more dynamic slab sizes this will be
938 * more complicated.
939 *
940 * Arguments
941 * zone The zone we should initialize
942 *
943 * Returns
944 * Nothing
945 */
946 static void
947 zone_large_init(uma_zone_t zone)
948 {
949 int pages;
950
951 pages = zone->uz_size / UMA_SLAB_SIZE;
952
953 /* Account for remainder */
954 if ((pages * UMA_SLAB_SIZE) < zone->uz_size)
955 pages++;
956
957 zone->uz_ppera = pages;
958 zone->uz_ipers = 1;
959
960 zone->uz_flags |= UMA_ZFLAG_OFFPAGE;
961 if ((zone->uz_flags & UMA_ZFLAG_MALLOC) == 0)
962 zone->uz_flags |= UMA_ZFLAG_HASH;
963
964 zone->uz_rsize = zone->uz_size;
965 }
966
967 /*
968 * Zone header ctor. This initializes all fields, locks, etc. And inserts
969 * the zone onto the global zone list.
970 *
971 * Arguments/Returns follow uma_ctor specifications
972 * udata Actually uma_zcreat_args
973 *
974 */
975
976 static void
977 zone_ctor(void *mem, int size, void *udata)
978 {
979 struct uma_zctor_args *arg = udata;
980 uma_zone_t zone = mem;
981 int privlc;
982 int cplen;
983 int cpu;
984
985 bzero(zone, size);
986 zone->uz_name = arg->name;
987 zone->uz_size = arg->size;
988 zone->uz_ctor = arg->ctor;
989 zone->uz_dtor = arg->dtor;
990 zone->uz_init = arg->uminit;
991 zone->uz_fini = arg->fini;
992 zone->uz_align = arg->align;
993 zone->uz_free = 0;
994 zone->uz_pages = 0;
995 zone->uz_flags = 0;
996 zone->uz_allocf = page_alloc;
997 zone->uz_freef = page_free;
998
999 if (arg->flags & UMA_ZONE_ZINIT)
1000 zone->uz_init = zero_init;
1001
1002 if (arg->flags & UMA_ZONE_INTERNAL)
1003 zone->uz_flags |= UMA_ZFLAG_INTERNAL;
1004
1005 if (arg->flags & UMA_ZONE_MALLOC)
1006 zone->uz_flags |= UMA_ZFLAG_MALLOC;
1007
1008 if (arg->flags & UMA_ZONE_NOFREE)
1009 zone->uz_flags |= UMA_ZFLAG_NOFREE;
1010
1011 if (arg->flags & UMA_ZONE_VM)
1012 zone->uz_flags |= UMA_ZFLAG_BUCKETCACHE;
1013
1014 if (zone->uz_size > UMA_SLAB_SIZE)
1015 zone_large_init(zone);
1016 else
1017 zone_small_init(zone);
1018 #ifdef UMA_MD_SMALL_ALLOC
1019 if (zone->uz_ppera == 1) {
1020 zone->uz_allocf = uma_small_alloc;
1021 zone->uz_freef = uma_small_free;
1022 }
1023 #endif /* UMA_MD_SMALL_ALLOC */
1024
1025 if (arg->flags & UMA_ZONE_MTXCLASS)
1026 privlc = 1;
1027 else
1028 privlc = 0;
1029
1030 /* We do this so that the per cpu lock name is unique for each zone */
1031 memcpy(zone->uz_lname, "PCPU ", 5);
1032 cplen = min(strlen(zone->uz_name) + 1, LOCKNAME_LEN - 6);
1033 memcpy(zone->uz_lname+5, zone->uz_name, cplen);
1034 zone->uz_lname[LOCKNAME_LEN - 1] = '\0';
1035
1036 /*
1037 * If we're putting the slab header in the actual page we need to
1038 * figure out where in each page it goes. This calculates a right
1039 * justified offset into the memory on a ALIGN_PTR boundary.
1040 */
1041 if (!(zone->uz_flags & UMA_ZFLAG_OFFPAGE)) {
1042 int totsize;
1043 int waste;
1044
1045 /* Size of the slab struct and free list */
1046 totsize = sizeof(struct uma_slab) + zone->uz_ipers;
1047 if (totsize & UMA_ALIGN_PTR)
1048 totsize = (totsize & ~UMA_ALIGN_PTR) +
1049 (UMA_ALIGN_PTR + 1);
1050 zone->uz_pgoff = UMA_SLAB_SIZE - totsize;
1051
1052 waste = zone->uz_pgoff;
1053 waste -= (zone->uz_ipers * zone->uz_rsize);
1054
1055 /*
1056 * This calculates how much space we have for cache line size
1057 * optimizations. It works by offseting each slab slightly.
1058 * Currently it breaks on x86, and so it is disabled.
1059 */
1060
1061 if (zone->uz_align < UMA_CACHE_INC && waste > UMA_CACHE_INC) {
1062 zone->uz_cachemax = waste - UMA_CACHE_INC;
1063 zone->uz_cacheoff = 0;
1064 }
1065
1066 totsize = zone->uz_pgoff + sizeof(struct uma_slab)
1067 + zone->uz_ipers;
1068 /* I don't think it's possible, but I'll make sure anyway */
1069 if (totsize > UMA_SLAB_SIZE) {
1070 printf("zone %s ipers %d rsize %d size %d\n",
1071 zone->uz_name, zone->uz_ipers, zone->uz_rsize,
1072 zone->uz_size);
1073 panic("UMA slab won't fit.\n");
1074 }
1075 }
1076
1077 if (zone->uz_flags & UMA_ZFLAG_HASH)
1078 hash_alloc(&zone->uz_hash);
1079
1080 #ifdef UMA_DEBUG
1081 printf("%s(%p) size = %d ipers = %d ppera = %d pgoff = %d\n",
1082 zone->uz_name, zone,
1083 zone->uz_size, zone->uz_ipers,
1084 zone->uz_ppera, zone->uz_pgoff);
1085 #endif
1086 ZONE_LOCK_INIT(zone, privlc);
1087
1088 mtx_lock(&uma_mtx);
1089 LIST_INSERT_HEAD(&uma_zones, zone, uz_link);
1090 mtx_unlock(&uma_mtx);
1091
1092 /*
1093 * Some internal zones don't have room allocated for the per cpu
1094 * caches. If we're internal, bail out here.
1095 */
1096
1097 if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
1098 return;
1099
1100 if (zone->uz_ipers < UMA_BUCKET_SIZE)
1101 zone->uz_count = zone->uz_ipers - 1;
1102 else
1103 zone->uz_count = UMA_BUCKET_SIZE - 1;
1104
1105 for (cpu = 0; cpu < maxcpu; cpu++)
1106 CPU_LOCK_INIT(zone, cpu, privlc);
1107 }
1108
1109 /*
1110 * Zone header dtor. This frees all data, destroys locks, frees the hash table
1111 * and removes the zone from the global list.
1112 *
1113 * Arguments/Returns follow uma_dtor specifications
1114 * udata unused
1115 */
1116
1117 static void
1118 zone_dtor(void *arg, int size, void *udata)
1119 {
1120 uma_zone_t zone;
1121 int cpu;
1122
1123 zone = (uma_zone_t)arg;
1124
1125 ZONE_LOCK(zone);
1126 zone->uz_wssize = 0;
1127 ZONE_UNLOCK(zone);
1128
1129 mtx_lock(&uma_mtx);
1130 LIST_REMOVE(zone, uz_link);
1131 zone_drain(zone);
1132 mtx_unlock(&uma_mtx);
1133
1134 ZONE_LOCK(zone);
1135 if (zone->uz_free != 0)
1136 printf("Zone %s was not empty. Lost %d pages of memory.\n",
1137 zone->uz_name, zone->uz_pages);
1138
1139 if ((zone->uz_flags & UMA_ZFLAG_INTERNAL) == 0)
1140 for (cpu = 0; cpu < maxcpu; cpu++)
1141 CPU_LOCK_FINI(zone, cpu);
1142
1143 ZONE_UNLOCK(zone);
1144 if ((zone->uz_flags & UMA_ZFLAG_OFFPAGE) != 0)
1145 hash_free(&zone->uz_hash);
1146
1147 ZONE_LOCK_FINI(zone);
1148 }
1149 /*
1150 * Traverses every zone in the system and calls a callback
1151 *
1152 * Arguments:
1153 * zfunc A pointer to a function which accepts a zone
1154 * as an argument.
1155 *
1156 * Returns:
1157 * Nothing
1158 */
1159 static void
1160 zone_foreach(void (*zfunc)(uma_zone_t))
1161 {
1162 uma_zone_t zone;
1163
1164 mtx_lock(&uma_mtx);
1165 LIST_FOREACH(zone, &uma_zones, uz_link) {
1166 zfunc(zone);
1167 }
1168 mtx_unlock(&uma_mtx);
1169 }
1170
1171 /* Public functions */
1172 /* See uma.h */
1173 void
1174 uma_startup(void *bootmem)
1175 {
1176 struct uma_zctor_args args;
1177 uma_slab_t slab;
1178 int slabsize;
1179 int i;
1180
1181 #ifdef UMA_DEBUG
1182 printf("Creating uma zone headers zone.\n");
1183 #endif
1184 #ifdef SMP
1185 maxcpu = mp_maxid + 1;
1186 #else
1187 maxcpu = 1;
1188 #endif
1189 #ifdef UMA_DEBUG
1190 printf("Max cpu = %d, mp_maxid = %d\n", maxcpu, mp_maxid);
1191 Debugger("stop");
1192 #endif
1193 mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF);
1194 /* "manually" Create the initial zone */
1195 args.name = "UMA Zones";
1196 args.size = sizeof(struct uma_zone) +
1197 (sizeof(struct uma_cache) * (maxcpu - 1));
1198 args.ctor = zone_ctor;
1199 args.dtor = zone_dtor;
1200 args.uminit = zero_init;
1201 args.fini = NULL;
1202 args.align = 32 - 1;
1203 args.flags = UMA_ZONE_INTERNAL;
1204 /* The initial zone has no Per cpu queues so it's smaller */
1205 zone_ctor(zones, sizeof(struct uma_zone), &args);
1206
1207 #ifdef UMA_DEBUG
1208 printf("Filling boot free list.\n");
1209 #endif
1210 for (i = 0; i < UMA_BOOT_PAGES; i++) {
1211 slab = (uma_slab_t)((u_int8_t *)bootmem + (i * UMA_SLAB_SIZE));
1212 slab->us_data = (u_int8_t *)slab;
1213 slab->us_flags = UMA_SLAB_BOOT;
1214 LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link);
1215 uma_boot_free++;
1216 }
1217
1218 #ifdef UMA_DEBUG
1219 printf("Creating slab zone.\n");
1220 #endif
1221
1222 /*
1223 * This is the max number of free list items we'll have with
1224 * offpage slabs.
1225 */
1226
1227 slabsize = UMA_SLAB_SIZE - sizeof(struct uma_slab);
1228 slabsize /= UMA_MAX_WASTE;
1229 slabsize++; /* In case there it's rounded */
1230 slabsize += sizeof(struct uma_slab);
1231
1232 /* Now make a zone for slab headers */
1233 slabzone = uma_zcreate("UMA Slabs",
1234 slabsize,
1235 NULL, NULL, NULL, NULL,
1236 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL);
1237
1238 hashzone = uma_zcreate("UMA Hash",
1239 sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT,
1240 NULL, NULL, NULL, NULL,
1241 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL);
1242
1243 bucketzone = uma_zcreate("UMA Buckets", sizeof(struct uma_bucket),
1244 NULL, NULL, NULL, NULL,
1245 UMA_ALIGN_PTR, UMA_ZONE_INTERNAL);
1246
1247 #ifdef UMA_MD_SMALL_ALLOC
1248 booted = 1;
1249 #endif
1250
1251 #ifdef UMA_DEBUG
1252 printf("UMA startup complete.\n");
1253 #endif
1254 }
1255
1256 /* see uma.h */
1257 void
1258 uma_startup2(void)
1259 {
1260 booted = 1;
1261 bucket_enable();
1262 #ifdef UMA_DEBUG
1263 printf("UMA startup2 complete.\n");
1264 #endif
1265 }
1266
1267 /*
1268 * Initialize our callout handle
1269 *
1270 */
1271
1272 static void
1273 uma_startup3(void)
1274 {
1275 #ifdef UMA_DEBUG
1276 printf("Starting callout.\n");
1277 #endif
1278 callout_init(&uma_callout, 0);
1279 callout_reset(&uma_callout, UMA_WORKING_TIME * hz, uma_timeout, NULL);
1280 #ifdef UMA_DEBUG
1281 printf("UMA startup3 complete.\n");
1282 #endif
1283 }
1284
1285 /* See uma.h */
1286 uma_zone_t
1287 uma_zcreate(char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
1288 uma_init uminit, uma_fini fini, int align, u_int16_t flags)
1289
1290 {
1291 struct uma_zctor_args args;
1292
1293 /* This stuff is essential for the zone ctor */
1294 args.name = name;
1295 args.size = size;
1296 args.ctor = ctor;
1297 args.dtor = dtor;
1298 args.uminit = uminit;
1299 args.fini = fini;
1300 args.align = align;
1301 args.flags = flags;
1302
1303 return (uma_zalloc_internal(zones, &args, M_WAITOK));
1304 }
1305
1306 /* See uma.h */
1307 void
1308 uma_zdestroy(uma_zone_t zone)
1309 {
1310 uma_zfree_internal(zones, zone, NULL, 0);
1311 }
1312
1313 /* See uma.h */
1314 void *
1315 uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
1316 {
1317 void *item;
1318 uma_cache_t cache;
1319 uma_bucket_t bucket;
1320 int cpu;
1321
1322 /* This is the fast path allocation */
1323 #ifdef UMA_DEBUG_ALLOC_1
1324 printf("Allocating one item from %s(%p)\n", zone->uz_name, zone);
1325 #endif
1326
1327 if (!(flags & M_NOWAIT)) {
1328 KASSERT(curthread->td_intr_nesting_level == 0,
1329 ("malloc(M_WAITOK) in interrupt context"));
1330 WITNESS_SLEEP(1, NULL);
1331 }
1332
1333 zalloc_restart:
1334 cpu = PCPU_GET(cpuid);
1335 CPU_LOCK(zone, cpu);
1336 cache = &zone->uz_cpu[cpu];
1337
1338 zalloc_start:
1339 bucket = cache->uc_allocbucket;
1340
1341 if (bucket) {
1342 if (bucket->ub_ptr > -1) {
1343 item = bucket->ub_bucket[bucket->ub_ptr];
1344 #ifdef INVARIANTS
1345 bucket->ub_bucket[bucket->ub_ptr] = NULL;
1346 #endif
1347 bucket->ub_ptr--;
1348 KASSERT(item != NULL,
1349 ("uma_zalloc: Bucket pointer mangled."));
1350 cache->uc_allocs++;
1351 #ifdef INVARIANTS
1352 ZONE_LOCK(zone);
1353 uma_dbg_alloc(zone, NULL, item);
1354 ZONE_UNLOCK(zone);
1355 #endif
1356 CPU_UNLOCK(zone, cpu);
1357 if (zone->uz_ctor)
1358 zone->uz_ctor(item, zone->uz_size, udata);
1359 if (flags & M_ZERO)
1360 bzero(item, zone->uz_size);
1361 return (item);
1362 } else if (cache->uc_freebucket) {
1363 /*
1364 * We have run out of items in our allocbucket.
1365 * See if we can switch with our free bucket.
1366 */
1367 if (cache->uc_freebucket->ub_ptr > -1) {
1368 uma_bucket_t swap;
1369
1370 #ifdef UMA_DEBUG_ALLOC
1371 printf("uma_zalloc: Swapping empty with alloc.\n");
1372 #endif
1373 swap = cache->uc_freebucket;
1374 cache->uc_freebucket = cache->uc_allocbucket;
1375 cache->uc_allocbucket = swap;
1376
1377 goto zalloc_start;
1378 }
1379 }
1380 }
1381 ZONE_LOCK(zone);
1382 /* Since we have locked the zone we may as well send back our stats */
1383 zone->uz_allocs += cache->uc_allocs;
1384 cache->uc_allocs = 0;
1385
1386 /* Our old one is now a free bucket */
1387 if (cache->uc_allocbucket) {
1388 KASSERT(cache->uc_allocbucket->ub_ptr == -1,
1389 ("uma_zalloc_arg: Freeing a non free bucket."));
1390 LIST_INSERT_HEAD(&zone->uz_free_bucket,
1391 cache->uc_allocbucket, ub_link);
1392 cache->uc_allocbucket = NULL;
1393 }
1394
1395 /* Check the free list for a new alloc bucket */
1396 if ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
1397 KASSERT(bucket->ub_ptr != -1,
1398 ("uma_zalloc_arg: Returning an empty bucket."));
1399
1400 LIST_REMOVE(bucket, ub_link);
1401 cache->uc_allocbucket = bucket;
1402 ZONE_UNLOCK(zone);
1403 goto zalloc_start;
1404 }
1405 /* We are no longer associated with this cpu!!! */
1406 CPU_UNLOCK(zone, cpu);
1407
1408 /* Bump up our uz_count so we get here less */
1409 if (zone->uz_count < UMA_BUCKET_SIZE - 1)
1410 zone->uz_count++;
1411
1412 /*
1413 * Now lets just fill a bucket and put it on the free list. If that
1414 * works we'll restart the allocation from the begining.
1415 */
1416
1417 if (uma_zalloc_bucket(zone, flags)) {
1418 ZONE_UNLOCK(zone);
1419 goto zalloc_restart;
1420 }
1421 ZONE_UNLOCK(zone);
1422 /*
1423 * We may not be able to get a bucket so return an actual item.
1424 */
1425 #ifdef UMA_DEBUG
1426 printf("uma_zalloc_arg: Bucketzone returned NULL\n");
1427 #endif
1428
1429 return (uma_zalloc_internal(zone, udata, flags));
1430 }
1431
1432 static uma_slab_t
1433 uma_zone_slab(uma_zone_t zone, int flags)
1434 {
1435 uma_slab_t slab;
1436
1437 /*
1438 * This is to prevent us from recursively trying to allocate
1439 * buckets. The problem is that if an allocation forces us to
1440 * grab a new bucket we will call page_alloc, which will go off
1441 * and cause the vm to allocate vm_map_entries. If we need new
1442 * buckets there too we will recurse in kmem_alloc and bad
1443 * things happen. So instead we return a NULL bucket, and make
1444 * the code that allocates buckets smart enough to deal with it
1445 */
1446 if (zone == bucketzone && zone->uz_recurse != 0)
1447 return (NULL);
1448
1449 slab = NULL;
1450
1451 for (;;) {
1452 /*
1453 * Find a slab with some space. Prefer slabs that are partially
1454 * used over those that are totally full. This helps to reduce
1455 * fragmentation.
1456 */
1457 if (zone->uz_free != 0) {
1458 if (!LIST_EMPTY(&zone->uz_part_slab)) {
1459 slab = LIST_FIRST(&zone->uz_part_slab);
1460 } else {
1461 slab = LIST_FIRST(&zone->uz_free_slab);
1462 LIST_REMOVE(slab, us_link);
1463 LIST_INSERT_HEAD(&zone->uz_part_slab, slab,
1464 us_link);
1465 }
1466 return (slab);
1467 }
1468
1469 /*
1470 * M_NOVM means don't ask at all!
1471 */
1472 if (flags & M_NOVM)
1473 break;
1474
1475 if (zone->uz_maxpages &&
1476 zone->uz_pages >= zone->uz_maxpages) {
1477 zone->uz_flags |= UMA_ZFLAG_FULL;
1478
1479 if (flags & M_WAITOK)
1480 msleep(zone, &zone->uz_lock, PVM, "zonelimit", 0);
1481 else
1482 break;
1483 continue;
1484 }
1485 zone->uz_recurse++;
1486 slab = slab_zalloc(zone, flags);
1487 zone->uz_recurse--;
1488 /*
1489 * If we got a slab here it's safe to mark it partially used
1490 * and return. We assume that the caller is going to remove
1491 * at least one item.
1492 */
1493 if (slab) {
1494 LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link);
1495 return (slab);
1496 }
1497 /*
1498 * We might not have been able to get a slab but another cpu
1499 * could have while we were unlocked. Check again before we
1500 * fail.
1501 */
1502 if ((flags & M_WAITOK) == 0)
1503 flags |= M_NOVM;
1504 }
1505 return (slab);
1506 }
1507
1508 static __inline void *
1509 uma_slab_alloc(uma_zone_t zone, uma_slab_t slab)
1510 {
1511 void *item;
1512 u_int8_t freei;
1513
1514 freei = slab->us_firstfree;
1515 slab->us_firstfree = slab->us_freelist[freei];
1516 item = slab->us_data + (zone->uz_rsize * freei);
1517
1518 slab->us_freecount--;
1519 zone->uz_free--;
1520 #ifdef INVARIANTS
1521 uma_dbg_alloc(zone, slab, item);
1522 #endif
1523 /* Move this slab to the full list */
1524 if (slab->us_freecount == 0) {
1525 LIST_REMOVE(slab, us_link);
1526 LIST_INSERT_HEAD(&zone->uz_full_slab, slab, us_link);
1527 }
1528
1529 return (item);
1530 }
1531
1532 static int
1533 uma_zalloc_bucket(uma_zone_t zone, int flags)
1534 {
1535 uma_bucket_t bucket;
1536 uma_slab_t slab;
1537
1538 /*
1539 * Try this zone's free list first so we don't allocate extra buckets.
1540 */
1541
1542 if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
1543 KASSERT(bucket->ub_ptr == -1,
1544 ("uma_zalloc_bucket: Bucket on free list is not empty."));
1545 LIST_REMOVE(bucket, ub_link);
1546 } else {
1547 int bflags;
1548
1549 bflags = flags;
1550 if (zone->uz_flags & UMA_ZFLAG_BUCKETCACHE)
1551 bflags |= M_NOVM;
1552
1553 ZONE_UNLOCK(zone);
1554 bucket = uma_zalloc_internal(bucketzone,
1555 NULL, bflags);
1556 ZONE_LOCK(zone);
1557 if (bucket != NULL) {
1558 #ifdef INVARIANTS
1559 bzero(bucket, bucketzone->uz_size);
1560 #endif
1561 bucket->ub_ptr = -1;
1562 }
1563 }
1564
1565 if (bucket == NULL)
1566 return (0);
1567
1568 #ifdef SMP
1569 /*
1570 * This code is here to limit the number of simultaneous bucket fills
1571 * for any given zone to the number of per cpu caches in this zone. This
1572 * is done so that we don't allocate more memory than we really need.
1573 */
1574 if (zone->uz_fills >= mp_ncpus)
1575 goto done;
1576
1577 #endif
1578 zone->uz_fills++;
1579
1580 /* Try to keep the buckets totally full */
1581 while ((slab = uma_zone_slab(zone, flags)) != NULL &&
1582 bucket->ub_ptr < zone->uz_count) {
1583 while (slab->us_freecount &&
1584 bucket->ub_ptr < zone->uz_count) {
1585 bucket->ub_bucket[++bucket->ub_ptr] =
1586 uma_slab_alloc(zone, slab);
1587 }
1588 /* Don't block on the next fill */
1589 flags |= M_NOWAIT;
1590 flags &= ~M_WAITOK;
1591 }
1592
1593 zone->uz_fills--;
1594
1595 if (bucket->ub_ptr != -1) {
1596 LIST_INSERT_HEAD(&zone->uz_full_bucket,
1597 bucket, ub_link);
1598 return (1);
1599 }
1600 #ifdef SMP
1601 done:
1602 #endif
1603 uma_zfree_internal(bucketzone, bucket, NULL, 0);
1604
1605 return (0);
1606 }
1607 /*
1608 * Allocates an item for an internal zone
1609 *
1610 * Arguments
1611 * zone The zone to alloc for.
1612 * udata The data to be passed to the constructor.
1613 * flags M_WAITOK, M_NOWAIT, M_ZERO.
1614 *
1615 * Returns
1616 * NULL if there is no memory and M_NOWAIT is set
1617 * An item if successful
1618 */
1619
1620 static void *
1621 uma_zalloc_internal(uma_zone_t zone, void *udata, int flags)
1622 {
1623 uma_slab_t slab;
1624 void *item;
1625
1626 item = NULL;
1627
1628 /*
1629 * This is to stop us from allocating per cpu buckets while we're
1630 * running out of UMA_BOOT_PAGES. Otherwise, we would exhaust the
1631 * boot pages.
1632 */
1633
1634 if (bucketdisable && zone == bucketzone)
1635 return (NULL);
1636
1637 #ifdef UMA_DEBUG_ALLOC
1638 printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone);
1639 #endif
1640 ZONE_LOCK(zone);
1641
1642 slab = uma_zone_slab(zone, flags);
1643 if (slab == NULL) {
1644 ZONE_UNLOCK(zone);
1645 return (NULL);
1646 }
1647
1648 item = uma_slab_alloc(zone, slab);
1649
1650 ZONE_UNLOCK(zone);
1651
1652 if (zone->uz_ctor != NULL)
1653 zone->uz_ctor(item, zone->uz_size, udata);
1654 if (flags & M_ZERO)
1655 bzero(item, zone->uz_size);
1656
1657 return (item);
1658 }
1659
1660 /* See uma.h */
1661 void
1662 uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
1663 {
1664 uma_cache_t cache;
1665 uma_bucket_t bucket;
1666 int bflags;
1667 int cpu;
1668
1669 /* This is the fast path free */
1670 #ifdef UMA_DEBUG_ALLOC_1
1671 printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone);
1672 #endif
1673 /*
1674 * The race here is acceptable. If we miss it we'll just have to wait
1675 * a little longer for the limits to be reset.
1676 */
1677
1678 if (zone->uz_flags & UMA_ZFLAG_FULL)
1679 goto zfree_internal;
1680
1681 if (zone->uz_dtor)
1682 zone->uz_dtor(item, zone->uz_size, udata);
1683
1684 zfree_restart:
1685 cpu = PCPU_GET(cpuid);
1686 CPU_LOCK(zone, cpu);
1687 cache = &zone->uz_cpu[cpu];
1688
1689 zfree_start:
1690 bucket = cache->uc_freebucket;
1691
1692 if (bucket) {
1693 /*
1694 * Do we have room in our bucket? It is OK for this uz count
1695 * check to be slightly out of sync.
1696 */
1697
1698 if (bucket->ub_ptr < zone->uz_count) {
1699 bucket->ub_ptr++;
1700 KASSERT(bucket->ub_bucket[bucket->ub_ptr] == NULL,
1701 ("uma_zfree: Freeing to non free bucket index."));
1702 bucket->ub_bucket[bucket->ub_ptr] = item;
1703 #ifdef INVARIANTS
1704 ZONE_LOCK(zone);
1705 if (zone->uz_flags & UMA_ZFLAG_MALLOC)
1706 uma_dbg_free(zone, udata, item);
1707 else
1708 uma_dbg_free(zone, NULL, item);
1709 ZONE_UNLOCK(zone);
1710 #endif
1711 CPU_UNLOCK(zone, cpu);
1712 return;
1713 } else if (cache->uc_allocbucket) {
1714 #ifdef UMA_DEBUG_ALLOC
1715 printf("uma_zfree: Swapping buckets.\n");
1716 #endif
1717 /*
1718 * We have run out of space in our freebucket.
1719 * See if we can switch with our alloc bucket.
1720 */
1721 if (cache->uc_allocbucket->ub_ptr <
1722 cache->uc_freebucket->ub_ptr) {
1723 uma_bucket_t swap;
1724
1725 swap = cache->uc_freebucket;
1726 cache->uc_freebucket = cache->uc_allocbucket;
1727 cache->uc_allocbucket = swap;
1728
1729 goto zfree_start;
1730 }
1731 }
1732 }
1733
1734 /*
1735 * We can get here for two reasons:
1736 *
1737 * 1) The buckets are NULL
1738 * 2) The alloc and free buckets are both somewhat full.
1739 *
1740 */
1741
1742 ZONE_LOCK(zone);
1743
1744 bucket = cache->uc_freebucket;
1745 cache->uc_freebucket = NULL;
1746
1747 /* Can we throw this on the zone full list? */
1748 if (bucket != NULL) {
1749 #ifdef UMA_DEBUG_ALLOC
1750 printf("uma_zfree: Putting old bucket on the free list.\n");
1751 #endif
1752 /* ub_ptr is pointing to the last free item */
1753 KASSERT(bucket->ub_ptr != -1,
1754 ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n"));
1755 LIST_INSERT_HEAD(&zone->uz_full_bucket,
1756 bucket, ub_link);
1757 }
1758 if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
1759 LIST_REMOVE(bucket, ub_link);
1760 ZONE_UNLOCK(zone);
1761 cache->uc_freebucket = bucket;
1762 goto zfree_start;
1763 }
1764 /* We're done with this CPU now */
1765 CPU_UNLOCK(zone, cpu);
1766
1767 /* And the zone.. */
1768 ZONE_UNLOCK(zone);
1769
1770 #ifdef UMA_DEBUG_ALLOC
1771 printf("uma_zfree: Allocating new free bucket.\n");
1772 #endif
1773 bflags = M_NOWAIT;
1774
1775 if (zone->uz_flags & UMA_ZFLAG_BUCKETCACHE)
1776 bflags |= M_NOVM;
1777 #ifdef INVARIANTS
1778 bflags |= M_ZERO;
1779 #endif
1780 bucket = uma_zalloc_internal(bucketzone,
1781 NULL, bflags);
1782 if (bucket) {
1783 bucket->ub_ptr = -1;
1784 ZONE_LOCK(zone);
1785 LIST_INSERT_HEAD(&zone->uz_free_bucket,
1786 bucket, ub_link);
1787 ZONE_UNLOCK(zone);
1788 goto zfree_restart;
1789 }
1790
1791 /*
1792 * If nothing else caught this, we'll just do an internal free.
1793 */
1794
1795 zfree_internal:
1796
1797 uma_zfree_internal(zone, item, udata, 0);
1798
1799 return;
1800
1801 }
1802
1803 /*
1804 * Frees an item to an INTERNAL zone or allocates a free bucket
1805 *
1806 * Arguments:
1807 * zone The zone to free to
1808 * item The item we're freeing
1809 * udata User supplied data for the dtor
1810 * skip Skip the dtor, it was done in uma_zfree_arg
1811 */
1812
1813 static void
1814 uma_zfree_internal(uma_zone_t zone, void *item, void *udata, int skip)
1815 {
1816 uma_slab_t slab;
1817 u_int8_t *mem;
1818 u_int8_t freei;
1819
1820 if (!skip && zone->uz_dtor)
1821 zone->uz_dtor(item, zone->uz_size, udata);
1822
1823 ZONE_LOCK(zone);
1824
1825 if (!(zone->uz_flags & UMA_ZFLAG_MALLOC)) {
1826 mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK));
1827 if (zone->uz_flags & UMA_ZFLAG_HASH)
1828 slab = hash_sfind(&zone->uz_hash, mem);
1829 else {
1830 mem += zone->uz_pgoff;
1831 slab = (uma_slab_t)mem;
1832 }
1833 } else {
1834 slab = (uma_slab_t)udata;
1835 }
1836
1837 /* Do we need to remove from any lists? */
1838 if (slab->us_freecount+1 == zone->uz_ipers) {
1839 LIST_REMOVE(slab, us_link);
1840 LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link);
1841 } else if (slab->us_freecount == 0) {
1842 LIST_REMOVE(slab, us_link);
1843 LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link);
1844 }
1845
1846 /* Slab management stuff */
1847 freei = ((unsigned long)item - (unsigned long)slab->us_data)
1848 / zone->uz_rsize;
1849
1850 #ifdef INVARIANTS
1851 if (!skip)
1852 uma_dbg_free(zone, slab, item);
1853 #endif
1854
1855 slab->us_freelist[freei] = slab->us_firstfree;
1856 slab->us_firstfree = freei;
1857 slab->us_freecount++;
1858
1859 /* Zone statistics */
1860 zone->uz_free++;
1861
1862 if (zone->uz_flags & UMA_ZFLAG_FULL) {
1863 if (zone->uz_pages < zone->uz_maxpages)
1864 zone->uz_flags &= ~UMA_ZFLAG_FULL;
1865
1866 /* We can handle one more allocation */
1867 wakeup_one(zone);
1868 }
1869
1870 ZONE_UNLOCK(zone);
1871 }
1872
1873 /* See uma.h */
1874 void
1875 uma_zone_set_max(uma_zone_t zone, int nitems)
1876 {
1877 ZONE_LOCK(zone);
1878 if (zone->uz_ppera > 1)
1879 zone->uz_maxpages = nitems * zone->uz_ppera;
1880 else
1881 zone->uz_maxpages = nitems / zone->uz_ipers;
1882
1883 if (zone->uz_maxpages * zone->uz_ipers < nitems)
1884 zone->uz_maxpages++;
1885
1886 ZONE_UNLOCK(zone);
1887 }
1888
1889 /* See uma.h */
1890 void
1891 uma_zone_set_freef(uma_zone_t zone, uma_free freef)
1892 {
1893 ZONE_LOCK(zone);
1894
1895 zone->uz_freef = freef;
1896
1897 ZONE_UNLOCK(zone);
1898 }
1899
1900 /* See uma.h */
1901 void
1902 uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf)
1903 {
1904 ZONE_LOCK(zone);
1905
1906 zone->uz_flags |= UMA_ZFLAG_PRIVALLOC;
1907 zone->uz_allocf = allocf;
1908
1909 ZONE_UNLOCK(zone);
1910 }
1911
1912 /* See uma.h */
1913 int
1914 uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int count)
1915 {
1916 int pages;
1917 vm_offset_t kva;
1918
1919 mtx_lock(&Giant);
1920
1921 pages = count / zone->uz_ipers;
1922
1923 if (pages * zone->uz_ipers < count)
1924 pages++;
1925
1926 kva = kmem_alloc_pageable(kernel_map, pages * UMA_SLAB_SIZE);
1927
1928 if (kva == 0) {
1929 mtx_unlock(&Giant);
1930 return (0);
1931 }
1932
1933
1934 if (obj == NULL)
1935 obj = vm_object_allocate(OBJT_DEFAULT,
1936 pages);
1937 else
1938 _vm_object_allocate(OBJT_DEFAULT,
1939 pages, obj);
1940
1941 ZONE_LOCK(zone);
1942 zone->uz_kva = kva;
1943 zone->uz_obj = obj;
1944 zone->uz_maxpages = pages;
1945
1946 zone->uz_allocf = obj_alloc;
1947 zone->uz_flags |= UMA_ZFLAG_NOFREE | UMA_ZFLAG_PRIVALLOC;
1948
1949 ZONE_UNLOCK(zone);
1950 mtx_unlock(&Giant);
1951
1952 return (1);
1953 }
1954
1955 /* See uma.h */
1956 void
1957 uma_prealloc(uma_zone_t zone, int items)
1958 {
1959 int slabs;
1960 uma_slab_t slab;
1961
1962 ZONE_LOCK(zone);
1963 slabs = items / zone->uz_ipers;
1964 if (slabs * zone->uz_ipers < items)
1965 slabs++;
1966
1967 while (slabs > 0) {
1968 slab = slab_zalloc(zone, M_WAITOK);
1969 LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link);
1970 slabs--;
1971 }
1972 ZONE_UNLOCK(zone);
1973 }
1974
1975 /* See uma.h */
1976 void
1977 uma_reclaim(void)
1978 {
1979 /*
1980 * You might think that the delay below would improve performance since
1981 * the allocator will give away memory that it may ask for immediately.
1982 * Really, it makes things worse, since cpu cycles are so much cheaper
1983 * than disk activity.
1984 */
1985 #if 0
1986 static struct timeval tv = {0};
1987 struct timeval now;
1988 getmicrouptime(&now);
1989 if (now.tv_sec > tv.tv_sec + 30)
1990 tv = now;
1991 else
1992 return;
1993 #endif
1994 #ifdef UMA_DEBUG
1995 printf("UMA: vm asked us to release pages!\n");
1996 #endif
1997 bucket_enable();
1998 zone_foreach(zone_drain);
1999
2000 /*
2001 * Some slabs may have been freed but this zone will be visited early
2002 * we visit again so that we can free pages that are empty once other
2003 * zones are drained. We have to do the same for buckets.
2004 */
2005 zone_drain(slabzone);
2006 zone_drain(bucketzone);
2007 }
2008
2009 void *
2010 uma_large_malloc(int size, int wait)
2011 {
2012 void *mem;
2013 uma_slab_t slab;
2014 u_int8_t flags;
2015
2016 slab = uma_zalloc_internal(slabzone, NULL, wait);
2017 if (slab == NULL)
2018 return (NULL);
2019
2020 mem = page_alloc(NULL, size, &flags, wait);
2021 if (mem) {
2022 vsetslab((vm_offset_t)mem, slab);
2023 slab->us_data = mem;
2024 slab->us_flags = flags | UMA_SLAB_MALLOC;
2025 slab->us_size = size;
2026 } else {
2027 uma_zfree_internal(slabzone, slab, NULL, 0);
2028 }
2029
2030
2031 return (mem);
2032 }
2033
2034 void
2035 uma_large_free(uma_slab_t slab)
2036 {
2037 vsetobj((vm_offset_t)slab->us_data, kmem_object);
2038 page_free(slab->us_data, slab->us_size, slab->us_flags);
2039 uma_zfree_internal(slabzone, slab, NULL, 0);
2040 }
2041
2042 void
2043 uma_print_stats(void)
2044 {
2045 zone_foreach(uma_print_zone);
2046 }
2047
2048 void
2049 uma_print_zone(uma_zone_t zone)
2050 {
2051 printf("%s(%p) size %d(%d) flags %d ipers %d ppera %d out %d free %d\n",
2052 zone->uz_name, zone, zone->uz_size, zone->uz_rsize, zone->uz_flags,
2053 zone->uz_ipers, zone->uz_ppera,
2054 (zone->uz_ipers * zone->uz_pages) - zone->uz_free, zone->uz_free);
2055 }
2056
2057 /*
2058 * Sysctl handler for vm.zone
2059 *
2060 * stolen from vm_zone.c
2061 */
2062 static int
2063 sysctl_vm_zone(SYSCTL_HANDLER_ARGS)
2064 {
2065 int error, len, cnt;
2066 const int linesize = 128; /* conservative */
2067 int totalfree;
2068 char *tmpbuf, *offset;
2069 uma_zone_t z;
2070 char *p;
2071
2072 cnt = 0;
2073 mtx_lock(&uma_mtx);
2074 LIST_FOREACH(z, &uma_zones, uz_link)
2075 cnt++;
2076 mtx_unlock(&uma_mtx);
2077 MALLOC(tmpbuf, char *, (cnt == 0 ? 1 : cnt) * linesize,
2078 M_TEMP, M_WAITOK);
2079 len = snprintf(tmpbuf, linesize,
2080 "\nITEM SIZE LIMIT USED FREE REQUESTS\n\n");
2081 if (cnt == 0)
2082 tmpbuf[len - 1] = '\0';
2083 error = SYSCTL_OUT(req, tmpbuf, cnt == 0 ? len-1 : len);
2084 if (error || cnt == 0)
2085 goto out;
2086 offset = tmpbuf;
2087 mtx_lock(&uma_mtx);
2088 LIST_FOREACH(z, &uma_zones, uz_link) {
2089 if (cnt == 0) /* list may have changed size */
2090 break;
2091 ZONE_LOCK(z);
2092 totalfree = z->uz_free + z->uz_cachefree;
2093 len = snprintf(offset, linesize,
2094 "%-12.12s %6.6u, %8.8u, %6.6u, %6.6u, %8.8llu\n",
2095 z->uz_name, z->uz_size,
2096 z->uz_maxpages * z->uz_ipers,
2097 (z->uz_ipers * (z->uz_pages / z->uz_ppera)) - totalfree,
2098 totalfree,
2099 (unsigned long long)z->uz_allocs);
2100 ZONE_UNLOCK(z);
2101 for (p = offset + 12; p > offset && *p == ' '; --p)
2102 /* nothing */ ;
2103 p[1] = ':';
2104 cnt--;
2105 offset += len;
2106 }
2107 mtx_unlock(&uma_mtx);
2108 *offset++ = '\0';
2109 error = SYSCTL_OUT(req, tmpbuf, offset - tmpbuf);
2110 out:
2111 FREE(tmpbuf, M_TEMP);
2112 return (error);
2113 }
Cache object: 2fa8f70a4e5fa4161e3cdbc2551aa870
|