1 /*
2 * Copyright (c) 1987, 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * @(#)kern_malloc.c 8.3 (Berkeley) 1/4/94
34 * $FreeBSD: releng/5.1/sys/kern/kern_malloc.c 116190 2003-06-11 05:50:09Z ps $
35 */
36
37 #include "opt_vm.h"
38
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/kernel.h>
42 #include <sys/lock.h>
43 #include <sys/malloc.h>
44 #include <sys/mbuf.h>
45 #include <sys/mutex.h>
46 #include <sys/vmmeter.h>
47 #include <sys/proc.h>
48 #include <sys/sysctl.h>
49 #include <sys/time.h>
50
51 #include <vm/vm.h>
52 #include <vm/pmap.h>
53 #include <vm/vm_param.h>
54 #include <vm/vm_kern.h>
55 #include <vm/vm_extern.h>
56 #include <vm/vm_map.h>
57 #include <vm/vm_page.h>
58 #include <vm/uma.h>
59 #include <vm/uma_int.h>
60 #include <vm/uma_dbg.h>
61
62 #if defined(INVARIANTS) && defined(__i386__)
63 #include <machine/cpu.h>
64 #endif
65
66 /*
67 * When realloc() is called, if the new size is sufficiently smaller than
68 * the old size, realloc() will allocate a new, smaller block to avoid
69 * wasting memory. 'Sufficiently smaller' is defined as: newsize <=
70 * oldsize / 2^n, where REALLOC_FRACTION defines the value of 'n'.
71 */
72 #ifndef REALLOC_FRACTION
73 #define REALLOC_FRACTION 1 /* new block if <= half the size */
74 #endif
75
76 MALLOC_DEFINE(M_CACHE, "cache", "Various Dynamically allocated caches");
77 MALLOC_DEFINE(M_DEVBUF, "devbuf", "device driver memory");
78 MALLOC_DEFINE(M_TEMP, "temp", "misc temporary data buffers");
79
80 MALLOC_DEFINE(M_IP6OPT, "ip6opt", "IPv6 options");
81 MALLOC_DEFINE(M_IP6NDP, "ip6ndp", "IPv6 Neighbor Discovery");
82
83 static void kmeminit(void *);
84 SYSINIT(kmem, SI_SUB_KMEM, SI_ORDER_FIRST, kmeminit, NULL)
85
86 static MALLOC_DEFINE(M_FREE, "free", "should be on free list");
87
88 static struct malloc_type *kmemstatistics;
89 static char *kmembase;
90 static char *kmemlimit;
91
92 #define KMEM_ZSHIFT 4
93 #define KMEM_ZBASE 16
94 #define KMEM_ZMASK (KMEM_ZBASE - 1)
95
96 #define KMEM_ZMAX 65536
97 #define KMEM_ZSIZE (KMEM_ZMAX >> KMEM_ZSHIFT)
98 static u_int8_t kmemsize[KMEM_ZSIZE + 1];
99
100 /* These won't be powers of two for long */
101 struct {
102 int kz_size;
103 char *kz_name;
104 uma_zone_t kz_zone;
105 } kmemzones[] = {
106 {16, "16", NULL},
107 {32, "32", NULL},
108 {64, "64", NULL},
109 {128, "128", NULL},
110 {256, "256", NULL},
111 {512, "512", NULL},
112 {1024, "1024", NULL},
113 {2048, "2048", NULL},
114 {4096, "4096", NULL},
115 {8192, "8192", NULL},
116 {16384, "16384", NULL},
117 {32768, "32768", NULL},
118 {65536, "65536", NULL},
119 {0, NULL},
120 };
121
122 u_int vm_kmem_size;
123
124 /*
125 * The malloc_mtx protects the kmemstatistics linked list.
126 */
127
128 struct mtx malloc_mtx;
129
130 #ifdef MALLOC_PROFILE
131 uint64_t krequests[KMEM_ZSIZE + 1];
132
133 static int sysctl_kern_mprof(SYSCTL_HANDLER_ARGS);
134 #endif
135
136 static int sysctl_kern_malloc(SYSCTL_HANDLER_ARGS);
137
138 /* time_uptime of last malloc(9) failure */
139 static time_t t_malloc_fail;
140
141 #ifdef MALLOC_MAKE_FAILURES
142 /*
143 * Causes malloc failures every (n) mallocs with M_NOWAIT. If set to 0,
144 * doesn't cause failures.
145 */
146 SYSCTL_NODE(_debug, OID_AUTO, malloc, CTLFLAG_RD, 0,
147 "Kernel malloc debugging options");
148
149 static int malloc_failure_rate;
150 static int malloc_nowait_count;
151 static int malloc_failure_count;
152 SYSCTL_INT(_debug_malloc, OID_AUTO, failure_rate, CTLFLAG_RW,
153 &malloc_failure_rate, 0, "Every (n) mallocs with M_NOWAIT will fail");
154 TUNABLE_INT("debug.malloc.failure_rate", &malloc_failure_rate);
155 SYSCTL_INT(_debug_malloc, OID_AUTO, failure_count, CTLFLAG_RD,
156 &malloc_failure_count, 0, "Number of imposed M_NOWAIT malloc failures");
157 #endif
158
159 int
160 malloc_last_fail(void)
161 {
162
163 return (time_uptime - t_malloc_fail);
164 }
165
166 /*
167 * malloc:
168 *
169 * Allocate a block of memory.
170 *
171 * If M_NOWAIT is set, this routine will not block and return NULL if
172 * the allocation fails.
173 */
174 void *
175 malloc(size, type, flags)
176 unsigned long size;
177 struct malloc_type *type;
178 int flags;
179 {
180 int indx;
181 caddr_t va;
182 uma_zone_t zone;
183 #ifdef DIAGNOSTIC
184 unsigned long osize = size;
185 #endif
186 register struct malloc_type *ksp = type;
187
188 #ifdef INVARIANTS
189 /*
190 * To make sure that WAITOK or NOWAIT is set, but not more than
191 * one, and check against the API botches that are common.
192 */
193 indx = flags & (M_WAITOK | M_NOWAIT | M_DONTWAIT | M_TRYWAIT);
194 if (indx != M_NOWAIT && indx != M_WAITOK) {
195 static struct timeval lasterr;
196 static int curerr, once;
197 if (once == 0 && ppsratecheck(&lasterr, &curerr, 1)) {
198 printf("Bad malloc flags: %x\n", indx);
199 backtrace();
200 flags |= M_WAITOK;
201 once++;
202 }
203 }
204 #endif
205 #if 0
206 if (size == 0)
207 Debugger("zero size malloc");
208 #endif
209 #ifdef MALLOC_MAKE_FAILURES
210 if ((flags & M_NOWAIT) && (malloc_failure_rate != 0)) {
211 atomic_add_int(&malloc_nowait_count, 1);
212 if ((malloc_nowait_count % malloc_failure_rate) == 0) {
213 atomic_add_int(&malloc_failure_count, 1);
214 t_malloc_fail = time_uptime;
215 return (NULL);
216 }
217 }
218 #endif
219 if (flags & M_WAITOK)
220 KASSERT(curthread->td_intr_nesting_level == 0,
221 ("malloc(M_WAITOK) in interrupt context"));
222 if (size <= KMEM_ZMAX) {
223 if (size & KMEM_ZMASK)
224 size = (size & ~KMEM_ZMASK) + KMEM_ZBASE;
225 indx = kmemsize[size >> KMEM_ZSHIFT];
226 zone = kmemzones[indx].kz_zone;
227 #ifdef MALLOC_PROFILE
228 krequests[size >> KMEM_ZSHIFT]++;
229 #endif
230 va = uma_zalloc(zone, flags);
231 mtx_lock(&ksp->ks_mtx);
232 if (va == NULL)
233 goto out;
234
235 ksp->ks_size |= 1 << indx;
236 size = zone->uz_size;
237 } else {
238 size = roundup(size, PAGE_SIZE);
239 zone = NULL;
240 va = uma_large_malloc(size, flags);
241 mtx_lock(&ksp->ks_mtx);
242 if (va == NULL)
243 goto out;
244 }
245 ksp->ks_memuse += size;
246 ksp->ks_inuse++;
247 out:
248 ksp->ks_calls++;
249 if (ksp->ks_memuse > ksp->ks_maxused)
250 ksp->ks_maxused = ksp->ks_memuse;
251
252 mtx_unlock(&ksp->ks_mtx);
253 if (flags & M_WAITOK)
254 KASSERT(va != NULL, ("malloc(M_WAITOK) returned NULL"));
255 else if (va == NULL)
256 t_malloc_fail = time_uptime;
257 #ifdef DIAGNOSTIC
258 if (va != NULL && !(flags & M_ZERO)) {
259 memset(va, 0x70, osize);
260 }
261 #endif
262 return ((void *) va);
263 }
264
265 /*
266 * free:
267 *
268 * Free a block of memory allocated by malloc.
269 *
270 * This routine may not block.
271 */
272 void
273 free(addr, type)
274 void *addr;
275 struct malloc_type *type;
276 {
277 register struct malloc_type *ksp = type;
278 uma_slab_t slab;
279 u_long size;
280
281 /* free(NULL, ...) does nothing */
282 if (addr == NULL)
283 return;
284
285 KASSERT(ksp->ks_memuse > 0,
286 ("malloc(9)/free(9) confusion.\n%s",
287 "Probably freeing with wrong type, but maybe not here."));
288 size = 0;
289
290 slab = vtoslab((vm_offset_t)addr & (~UMA_SLAB_MASK));
291
292 if (slab == NULL)
293 panic("free: address %p(%p) has not been allocated.\n",
294 addr, (void *)((u_long)addr & (~UMA_SLAB_MASK)));
295
296
297 if (!(slab->us_flags & UMA_SLAB_MALLOC)) {
298 #ifdef INVARIANTS
299 struct malloc_type **mtp = addr;
300 #endif
301 size = slab->us_zone->uz_size;
302 #ifdef INVARIANTS
303 /*
304 * Cache a pointer to the malloc_type that most recently freed
305 * this memory here. This way we know who is most likely to
306 * have stepped on it later.
307 *
308 * This code assumes that size is a multiple of 8 bytes for
309 * 64 bit machines
310 */
311 mtp = (struct malloc_type **)
312 ((unsigned long)mtp & ~UMA_ALIGN_PTR);
313 mtp += (size - sizeof(struct malloc_type *)) /
314 sizeof(struct malloc_type *);
315 *mtp = type;
316 #endif
317 uma_zfree_arg(slab->us_zone, addr, slab);
318 } else {
319 size = slab->us_size;
320 uma_large_free(slab);
321 }
322 mtx_lock(&ksp->ks_mtx);
323 KASSERT(size <= ksp->ks_memuse,
324 ("malloc(9)/free(9) confusion.\n%s",
325 "Probably freeing with wrong type, but maybe not here."));
326 ksp->ks_memuse -= size;
327 ksp->ks_inuse--;
328 mtx_unlock(&ksp->ks_mtx);
329 }
330
331 /*
332 * realloc: change the size of a memory block
333 */
334 void *
335 realloc(addr, size, type, flags)
336 void *addr;
337 unsigned long size;
338 struct malloc_type *type;
339 int flags;
340 {
341 uma_slab_t slab;
342 unsigned long alloc;
343 void *newaddr;
344
345 /* realloc(NULL, ...) is equivalent to malloc(...) */
346 if (addr == NULL)
347 return (malloc(size, type, flags));
348
349 slab = vtoslab((vm_offset_t)addr & ~(UMA_SLAB_MASK));
350
351 /* Sanity check */
352 KASSERT(slab != NULL,
353 ("realloc: address %p out of range", (void *)addr));
354
355 /* Get the size of the original block */
356 if (slab->us_zone)
357 alloc = slab->us_zone->uz_size;
358 else
359 alloc = slab->us_size;
360
361 /* Reuse the original block if appropriate */
362 if (size <= alloc
363 && (size > (alloc >> REALLOC_FRACTION) || alloc == MINALLOCSIZE))
364 return (addr);
365
366 /* Allocate a new, bigger (or smaller) block */
367 if ((newaddr = malloc(size, type, flags)) == NULL)
368 return (NULL);
369
370 /* Copy over original contents */
371 bcopy(addr, newaddr, min(size, alloc));
372 free(addr, type);
373 return (newaddr);
374 }
375
376 /*
377 * reallocf: same as realloc() but free memory on failure.
378 */
379 void *
380 reallocf(addr, size, type, flags)
381 void *addr;
382 unsigned long size;
383 struct malloc_type *type;
384 int flags;
385 {
386 void *mem;
387
388 if ((mem = realloc(addr, size, type, flags)) == NULL)
389 free(addr, type);
390 return (mem);
391 }
392
393 /*
394 * Initialize the kernel memory allocator
395 */
396 /* ARGSUSED*/
397 static void
398 kmeminit(dummy)
399 void *dummy;
400 {
401 u_int8_t indx;
402 u_long npg;
403 u_long mem_size;
404 int i;
405
406 mtx_init(&malloc_mtx, "malloc", NULL, MTX_DEF);
407
408 /*
409 * Try to auto-tune the kernel memory size, so that it is
410 * more applicable for a wider range of machine sizes.
411 * On an X86, a VM_KMEM_SIZE_SCALE value of 4 is good, while
412 * a VM_KMEM_SIZE of 12MB is a fair compromise. The
413 * VM_KMEM_SIZE_MAX is dependent on the maximum KVA space
414 * available, and on an X86 with a total KVA space of 256MB,
415 * try to keep VM_KMEM_SIZE_MAX at 80MB or below.
416 *
417 * Note that the kmem_map is also used by the zone allocator,
418 * so make sure that there is enough space.
419 */
420 vm_kmem_size = VM_KMEM_SIZE;
421 mem_size = cnt.v_page_count;
422
423 #if defined(VM_KMEM_SIZE_SCALE)
424 if ((mem_size / VM_KMEM_SIZE_SCALE) > (vm_kmem_size / PAGE_SIZE))
425 vm_kmem_size = (mem_size / VM_KMEM_SIZE_SCALE) * PAGE_SIZE;
426 #endif
427
428 #if defined(VM_KMEM_SIZE_MAX)
429 if (vm_kmem_size >= VM_KMEM_SIZE_MAX)
430 vm_kmem_size = VM_KMEM_SIZE_MAX;
431 #endif
432
433 /* Allow final override from the kernel environment */
434 TUNABLE_INT_FETCH("kern.vm.kmem.size", &vm_kmem_size);
435
436 /*
437 * Limit kmem virtual size to twice the physical memory.
438 * This allows for kmem map sparseness, but limits the size
439 * to something sane. Be careful to not overflow the 32bit
440 * ints while doing the check.
441 */
442 if (((vm_kmem_size / 2) / PAGE_SIZE) > cnt.v_page_count)
443 vm_kmem_size = 2 * cnt.v_page_count * PAGE_SIZE;
444
445 /*
446 * In mbuf_init(), we set up submaps for mbufs and clusters, in which
447 * case we rounddown() (nmbufs * MSIZE) and (nmbclusters * MCLBYTES),
448 * respectively. Mathematically, this means that what we do here may
449 * amount to slightly more address space than we need for the submaps,
450 * but it never hurts to have an extra page in kmem_map.
451 */
452 npg = (nmbufs*MSIZE + nmbclusters*MCLBYTES + vm_kmem_size) / PAGE_SIZE;
453
454 kmem_map = kmem_suballoc(kernel_map, (vm_offset_t *)&kmembase,
455 (vm_offset_t *)&kmemlimit, (vm_size_t)(npg * PAGE_SIZE));
456 kmem_map->system_map = 1;
457
458 uma_startup2();
459
460 for (i = 0, indx = 0; kmemzones[indx].kz_size != 0; indx++) {
461 int size = kmemzones[indx].kz_size;
462 char *name = kmemzones[indx].kz_name;
463
464 kmemzones[indx].kz_zone = uma_zcreate(name, size,
465 #ifdef INVARIANTS
466 mtrash_ctor, mtrash_dtor, mtrash_init, mtrash_fini,
467 #else
468 NULL, NULL, NULL, NULL,
469 #endif
470 UMA_ALIGN_PTR, UMA_ZONE_MALLOC);
471
472 for (;i <= size; i+= KMEM_ZBASE)
473 kmemsize[i >> KMEM_ZSHIFT] = indx;
474
475 }
476 }
477
478 void
479 malloc_init(data)
480 void *data;
481 {
482 struct malloc_type *type = (struct malloc_type *)data;
483
484 mtx_lock(&malloc_mtx);
485 if (type->ks_magic != M_MAGIC)
486 panic("malloc type lacks magic");
487
488 if (cnt.v_page_count == 0)
489 panic("malloc_init not allowed before vm init");
490
491 if (type->ks_next != NULL)
492 return;
493
494 type->ks_next = kmemstatistics;
495 kmemstatistics = type;
496 mtx_init(&type->ks_mtx, type->ks_shortdesc, "Malloc Stats", MTX_DEF);
497 mtx_unlock(&malloc_mtx);
498 }
499
500 void
501 malloc_uninit(data)
502 void *data;
503 {
504 struct malloc_type *type = (struct malloc_type *)data;
505 struct malloc_type *t;
506
507 mtx_lock(&malloc_mtx);
508 mtx_lock(&type->ks_mtx);
509 if (type->ks_magic != M_MAGIC)
510 panic("malloc type lacks magic");
511
512 if (cnt.v_page_count == 0)
513 panic("malloc_uninit not allowed before vm init");
514
515 if (type == kmemstatistics)
516 kmemstatistics = type->ks_next;
517 else {
518 for (t = kmemstatistics; t->ks_next != NULL; t = t->ks_next) {
519 if (t->ks_next == type) {
520 t->ks_next = type->ks_next;
521 break;
522 }
523 }
524 }
525 type->ks_next = NULL;
526 mtx_destroy(&type->ks_mtx);
527 mtx_unlock(&malloc_mtx);
528 }
529
530 static int
531 sysctl_kern_malloc(SYSCTL_HANDLER_ARGS)
532 {
533 struct malloc_type *type;
534 int linesize = 128;
535 int curline;
536 int bufsize;
537 int first;
538 int error;
539 char *buf;
540 char *p;
541 int cnt;
542 int len;
543 int i;
544
545 cnt = 0;
546
547 mtx_lock(&malloc_mtx);
548 for (type = kmemstatistics; type != NULL; type = type->ks_next)
549 cnt++;
550
551 mtx_unlock(&malloc_mtx);
552 bufsize = linesize * (cnt + 1);
553 p = buf = (char *)malloc(bufsize, M_TEMP, M_WAITOK|M_ZERO);
554 mtx_lock(&malloc_mtx);
555
556 len = snprintf(p, linesize,
557 "\n Type InUse MemUse HighUse Requests Size(s)\n");
558 p += len;
559
560 for (type = kmemstatistics; cnt != 0 && type != NULL;
561 type = type->ks_next, cnt--) {
562 if (type->ks_calls == 0)
563 continue;
564
565 curline = linesize - 2; /* Leave room for the \n */
566 len = snprintf(p, curline, "%13s%6lu%6luK%7luK%9llu",
567 type->ks_shortdesc,
568 type->ks_inuse,
569 (type->ks_memuse + 1023) / 1024,
570 (type->ks_maxused + 1023) / 1024,
571 (long long unsigned)type->ks_calls);
572 curline -= len;
573 p += len;
574
575 first = 1;
576 for (i = 0; i < sizeof(kmemzones) / sizeof(kmemzones[0]) - 1;
577 i++) {
578 if (type->ks_size & (1 << i)) {
579 if (first)
580 len = snprintf(p, curline, " ");
581 else
582 len = snprintf(p, curline, ",");
583 curline -= len;
584 p += len;
585
586 len = snprintf(p, curline,
587 "%s", kmemzones[i].kz_name);
588 curline -= len;
589 p += len;
590
591 first = 0;
592 }
593 }
594
595 len = snprintf(p, 2, "\n");
596 p += len;
597 }
598
599 mtx_unlock(&malloc_mtx);
600 error = SYSCTL_OUT(req, buf, p - buf);
601
602 free(buf, M_TEMP);
603 return (error);
604 }
605
606 SYSCTL_OID(_kern, OID_AUTO, malloc, CTLTYPE_STRING|CTLFLAG_RD,
607 NULL, 0, sysctl_kern_malloc, "A", "Malloc Stats");
608
609 #ifdef MALLOC_PROFILE
610
611 static int
612 sysctl_kern_mprof(SYSCTL_HANDLER_ARGS)
613 {
614 int linesize = 64;
615 uint64_t count;
616 uint64_t waste;
617 uint64_t mem;
618 int bufsize;
619 int error;
620 char *buf;
621 int rsize;
622 int size;
623 char *p;
624 int len;
625 int i;
626
627 bufsize = linesize * (KMEM_ZSIZE + 1);
628 bufsize += 128; /* For the stats line */
629 bufsize += 128; /* For the banner line */
630 waste = 0;
631 mem = 0;
632
633 p = buf = (char *)malloc(bufsize, M_TEMP, M_WAITOK|M_ZERO);
634 len = snprintf(p, bufsize,
635 "\n Size Requests Real Size\n");
636 bufsize -= len;
637 p += len;
638
639 for (i = 0; i < KMEM_ZSIZE; i++) {
640 size = i << KMEM_ZSHIFT;
641 rsize = kmemzones[kmemsize[i]].kz_size;
642 count = (long long unsigned)krequests[i];
643
644 len = snprintf(p, bufsize, "%6d%28llu%11d\n",
645 size, (unsigned long long)count, rsize);
646 bufsize -= len;
647 p += len;
648
649 if ((rsize * count) > (size * count))
650 waste += (rsize * count) - (size * count);
651 mem += (rsize * count);
652 }
653
654 len = snprintf(p, bufsize,
655 "\nTotal memory used:\t%30llu\nTotal Memory wasted:\t%30llu\n",
656 (unsigned long long)mem, (unsigned long long)waste);
657 p += len;
658
659 error = SYSCTL_OUT(req, buf, p - buf);
660
661 free(buf, M_TEMP);
662 return (error);
663 }
664
665 SYSCTL_OID(_kern, OID_AUTO, mprof, CTLTYPE_STRING|CTLFLAG_RD,
666 NULL, 0, sysctl_kern_mprof, "A", "Malloc Profiling");
667 #endif /* MALLOC_PROFILE */
Cache object: 1da9eab521086b9c0e48fe9fc69992cc
|