The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_malloc.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1987, 1991, 1993
    3  *      The Regents of the University of California.
    4  * Copyright (c) 2005-2009 Robert N. M. Watson
    5  * All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  * 4. Neither the name of the University nor the names of its contributors
   16  *    may be used to endorse or promote products derived from this software
   17  *    without specific prior written permission.
   18  *
   19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   29  * SUCH DAMAGE.
   30  *
   31  *      @(#)kern_malloc.c       8.3 (Berkeley) 1/4/94
   32  */
   33 
   34 /*
   35  * Kernel malloc(9) implementation -- general purpose kernel memory allocator
   36  * based on memory types.  Back end is implemented using the UMA(9) zone
   37  * allocator.  A set of fixed-size buckets are used for smaller allocations,
   38  * and a special UMA allocation interface is used for larger allocations.
   39  * Callers declare memory types, and statistics are maintained independently
   40  * for each memory type.  Statistics are maintained per-CPU for performance
   41  * reasons.  See malloc(9) and comments in malloc.h for a detailed
   42  * description.
   43  */
   44 
   45 #include <sys/cdefs.h>
   46 __FBSDID("$FreeBSD: releng/10.0/sys/kern/kern_malloc.c 256068 2013-10-05 18:53:03Z alc $");
   47 
   48 #include "opt_ddb.h"
   49 #include "opt_kdtrace.h"
   50 #include "opt_vm.h"
   51 
   52 #include <sys/param.h>
   53 #include <sys/systm.h>
   54 #include <sys/kdb.h>
   55 #include <sys/kernel.h>
   56 #include <sys/lock.h>
   57 #include <sys/malloc.h>
   58 #include <sys/mutex.h>
   59 #include <sys/vmmeter.h>
   60 #include <sys/proc.h>
   61 #include <sys/sbuf.h>
   62 #include <sys/sysctl.h>
   63 #include <sys/time.h>
   64 #include <sys/vmem.h>
   65 
   66 #include <vm/vm.h>
   67 #include <vm/pmap.h>
   68 #include <vm/vm_pageout.h>
   69 #include <vm/vm_param.h>
   70 #include <vm/vm_kern.h>
   71 #include <vm/vm_extern.h>
   72 #include <vm/vm_map.h>
   73 #include <vm/vm_page.h>
   74 #include <vm/uma.h>
   75 #include <vm/uma_int.h>
   76 #include <vm/uma_dbg.h>
   77 
   78 #ifdef DEBUG_MEMGUARD
   79 #include <vm/memguard.h>
   80 #endif
   81 #ifdef DEBUG_REDZONE
   82 #include <vm/redzone.h>
   83 #endif
   84 
   85 #if defined(INVARIANTS) && defined(__i386__)
   86 #include <machine/cpu.h>
   87 #endif
   88 
   89 #include <ddb/ddb.h>
   90 
   91 #ifdef KDTRACE_HOOKS
   92 #include <sys/dtrace_bsd.h>
   93 
   94 dtrace_malloc_probe_func_t      dtrace_malloc_probe;
   95 #endif
   96 
   97 /*
   98  * When realloc() is called, if the new size is sufficiently smaller than
   99  * the old size, realloc() will allocate a new, smaller block to avoid
  100  * wasting memory. 'Sufficiently smaller' is defined as: newsize <=
  101  * oldsize / 2^n, where REALLOC_FRACTION defines the value of 'n'.
  102  */
  103 #ifndef REALLOC_FRACTION
  104 #define REALLOC_FRACTION        1       /* new block if <= half the size */
  105 #endif
  106 
  107 /*
  108  * Centrally define some common malloc types.
  109  */
  110 MALLOC_DEFINE(M_CACHE, "cache", "Various Dynamically allocated caches");
  111 MALLOC_DEFINE(M_DEVBUF, "devbuf", "device driver memory");
  112 MALLOC_DEFINE(M_TEMP, "temp", "misc temporary data buffers");
  113 
  114 MALLOC_DEFINE(M_IP6OPT, "ip6opt", "IPv6 options");
  115 MALLOC_DEFINE(M_IP6NDP, "ip6ndp", "IPv6 Neighbor Discovery");
  116 
  117 static struct malloc_type *kmemstatistics;
  118 static int kmemcount;
  119 
  120 #define KMEM_ZSHIFT     4
  121 #define KMEM_ZBASE      16
  122 #define KMEM_ZMASK      (KMEM_ZBASE - 1)
  123 
  124 #define KMEM_ZMAX       PAGE_SIZE
  125 #define KMEM_ZSIZE      (KMEM_ZMAX >> KMEM_ZSHIFT)
  126 static uint8_t kmemsize[KMEM_ZSIZE + 1];
  127 
  128 #ifndef MALLOC_DEBUG_MAXZONES
  129 #define MALLOC_DEBUG_MAXZONES   1
  130 #endif
  131 static int numzones = MALLOC_DEBUG_MAXZONES;
  132 
  133 /*
  134  * Small malloc(9) memory allocations are allocated from a set of UMA buckets
  135  * of various sizes.
  136  *
  137  * XXX: The comment here used to read "These won't be powers of two for
  138  * long."  It's possible that a significant amount of wasted memory could be
  139  * recovered by tuning the sizes of these buckets.
  140  */
  141 struct {
  142         int kz_size;
  143         char *kz_name;
  144         uma_zone_t kz_zone[MALLOC_DEBUG_MAXZONES];
  145 } kmemzones[] = {
  146         {16, "16", },
  147         {32, "32", },
  148         {64, "64", },
  149         {128, "128", },
  150         {256, "256", },
  151         {512, "512", },
  152         {1024, "1024", },
  153         {2048, "2048", },
  154         {4096, "4096", },
  155 #if PAGE_SIZE > 4096
  156         {8192, "8192", },
  157 #if PAGE_SIZE > 8192
  158         {16384, "16384", },
  159 #if PAGE_SIZE > 16384
  160         {32768, "32768", },
  161 #if PAGE_SIZE > 32768
  162         {65536, "65536", },
  163 #if PAGE_SIZE > 65536
  164 #error  "Unsupported PAGE_SIZE"
  165 #endif  /* 65536 */
  166 #endif  /* 32768 */
  167 #endif  /* 16384 */
  168 #endif  /* 8192 */
  169 #endif  /* 4096 */
  170         {0, NULL},
  171 };
  172 
  173 /*
  174  * Zone to allocate malloc type descriptions from.  For ABI reasons, memory
  175  * types are described by a data structure passed by the declaring code, but
  176  * the malloc(9) implementation has its own data structure describing the
  177  * type and statistics.  This permits the malloc(9)-internal data structures
  178  * to be modified without breaking binary-compiled kernel modules that
  179  * declare malloc types.
  180  */
  181 static uma_zone_t mt_zone;
  182 
  183 u_long vm_kmem_size;
  184 SYSCTL_ULONG(_vm, OID_AUTO, kmem_size, CTLFLAG_RDTUN, &vm_kmem_size, 0,
  185     "Size of kernel memory");
  186 
  187 static u_long vm_kmem_size_min;
  188 SYSCTL_ULONG(_vm, OID_AUTO, kmem_size_min, CTLFLAG_RDTUN, &vm_kmem_size_min, 0,
  189     "Minimum size of kernel memory");
  190 
  191 static u_long vm_kmem_size_max;
  192 SYSCTL_ULONG(_vm, OID_AUTO, kmem_size_max, CTLFLAG_RDTUN, &vm_kmem_size_max, 0,
  193     "Maximum size of kernel memory");
  194 
  195 static u_int vm_kmem_size_scale;
  196 SYSCTL_UINT(_vm, OID_AUTO, kmem_size_scale, CTLFLAG_RDTUN, &vm_kmem_size_scale, 0,
  197     "Scale factor for kernel memory size");
  198 
  199 static int sysctl_kmem_map_size(SYSCTL_HANDLER_ARGS);
  200 SYSCTL_PROC(_vm, OID_AUTO, kmem_map_size,
  201     CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE, NULL, 0,
  202     sysctl_kmem_map_size, "LU", "Current kmem allocation size");
  203 
  204 static int sysctl_kmem_map_free(SYSCTL_HANDLER_ARGS);
  205 SYSCTL_PROC(_vm, OID_AUTO, kmem_map_free,
  206     CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE, NULL, 0,
  207     sysctl_kmem_map_free, "LU", "Free space in kmem");
  208 
  209 /*
  210  * The malloc_mtx protects the kmemstatistics linked list.
  211  */
  212 struct mtx malloc_mtx;
  213 
  214 #ifdef MALLOC_PROFILE
  215 uint64_t krequests[KMEM_ZSIZE + 1];
  216 
  217 static int sysctl_kern_mprof(SYSCTL_HANDLER_ARGS);
  218 #endif
  219 
  220 static int sysctl_kern_malloc_stats(SYSCTL_HANDLER_ARGS);
  221 
  222 /*
  223  * time_uptime of the last malloc(9) failure (induced or real).
  224  */
  225 static time_t t_malloc_fail;
  226 
  227 #if defined(MALLOC_MAKE_FAILURES) || (MALLOC_DEBUG_MAXZONES > 1)
  228 static SYSCTL_NODE(_debug, OID_AUTO, malloc, CTLFLAG_RD, 0,
  229     "Kernel malloc debugging options");
  230 #endif
  231 
  232 /*
  233  * malloc(9) fault injection -- cause malloc failures every (n) mallocs when
  234  * the caller specifies M_NOWAIT.  If set to 0, no failures are caused.
  235  */
  236 #ifdef MALLOC_MAKE_FAILURES
  237 static int malloc_failure_rate;
  238 static int malloc_nowait_count;
  239 static int malloc_failure_count;
  240 SYSCTL_INT(_debug_malloc, OID_AUTO, failure_rate, CTLFLAG_RW,
  241     &malloc_failure_rate, 0, "Every (n) mallocs with M_NOWAIT will fail");
  242 TUNABLE_INT("debug.malloc.failure_rate", &malloc_failure_rate);
  243 SYSCTL_INT(_debug_malloc, OID_AUTO, failure_count, CTLFLAG_RD,
  244     &malloc_failure_count, 0, "Number of imposed M_NOWAIT malloc failures");
  245 #endif
  246 
  247 static int
  248 sysctl_kmem_map_size(SYSCTL_HANDLER_ARGS)
  249 {
  250         u_long size;
  251 
  252         size = vmem_size(kmem_arena, VMEM_ALLOC);
  253         return (sysctl_handle_long(oidp, &size, 0, req));
  254 }
  255 
  256 static int
  257 sysctl_kmem_map_free(SYSCTL_HANDLER_ARGS)
  258 {
  259         u_long size;
  260 
  261         size = vmem_size(kmem_arena, VMEM_FREE);
  262         return (sysctl_handle_long(oidp, &size, 0, req));
  263 }
  264 
  265 /*
  266  * malloc(9) uma zone separation -- sub-page buffer overruns in one
  267  * malloc type will affect only a subset of other malloc types.
  268  */
  269 #if MALLOC_DEBUG_MAXZONES > 1
  270 static void
  271 tunable_set_numzones(void)
  272 {
  273 
  274         TUNABLE_INT_FETCH("debug.malloc.numzones",
  275             &numzones);
  276 
  277         /* Sanity check the number of malloc uma zones. */
  278         if (numzones <= 0)
  279                 numzones = 1;
  280         if (numzones > MALLOC_DEBUG_MAXZONES)
  281                 numzones = MALLOC_DEBUG_MAXZONES;
  282 }
  283 SYSINIT(numzones, SI_SUB_TUNABLES, SI_ORDER_ANY, tunable_set_numzones, NULL);
  284 SYSCTL_INT(_debug_malloc, OID_AUTO, numzones, CTLFLAG_RDTUN,
  285     &numzones, 0, "Number of malloc uma subzones");
  286 
  287 /*
  288  * Any number that changes regularly is an okay choice for the
  289  * offset.  Build numbers are pretty good of you have them.
  290  */
  291 static u_int zone_offset = __FreeBSD_version;
  292 TUNABLE_INT("debug.malloc.zone_offset", &zone_offset);
  293 SYSCTL_UINT(_debug_malloc, OID_AUTO, zone_offset, CTLFLAG_RDTUN,
  294     &zone_offset, 0, "Separate malloc types by examining the "
  295     "Nth character in the malloc type short description.");
  296 
  297 static u_int
  298 mtp_get_subzone(const char *desc)
  299 {
  300         size_t len;
  301         u_int val;
  302 
  303         if (desc == NULL || (len = strlen(desc)) == 0)
  304                 return (0);
  305         val = desc[zone_offset % len];
  306         return (val % numzones);
  307 }
  308 #elif MALLOC_DEBUG_MAXZONES == 0
  309 #error "MALLOC_DEBUG_MAXZONES must be positive."
  310 #else
  311 static inline u_int
  312 mtp_get_subzone(const char *desc)
  313 {
  314 
  315         return (0);
  316 }
  317 #endif /* MALLOC_DEBUG_MAXZONES > 1 */
  318 
  319 int
  320 malloc_last_fail(void)
  321 {
  322 
  323         return (time_uptime - t_malloc_fail);
  324 }
  325 
  326 /*
  327  * An allocation has succeeded -- update malloc type statistics for the
  328  * amount of bucket size.  Occurs within a critical section so that the
  329  * thread isn't preempted and doesn't migrate while updating per-PCU
  330  * statistics.
  331  */
  332 static void
  333 malloc_type_zone_allocated(struct malloc_type *mtp, unsigned long size,
  334     int zindx)
  335 {
  336         struct malloc_type_internal *mtip;
  337         struct malloc_type_stats *mtsp;
  338 
  339         critical_enter();
  340         mtip = mtp->ks_handle;
  341         mtsp = &mtip->mti_stats[curcpu];
  342         if (size > 0) {
  343                 mtsp->mts_memalloced += size;
  344                 mtsp->mts_numallocs++;
  345         }
  346         if (zindx != -1)
  347                 mtsp->mts_size |= 1 << zindx;
  348 
  349 #ifdef KDTRACE_HOOKS
  350         if (dtrace_malloc_probe != NULL) {
  351                 uint32_t probe_id = mtip->mti_probes[DTMALLOC_PROBE_MALLOC];
  352                 if (probe_id != 0)
  353                         (dtrace_malloc_probe)(probe_id,
  354                             (uintptr_t) mtp, (uintptr_t) mtip,
  355                             (uintptr_t) mtsp, size, zindx);
  356         }
  357 #endif
  358 
  359         critical_exit();
  360 }
  361 
  362 void
  363 malloc_type_allocated(struct malloc_type *mtp, unsigned long size)
  364 {
  365 
  366         if (size > 0)
  367                 malloc_type_zone_allocated(mtp, size, -1);
  368 }
  369 
  370 /*
  371  * A free operation has occurred -- update malloc type statistics for the
  372  * amount of the bucket size.  Occurs within a critical section so that the
  373  * thread isn't preempted and doesn't migrate while updating per-CPU
  374  * statistics.
  375  */
  376 void
  377 malloc_type_freed(struct malloc_type *mtp, unsigned long size)
  378 {
  379         struct malloc_type_internal *mtip;
  380         struct malloc_type_stats *mtsp;
  381 
  382         critical_enter();
  383         mtip = mtp->ks_handle;
  384         mtsp = &mtip->mti_stats[curcpu];
  385         mtsp->mts_memfreed += size;
  386         mtsp->mts_numfrees++;
  387 
  388 #ifdef KDTRACE_HOOKS
  389         if (dtrace_malloc_probe != NULL) {
  390                 uint32_t probe_id = mtip->mti_probes[DTMALLOC_PROBE_FREE];
  391                 if (probe_id != 0)
  392                         (dtrace_malloc_probe)(probe_id,
  393                             (uintptr_t) mtp, (uintptr_t) mtip,
  394                             (uintptr_t) mtsp, size, 0);
  395         }
  396 #endif
  397 
  398         critical_exit();
  399 }
  400 
  401 /*
  402  *      contigmalloc:
  403  *
  404  *      Allocate a block of physically contiguous memory.
  405  *
  406  *      If M_NOWAIT is set, this routine will not block and return NULL if
  407  *      the allocation fails.
  408  */
  409 void *
  410 contigmalloc(unsigned long size, struct malloc_type *type, int flags,
  411     vm_paddr_t low, vm_paddr_t high, unsigned long alignment,
  412     vm_paddr_t boundary)
  413 {
  414         void *ret;
  415 
  416         ret = (void *)kmem_alloc_contig(kernel_arena, size, flags, low, high,
  417             alignment, boundary, VM_MEMATTR_DEFAULT);
  418         if (ret != NULL)
  419                 malloc_type_allocated(type, round_page(size));
  420         return (ret);
  421 }
  422 
  423 /*
  424  *      contigfree:
  425  *
  426  *      Free a block of memory allocated by contigmalloc.
  427  *
  428  *      This routine may not block.
  429  */
  430 void
  431 contigfree(void *addr, unsigned long size, struct malloc_type *type)
  432 {
  433 
  434         kmem_free(kernel_arena, (vm_offset_t)addr, size);
  435         malloc_type_freed(type, round_page(size));
  436 }
  437 
  438 /*
  439  *      malloc:
  440  *
  441  *      Allocate a block of memory.
  442  *
  443  *      If M_NOWAIT is set, this routine will not block and return NULL if
  444  *      the allocation fails.
  445  */
  446 void *
  447 malloc(unsigned long size, struct malloc_type *mtp, int flags)
  448 {
  449         int indx;
  450         struct malloc_type_internal *mtip;
  451         caddr_t va;
  452         uma_zone_t zone;
  453 #if defined(DIAGNOSTIC) || defined(DEBUG_REDZONE)
  454         unsigned long osize = size;
  455 #endif
  456 
  457 #ifdef INVARIANTS
  458         KASSERT(mtp->ks_magic == M_MAGIC, ("malloc: bad malloc type magic"));
  459         /*
  460          * Check that exactly one of M_WAITOK or M_NOWAIT is specified.
  461          */
  462         indx = flags & (M_WAITOK | M_NOWAIT);
  463         if (indx != M_NOWAIT && indx != M_WAITOK) {
  464                 static  struct timeval lasterr;
  465                 static  int curerr, once;
  466                 if (once == 0 && ppsratecheck(&lasterr, &curerr, 1)) {
  467                         printf("Bad malloc flags: %x\n", indx);
  468                         kdb_backtrace();
  469                         flags |= M_WAITOK;
  470                         once++;
  471                 }
  472         }
  473 #endif
  474 #ifdef MALLOC_MAKE_FAILURES
  475         if ((flags & M_NOWAIT) && (malloc_failure_rate != 0)) {
  476                 atomic_add_int(&malloc_nowait_count, 1);
  477                 if ((malloc_nowait_count % malloc_failure_rate) == 0) {
  478                         atomic_add_int(&malloc_failure_count, 1);
  479                         t_malloc_fail = time_uptime;
  480                         return (NULL);
  481                 }
  482         }
  483 #endif
  484         if (flags & M_WAITOK)
  485                 KASSERT(curthread->td_intr_nesting_level == 0,
  486                    ("malloc(M_WAITOK) in interrupt context"));
  487 
  488 #ifdef DEBUG_MEMGUARD
  489         if (memguard_cmp_mtp(mtp, size)) {
  490                 va = memguard_alloc(size, flags);
  491                 if (va != NULL)
  492                         return (va);
  493                 /* This is unfortunate but should not be fatal. */
  494         }
  495 #endif
  496 
  497 #ifdef DEBUG_REDZONE
  498         size = redzone_size_ntor(size);
  499 #endif
  500 
  501         if (size <= KMEM_ZMAX) {
  502                 mtip = mtp->ks_handle;
  503                 if (size & KMEM_ZMASK)
  504                         size = (size & ~KMEM_ZMASK) + KMEM_ZBASE;
  505                 indx = kmemsize[size >> KMEM_ZSHIFT];
  506                 KASSERT(mtip->mti_zone < numzones,
  507                     ("mti_zone %u out of range %d",
  508                     mtip->mti_zone, numzones));
  509                 zone = kmemzones[indx].kz_zone[mtip->mti_zone];
  510 #ifdef MALLOC_PROFILE
  511                 krequests[size >> KMEM_ZSHIFT]++;
  512 #endif
  513                 va = uma_zalloc(zone, flags);
  514                 if (va != NULL)
  515                         size = zone->uz_size;
  516                 malloc_type_zone_allocated(mtp, va == NULL ? 0 : size, indx);
  517         } else {
  518                 size = roundup(size, PAGE_SIZE);
  519                 zone = NULL;
  520                 va = uma_large_malloc(size, flags);
  521                 malloc_type_allocated(mtp, va == NULL ? 0 : size);
  522         }
  523         if (flags & M_WAITOK)
  524                 KASSERT(va != NULL, ("malloc(M_WAITOK) returned NULL"));
  525         else if (va == NULL)
  526                 t_malloc_fail = time_uptime;
  527 #ifdef DIAGNOSTIC
  528         if (va != NULL && !(flags & M_ZERO)) {
  529                 memset(va, 0x70, osize);
  530         }
  531 #endif
  532 #ifdef DEBUG_REDZONE
  533         if (va != NULL)
  534                 va = redzone_setup(va, osize);
  535 #endif
  536         return ((void *) va);
  537 }
  538 
  539 /*
  540  *      free:
  541  *
  542  *      Free a block of memory allocated by malloc.
  543  *
  544  *      This routine may not block.
  545  */
  546 void
  547 free(void *addr, struct malloc_type *mtp)
  548 {
  549         uma_slab_t slab;
  550         u_long size;
  551 
  552         KASSERT(mtp->ks_magic == M_MAGIC, ("free: bad malloc type magic"));
  553 
  554         /* free(NULL, ...) does nothing */
  555         if (addr == NULL)
  556                 return;
  557 
  558 #ifdef DEBUG_MEMGUARD
  559         if (is_memguard_addr(addr)) {
  560                 memguard_free(addr);
  561                 return;
  562         }
  563 #endif
  564 
  565 #ifdef DEBUG_REDZONE
  566         redzone_check(addr);
  567         addr = redzone_addr_ntor(addr);
  568 #endif
  569 
  570         slab = vtoslab((vm_offset_t)addr & (~UMA_SLAB_MASK));
  571 
  572         if (slab == NULL)
  573                 panic("free: address %p(%p) has not been allocated.\n",
  574                     addr, (void *)((u_long)addr & (~UMA_SLAB_MASK)));
  575 
  576         if (!(slab->us_flags & UMA_SLAB_MALLOC)) {
  577 #ifdef INVARIANTS
  578                 struct malloc_type **mtpp = addr;
  579 #endif
  580                 size = slab->us_keg->uk_size;
  581 #ifdef INVARIANTS
  582                 /*
  583                  * Cache a pointer to the malloc_type that most recently freed
  584                  * this memory here.  This way we know who is most likely to
  585                  * have stepped on it later.
  586                  *
  587                  * This code assumes that size is a multiple of 8 bytes for
  588                  * 64 bit machines
  589                  */
  590                 mtpp = (struct malloc_type **)
  591                     ((unsigned long)mtpp & ~UMA_ALIGN_PTR);
  592                 mtpp += (size - sizeof(struct malloc_type *)) /
  593                     sizeof(struct malloc_type *);
  594                 *mtpp = mtp;
  595 #endif
  596                 uma_zfree_arg(LIST_FIRST(&slab->us_keg->uk_zones), addr, slab);
  597         } else {
  598                 size = slab->us_size;
  599                 uma_large_free(slab);
  600         }
  601         malloc_type_freed(mtp, size);
  602 }
  603 
  604 /*
  605  *      realloc: change the size of a memory block
  606  */
  607 void *
  608 realloc(void *addr, unsigned long size, struct malloc_type *mtp, int flags)
  609 {
  610         uma_slab_t slab;
  611         unsigned long alloc;
  612         void *newaddr;
  613 
  614         KASSERT(mtp->ks_magic == M_MAGIC,
  615             ("realloc: bad malloc type magic"));
  616 
  617         /* realloc(NULL, ...) is equivalent to malloc(...) */
  618         if (addr == NULL)
  619                 return (malloc(size, mtp, flags));
  620 
  621         /*
  622          * XXX: Should report free of old memory and alloc of new memory to
  623          * per-CPU stats.
  624          */
  625 
  626 #ifdef DEBUG_MEMGUARD
  627         if (is_memguard_addr(addr))
  628                 return (memguard_realloc(addr, size, mtp, flags));
  629 #endif
  630 
  631 #ifdef DEBUG_REDZONE
  632         slab = NULL;
  633         alloc = redzone_get_size(addr);
  634 #else
  635         slab = vtoslab((vm_offset_t)addr & ~(UMA_SLAB_MASK));
  636 
  637         /* Sanity check */
  638         KASSERT(slab != NULL,
  639             ("realloc: address %p out of range", (void *)addr));
  640 
  641         /* Get the size of the original block */
  642         if (!(slab->us_flags & UMA_SLAB_MALLOC))
  643                 alloc = slab->us_keg->uk_size;
  644         else
  645                 alloc = slab->us_size;
  646 
  647         /* Reuse the original block if appropriate */
  648         if (size <= alloc
  649             && (size > (alloc >> REALLOC_FRACTION) || alloc == MINALLOCSIZE))
  650                 return (addr);
  651 #endif /* !DEBUG_REDZONE */
  652 
  653         /* Allocate a new, bigger (or smaller) block */
  654         if ((newaddr = malloc(size, mtp, flags)) == NULL)
  655                 return (NULL);
  656 
  657         /* Copy over original contents */
  658         bcopy(addr, newaddr, min(size, alloc));
  659         free(addr, mtp);
  660         return (newaddr);
  661 }
  662 
  663 /*
  664  *      reallocf: same as realloc() but free memory on failure.
  665  */
  666 void *
  667 reallocf(void *addr, unsigned long size, struct malloc_type *mtp, int flags)
  668 {
  669         void *mem;
  670 
  671         if ((mem = realloc(addr, size, mtp, flags)) == NULL)
  672                 free(addr, mtp);
  673         return (mem);
  674 }
  675 
  676 /*
  677  * Wake the page daemon when we exhaust KVA.  It will call the lowmem handler
  678  * and uma_reclaim() callbacks in a context that is safe.
  679  */
  680 static void
  681 kmem_reclaim(vmem_t *vm, int flags)
  682 {
  683 
  684         pagedaemon_wakeup();
  685 }
  686 
  687 /*
  688  * Initialize the kernel memory arena.
  689  */
  690 void
  691 kmeminit(void)
  692 {
  693         u_long mem_size, tmp;
  694  
  695         /*
  696          * Try to auto-tune the kernel memory size, so that it is
  697          * more applicable for a wider range of machine sizes.  The
  698          * VM_KMEM_SIZE_MAX is dependent on the maximum KVA space
  699          * available.
  700          *
  701          * Note that the kmem arena is also used by the zone allocator,
  702          * so make sure that there is enough space.
  703          */
  704         vm_kmem_size = VM_KMEM_SIZE;
  705         mem_size = cnt.v_page_count;
  706 
  707 #if defined(VM_KMEM_SIZE_SCALE)
  708         vm_kmem_size_scale = VM_KMEM_SIZE_SCALE;
  709 #endif
  710         TUNABLE_INT_FETCH("vm.kmem_size_scale", &vm_kmem_size_scale);
  711         if (vm_kmem_size_scale > 0 &&
  712             (mem_size / vm_kmem_size_scale) > (vm_kmem_size / PAGE_SIZE))
  713                 vm_kmem_size = (mem_size / vm_kmem_size_scale) * PAGE_SIZE;
  714 
  715 #if defined(VM_KMEM_SIZE_MIN)
  716         vm_kmem_size_min = VM_KMEM_SIZE_MIN;
  717 #endif
  718         TUNABLE_ULONG_FETCH("vm.kmem_size_min", &vm_kmem_size_min);
  719         if (vm_kmem_size_min > 0 && vm_kmem_size < vm_kmem_size_min) {
  720                 vm_kmem_size = vm_kmem_size_min;
  721         }
  722 
  723 #if defined(VM_KMEM_SIZE_MAX)
  724         vm_kmem_size_max = VM_KMEM_SIZE_MAX;
  725 #endif
  726         TUNABLE_ULONG_FETCH("vm.kmem_size_max", &vm_kmem_size_max);
  727         if (vm_kmem_size_max > 0 && vm_kmem_size >= vm_kmem_size_max)
  728                 vm_kmem_size = vm_kmem_size_max;
  729 
  730         /* Allow final override from the kernel environment */
  731         TUNABLE_ULONG_FETCH("vm.kmem_size", &vm_kmem_size);
  732 
  733         /*
  734          * Limit kmem virtual size to twice the physical memory.
  735          * This allows for kmem map sparseness, but limits the size
  736          * to something sane.  Be careful to not overflow the 32bit
  737          * ints while doing the check or the adjustment.
  738          */
  739         if (vm_kmem_size / 2 / PAGE_SIZE > mem_size)
  740                 vm_kmem_size = 2 * mem_size * PAGE_SIZE;
  741 
  742         vm_kmem_size = round_page(vm_kmem_size);
  743 #ifdef DEBUG_MEMGUARD
  744         tmp = memguard_fudge(vm_kmem_size, kernel_map);
  745 #else
  746         tmp = vm_kmem_size;
  747 #endif
  748         vmem_init(kmem_arena, "kmem arena", kva_alloc(tmp), tmp, PAGE_SIZE,
  749             0, 0);
  750         vmem_set_reclaim(kmem_arena, kmem_reclaim);
  751 
  752 #ifdef DEBUG_MEMGUARD
  753         /*
  754          * Initialize MemGuard if support compiled in.  MemGuard is a
  755          * replacement allocator used for detecting tamper-after-free
  756          * scenarios as they occur.  It is only used for debugging.
  757          */
  758         memguard_init(kmem_arena);
  759 #endif
  760 }
  761 
  762 /*
  763  * Initialize the kernel memory allocator
  764  */
  765 /* ARGSUSED*/
  766 static void
  767 mallocinit(void *dummy)
  768 {
  769         int i;
  770         uint8_t indx;
  771 
  772         mtx_init(&malloc_mtx, "malloc", NULL, MTX_DEF);
  773 
  774         kmeminit();
  775 
  776         uma_startup2();
  777 
  778         mt_zone = uma_zcreate("mt_zone", sizeof(struct malloc_type_internal),
  779 #ifdef INVARIANTS
  780             mtrash_ctor, mtrash_dtor, mtrash_init, mtrash_fini,
  781 #else
  782             NULL, NULL, NULL, NULL,
  783 #endif
  784             UMA_ALIGN_PTR, UMA_ZONE_MALLOC);
  785         for (i = 0, indx = 0; kmemzones[indx].kz_size != 0; indx++) {
  786                 int size = kmemzones[indx].kz_size;
  787                 char *name = kmemzones[indx].kz_name;
  788                 int subzone;
  789 
  790                 for (subzone = 0; subzone < numzones; subzone++) {
  791                         kmemzones[indx].kz_zone[subzone] =
  792                             uma_zcreate(name, size,
  793 #ifdef INVARIANTS
  794                             mtrash_ctor, mtrash_dtor, mtrash_init, mtrash_fini,
  795 #else
  796                             NULL, NULL, NULL, NULL,
  797 #endif
  798                             UMA_ALIGN_PTR, UMA_ZONE_MALLOC);
  799                 }                   
  800                 for (;i <= size; i+= KMEM_ZBASE)
  801                         kmemsize[i >> KMEM_ZSHIFT] = indx;
  802                 
  803         }
  804 }
  805 SYSINIT(kmem, SI_SUB_KMEM, SI_ORDER_FIRST, mallocinit, NULL);
  806 
  807 void
  808 malloc_init(void *data)
  809 {
  810         struct malloc_type_internal *mtip;
  811         struct malloc_type *mtp;
  812 
  813         KASSERT(cnt.v_page_count != 0, ("malloc_register before vm_init"));
  814 
  815         mtp = data;
  816         if (mtp->ks_magic != M_MAGIC)
  817                 panic("malloc_init: bad malloc type magic");
  818 
  819         mtip = uma_zalloc(mt_zone, M_WAITOK | M_ZERO);
  820         mtp->ks_handle = mtip;
  821         mtip->mti_zone = mtp_get_subzone(mtp->ks_shortdesc);
  822 
  823         mtx_lock(&malloc_mtx);
  824         mtp->ks_next = kmemstatistics;
  825         kmemstatistics = mtp;
  826         kmemcount++;
  827         mtx_unlock(&malloc_mtx);
  828 }
  829 
  830 void
  831 malloc_uninit(void *data)
  832 {
  833         struct malloc_type_internal *mtip;
  834         struct malloc_type_stats *mtsp;
  835         struct malloc_type *mtp, *temp;
  836         uma_slab_t slab;
  837         long temp_allocs, temp_bytes;
  838         int i;
  839 
  840         mtp = data;
  841         KASSERT(mtp->ks_magic == M_MAGIC,
  842             ("malloc_uninit: bad malloc type magic"));
  843         KASSERT(mtp->ks_handle != NULL, ("malloc_deregister: cookie NULL"));
  844 
  845         mtx_lock(&malloc_mtx);
  846         mtip = mtp->ks_handle;
  847         mtp->ks_handle = NULL;
  848         if (mtp != kmemstatistics) {
  849                 for (temp = kmemstatistics; temp != NULL;
  850                     temp = temp->ks_next) {
  851                         if (temp->ks_next == mtp) {
  852                                 temp->ks_next = mtp->ks_next;
  853                                 break;
  854                         }
  855                 }
  856                 KASSERT(temp,
  857                     ("malloc_uninit: type '%s' not found", mtp->ks_shortdesc));
  858         } else
  859                 kmemstatistics = mtp->ks_next;
  860         kmemcount--;
  861         mtx_unlock(&malloc_mtx);
  862 
  863         /*
  864          * Look for memory leaks.
  865          */
  866         temp_allocs = temp_bytes = 0;
  867         for (i = 0; i < MAXCPU; i++) {
  868                 mtsp = &mtip->mti_stats[i];
  869                 temp_allocs += mtsp->mts_numallocs;
  870                 temp_allocs -= mtsp->mts_numfrees;
  871                 temp_bytes += mtsp->mts_memalloced;
  872                 temp_bytes -= mtsp->mts_memfreed;
  873         }
  874         if (temp_allocs > 0 || temp_bytes > 0) {
  875                 printf("Warning: memory type %s leaked memory on destroy "
  876                     "(%ld allocations, %ld bytes leaked).\n", mtp->ks_shortdesc,
  877                     temp_allocs, temp_bytes);
  878         }
  879 
  880         slab = vtoslab((vm_offset_t) mtip & (~UMA_SLAB_MASK));
  881         uma_zfree_arg(mt_zone, mtip, slab);
  882 }
  883 
  884 struct malloc_type *
  885 malloc_desc2type(const char *desc)
  886 {
  887         struct malloc_type *mtp;
  888 
  889         mtx_assert(&malloc_mtx, MA_OWNED);
  890         for (mtp = kmemstatistics; mtp != NULL; mtp = mtp->ks_next) {
  891                 if (strcmp(mtp->ks_shortdesc, desc) == 0)
  892                         return (mtp);
  893         }
  894         return (NULL);
  895 }
  896 
  897 static int
  898 sysctl_kern_malloc_stats(SYSCTL_HANDLER_ARGS)
  899 {
  900         struct malloc_type_stream_header mtsh;
  901         struct malloc_type_internal *mtip;
  902         struct malloc_type_header mth;
  903         struct malloc_type *mtp;
  904         int error, i;
  905         struct sbuf sbuf;
  906 
  907         error = sysctl_wire_old_buffer(req, 0);
  908         if (error != 0)
  909                 return (error);
  910         sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
  911         mtx_lock(&malloc_mtx);
  912 
  913         /*
  914          * Insert stream header.
  915          */
  916         bzero(&mtsh, sizeof(mtsh));
  917         mtsh.mtsh_version = MALLOC_TYPE_STREAM_VERSION;
  918         mtsh.mtsh_maxcpus = MAXCPU;
  919         mtsh.mtsh_count = kmemcount;
  920         (void)sbuf_bcat(&sbuf, &mtsh, sizeof(mtsh));
  921 
  922         /*
  923          * Insert alternating sequence of type headers and type statistics.
  924          */
  925         for (mtp = kmemstatistics; mtp != NULL; mtp = mtp->ks_next) {
  926                 mtip = (struct malloc_type_internal *)mtp->ks_handle;
  927 
  928                 /*
  929                  * Insert type header.
  930                  */
  931                 bzero(&mth, sizeof(mth));
  932                 strlcpy(mth.mth_name, mtp->ks_shortdesc, MALLOC_MAX_NAME);
  933                 (void)sbuf_bcat(&sbuf, &mth, sizeof(mth));
  934 
  935                 /*
  936                  * Insert type statistics for each CPU.
  937                  */
  938                 for (i = 0; i < MAXCPU; i++) {
  939                         (void)sbuf_bcat(&sbuf, &mtip->mti_stats[i],
  940                             sizeof(mtip->mti_stats[i]));
  941                 }
  942         }
  943         mtx_unlock(&malloc_mtx);
  944         error = sbuf_finish(&sbuf);
  945         sbuf_delete(&sbuf);
  946         return (error);
  947 }
  948 
  949 SYSCTL_PROC(_kern, OID_AUTO, malloc_stats, CTLFLAG_RD|CTLTYPE_STRUCT,
  950     0, 0, sysctl_kern_malloc_stats, "s,malloc_type_ustats",
  951     "Return malloc types");
  952 
  953 SYSCTL_INT(_kern, OID_AUTO, malloc_count, CTLFLAG_RD, &kmemcount, 0,
  954     "Count of kernel malloc types");
  955 
  956 void
  957 malloc_type_list(malloc_type_list_func_t *func, void *arg)
  958 {
  959         struct malloc_type *mtp, **bufmtp;
  960         int count, i;
  961         size_t buflen;
  962 
  963         mtx_lock(&malloc_mtx);
  964 restart:
  965         mtx_assert(&malloc_mtx, MA_OWNED);
  966         count = kmemcount;
  967         mtx_unlock(&malloc_mtx);
  968 
  969         buflen = sizeof(struct malloc_type *) * count;
  970         bufmtp = malloc(buflen, M_TEMP, M_WAITOK);
  971 
  972         mtx_lock(&malloc_mtx);
  973 
  974         if (count < kmemcount) {
  975                 free(bufmtp, M_TEMP);
  976                 goto restart;
  977         }
  978 
  979         for (mtp = kmemstatistics, i = 0; mtp != NULL; mtp = mtp->ks_next, i++)
  980                 bufmtp[i] = mtp;
  981 
  982         mtx_unlock(&malloc_mtx);
  983 
  984         for (i = 0; i < count; i++)
  985                 (func)(bufmtp[i], arg);
  986 
  987         free(bufmtp, M_TEMP);
  988 }
  989 
  990 #ifdef DDB
  991 DB_SHOW_COMMAND(malloc, db_show_malloc)
  992 {
  993         struct malloc_type_internal *mtip;
  994         struct malloc_type *mtp;
  995         uint64_t allocs, frees;
  996         uint64_t alloced, freed;
  997         int i;
  998 
  999         db_printf("%18s %12s  %12s %12s\n", "Type", "InUse", "MemUse",
 1000             "Requests");
 1001         for (mtp = kmemstatistics; mtp != NULL; mtp = mtp->ks_next) {
 1002                 mtip = (struct malloc_type_internal *)mtp->ks_handle;
 1003                 allocs = 0;
 1004                 frees = 0;
 1005                 alloced = 0;
 1006                 freed = 0;
 1007                 for (i = 0; i < MAXCPU; i++) {
 1008                         allocs += mtip->mti_stats[i].mts_numallocs;
 1009                         frees += mtip->mti_stats[i].mts_numfrees;
 1010                         alloced += mtip->mti_stats[i].mts_memalloced;
 1011                         freed += mtip->mti_stats[i].mts_memfreed;
 1012                 }
 1013                 db_printf("%18s %12ju %12juK %12ju\n",
 1014                     mtp->ks_shortdesc, allocs - frees,
 1015                     (alloced - freed + 1023) / 1024, allocs);
 1016                 if (db_pager_quit)
 1017                         break;
 1018         }
 1019 }
 1020 
 1021 #if MALLOC_DEBUG_MAXZONES > 1
 1022 DB_SHOW_COMMAND(multizone_matches, db_show_multizone_matches)
 1023 {
 1024         struct malloc_type_internal *mtip;
 1025         struct malloc_type *mtp;
 1026         u_int subzone;
 1027 
 1028         if (!have_addr) {
 1029                 db_printf("Usage: show multizone_matches <malloc type/addr>\n");
 1030                 return;
 1031         }
 1032         mtp = (void *)addr;
 1033         if (mtp->ks_magic != M_MAGIC) {
 1034                 db_printf("Magic %lx does not match expected %x\n",
 1035                     mtp->ks_magic, M_MAGIC);
 1036                 return;
 1037         }
 1038 
 1039         mtip = mtp->ks_handle;
 1040         subzone = mtip->mti_zone;
 1041 
 1042         for (mtp = kmemstatistics; mtp != NULL; mtp = mtp->ks_next) {
 1043                 mtip = mtp->ks_handle;
 1044                 if (mtip->mti_zone != subzone)
 1045                         continue;
 1046                 db_printf("%s\n", mtp->ks_shortdesc);
 1047                 if (db_pager_quit)
 1048                         break;
 1049         }
 1050 }
 1051 #endif /* MALLOC_DEBUG_MAXZONES > 1 */
 1052 #endif /* DDB */
 1053 
 1054 #ifdef MALLOC_PROFILE
 1055 
 1056 static int
 1057 sysctl_kern_mprof(SYSCTL_HANDLER_ARGS)
 1058 {
 1059         struct sbuf sbuf;
 1060         uint64_t count;
 1061         uint64_t waste;
 1062         uint64_t mem;
 1063         int error;
 1064         int rsize;
 1065         int size;
 1066         int i;
 1067 
 1068         waste = 0;
 1069         mem = 0;
 1070 
 1071         error = sysctl_wire_old_buffer(req, 0);
 1072         if (error != 0)
 1073                 return (error);
 1074         sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
 1075         sbuf_printf(&sbuf, 
 1076             "\n  Size                    Requests  Real Size\n");
 1077         for (i = 0; i < KMEM_ZSIZE; i++) {
 1078                 size = i << KMEM_ZSHIFT;
 1079                 rsize = kmemzones[kmemsize[i]].kz_size;
 1080                 count = (long long unsigned)krequests[i];
 1081 
 1082                 sbuf_printf(&sbuf, "%6d%28llu%11d\n", size,
 1083                     (unsigned long long)count, rsize);
 1084 
 1085                 if ((rsize * count) > (size * count))
 1086                         waste += (rsize * count) - (size * count);
 1087                 mem += (rsize * count);
 1088         }
 1089         sbuf_printf(&sbuf,
 1090             "\nTotal memory used:\t%30llu\nTotal Memory wasted:\t%30llu\n",
 1091             (unsigned long long)mem, (unsigned long long)waste);
 1092         error = sbuf_finish(&sbuf);
 1093         sbuf_delete(&sbuf);
 1094         return (error);
 1095 }
 1096 
 1097 SYSCTL_OID(_kern, OID_AUTO, mprof, CTLTYPE_STRING|CTLFLAG_RD,
 1098     NULL, 0, sysctl_kern_mprof, "A", "Malloc Profiling");
 1099 #endif /* MALLOC_PROFILE */

Cache object: 7b8ee4c3f055678fc3e694162173c390


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.