The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/zalloc.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /* 
    2  * Mach Operating System
    3  * Copyright (c) 1993-1987 Carnegie Mellon University
    4  * All Rights Reserved.
    5  * 
    6  * Permission to use, copy, modify and distribute this software and its
    7  * documentation is hereby granted, provided that both the copyright
    8  * notice and this permission notice appear in all copies of the
    9  * software, derivative works or modified versions, and any portions
   10  * thereof, and that both notices appear in supporting documentation.
   11  * 
   12  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
   13  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
   14  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
   15  * 
   16  * Carnegie Mellon requests users of this software to return to
   17  * 
   18  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   19  *  School of Computer Science
   20  *  Carnegie Mellon University
   21  *  Pittsburgh PA 15213-3890
   22  * 
   23  * any improvements or extensions that they make and grant Carnegie Mellon
   24  * the rights to redistribute these changes.
   25  */
   26 /*
   27  * HISTORY
   28  * $Log:        zalloc.c,v $
   29  * Revision 2.21  93/11/17  17:34:10  dbg
   30  *      Fixed type of null continuation argument to thread_block.
   31  *      [93/06/03            dbg]
   32  * 
   33  *      Add ANSI function prototypes.
   34  *      [93/05/21            dbg]
   35  * 
   36  * Revision 2.20  93/05/15  18:56:28  mrt
   37  *      machparam.h -> machspl.h
   38  * 
   39  * Revision 2.19  93/01/14  17:37:34  danner
   40  *      Fixed casts of assert_wait and thread_wakeup arguments.
   41  *      [93/01/12            danner]
   42  *      64bit cleanup. Proper spl typing.
   43  *      [92/12/01            af]
   44  * 
   45  * Revision 2.18  92/08/03  17:40:37  jfriedl
   46  *      removed silly prototypes
   47  *      [92/08/02            jfriedl]
   48  * 
   49  * Revision 2.17  92/05/21  17:17:28  jfriedl
   50  *      Added stuff to quiet some gcc warnings.
   51  *      [92/05/16            jfriedl]
   52  * 
   53  * Revision 2.16  92/02/23  19:49:58  elf
   54  *      Eliminate keep_wired argument from vm_map_copyin().
   55  *      [92/02/21  10:13:57  dlb]
   56  * 
   57  * Revision 2.14.7.1  92/02/18  19:07:08  jeffreyh
   58  *      Increased zone_map_size for 2 servers
   59  *      [91/08/30            bernadat]
   60  * 
   61  * Revision 2.15  92/01/14  16:45:03  rpd
   62  *      Changed host_zone_info for CountInOut.
   63  *      [92/01/14            rpd]
   64  * 
   65  * Revision 2.14  91/05/18  14:34:46  rpd
   66  *      Added check_simple_locks.
   67  *      Moved ADD_TO_ZONE, REMOVE_FROM_ZONE here.
   68  *      Moved extraneous zone GC declarations here.
   69  *      [91/03/31            rpd]
   70  * 
   71  *      Minor cleanup in zget_space.
   72  *      [91/03/28            rpd]
   73  *      Changed to use zdata to initialize zalloc_next_space.
   74  *      [91/03/22            rpd]
   75  * 
   76  * Revision 2.13  91/05/14  16:50:36  mrt
   77  *      Correcting copyright
   78  * 
   79  * Revision 2.12  91/03/16  14:53:32  rpd
   80  *      Updated for new kmem_alloc interface.
   81  *      [91/03/03            rpd]
   82  *      Added continuation argument to thread_block.
   83  *      [91/02/16            rpd]
   84  * 
   85  * Revision 2.11  91/02/05  17:31:25  mrt
   86  *      Changed to new Mach copyright
   87  *      [91/02/01  16:21:52  mrt]
   88  * 
   89  * Revision 2.10  91/01/08  15:18:28  rpd
   90  *      Added zalloc_wasted_space.
   91  *      [91/01/06            rpd]
   92  *      Removed COLLECT_ZONE_GARBAGE.
   93  *      [91/01/03            rpd]
   94  * 
   95  *      Changed zinit to make zones by default *not* collectable.
   96  *      [90/12/29            rpd]
   97  *      Added consider_zone_gc.
   98  *      [90/11/11            rpd]
   99  * 
  100  * Revision 2.9  90/12/20  16:39:11  jeffreyh
  101  *      [90/12/19  10:36:55  jeffreyh]
  102  * 
  103  *      10-Dec-90  Jeffrey Heller  (jeffreyh)  at OSF
  104  *      Merge in changes from OSF/1 done by jvs@osf
  105  *      Zone's are now collectable by default, 
  106  *      zchange now takes a collectable argument
  107  *      include machine/machparam.h for splhigh
  108  * 
  109  * Revision 2.8  90/11/05  14:32:08  rpd
  110  *      Added zone_check option to zfree.
  111  *      [90/10/29            rpd]
  112  * 
  113  * Revision 2.7  90/06/19  22:59:49  rpd
  114  *      Added zi_collectable field to zone_info structure.
  115  *      [90/06/05            rpd]
  116  * 
  117  * Revision 2.6  90/06/02  14:57:28  rpd
  118  *      Made zone_ignore_overflow TRUE by default.
  119  *      When a zone overflows, increase its max_size.
  120  *      [90/05/11  17:00:24  rpd]
  121  * 
  122  *      Added host_zone_info.
  123  *      [90/03/26  22:28:05  rpd]
  124  * 
  125  * Revision 2.5  90/05/03  15:47:04  dbg
  126  *      Add host_zone_info.
  127  *      [90/04/06            dbg]
  128  * 
  129  * Revision 2.4  90/02/22  20:04:23  dbg
  130  *      Initialize zone_page_table_lock before using it.
  131  *      [90/02/16            dbg]
  132  * 
  133  * Revision 2.3  89/11/29  14:09:25  af
  134  *      Nullify zone_page_alloc/init if 'garbage' not here.
  135  *      [89/10/29  14:23:56  af]
  136  * 
  137  *      Could not compile without the 'garbage' thing cuz a def mizzing.
  138  *      [89/10/29  09:35:22  af]
  139  * 
  140  * Revision 2.2  89/08/11  17:56:21  rwd
  141  *      Added collectible zones.  Collectible zones allow storage to be
  142  *      returned to system via kmem_free when pages are no longer used.
  143  *      This option should only be used when zone memory is virtual
  144  *      (rather than physical as in a MIPS architecture).
  145  *      [89/07/22            rfr]
  146  * 
  147  * Revision 2.11  89/05/30  10:38:40  rvb
  148  *      Make zalloc storage pointers external, so they can be initialized from
  149  *      the outside.
  150  *      [89/05/30  08:28:14  rvb]
  151  * 
  152  * Revision 2.10  89/05/11  14:41:30  gm0w
  153  *      Keep all zones on a list that host_zone_info can traverse.
  154  *      This fixes a bug in host_zone_info: it would try to lock
  155  *      uninitialized zones.  Fixed zinit to round elem_size up
  156  *      to a multiple of four.  This prevents zget_space from handing
  157  *      out improperly aligned objects.
  158  *      [89/05/08  21:34:17  rpd]
  159  * 
  160  * Revision 2.9  89/05/06  15:47:11  rpd
  161  *      From jsb: Added missing argument to kmem_free in zget_space.
  162  * 
  163  * Revision 2.8  89/05/06  02:57:35  rpd
  164  *      Added host_zone_info (under MACH_DEBUG).
  165  *      Fixed zget to increase cur_size when the space comes from zget_space.
  166  *      Use MACRO_BEGIN/MACRO_END, decl_simple_lock_data where appropriate.
  167  *      [89/05/06  02:43:29  rpd]
  168  * 
  169  * Revision 2.7  89/04/18  16:43:20  mwyoung
  170  *      Document zget_space.  Eliminate MACH_XP conditional.
  171  *      [89/03/26            mwyoung]
  172  *      Make handling of zero allocation size unconditional.  Clean up
  173  *      allocation code.
  174  *      [89/03/16            mwyoung]
  175  * 
  176  * Revision 2.6  89/03/15  15:04:46  gm0w
  177  *      Picked up code from rfr to allocate data from non pageable zones
  178  *      from a single pool.
  179  *      [89/03/09            mrt]
  180  * 
  181  * Revision 2.5  89/03/09  20:17:50  rpd
  182  *      More cleanup.
  183  * 
  184  * Revision 2.4  89/02/25  18:11:15  gm0w
  185  *      Changes for cleanup.
  186  * 
  187  * Revision 2.3  89/01/18  00:50:51  jsb
  188  *      Vnode support: interpret allocation size of zero in zinit as meaning
  189  *      PAGE_SIZE.
  190  *      [89/01/17  20:57:39  jsb]
  191  * 
  192  * Revision 2.2  88/12/19  02:48:41  mwyoung
  193  *      Fix include file references.
  194  *      [88/12/19  00:33:03  mwyoung]
  195  *      
  196  *      Add and use zone_ignore_overflow.
  197  *      [88/12/14            mwyoung]
  198  * 
  199  *  8-Jan-88  Rick Rashid (rfr) at Carnegie-Mellon University
  200  *      Made pageable zones really pageable.  Turned spin locks to sleep
  201  *      locks for pageable zones.
  202  *
  203  * 30-Dec-87  David Golub (dbg) at Carnegie-Mellon University
  204  *      Delinted.
  205  *
  206  * 20-Oct-87  Michael Young (mwyoung) at Carnegie-Mellon University
  207  *      Allocate zone memory from a separate kernel submap, to avoid
  208  *      sleeping with the kernel_map locked.
  209  *
  210  *  1-Oct-87  Michael Young (mwyoung) at Carnegie-Mellon University
  211  *      Added zchange().
  212  *
  213  * 30-Sep-87  Richard Sanzi (sanzi) at Carnegie-Mellon University
  214  *      Deleted the printf() in zinit() which is called when zinit is
  215  *      creating a pageable zone.
  216  *
  217  * 12-Sep-87  Avadis Tevanian (avie) at Carnegie-Mellon University
  218  *      Modified to use list of elements instead of queues.  Actually,
  219  *      this package now uses macros defined in zalloc.h which define
  220  *      the list semantics.
  221  *
  222  * 30-Mar-87  Avadis Tevanian (avie) at Carnegie-Mellon University
  223  *      Update zone's cur_size field when it is crammed (zcram).
  224  *
  225  * 23-Mar-87  Avadis Tevanian (avie) at Carnegie-Mellon University
  226  *      Only define zfree if there is no macro version.
  227  *
  228  * 17-Mar-87  David Golub (dbg) at Carnegie-Mellon University
  229  *      De-linted.
  230  *
  231  * 12-Feb-87  Robert Sansom (rds) at Carnegie Mellon University
  232  *      Added zget - no waiting version of zalloc.
  233  *
  234  * 22-Jan-87  Michael Young (mwyoung) at Carnegie-Mellon University
  235  *      De-linted.
  236  *
  237  * 12-Jan-87  Michael Young (mwyoung) at Carnegie-Mellon University
  238  *      Eliminated use of the old interlocked queuing package;
  239  *      random other cleanups.
  240  *
  241  *  9-Jun-85  Avadis Tevanian (avie) at Carnegie-Mellon University
  242  *      Created.
  243  */
  244 /*
  245  *      File:   kern/zalloc.c
  246  *      Author: Avadis Tevanian, Jr.
  247  *
  248  *      Zone-based memory allocator.  A zone is a collection of fixed size
  249  *      data blocks for which quick allocation/deallocation is possible.
  250  */
  251 
  252 #include <kern/assert.h>
  253 #include <kern/macro_help.h>
  254 #include <kern/memory.h>
  255 #include <kern/sched.h>                 /* sched_tick */
  256 #include <kern/sched_prim.h>
  257 #include <kern/strings.h>
  258 #include <kern/zalloc.h>
  259 #include <mach/vm_param.h>
  260 #include <vm/vm_kern.h>
  261 #include <machine/machspl.h>
  262 
  263 #include <mach_debug.h>
  264 #if     MACH_DEBUG
  265 #include <mach/kern_return.h>
  266 #include <mach/machine/vm_types.h>
  267 #include <mach_debug/zone_info.h>
  268 #include <kern/host.h>
  269 #include <vm/vm_map.h>
  270 #include <vm/vm_user.h>
  271 #include <vm/vm_kern.h>
  272 #endif
  273 
  274 #define ADD_TO_ZONE(zone, element)                                      \
  275 MACRO_BEGIN                                                             \
  276                 *((vm_offset_t *)(element)) = (zone)->free_elements;    \
  277                 (zone)->free_elements = (vm_offset_t) (element);        \
  278                 (zone)->count--;                                        \
  279 MACRO_END
  280 
  281 #define REMOVE_FROM_ZONE(zone, ret, type)                               \
  282 MACRO_BEGIN                                                             \
  283         (ret) = (type) (zone)->free_elements;                           \
  284         if ((ret) != (type) 0) {                                        \
  285                 (zone)->count++;                                        \
  286                 (zone)->free_elements = *((vm_offset_t *)(ret));        \
  287         }                                                               \
  288 MACRO_END
  289 
  290 /*
  291  * Support for garbage collection of unused zone pages:
  292  */
  293 
  294 struct zone_page_table_entry {
  295         struct  zone_page_table_entry   *next;
  296         short   in_free_list;
  297         short   alloc_count;
  298 };
  299 
  300 extern struct zone_page_table_entry * zone_page_table;
  301 extern vm_offset_t zone_map_min_address;
  302 
  303 #define lock_zone_page_table()   simple_lock(&zone_page_table_lock)
  304 #define unlock_zone_page_table() simple_unlock(&zone_page_table_lock)
  305 
  306 #define zone_page(addr) \
  307     (&zone_page_table[atop(((vm_offset_t)addr) - zone_map_min_address)])
  308 
  309 
  310 void            zone_page_alloc(
  311         vm_offset_t     addr,
  312         vm_size_t       size);          /* forward */
  313 void            zone_page_dealloc(
  314         vm_offset_t     addr,
  315         vm_size_t       size);          /* forward */
  316 void            zone_page_in_use(
  317         vm_offset_t     addr,
  318         vm_size_t       size);          /* forward */
  319 void            zone_page_free(
  320         vm_offset_t     addr,
  321         vm_size_t       size);          /* forward */
  322 
  323 zone_t          zone_zone;      /* this is the zone containing other zones */
  324 
  325 boolean_t       zone_ignore_overflow = TRUE;
  326 
  327 vm_map_t        zone_map = VM_MAP_NULL;
  328 vm_size_t       zone_map_size = 12 * 1024 * 1024;
  329 
  330 /*
  331  *      The VM system gives us an initial chunk of memory.
  332  *      It has to be big enough to allocate the zone_zone
  333  *      and some initial kernel data structures, like kernel maps.
  334  *      It is advantageous to make it bigger than really necessary,
  335  *      because this memory is more efficient than normal kernel
  336  *      virtual memory.  (It doesn't have vm_page structures backing it
  337  *      and it may have other machine-dependent advantages.)
  338  *      So for best performance, zdata_size should approximate
  339  *      the amount of memory you expect the zone system to consume.
  340  */
  341 
  342 vm_offset_t     zdata;
  343 vm_size_t       zdata_size = 420 * 1024;
  344 
  345 #define lock_zone(zone)                                 \
  346 MACRO_BEGIN                                             \
  347         if (zone->pageable) {                           \
  348                 lock_write(&zone->complex_lock);        \
  349         } else {                                        \
  350                 simple_lock(&zone->lock);               \
  351         }                                               \
  352 MACRO_END
  353 
  354 #define unlock_zone(zone)                               \
  355 MACRO_BEGIN                                             \
  356         if (zone->pageable) {                           \
  357                 lock_done(&zone->complex_lock);         \
  358         } else {                                        \
  359                 simple_unlock(&zone->lock);             \
  360         }                                               \
  361 MACRO_END
  362 
  363 #define lock_zone_init(zone)                            \
  364 MACRO_BEGIN                                             \
  365         if (zone->pageable) {                           \
  366                 lock_init(&zone->complex_lock, TRUE);   \
  367         } else {                                        \
  368                 simple_lock_init(&zone->lock);          \
  369         }                                               \
  370 MACRO_END
  371 
  372 vm_offset_t zget_space(vm_offset_t size);
  373 
  374 decl_simple_lock_data(,zget_space_lock)
  375 vm_offset_t zalloc_next_space;
  376 vm_offset_t zalloc_end_of_space;
  377 vm_size_t zalloc_wasted_space;
  378 
  379 /*
  380  *      Garbage collection map information
  381  */
  382 decl_simple_lock_data(,zone_page_table_lock)
  383 struct zone_page_table_entry *  zone_page_table;
  384 vm_offset_t                     zone_map_min_address;
  385 vm_offset_t                     zone_map_max_address;
  386 int                             zone_pages;
  387 
  388 void zone_page_init(
  389         vm_offset_t     addr,
  390         vm_size_t       size,
  391         int             value);         /* forward */
  392 
  393 #define ZONE_PAGE_USED  0
  394 #define ZONE_PAGE_UNUSED -1
  395 
  396 
  397 /*
  398  *      Protects first_zone, last_zone, num_zones,
  399  *      and the next_zone field of zones.
  400  */
  401 decl_simple_lock_data(,all_zones_lock)
  402 zone_t                  first_zone;
  403 zone_t                  *last_zone;
  404 int                     num_zones;
  405 
  406 /*
  407  *      zinit initializes a new zone.  The zone data structures themselves
  408  *      are stored in a zone, which is initially a static structure that
  409  *      is initialized by zone_init.
  410  */
  411 zone_t zinit(
  412         vm_size_t       size,           /* the size of an element */
  413         vm_size_t       max,            /* maximum memory to use */
  414         vm_size_t       alloc,          /* allocation size */
  415         boolean_t       pageable,       /* is this zone pageable? */
  416         char            *name)          /* a name for the zone */
  417 {
  418         register zone_t         z;
  419 
  420         if (zone_zone == ZONE_NULL)
  421                 z = (zone_t) zget_space(sizeof(struct zone));
  422         else
  423                 z = (zone_t) zalloc(zone_zone);
  424         if (z == ZONE_NULL)
  425                 panic("zinit");
  426 
  427         if (alloc == 0)
  428                 alloc = PAGE_SIZE;
  429 
  430         if (size == 0)
  431                 size = sizeof(z->free_elements);
  432         /*
  433          *      Round off all the parameters appropriately.
  434          */
  435 
  436         if ((max = round_page(max)) < (alloc = round_page(alloc)))
  437                 max = alloc;
  438 
  439         z->free_elements = 0;
  440         z->cur_size = 0;
  441         z->max_size = max;
  442         z->elem_size = ((size-1) + sizeof(z->free_elements)) -
  443                         ((size-1) % sizeof(z->free_elements));
  444 
  445         z->alloc_size = alloc;
  446         z->pageable = pageable;
  447         z->zone_name = name;
  448         z->count = 0;
  449         z->doing_alloc = FALSE;
  450         z->exhaustible = z->sleepable = FALSE;
  451         z->collectable = FALSE;
  452         z->expandable  = TRUE;
  453         lock_zone_init(z);
  454 
  455         /*
  456          *      Add the zone to the all-zones list.
  457          */
  458 
  459         z->next_zone = ZONE_NULL;
  460         simple_lock(&all_zones_lock);
  461         *last_zone = z;
  462         last_zone = &z->next_zone;
  463         num_zones++;
  464         simple_unlock(&all_zones_lock);
  465 
  466         return z;
  467 }
  468 
  469 /*
  470  *      Cram the given memory into the specified zone.
  471  */
  472 void zcram(
  473         register zone_t         zone,
  474         vm_offset_t             newmem,
  475         vm_size_t               size)
  476 {
  477         register vm_size_t      elem_size;
  478 
  479         if (newmem == (vm_offset_t) 0) {
  480                 panic("zcram - memory at zero");
  481         }
  482         elem_size = zone->elem_size;
  483 
  484         lock_zone(zone);
  485         while (size >= elem_size) {
  486                 ADD_TO_ZONE(zone, newmem);
  487                 zone_page_alloc(newmem, elem_size);
  488                 zone->count++;  /* compensate for ADD_TO_ZONE */
  489                 size -= elem_size;
  490                 newmem += elem_size;
  491                 zone->cur_size += elem_size;
  492         }
  493         unlock_zone(zone);
  494 }
  495 
  496 /*
  497  * Contiguous space allocator for non-paged zones. Allocates "size" amount
  498  * of memory from zone_map.
  499  */
  500 
  501 vm_offset_t zget_space(
  502         vm_offset_t size)
  503 {
  504         vm_offset_t     new_space = 0;
  505         vm_offset_t     result;
  506         vm_size_t       space_to_add = 0; /*'=0' to quiet gcc warnings */
  507 
  508         simple_lock(&zget_space_lock);
  509         while ((zalloc_next_space + size) > zalloc_end_of_space) {
  510                 /*
  511                  *      Add at least one page to allocation area.
  512                  */
  513 
  514                 space_to_add = round_page(size);
  515 
  516                 if (new_space == 0) {
  517                         /*
  518                          *      Memory cannot be wired down while holding
  519                          *      any locks that the pageout daemon might
  520                          *      need to free up pages.  [Making the zget_space
  521                          *      lock a complex lock does not help in this
  522                          *      regard.]
  523                          *
  524                          *      Unlock and allocate memory.  Because several
  525                          *      threads might try to do this at once, don't
  526                          *      use the memory before checking for available
  527                          *      space again.
  528                          */
  529 
  530                         simple_unlock(&zget_space_lock);
  531 
  532                         if (kmem_alloc_wired(zone_map,
  533                                              &new_space, space_to_add)
  534                                                         != KERN_SUCCESS)
  535                                 return 0;
  536                         zone_page_init(new_space, space_to_add,
  537                                                         ZONE_PAGE_USED);
  538                         simple_lock(&zget_space_lock);
  539                         continue;
  540                 }
  541 
  542                 
  543                 /*
  544                  *      Memory was allocated in a previous iteration.
  545                  *
  546                  *      Check whether the new region is contiguous
  547                  *      with the old one.
  548                  */
  549 
  550                 if (new_space != zalloc_end_of_space) {
  551                         /*
  552                          *      Throw away the remainder of the
  553                          *      old space, and start a new one.
  554                          */
  555                         zalloc_wasted_space +=
  556                                 zalloc_end_of_space - zalloc_next_space;
  557                         zalloc_next_space = new_space;
  558                 }
  559 
  560                 zalloc_end_of_space = new_space + space_to_add;
  561 
  562                 new_space = 0;
  563         }
  564         result = zalloc_next_space;
  565         zalloc_next_space += size;              
  566         simple_unlock(&zget_space_lock);
  567 
  568         if (new_space != 0)
  569                 kmem_free(zone_map, new_space, space_to_add);
  570 
  571         return result;
  572 }
  573 
  574 
  575 /*
  576  *      Initialize the "zone of zones" which uses fixed memory allocated
  577  *      earlier in memory initialization.  zone_bootstrap is called
  578  *      before zone_init.
  579  */
  580 void zone_bootstrap(void)
  581 {
  582         simple_lock_init(&all_zones_lock);
  583         first_zone = ZONE_NULL;
  584         last_zone = &first_zone;
  585         num_zones = 0;
  586 
  587         simple_lock_init(&zget_space_lock);
  588         zalloc_next_space = zdata;
  589         zalloc_end_of_space = zdata + zdata_size;
  590         zalloc_wasted_space = 0;
  591 
  592         zone_zone = ZONE_NULL;
  593         zone_zone = zinit(sizeof(struct zone), 128 * sizeof(struct zone),
  594                           sizeof(struct zone), FALSE, "zones");
  595 }
  596 
  597 void zone_init(void)
  598 {
  599         vm_offset_t     zone_min;
  600         vm_offset_t     zone_max;
  601 
  602         vm_size_t       zone_table_size;
  603 
  604         zone_map = kmem_suballoc(kernel_map, &zone_min, &zone_max,
  605                                  zone_map_size, FALSE);
  606 
  607         /*
  608          * Setup garbage collection information:
  609          */
  610 
  611         zone_table_size = atop(zone_max - zone_min) * 
  612                                 sizeof(struct zone_page_table_entry);
  613         if (kmem_alloc_wired(zone_map, (vm_offset_t *) &zone_page_table,
  614                              zone_table_size) != KERN_SUCCESS)
  615                 panic("zone_init");
  616         zone_min = (vm_offset_t)zone_page_table + round_page(zone_table_size);
  617         zone_pages = atop(zone_max - zone_min);
  618         zone_map_min_address = zone_min;
  619         zone_map_max_address = zone_max;
  620         simple_lock_init(&zone_page_table_lock);
  621         zone_page_init(zone_min, zone_max - zone_min, ZONE_PAGE_UNUSED);
  622 }
  623 
  624 
  625 /*
  626  *      zalloc returns an element from the specified zone.
  627  */
  628 vm_offset_t zalloc(
  629         register zone_t zone)
  630 {
  631         vm_offset_t     addr;
  632 
  633         if (zone == ZONE_NULL)
  634                 panic("zalloc: null zone");
  635 
  636         check_simple_locks();
  637 
  638         lock_zone(zone);
  639         REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
  640         while (addr == 0) {
  641                 /*
  642                  *      If nothing was there, try to get more
  643                  */
  644                 if (zone->doing_alloc) {
  645                         /*
  646                          *      Someone is allocating memory for this zone.
  647                          *      Wait for it to show up, then try again.
  648                          */
  649                         assert_wait((event_t)&zone->doing_alloc, TRUE);
  650                         /* XXX say wakeup needed */
  651                         unlock_zone(zone);
  652                         thread_block(CONTINUE_NULL);
  653                         lock_zone(zone);
  654                 }
  655                 else {
  656                         if ((zone->cur_size + (zone->pageable ?
  657                                 zone->alloc_size : zone->elem_size)) >
  658                             zone->max_size) {
  659                                 if (zone->exhaustible)
  660                                         break;
  661                                 /*
  662                                  * Printf calls logwakeup, which calls
  663                                  * select_wakeup which will do a zfree
  664                                  * (which tries to take the select_zone
  665                                  * lock... Hang.  Release the lock now
  666                                  * so it can be taken again later.
  667                                  * NOTE: this used to be specific to
  668                                  * the select_zone, but for
  669                                  * cleanliness, we just unlock all
  670                                  * zones before this.
  671                                  */
  672                                 if (zone->expandable) {
  673                                         /*
  674                                          * We're willing to overflow certain
  675                                          * zones, but not without complaining.
  676                                          *
  677                                          * This is best used in conjunction
  678                                          * with the collecatable flag. What we
  679                                          * want is an assurance we can get the
  680                                          * memory back, assuming there's no
  681                                          * leak. 
  682                                          */
  683                                         zone->max_size += (zone->max_size >> 1);
  684                                 } else if (!zone_ignore_overflow) {
  685                                         unlock_zone(zone);
  686                                         panic("zalloc: zone \"%s\" empty.\n",
  687                                                 zone->zone_name);
  688                                 }
  689                         }
  690 
  691                         if (zone->pageable)
  692                                 zone->doing_alloc = TRUE;
  693                         unlock_zone(zone);
  694 
  695                         if (zone->pageable) {
  696                                 if (kmem_alloc_pageable(zone_map, &addr,
  697                                                         zone->alloc_size)
  698                                                         != KERN_SUCCESS)
  699                                         panic("zalloc");
  700                                 zcram(zone, addr, zone->alloc_size);
  701                                 lock_zone(zone);
  702                                 zone->doing_alloc = FALSE; 
  703                                 /* XXX check before doing this */
  704                                 thread_wakeup((event_t)&zone->doing_alloc);
  705 
  706                                 REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
  707                         } else  if (zone->collectable) {
  708                                 if (kmem_alloc_wired(zone_map,
  709                                                      &addr, zone->alloc_size)
  710                                                         != KERN_SUCCESS)
  711                                         panic("zalloc");
  712                                 zone_page_init(addr, zone->alloc_size,
  713                                                         ZONE_PAGE_USED);
  714                                 zcram(zone, addr, zone->alloc_size);
  715                                 lock_zone(zone);
  716                                 REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
  717                         } else {
  718                                 addr = zget_space(zone->elem_size);
  719                                 if (addr == 0)
  720                                         panic("zalloc");
  721 
  722                                 lock_zone(zone);
  723                                 zone->count++;
  724                                 zone->cur_size += zone->elem_size;
  725                                 unlock_zone(zone);
  726                                 zone_page_alloc(addr, zone->elem_size);
  727                                 return addr;
  728                         }
  729                 }
  730         }
  731 
  732         unlock_zone(zone);
  733         return addr;
  734 }
  735 
  736 
  737 /*
  738  *      zget returns an element from the specified zone
  739  *      and immediately returns nothing if there is nothing there.
  740  *
  741  *      This form should be used when you can not block (like when
  742  *      processing an interrupt).
  743  */
  744 vm_offset_t zget(
  745         register zone_t zone)
  746 {
  747         register vm_offset_t    addr;
  748 
  749         if (zone == ZONE_NULL)
  750                 panic ("zalloc: null zone");
  751 
  752         lock_zone(zone);
  753         REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
  754         unlock_zone(zone);
  755 
  756         return addr;
  757 }
  758 
  759 boolean_t zone_check = FALSE;
  760 
  761 void zfree(
  762         register zone_t zone,
  763         vm_offset_t     elem)
  764 {
  765         lock_zone(zone);
  766         if (zone_check) {
  767                 vm_offset_t this;
  768 
  769                 /* check the zone's consistency */
  770 
  771                 for (this = zone->free_elements;
  772                      this != 0;
  773                      this = * (vm_offset_t *) this)
  774                         if (this == elem)
  775                                 panic("zfree");
  776         }
  777         ADD_TO_ZONE(zone, elem);
  778         unlock_zone(zone);
  779 }
  780 
  781 void zcollectable(
  782         zone_t          zone)
  783 {
  784         zone->collectable = TRUE;
  785 }
  786 
  787 void zchange(
  788         zone_t          zone,
  789         boolean_t       pageable,
  790         boolean_t       sleepable,
  791         boolean_t       exhaustible,
  792         boolean_t       collectable)
  793 {
  794         zone->pageable = pageable;
  795         zone->sleepable = sleepable;
  796         zone->exhaustible = exhaustible;
  797         zone->collectable = collectable;
  798         lock_zone_init(zone);
  799 }
  800 
  801 /*
  802  *  Zone garbage collection subroutines
  803  *
  804  *  These routines have in common the modification of entries in the
  805  *  zone_page_table.  The latter contains one entry for every page
  806  *  in the zone_map.  
  807  *
  808  *  For each page table entry in the given range:
  809  *
  810  *      zone_page_in_use        - decrements in_free_list
  811  *      zone_page_free          - increments in_free_list
  812  *      zone_page_init          - initializes in_free_list and alloc_count
  813  *      zone_page_alloc         - increments alloc_count
  814  *      zone_page_dealloc       - decrements alloc_count
  815  *      zone_add_free_page_list - adds the page to the free list
  816  *   
  817  *  Two counts are maintained for each page, the in_free_list count and
  818  *  alloc_count.  The alloc_count is how many zone elements have been
  819  *  allocated from a page.  (Note that the page could contain elements
  820  *  that span page boundaries.  The count includes these elements so
  821  *  one element may be counted in two pages.) In_free_list is a count
  822  *  of how many zone elements are currently free.  If in_free_list is
  823  *  equal to alloc_count then the page is eligible for garbage
  824  *  collection.
  825  *
  826  *  Alloc_count and in_free_list are initialized to the correct values
  827  *  for a particular zone when a page is zcram'ed into a zone.  Subsequent
  828  *  gets and frees of zone elements will call zone_page_in_use and 
  829  *  zone_page_free which modify the in_free_list count.  When the zones
  830  *  garbage collector runs it will walk through a zones free element list,
  831  *  remove the elements that reside on collectable pages, and use 
  832  *  zone_add_free_page_list to create a list of pages to be collected.
  833  */
  834 
  835 void zone_page_in_use(
  836         vm_offset_t     addr,
  837         vm_size_t       size)
  838 {
  839         int i, j;
  840         if ((addr < zone_map_min_address) ||
  841             (addr+size > zone_map_max_address)) return;
  842         i = atop(addr-zone_map_min_address);
  843         j = atop((addr+size-1) - zone_map_min_address);
  844         lock_zone_page_table();
  845         for (; i <= j; i++) {
  846                 zone_page_table[i].in_free_list--;
  847         }
  848         unlock_zone_page_table();
  849 }
  850 
  851 void zone_page_free(
  852         vm_offset_t     addr,
  853         vm_size_t       size)
  854 {
  855         int i, j;
  856         if ((addr < zone_map_min_address) ||
  857             (addr+size > zone_map_max_address)) return;
  858         i = atop(addr-zone_map_min_address);
  859         j = atop((addr+size-1) - zone_map_min_address);
  860         lock_zone_page_table();
  861         for (; i <= j; i++) {
  862                 /* Set in_free_list to (ZONE_PAGE_USED + 1) if
  863                  * it was previously set to ZONE_PAGE_UNUSED.
  864                  */
  865                 if (zone_page_table[i].in_free_list == ZONE_PAGE_UNUSED) {
  866                         zone_page_table[i].in_free_list = 1;
  867                 } else {
  868                         zone_page_table[i].in_free_list++;
  869                 }
  870         }
  871         unlock_zone_page_table();
  872 }
  873 
  874 void zone_page_init(
  875         vm_offset_t     addr,
  876         vm_size_t       size,
  877         int             value)
  878 {
  879         int i, j;
  880         if ((addr < zone_map_min_address) ||
  881             (addr+size > zone_map_max_address)) return;
  882         i = atop(addr-zone_map_min_address);
  883         j = atop((addr+size-1) - zone_map_min_address);
  884         lock_zone_page_table();
  885         for (; i <= j; i++) {
  886                 zone_page_table[i].alloc_count = value;
  887                 zone_page_table[i].in_free_list = 0;
  888         }
  889         unlock_zone_page_table();
  890 }
  891 
  892 void zone_page_alloc(
  893         vm_offset_t     addr,
  894         vm_size_t       size)
  895 {
  896         int i, j;
  897         if ((addr < zone_map_min_address) ||
  898             (addr+size > zone_map_max_address)) return;
  899         i = atop(addr-zone_map_min_address);
  900         j = atop((addr+size-1) - zone_map_min_address);
  901         lock_zone_page_table();
  902         for (; i <= j; i++) {
  903                 /* Set alloc_count to (ZONE_PAGE_USED + 1) if
  904                  * it was previously set to ZONE_PAGE_UNUSED.
  905                  */
  906                 if (zone_page_table[i].alloc_count == ZONE_PAGE_UNUSED) {
  907                         zone_page_table[i].alloc_count = 1;
  908                 } else {
  909                         zone_page_table[i].alloc_count++;
  910                 }
  911         }
  912         unlock_zone_page_table();
  913 }
  914 
  915 void zone_page_dealloc(
  916         vm_offset_t     addr,
  917         vm_size_t       size)
  918 {
  919         int i, j;
  920         if ((addr < zone_map_min_address) ||
  921             (addr+size > zone_map_max_address)) return;
  922         i = atop(addr-zone_map_min_address);
  923         j = atop((addr+size-1) - zone_map_min_address);
  924         lock_zone_page_table();
  925         for (; i <= j; i++) {
  926                 zone_page_table[i].alloc_count--;
  927         }
  928         unlock_zone_page_table();
  929 }
  930 
  931 void
  932 zone_add_free_page_list(
  933         struct zone_page_table_entry    **free_list,
  934         vm_offset_t     addr,
  935         vm_size_t       size)
  936 {
  937         int i, j;
  938         if ((addr < zone_map_min_address) ||
  939             (addr+size > zone_map_max_address)) return;
  940         i = atop(addr-zone_map_min_address);
  941         j = atop((addr+size-1) - zone_map_min_address);
  942         lock_zone_page_table();
  943         for (; i <= j; i++) {
  944                 if (zone_page_table[i].alloc_count == 0) {
  945                         zone_page_table[i].next = *free_list;
  946                         *free_list = &zone_page_table[i];
  947                         zone_page_table[i].alloc_count  = ZONE_PAGE_UNUSED;
  948                         zone_page_table[i].in_free_list = 0;
  949                 }
  950         }
  951         unlock_zone_page_table();
  952 }
  953 
  954 
  955 /* This is used for walking through a zone's free element list.
  956  */
  957 struct zone_free_entry {
  958         struct zone_free_entry * next;
  959 };
  960 
  961 
  962 /*      Zone garbage collection
  963  *
  964  *      zone_gc will walk through all the free elements in all the
  965  *      zones that are marked collectable looking for reclaimable
  966  *      pages.  zone_gc is called by consider_zone_gc when the system
  967  *      begins to run out of memory.
  968  */
  969 void
  970 zone_gc(void)
  971 {
  972         int             max_zones;
  973         zone_t          z;
  974         int             i;
  975         register spl_t  s;
  976         struct zone_page_table_entry    *freep;
  977         struct zone_page_table_entry    *zone_free_page_list;
  978 
  979         simple_lock(&all_zones_lock);
  980         max_zones = num_zones;
  981         z = first_zone;
  982         simple_unlock(&all_zones_lock);
  983 
  984         zone_free_page_list = (struct zone_page_table_entry *) 0;
  985 
  986         for (i = 0; i < max_zones; i++) {
  987                 struct zone_free_entry * last;
  988                 struct zone_free_entry * elt;
  989                 assert(z != ZONE_NULL);
  990         /* run this at splhigh so that interupt routines that use zones
  991            can not interupt while their zone is locked */
  992                 s = splhigh();
  993                 lock_zone(z);
  994 
  995                 if (!z->pageable && z->collectable) {
  996 
  997                     /* Count the free elements in each page.  This loop
  998                      * requires that all in_free_list entries are zero.
  999                      */
 1000                     elt = (struct zone_free_entry *)(z->free_elements);
 1001                     while ((elt != (struct zone_free_entry *)0)) {
 1002                            zone_page_free((vm_offset_t)elt, z->elem_size);
 1003                            elt = elt->next;
 1004                     }
 1005 
 1006                     /* Now determine which elements should be removed
 1007                      * from the free list and, after all the elements
 1008                      * on a page have been removed, add the element's
 1009                      * page to a list of pages to be freed.
 1010                      */
 1011                     elt = (struct zone_free_entry *)(z->free_elements);
 1012                     last = elt;
 1013                     while ((elt != (struct zone_free_entry *)0)) {
 1014                         if (((vm_offset_t)elt>=zone_map_min_address)&&
 1015                             ((vm_offset_t)elt<=zone_map_max_address)&&
 1016                             (zone_page(elt)->in_free_list ==
 1017                              zone_page(elt)->alloc_count)) {
 1018 
 1019                             z->cur_size -= z->elem_size;
 1020                             zone_page_in_use((vm_offset_t)elt, z->elem_size);
 1021                             zone_page_dealloc((vm_offset_t)elt, z->elem_size);
 1022                             if (zone_page(elt)->alloc_count == 0 ||
 1023                               zone_page(elt+(z->elem_size-1))->alloc_count==0) {
 1024                                     zone_add_free_page_list(
 1025                                             &zone_free_page_list, 
 1026                                             (vm_offset_t)elt, z->elem_size);
 1027                             }
 1028 
 1029 
 1030                             if (elt == last) {
 1031                                 elt = elt->next;
 1032                                 z->free_elements =(vm_offset_t)elt;
 1033                                 last = elt;
 1034                             } else {
 1035                                 last->next = elt->next;
 1036                                 elt = elt->next;
 1037                             }
 1038                         } else {
 1039                             /* This element is not eligible for collection
 1040                              * so clear in_free_list in preparation for a
 1041                              * subsequent garbage collection pass.
 1042                              */
 1043                             if (((vm_offset_t)elt>=zone_map_min_address)&&
 1044                                 ((vm_offset_t)elt<=zone_map_max_address)) {
 1045                                 zone_page(elt)->in_free_list = 0;
 1046                             }
 1047                             last = elt;
 1048                             elt = elt->next;
 1049                         }
 1050                     }
 1051                 }
 1052                 unlock_zone(z);         
 1053                 splx(s);
 1054                 simple_lock(&all_zones_lock);
 1055                 z = z->next_zone;
 1056                 simple_unlock(&all_zones_lock);
 1057         }
 1058 
 1059         for (freep = zone_free_page_list; freep != 0; freep = freep->next) {
 1060                 vm_offset_t     free_addr;
 1061 
 1062                 free_addr = zone_map_min_address + 
 1063                         PAGE_SIZE * (freep - zone_page_table);
 1064                 kmem_free(zone_map, free_addr, PAGE_SIZE);
 1065         }
 1066 }
 1067 
 1068 boolean_t zone_gc_allowed = TRUE;
 1069 unsigned zone_gc_last_tick = 0;
 1070 unsigned zone_gc_max_rate = 0;          /* in ticks */
 1071 
 1072 /*
 1073  *      consider_zone_gc:
 1074  *
 1075  *      Called by the pageout daemon when the system needs more free pages.
 1076  */
 1077 
 1078 void
 1079 consider_zone_gc(void)
 1080 {
 1081         /*
 1082          *      By default, don't attempt zone GC more frequently
 1083          *      than once a minute.
 1084          */
 1085 
 1086         if (zone_gc_max_rate == 0)
 1087                 zone_gc_max_rate = 60;
 1088 
 1089         if (zone_gc_allowed &&
 1090             (sched_tick > (zone_gc_last_tick + zone_gc_max_rate))) {
 1091                 zone_gc_last_tick = sched_tick;
 1092                 zone_gc();
 1093         }
 1094 }
 1095 
 1096 #if     MACH_DEBUG
 1097 kern_return_t host_zone_info(
 1098         host_t          host,
 1099         zone_name_array_t *namesp,
 1100         unsigned int    *namesCntp,
 1101         zone_info_array_t *infop,
 1102         unsigned int    *infoCntp)
 1103 {
 1104         zone_name_t     *names;
 1105         vm_offset_t     names_addr;
 1106         vm_size_t       names_size = 0; /*'=0' to quiet gcc warnings */
 1107         zone_info_t     *info;
 1108         vm_offset_t     info_addr;
 1109         vm_size_t       info_size = 0; /*'=0' to quiet gcc warnings */
 1110         unsigned int    max_zones, i;
 1111         zone_t          z;
 1112         kern_return_t   kr;
 1113 
 1114         if (host == HOST_NULL)
 1115                 return KERN_INVALID_HOST;
 1116 
 1117         /*
 1118          *      We assume that zones aren't freed once allocated.
 1119          *      We won't pick up any zones that are allocated later.
 1120          */
 1121 
 1122         simple_lock(&all_zones_lock);
 1123         max_zones = num_zones;
 1124         z = first_zone;
 1125         simple_unlock(&all_zones_lock);
 1126 
 1127         if (max_zones <= *namesCntp) {
 1128                 /* use in-line memory */
 1129 
 1130                 names = *namesp;
 1131         } else {
 1132                 names_size = round_page(max_zones * sizeof *names);
 1133                 kr = kmem_alloc_pageable(ipc_kernel_map,
 1134                                          &names_addr, names_size);
 1135                 if (kr != KERN_SUCCESS)
 1136                         return kr;
 1137 
 1138                 names = (zone_name_t *) names_addr;
 1139         }
 1140 
 1141         if (max_zones <= *infoCntp) {
 1142                 /* use in-line memory */
 1143 
 1144                 info = *infop;
 1145         } else {
 1146                 info_size = round_page(max_zones * sizeof *info);
 1147                 kr = kmem_alloc_pageable(ipc_kernel_map,
 1148                                          &info_addr, info_size);
 1149                 if (kr != KERN_SUCCESS) {
 1150                         if (names != *namesp)
 1151                                 kmem_free(ipc_kernel_map,
 1152                                           names_addr, names_size);
 1153                         return kr;
 1154                 }
 1155 
 1156                 info = (zone_info_t *) info_addr;
 1157         }
 1158 
 1159         for (i = 0; i < max_zones; i++) {
 1160                 zone_name_t *zn = &names[i];
 1161                 zone_info_t *zi = &info[i];
 1162                 struct zone zcopy;
 1163 
 1164                 assert(z != ZONE_NULL);
 1165 
 1166                 lock_zone(z);
 1167                 zcopy = *z;
 1168                 unlock_zone(z);
 1169 
 1170                 simple_lock(&all_zones_lock);
 1171                 z = z->next_zone;
 1172                 simple_unlock(&all_zones_lock);
 1173 
 1174                 /* assuming here the name data is static */
 1175                 (void) strncpy(zn->zn_name, zcopy.zone_name,
 1176                                sizeof zn->zn_name);
 1177 
 1178                 zi->zi_count = zcopy.count;
 1179                 zi->zi_cur_size = zcopy.cur_size;
 1180                 zi->zi_max_size = zcopy.max_size;
 1181                 zi->zi_elem_size = zcopy.elem_size;
 1182                 zi->zi_alloc_size = zcopy.alloc_size;
 1183                 zi->zi_pageable = zcopy.pageable;
 1184                 zi->zi_sleepable = zcopy.sleepable;
 1185                 zi->zi_exhaustible = zcopy.exhaustible;
 1186                 zi->zi_collectable = zcopy.collectable;
 1187         }
 1188 
 1189         if (names != *namesp) {
 1190                 vm_size_t used;
 1191                 vm_map_copy_t copy;
 1192 
 1193                 used = max_zones * sizeof *names;
 1194 
 1195                 if (used != names_size)
 1196                         bzero((char *) (names_addr + used), names_size - used);
 1197 
 1198                 kr = vm_map_copyin(ipc_kernel_map, names_addr, names_size,
 1199                                    TRUE, &copy);
 1200                 assert(kr == KERN_SUCCESS);
 1201 
 1202                 *namesp = (zone_name_t *) copy;
 1203         }
 1204         *namesCntp = max_zones;
 1205 
 1206         if (info != *infop) {
 1207                 vm_size_t used;
 1208                 vm_map_copy_t copy;
 1209 
 1210                 used = max_zones * sizeof *info;
 1211 
 1212                 if (used != info_size)
 1213                         bzero((char *) (info_addr + used), info_size - used);
 1214 
 1215                 kr = vm_map_copyin(ipc_kernel_map, info_addr, info_size,
 1216                                    TRUE, &copy);
 1217                 assert(kr == KERN_SUCCESS);
 1218 
 1219                 *infop = (zone_info_t *) copy;
 1220         }
 1221         *infoCntp = max_zones;
 1222 
 1223         return KERN_SUCCESS;
 1224 }
 1225 #endif  /* MACH_DEBUG */

Cache object: 1f8d432fba6371e01e47bb2745fb276a


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.