The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/vm/vm_kern.c

Version: -  FREEBSD  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-2  -  FREEBSD-11-1  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-4  -  FREEBSD-10-3  -  FREEBSD-10-2  -  FREEBSD-10-1  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-3  -  FREEBSD-9-2  -  FREEBSD-9-1  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-4  -  FREEBSD-8-3  -  FREEBSD-8-2  -  FREEBSD-8-1  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-4  -  FREEBSD-7-3  -  FREEBSD-7-2  -  FREEBSD-7-1  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-4  -  FREEBSD-6-3  -  FREEBSD-6-2  -  FREEBSD-6-1  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-5  -  FREEBSD-5-4  -  FREEBSD-5-3  -  FREEBSD-5-2  -  FREEBSD-5-1  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  xnu-1456.1.26  -  xnu-1699.24.8  -  xnu-2050.18.24  -  OPENSOLARIS  -  minix-3-1-1 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * (MPSAFE)
    3  *
    4  * Copyright (c) 1991, 1993
    5  *      The Regents of the University of California.  All rights reserved.
    6  *
    7  * This code is derived from software contributed to Berkeley by
    8  * The Mach Operating System project at Carnegie-Mellon University.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 3. Neither the name of the University nor the names of its contributors
   19  *    may be used to endorse or promote products derived from this software
   20  *    without specific prior written permission.
   21  *
   22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   32  * SUCH DAMAGE.
   33  *
   34  *      from: @(#)vm_kern.c     8.3 (Berkeley) 1/12/94
   35  *
   36  *
   37  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
   38  * All rights reserved.
   39  *
   40  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
   41  *
   42  * Permission to use, copy, modify and distribute this software and
   43  * its documentation is hereby granted, provided that both the copyright
   44  * notice and this permission notice appear in all copies of the
   45  * software, derivative works or modified versions, and any portions
   46  * thereof, and that both notices appear in supporting documentation.
   47  *
   48  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
   49  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
   50  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
   51  *
   52  * Carnegie Mellon requests users of this software to return to
   53  *
   54  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   55  *  School of Computer Science
   56  *  Carnegie Mellon University
   57  *  Pittsburgh PA 15213-3890
   58  *
   59  * any improvements or extensions that they make and grant Carnegie the
   60  * rights to redistribute these changes.
   61  *
   62  * $FreeBSD: src/sys/vm/vm_kern.c,v 1.61.2.2 2002/03/12 18:25:26 tegge Exp $
   63  */
   64 
   65 /*
   66  *      Kernel memory management.
   67  */
   68 
   69 #include <sys/param.h>
   70 #include <sys/systm.h>
   71 #include <sys/proc.h>
   72 #include <sys/malloc.h>
   73 #include <sys/kernel.h>
   74 #include <sys/sysctl.h>
   75 
   76 #include <vm/vm.h>
   77 #include <vm/vm_param.h>
   78 #include <sys/lock.h>
   79 #include <vm/pmap.h>
   80 #include <vm/vm_map.h>
   81 #include <vm/vm_object.h>
   82 #include <vm/vm_page.h>
   83 #include <vm/vm_pageout.h>
   84 #include <vm/vm_kern.h>
   85 #include <vm/vm_extern.h>
   86 
   87 struct vm_map kernel_map;
   88 struct vm_map clean_map;
   89 struct vm_map buffer_map;
   90 
   91 /*
   92  * Allocate pageable memory to the kernel's address map.  "map" must
   93  * be kernel_map or a submap of kernel_map.
   94  *
   95  * No requirements.
   96  */
   97 vm_offset_t
   98 kmem_alloc_pageable(vm_map_t map, vm_size_t size)
   99 {
  100         vm_offset_t addr;
  101         int result;
  102 
  103         size = round_page(size);
  104         addr = vm_map_min(map);
  105         result = vm_map_find(map, NULL, (vm_offset_t) 0,
  106                              &addr, size, PAGE_SIZE,
  107                              TRUE, VM_MAPTYPE_NORMAL,
  108                              VM_PROT_ALL, VM_PROT_ALL,
  109                              0);
  110         if (result != KERN_SUCCESS)
  111                 return (0);
  112         return (addr);
  113 }
  114 
  115 /*
  116  * Same as kmem_alloc_pageable, except that it create a nofault entry.
  117  *
  118  * No requirements.
  119  */
  120 vm_offset_t
  121 kmem_alloc_nofault(vm_map_t map, vm_size_t size, vm_size_t align)
  122 {
  123         vm_offset_t addr;
  124         int result;
  125 
  126         size = round_page(size);
  127         addr = vm_map_min(map);
  128         result = vm_map_find(map, NULL, (vm_offset_t) 0,
  129                              &addr, size, align,
  130                              TRUE, VM_MAPTYPE_NORMAL,
  131                              VM_PROT_ALL, VM_PROT_ALL,
  132                              MAP_NOFAULT);
  133         if (result != KERN_SUCCESS)
  134                 return (0);
  135         return (addr);
  136 }
  137 
  138 /*
  139  * Allocate wired-down memory in the kernel's address map or a submap.
  140  *
  141  * No requirements.
  142  */
  143 vm_offset_t
  144 kmem_alloc3(vm_map_t map, vm_size_t size, int kmflags)
  145 {
  146         vm_offset_t addr;
  147         vm_offset_t gstart;
  148         vm_offset_t i;
  149         int count;
  150         int cow;
  151 
  152         size = round_page(size);
  153 
  154         if (kmflags & KM_KRESERVE)
  155                 count = vm_map_entry_kreserve(MAP_RESERVE_COUNT);
  156         else
  157                 count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
  158 
  159         if (kmflags & KM_STACK) {
  160                 cow = MAP_IS_KSTACK;
  161                 gstart = PAGE_SIZE;
  162         } else {
  163                 cow = 0;
  164                 gstart = 0;
  165         }
  166 
  167         /*
  168          * Use the kernel object for wired-down kernel pages. Assume that no
  169          * region of the kernel object is referenced more than once.
  170          *
  171          * Locate sufficient space in the map.  This will give us the final
  172          * virtual address for the new memory, and thus will tell us the
  173          * offset within the kernel map.
  174          */
  175         vm_map_lock(map);
  176         if (vm_map_findspace(map, vm_map_min(map), size, PAGE_SIZE, 0, &addr)) {
  177                 vm_map_unlock(map);
  178                 if (kmflags & KM_KRESERVE)
  179                         vm_map_entry_krelease(count);
  180                 else
  181                         vm_map_entry_release(count);
  182                 return (0);
  183         }
  184         vm_object_hold(&kernel_object);
  185         vm_object_reference_locked(&kernel_object);
  186         vm_map_insert(map, &count,
  187                       &kernel_object, addr, addr, addr + size,
  188                       VM_MAPTYPE_NORMAL,
  189                       VM_PROT_ALL, VM_PROT_ALL,
  190                       cow);
  191         vm_object_drop(&kernel_object);
  192 
  193         vm_map_unlock(map);
  194         if (kmflags & KM_KRESERVE)
  195                 vm_map_entry_krelease(count);
  196         else
  197                 vm_map_entry_release(count);
  198 
  199         /*
  200          * Guarantee that there are pages already in this object before
  201          * calling vm_map_wire.  This is to prevent the following
  202          * scenario:
  203          *
  204          * 1) Threads have swapped out, so that there is a pager for the
  205          * kernel_object. 2) The kmsg zone is empty, and so we are
  206          * kmem_allocing a new page for it. 3) vm_map_wire calls vm_fault;
  207          * there is no page, but there is a pager, so we call
  208          * pager_data_request.  But the kmsg zone is empty, so we must
  209          * kmem_alloc. 4) goto 1 5) Even if the kmsg zone is not empty: when
  210          * we get the data back from the pager, it will be (very stale)
  211          * non-zero data.  kmem_alloc is defined to return zero-filled memory.
  212          *
  213          * We're intentionally not activating the pages we allocate to prevent a
  214          * race with page-out.  vm_map_wire will wire the pages.
  215          */
  216         vm_object_hold(&kernel_object);
  217         for (i = gstart; i < size; i += PAGE_SIZE) {
  218                 vm_page_t mem;
  219 
  220                 mem = vm_page_grab(&kernel_object, OFF_TO_IDX(addr + i),
  221                                    VM_ALLOC_FORCE_ZERO | VM_ALLOC_NORMAL |
  222                                    VM_ALLOC_RETRY);
  223                 vm_page_unqueue_nowakeup(mem);
  224                 vm_page_wakeup(mem);
  225         }
  226         vm_object_drop(&kernel_object);
  227 
  228         /*
  229          * And finally, mark the data as non-pageable.
  230          *
  231          * NOTE: vm_map_wire() handles any kstack guard.
  232          */
  233         vm_map_wire(map, addr, addr + size, kmflags);
  234 
  235         return (addr);
  236 }
  237 
  238 /*
  239  * Release a region of kernel virtual memory allocated with kmem_alloc,
  240  * and return the physical pages associated with that region.
  241  *
  242  * WARNING!  If the caller entered pages into the region using pmap_kenter()
  243  * it must remove the pages using pmap_kremove[_quick]() before freeing the
  244  * underlying kmem, otherwise resident_count will be mistabulated.
  245  *
  246  * No requirements.
  247  */
  248 void
  249 kmem_free(vm_map_t map, vm_offset_t addr, vm_size_t size)
  250 {
  251         vm_map_remove(map, trunc_page(addr), round_page(addr + size));
  252 }
  253 
  254 /*
  255  * Used to break a system map into smaller maps, usually to reduce
  256  * contention and to provide large KVA spaces for subsystems like the
  257  * buffer cache.
  258  *
  259  *      parent          Map to take range from
  260  *      result  
  261  *      size            Size of range to find
  262  *      min, max        Returned endpoints of map
  263  *      pageable        Can the region be paged
  264  *
  265  * No requirements.
  266  */
  267 void
  268 kmem_suballoc(vm_map_t parent, vm_map_t result,
  269               vm_offset_t *min, vm_offset_t *max, vm_size_t size)
  270 {
  271         int ret;
  272 
  273         size = round_page(size);
  274 
  275         *min = (vm_offset_t) vm_map_min(parent);
  276         ret = vm_map_find(parent, NULL, (vm_offset_t) 0,
  277                           min, size, PAGE_SIZE,
  278                           TRUE, VM_MAPTYPE_UNSPECIFIED,
  279                           VM_PROT_ALL, VM_PROT_ALL,
  280                           0);
  281         if (ret != KERN_SUCCESS) {
  282                 kprintf("kmem_suballoc: bad status return of %d.\n", ret);
  283                 panic("kmem_suballoc");
  284         }
  285         *max = *min + size;
  286         pmap_reference(vm_map_pmap(parent));
  287         vm_map_init(result, *min, *max, vm_map_pmap(parent));
  288         if ((ret = vm_map_submap(parent, *min, *max, result)) != KERN_SUCCESS)
  289                 panic("kmem_suballoc: unable to change range to submap");
  290 }
  291 
  292 /*
  293  * Allocates pageable memory from a sub-map of the kernel.  If the submap
  294  * has no room, the caller sleeps waiting for more memory in the submap.
  295  *
  296  * No requirements.
  297  */
  298 vm_offset_t
  299 kmem_alloc_wait(vm_map_t map, vm_size_t size)
  300 {
  301         vm_offset_t addr;
  302         int count;
  303 
  304         size = round_page(size);
  305 
  306         count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
  307 
  308         for (;;) {
  309                 /*
  310                  * To make this work for more than one map, use the map's lock
  311                  * to lock out sleepers/wakers.
  312                  */
  313                 vm_map_lock(map);
  314                 if (vm_map_findspace(map, vm_map_min(map),
  315                                      size, PAGE_SIZE, 0, &addr) == 0) {
  316                         break;
  317                 }
  318                 /* no space now; see if we can ever get space */
  319                 if (vm_map_max(map) - vm_map_min(map) < size) {
  320                         vm_map_entry_release(count);
  321                         vm_map_unlock(map);
  322                         return (0);
  323                 }
  324                 vm_map_unlock(map);
  325                 tsleep(map, 0, "kmaw", 0);
  326         }
  327         vm_map_insert(map, &count,
  328                       NULL, (vm_offset_t) 0,
  329                       addr, addr + size,
  330                       VM_MAPTYPE_NORMAL,
  331                       VM_PROT_ALL, VM_PROT_ALL,
  332                       0);
  333         vm_map_unlock(map);
  334         vm_map_entry_release(count);
  335 
  336         return (addr);
  337 }
  338 
  339 /*
  340  *  Allocates a region from the kernel address map and physical pages
  341  *  within the specified address range to the kernel object.  Creates a
  342  *  wired mapping from this region to these pages, and returns the
  343  *  region's starting virtual address.  The allocated pages are not
  344  *  necessarily physically contiguous.  If M_ZERO is specified through the
  345  *  given flags, then the pages are zeroed before they are mapped.
  346  */
  347 vm_offset_t
  348 kmem_alloc_attr(vm_map_t map, vm_size_t size, int flags, vm_paddr_t low,
  349     vm_paddr_t high, vm_memattr_t memattr)
  350 {
  351         vm_offset_t addr, i, offset;
  352         vm_page_t m;
  353         int count;
  354 
  355         size = round_page(size);
  356         count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
  357         vm_map_lock(map);
  358         if (vm_map_findspace(map, vm_map_min(map), size, PAGE_SIZE,
  359                                 flags, &addr)) {
  360                 vm_map_unlock(map);
  361                 vm_map_entry_release(count);
  362                 return (0);
  363         }
  364         offset = addr - vm_map_min(&kernel_map);
  365         vm_object_hold(&kernel_object);
  366         vm_object_reference_locked(&kernel_object);
  367         vm_map_insert(map, &count, &kernel_object, offset, addr, addr + size,
  368                 VM_MAPTYPE_NORMAL, VM_PROT_ALL, VM_PROT_ALL, 0);
  369         vm_map_unlock(map);
  370         vm_map_entry_release(count);
  371         vm_object_drop(&kernel_object);
  372         for (i = 0; i < size; i += PAGE_SIZE) {
  373                 m = vm_page_alloc_contig(low, high, PAGE_SIZE, 0, PAGE_SIZE, memattr);
  374                 if (!m) {
  375                         return (0);
  376                 }
  377                 vm_object_hold(&kernel_object);
  378                 vm_page_insert(m, &kernel_object, OFF_TO_IDX(offset + i));
  379                 vm_object_drop(&kernel_object);
  380                 if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0)
  381                         pmap_zero_page(VM_PAGE_TO_PHYS(m));
  382                 m->valid = VM_PAGE_BITS_ALL;
  383         }
  384         vm_map_wire(map, addr, addr + size, 0);
  385         return (addr);
  386 }
  387 
  388 
  389 /*
  390  * Returns memory to a submap of the kernel, and wakes up any processes
  391  * waiting for memory in that map.
  392  *
  393  * No requirements.
  394  */
  395 void
  396 kmem_free_wakeup(vm_map_t map, vm_offset_t addr, vm_size_t size)
  397 {
  398         int count;
  399 
  400         count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
  401         vm_map_lock(map);
  402         vm_map_delete(map, trunc_page(addr), round_page(addr + size), &count);
  403         wakeup(map);
  404         vm_map_unlock(map);
  405         vm_map_entry_release(count);
  406 }
  407 
  408 /*
  409  * Create the kernel_ma for (KvaStart,KvaEnd) and insert mappings to
  410  * cover areas already allocated or reserved thus far.
  411  *
  412  * The areas (virtual_start, virtual_end) and (virtual2_start, virtual2_end)
  413  * are available so the cutouts are the areas around these ranges between
  414  * KvaStart and KvaEnd.
  415  *
  416  * Depend on the zalloc bootstrap cache to get our vm_map_entry_t.
  417  * Called from the low level boot code only.
  418  */
  419 void
  420 kmem_init(void)
  421 {
  422         vm_offset_t addr;
  423         vm_map_t m;
  424         int count;
  425 
  426         m = vm_map_create(&kernel_map, &kernel_pmap, KvaStart, KvaEnd);
  427         vm_map_lock(m);
  428         /* N.B.: cannot use kgdb to debug, starting with this assignment ... */
  429         m->system_map = 1;
  430         count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
  431         addr = KvaStart;
  432         if (virtual2_start) {
  433                 if (addr < virtual2_start) {
  434                         vm_map_insert(m, &count, NULL, (vm_offset_t) 0,
  435                                       addr, virtual2_start,
  436                                       VM_MAPTYPE_NORMAL,
  437                                       VM_PROT_ALL, VM_PROT_ALL,
  438                                       0);
  439                 }
  440                 addr = virtual2_end;
  441         }
  442         if (addr < virtual_start) {
  443                 vm_map_insert(m, &count, NULL, (vm_offset_t) 0,
  444                               addr, virtual_start,
  445                               VM_MAPTYPE_NORMAL,
  446                               VM_PROT_ALL, VM_PROT_ALL,
  447                               0);
  448         }
  449         addr = virtual_end;
  450         if (addr < KvaEnd) {
  451                 vm_map_insert(m, &count, NULL, (vm_offset_t) 0,
  452                               addr, KvaEnd,
  453                               VM_MAPTYPE_NORMAL,
  454                               VM_PROT_ALL, VM_PROT_ALL,
  455                               0);
  456         }
  457         /* ... and ending with the completion of the above `insert' */
  458         vm_map_unlock(m);
  459         vm_map_entry_release(count);
  460 }
  461 
  462 /*
  463  * No requirements.
  464  */
  465 static int
  466 kvm_size(SYSCTL_HANDLER_ARGS)
  467 {
  468         unsigned long ksize = KvaSize;
  469 
  470         return sysctl_handle_long(oidp, &ksize, 0, req);
  471 }
  472 SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_ULONG|CTLFLAG_RD,
  473     0, 0, kvm_size, "LU", "Size of KVM");
  474  
  475 /*
  476  * No requirements.
  477  */
  478 static int
  479 kvm_free(SYSCTL_HANDLER_ARGS)
  480 {
  481         unsigned long kfree = virtual_end - kernel_vm_end;
  482 
  483         return sysctl_handle_long(oidp, &kfree, 0, req);
  484 }
  485 SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_ULONG|CTLFLAG_RD,
  486     0, 0, kvm_free, "LU", "Amount of KVM free");
  487 

Cache object: 0e00129817426e7d19a0a015f5be4cf1


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.