uvm_km.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*      $OpenBSD: uvm_km.c,v 1.151 2022/08/01 14:15:46 mpi Exp $        */
    2 /*      $NetBSD: uvm_km.c,v 1.42 2001/01/14 02:10:01 thorpej Exp $      */
    3 
    4 /* 
    5  * Copyright (c) 1997 Charles D. Cranor and Washington University.
    6  * Copyright (c) 1991, 1993, The Regents of the University of California.  
    7  *
    8  * All rights reserved.
    9  *
   10  * This code is derived from software contributed to Berkeley by
   11  * The Mach Operating System project at Carnegie-Mellon University.
   12  *
   13  * Redistribution and use in source and binary forms, with or without
   14  * modification, are permitted provided that the following conditions
   15  * are met:
   16  * 1. Redistributions of source code must retain the above copyright
   17  *    notice, this list of conditions and the following disclaimer.
   18  * 2. Redistributions in binary form must reproduce the above copyright
   19  *    notice, this list of conditions and the following disclaimer in the
   20  *    documentation and/or other materials provided with the distribution.
   21  * 3. Neither the name of the University nor the names of its contributors
   22  *    may be used to endorse or promote products derived from this software
   23  *    without specific prior written permission.
   24  *
   25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   35  * SUCH DAMAGE.
   36  *
   37  *      @(#)vm_kern.c   8.3 (Berkeley) 1/12/94
   38  * from: Id: uvm_km.c,v 1.1.2.14 1998/02/06 05:19:27 chs Exp
   39  *
   40  *
   41  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
   42  * All rights reserved.
   43  * 
   44  * Permission to use, copy, modify and distribute this software and
   45  * its documentation is hereby granted, provided that both the copyright
   46  * notice and this permission notice appear in all copies of the
   47  * software, derivative works or modified versions, and any portions
   48  * thereof, and that both notices appear in supporting documentation.
   49  * 
   50  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 
   51  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 
   52  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
   53  * 
   54  * Carnegie Mellon requests users of this software to return to
   55  *
   56  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   57  *  School of Computer Science
   58  *  Carnegie Mellon University
   59  *  Pittsburgh PA 15213-3890
   60  *
   61  * any improvements or extensions that they make and grant Carnegie the
   62  * rights to redistribute these changes.
   63  */
   64 
   65 /*
   66  * uvm_km.c: handle kernel memory allocation and management
   67  */
   68 
   69 /*
   70  * overview of kernel memory management:
   71  *
   72  * the kernel virtual address space is mapped by "kernel_map."   kernel_map
   73  * starts at a machine-dependent address and is VM_KERNEL_SPACE_SIZE bytes
   74  * large.
   75  *
   76  * the kernel_map has several "submaps."   submaps can only appear in 
   77  * the kernel_map (user processes can't use them).   submaps "take over"
   78  * the management of a sub-range of the kernel's address space.  submaps
   79  * are typically allocated at boot time and are never released.   kernel
   80  * virtual address space that is mapped by a submap is locked by the 
   81  * submap's lock -- not the kernel_map's lock.
   82  *
   83  * thus, the useful feature of submaps is that they allow us to break
   84  * up the locking and protection of the kernel address space into smaller
   85  * chunks.
   86  *
   87  * The VM system has several standard kernel submaps:
   88  *   kmem_map: Contains only wired kernel memory for malloc(9).
   89  *             Note: All access to this map must be protected by splvm as
   90  *             calls to malloc(9) are allowed in interrupt handlers.
   91  *   exec_map: Memory to hold arguments to system calls are allocated from
   92  *             this map.
   93  *             XXX: This is primeraly used to artificially limit the number
   94  *             of concurrent processes doing an exec.
   95  *   phys_map: Buffers for vmapbuf (physio) are allocated from this map.
   96  *
   97  * the kernel allocates its private memory out of special uvm_objects whose
   98  * reference count is set to UVM_OBJ_KERN (thus indicating that the objects
   99  * are "special" and never die).   all kernel objects should be thought of
  100  * as large, fixed-sized, sparsely populated uvm_objects.   each kernel 
  101  * object is equal to the size of kernel virtual address space (i.e.
  102  * VM_KERNEL_SPACE_SIZE).
  103  *
  104  * most kernel private memory lives in kernel_object.   the only exception
  105  * to this is for memory that belongs to submaps that must be protected
  106  * by splvm(). each of these submaps manages their own pages.
  107  *
  108  * note that just because a kernel object spans the entire kernel virtual
  109  * address space doesn't mean that it has to be mapped into the entire space.
  110  * large chunks of a kernel object's space go unused either because 
  111  * that area of kernel VM is unmapped, or there is some other type of 
  112  * object mapped into that range (e.g. a vnode).    for submap's kernel
  113  * objects, the only part of the object that can ever be populated is the
  114  * offsets that are managed by the submap.
  115  *
  116  * note that the "offset" in a kernel object is always the kernel virtual
  117  * address minus the vm_map_min(kernel_map).
  118  * example:
  119  *   suppose kernel_map starts at 0xf8000000 and the kernel does a
  120  *   uvm_km_alloc(kernel_map, PAGE_SIZE) [allocate 1 wired down page in the
  121  *   kernel map].    if uvm_km_alloc returns virtual address 0xf8235000,
  122  *   then that means that the page at offset 0x235000 in kernel_object is
  123  *   mapped at 0xf8235000.   
  124  *
  125  * kernel objects have one other special property: when the kernel virtual
  126  * memory mapping them is unmapped, the backing memory in the object is
  127  * freed right away.   this is done with the uvm_km_pgremove() function.
  128  * this has to be done because there is no backing store for kernel pages
  129  * and no need to save them after they are no longer referenced.
  130  */
  131 
  132 #include <sys/param.h>
  133 #include <sys/systm.h>
  134 #include <sys/proc.h>
  135 #include <sys/kthread.h>
  136 #include <uvm/uvm.h>
  137 
  138 /*
  139  * global data structures
  140  */
  141 
  142 struct vm_map *kernel_map = NULL;
  143 
  144 /* Unconstraint range. */
  145 struct uvm_constraint_range     no_constraint = { 0x0, (paddr_t)-1 };
  146 
  147 /*
  148  * local data structures
  149  */
  150 static struct vm_map            kernel_map_store;
  151 
  152 /*
  153  * uvm_km_init: init kernel maps and objects to reflect reality (i.e.
  154  * KVM already allocated for text, data, bss, and static data structures).
  155  *
  156  * => KVM is defined by [base.. base + VM_KERNEL_SPACE_SIZE].
  157  *    we assume that [base -> start] has already been allocated and that
  158  *    "end" is the end of the kernel image span.
  159  */
  160 void
  161 uvm_km_init(vaddr_t base, vaddr_t start, vaddr_t end)
  162 {
  163         /* kernel_object: for pageable anonymous kernel memory */
  164         uao_init();
  165         uvm.kernel_object = uao_create(VM_KERNEL_SPACE_SIZE, UAO_FLAG_KERNOBJ);
  166 
  167         /*
  168          * init the map and reserve already allocated kernel space 
  169          * before installing.
  170          */
  171 
  172         uvm_map_setup(&kernel_map_store, pmap_kernel(), base, end,
  173 #ifdef KVA_GUARDPAGES
  174             VM_MAP_PAGEABLE | VM_MAP_GUARDPAGES
  175 #else
  176             VM_MAP_PAGEABLE
  177 #endif
  178             );
  179         if (base != start && uvm_map(&kernel_map_store, &base, start - base,
  180             NULL, UVM_UNKNOWN_OFFSET, 0,
  181             UVM_MAPFLAG(PROT_READ | PROT_WRITE, PROT_READ | PROT_WRITE,
  182             MAP_INHERIT_NONE, MADV_RANDOM, UVM_FLAG_FIXED)) != 0)
  183                 panic("uvm_km_init: could not reserve space for kernel");
  184         
  185         kernel_map = &kernel_map_store;
  186 }
  187 
  188 /*
  189  * uvm_km_suballoc: allocate a submap in the kernel map.   once a submap
  190  * is allocated all references to that area of VM must go through it.  this
  191  * allows the locking of VAs in kernel_map to be broken up into regions.
  192  *
  193  * => if `fixed' is true, *min specifies where the region described
  194  *      by the submap must start
  195  * => if submap is non NULL we use that as the submap, otherwise we
  196  *      alloc a new map
  197  */
  198 struct vm_map *
  199 uvm_km_suballoc(struct vm_map *map, vaddr_t *min, vaddr_t *max, vsize_t size,
  200     int flags, boolean_t fixed, struct vm_map *submap)
  201 {
  202         int mapflags = UVM_FLAG_NOMERGE | (fixed ? UVM_FLAG_FIXED : 0);
  203 
  204         size = round_page(size);        /* round up to pagesize */
  205 
  206         /* first allocate a blank spot in the parent map */
  207         if (uvm_map(map, min, size, NULL, UVM_UNKNOWN_OFFSET, 0,
  208             UVM_MAPFLAG(PROT_READ | PROT_WRITE, PROT_READ | PROT_WRITE,
  209             MAP_INHERIT_NONE, MADV_RANDOM, mapflags)) != 0) {
  210                panic("uvm_km_suballoc: unable to allocate space in parent map");
  211         }
  212 
  213         /* set VM bounds (min is filled in by uvm_map) */
  214         *max = *min + size;
  215 
  216         /* add references to pmap and create or init the submap */
  217         pmap_reference(vm_map_pmap(map));
  218         if (submap == NULL) {
  219                 submap = uvm_map_create(vm_map_pmap(map), *min, *max, flags);
  220                 if (submap == NULL)
  221                         panic("uvm_km_suballoc: unable to create submap");
  222         } else {
  223                 uvm_map_setup(submap, vm_map_pmap(map), *min, *max, flags);
  224         }
  225 
  226         /*
  227          * now let uvm_map_submap plug in it...
  228          */
  229         if (uvm_map_submap(map, *min, *max, submap) != 0)
  230                 panic("uvm_km_suballoc: submap allocation failed");
  231 
  232         return(submap);
  233 }
  234 
  235 /*
  236  * uvm_km_pgremove: remove pages from a kernel uvm_object.
  237  *
  238  * => when you unmap a part of anonymous kernel memory you want to toss
  239  *    the pages right away.    (this gets called from uvm_unmap_...).
  240  */
  241 void
  242 uvm_km_pgremove(struct uvm_object *uobj, vaddr_t startva, vaddr_t endva)
  243 {
  244         const voff_t start = startva - vm_map_min(kernel_map);
  245         const voff_t end = endva - vm_map_min(kernel_map);
  246         struct vm_page *pp;
  247         voff_t curoff;
  248         int slot;
  249         int swpgonlydelta = 0;
  250 
  251         KASSERT(UVM_OBJ_IS_AOBJ(uobj));
  252         KASSERT(rw_write_held(uobj->vmobjlock));
  253 
  254         pmap_remove(pmap_kernel(), startva, endva);
  255         for (curoff = start ; curoff < end ; curoff += PAGE_SIZE) {
  256                 pp = uvm_pagelookup(uobj, curoff);
  257                 if (pp && pp->pg_flags & PG_BUSY) {
  258                         uvm_pagewait(pp, uobj->vmobjlock, "km_pgrm");
  259                         rw_enter(uobj->vmobjlock, RW_WRITE);
  260                         curoff -= PAGE_SIZE; /* loop back to us */
  261                         continue;
  262                 }
  263 
  264                 /* free the swap slot, then the page */
  265                 slot = uao_dropswap(uobj, curoff >> PAGE_SHIFT);
  266 
  267                 if (pp != NULL) {
  268                         uvm_lock_pageq();
  269                         uvm_pagefree(pp);
  270                         uvm_unlock_pageq();
  271                 } else if (slot != 0) {
  272                         swpgonlydelta++;
  273                 }
  274         }
  275 
  276         if (swpgonlydelta > 0) {
  277                 KASSERT(uvmexp.swpgonly >= swpgonlydelta);
  278                 atomic_add_int(&uvmexp.swpgonly, -swpgonlydelta);
  279         }
  280 }
  281 
  282 
  283 /*
  284  * uvm_km_pgremove_intrsafe: like uvm_km_pgremove(), but for "intrsafe"
  285  *    objects
  286  *
  287  * => when you unmap a part of anonymous kernel memory you want to toss
  288  *    the pages right away.    (this gets called from uvm_unmap_...).
  289  * => none of the pages will ever be busy, and none of them will ever
  290  *    be on the active or inactive queues (because these objects are
  291  *    never allowed to "page").
  292  */
  293 void
  294 uvm_km_pgremove_intrsafe(vaddr_t start, vaddr_t end)
  295 {
  296         struct vm_page *pg;
  297         vaddr_t va;
  298         paddr_t pa;
  299 
  300         for (va = start; va < end; va += PAGE_SIZE) {
  301                 if (!pmap_extract(pmap_kernel(), va, &pa))
  302                         continue;
  303                 pg = PHYS_TO_VM_PAGE(pa);
  304                 if (pg == NULL)
  305                         panic("uvm_km_pgremove_intrsafe: no page");
  306                 uvm_pagefree(pg);
  307         }
  308         pmap_kremove(start, end - start);
  309 }
  310 
  311 /*
  312  * uvm_km_kmemalloc: lower level kernel memory allocator for malloc()
  313  *
  314  * => we map wired memory into the specified map using the obj passed in
  315  * => NOTE: we can return NULL even if we can wait if there is not enough
  316  *      free VM space in the map... caller should be prepared to handle
  317  *      this case.
  318  * => we return KVA of memory allocated
  319  * => flags: NOWAIT, VALLOC - just allocate VA, TRYLOCK - fail if we can't
  320  *      lock the map
  321  * => low, high, alignment, boundary, nsegs are the corresponding parameters
  322  *      to uvm_pglistalloc
  323  * => flags: ZERO - correspond to uvm_pglistalloc flags
  324  */
  325 vaddr_t
  326 uvm_km_kmemalloc_pla(struct vm_map *map, struct uvm_object *obj, vsize_t size,
  327     vsize_t valign, int flags, paddr_t low, paddr_t high, paddr_t alignment,
  328     paddr_t boundary, int nsegs)
  329 {
  330         vaddr_t kva, loopva;
  331         voff_t offset;
  332         struct vm_page *pg;
  333         struct pglist pgl;
  334         int pla_flags;
  335 
  336         KASSERT(vm_map_pmap(map) == pmap_kernel());
  337         /* UVM_KMF_VALLOC => !UVM_KMF_ZERO */
  338         KASSERT(!(flags & UVM_KMF_VALLOC) ||
  339             !(flags & UVM_KMF_ZERO));
  340 
  341         /* setup for call */
  342         size = round_page(size);
  343         kva = vm_map_min(map);  /* hint */
  344         if (nsegs == 0)
  345                 nsegs = atop(size);
  346 
  347         /* allocate some virtual space */
  348         if (__predict_false(uvm_map(map, &kva, size, obj, UVM_UNKNOWN_OFFSET,
  349             valign, UVM_MAPFLAG(PROT_READ | PROT_WRITE, PROT_READ | PROT_WRITE,
  350             MAP_INHERIT_NONE, MADV_RANDOM, (flags & UVM_KMF_TRYLOCK))) != 0)) {
  351                 return 0;
  352         }
  353 
  354         /* if all we wanted was VA, return now */
  355         if (flags & UVM_KMF_VALLOC) {
  356                 return kva;
  357         }
  358 
  359         /* recover object offset from virtual address */
  360         if (obj != NULL)
  361                 offset = kva - vm_map_min(kernel_map);
  362         else
  363                 offset = 0;
  364 
  365         /*
  366          * now allocate and map in the memory... note that we are the only ones
  367          * whom should ever get a handle on this area of VM.
  368          */
  369         TAILQ_INIT(&pgl);
  370         pla_flags = 0;
  371         KASSERT(uvmexp.swpgonly <= uvmexp.swpages);
  372         if ((flags & UVM_KMF_NOWAIT) ||
  373             ((flags & UVM_KMF_CANFAIL) &&
  374             uvmexp.swpages - uvmexp.swpgonly <= atop(size)))
  375                 pla_flags |= UVM_PLA_NOWAIT;
  376         else
  377                 pla_flags |= UVM_PLA_WAITOK;
  378         if (flags & UVM_KMF_ZERO)
  379                 pla_flags |= UVM_PLA_ZERO;
  380         if (uvm_pglistalloc(size, low, high, alignment, boundary, &pgl, nsegs,
  381             pla_flags) != 0) {
  382                 /* Failed. */
  383                 uvm_unmap(map, kva, kva + size);
  384                 return (0);
  385         }
  386 
  387         if (obj != NULL)
  388                 rw_enter(obj->vmobjlock, RW_WRITE);
  389 
  390         loopva = kva;
  391         while (loopva != kva + size) {
  392                 pg = TAILQ_FIRST(&pgl);
  393                 TAILQ_REMOVE(&pgl, pg, pageq);
  394                 uvm_pagealloc_pg(pg, obj, offset, NULL);
  395                 atomic_clearbits_int(&pg->pg_flags, PG_BUSY);
  396                 UVM_PAGE_OWN(pg, NULL);
  397 
  398                 /*
  399                  * map it in: note that we call pmap_enter with the map and
  400                  * object unlocked in case we are kmem_map.
  401                  */
  402                 if (obj == NULL) {
  403                         pmap_kenter_pa(loopva, VM_PAGE_TO_PHYS(pg),
  404                             PROT_READ | PROT_WRITE);
  405                 } else {
  406                         pmap_enter(map->pmap, loopva, VM_PAGE_TO_PHYS(pg),
  407                             PROT_READ | PROT_WRITE,
  408                             PROT_READ | PROT_WRITE | PMAP_WIRED);
  409                 }
  410                 loopva += PAGE_SIZE;
  411                 offset += PAGE_SIZE;
  412         }
  413         KASSERT(TAILQ_EMPTY(&pgl));
  414         pmap_update(pmap_kernel());
  415 
  416         if (obj != NULL)
  417                 rw_exit(obj->vmobjlock);
  418 
  419         return kva;
  420 }
  421 
  422 /*
  423  * uvm_km_free: free an area of kernel memory
  424  */
  425 void
  426 uvm_km_free(struct vm_map *map, vaddr_t addr, vsize_t size)
  427 {
  428         uvm_unmap(map, trunc_page(addr), round_page(addr+size));
  429 }
  430 
  431 /*
  432  * uvm_km_alloc1: allocate wired down memory in the kernel map.
  433  *
  434  * => we can sleep if needed
  435  */
  436 vaddr_t
  437 uvm_km_alloc1(struct vm_map *map, vsize_t size, vsize_t align, boolean_t zeroit)
  438 {
  439         vaddr_t kva, loopva;
  440         voff_t offset;
  441         struct vm_page *pg;
  442 
  443         KASSERT(vm_map_pmap(map) == pmap_kernel());
  444 
  445         size = round_page(size);
  446         kva = vm_map_min(map);          /* hint */
  447 
  448         /* allocate some virtual space */
  449         if (__predict_false(uvm_map(map, &kva, size, uvm.kernel_object,
  450             UVM_UNKNOWN_OFFSET, align,
  451             UVM_MAPFLAG(PROT_READ | PROT_WRITE,
  452             PROT_READ | PROT_WRITE | PROT_EXEC,
  453             MAP_INHERIT_NONE, MADV_RANDOM, 0)) != 0)) {
  454                 return 0;
  455         }
  456 
  457         /* recover object offset from virtual address */
  458         offset = kva - vm_map_min(kernel_map);
  459 
  460         /* now allocate the memory.  we must be careful about released pages. */
  461         loopva = kva;
  462         while (size) {
  463                 rw_enter(uvm.kernel_object->vmobjlock, RW_WRITE);
  464                 /* allocate ram */
  465                 pg = uvm_pagealloc(uvm.kernel_object, offset, NULL, 0);
  466                 if (pg) {
  467                         atomic_clearbits_int(&pg->pg_flags, PG_BUSY);
  468                         UVM_PAGE_OWN(pg, NULL);
  469                 }
  470                 rw_exit(uvm.kernel_object->vmobjlock);
  471                 if (__predict_false(pg == NULL)) {
  472                         if (curproc == uvm.pagedaemon_proc) {
  473                                 /*
  474                                  * It is unfeasible for the page daemon to
  475                                  * sleep for memory, so free what we have
  476                                  * allocated and fail.
  477                                  */
  478                                 uvm_unmap(map, kva, loopva - kva);
  479                                 return (0);
  480                         } else {
  481                                 uvm_wait("km_alloc1w"); /* wait for memory */
  482                                 continue;
  483                         }
  484                 }
  485 
  486                 /*
  487                  * map it in; note we're never called with an intrsafe
  488                  * object, so we always use regular old pmap_enter().
  489                  */
  490                 pmap_enter(map->pmap, loopva, VM_PAGE_TO_PHYS(pg),
  491                     PROT_READ | PROT_WRITE,
  492                     PROT_READ | PROT_WRITE | PMAP_WIRED);
  493 
  494                 loopva += PAGE_SIZE;
  495                 offset += PAGE_SIZE;
  496                 size -= PAGE_SIZE;
  497         }
  498         pmap_update(map->pmap);
  499         
  500         /*
  501          * zero on request (note that "size" is now zero due to the above loop
  502          * so we need to subtract kva from loopva to reconstruct the size).
  503          */
  504         if (zeroit)
  505                 memset((caddr_t)kva, 0, loopva - kva);
  506 
  507         return kva;
  508 }
  509 
  510 #if defined(__HAVE_PMAP_DIRECT)
  511 /*
  512  * uvm_km_page allocator, __HAVE_PMAP_DIRECT arch
  513  * On architectures with machine memory direct mapped into a portion
  514  * of KVM, we have very little work to do.  Just get a physical page,
  515  * and find and return its VA.
  516  */
  517 void
  518 uvm_km_page_init(void)
  519 {
  520         /* nothing */
  521 }
  522 
  523 void
  524 uvm_km_page_lateinit(void)
  525 {
  526         /* nothing */
  527 }
  528 
  529 #else
  530 /*
  531  * uvm_km_page allocator, non __HAVE_PMAP_DIRECT archs
  532  * This is a special allocator that uses a reserve of free pages
  533  * to fulfill requests.  It is fast and interrupt safe, but can only
  534  * return page sized regions.  Its primary use is as a backend for pool.
  535  *
  536  * The memory returned is allocated from the larger kernel_map, sparing
  537  * pressure on the small interrupt-safe kmem_map.  It is wired, but
  538  * not zero filled.
  539  */
  540 
  541 struct uvm_km_pages uvm_km_pages;
  542 
  543 void uvm_km_createthread(void *);
  544 void uvm_km_thread(void *);
  545 struct uvm_km_free_page *uvm_km_doputpage(struct uvm_km_free_page *);
  546 
  547 /*
  548  * Allocate the initial reserve, and create the thread which will
  549  * keep the reserve full.  For bootstrapping, we allocate more than
  550  * the lowat amount, because it may be a while before the thread is
  551  * running.
  552  */
  553 void
  554 uvm_km_page_init(void)
  555 {
  556         int     lowat_min;
  557         int     i;
  558         int     len, bulk;
  559         vaddr_t addr;
  560 
  561         mtx_init(&uvm_km_pages.mtx, IPL_VM);
  562         if (!uvm_km_pages.lowat) {
  563                 /* based on physmem, calculate a good value here */
  564                 uvm_km_pages.lowat = physmem / 256;
  565                 lowat_min = physmem < atop(16 * 1024 * 1024) ? 32 : 128;
  566                 if (uvm_km_pages.lowat < lowat_min)
  567                         uvm_km_pages.lowat = lowat_min;
  568         }
  569         if (uvm_km_pages.lowat > UVM_KM_PAGES_LOWAT_MAX)
  570                 uvm_km_pages.lowat = UVM_KM_PAGES_LOWAT_MAX;
  571         uvm_km_pages.hiwat = 4 * uvm_km_pages.lowat;
  572         if (uvm_km_pages.hiwat > UVM_KM_PAGES_HIWAT_MAX)
  573                 uvm_km_pages.hiwat = UVM_KM_PAGES_HIWAT_MAX;
  574 
  575         /* Allocate all pages in as few allocations as possible. */
  576         len = 0;
  577         bulk = uvm_km_pages.hiwat;
  578         while (len < uvm_km_pages.hiwat && bulk > 0) {
  579                 bulk = MIN(bulk, uvm_km_pages.hiwat - len);
  580                 addr = vm_map_min(kernel_map);
  581                 if (uvm_map(kernel_map, &addr, (vsize_t)bulk << PAGE_SHIFT,
  582                     NULL, UVM_UNKNOWN_OFFSET, 0,
  583                     UVM_MAPFLAG(PROT_READ | PROT_WRITE,
  584                     PROT_READ | PROT_WRITE, MAP_INHERIT_NONE,
  585                     MADV_RANDOM, UVM_KMF_TRYLOCK)) != 0) {
  586                         bulk /= 2;
  587                         continue;
  588                 }
  589 
  590                 for (i = len; i < len + bulk; i++, addr += PAGE_SIZE)
  591                         uvm_km_pages.page[i] = addr;
  592                 len += bulk;
  593         }
  594 
  595         uvm_km_pages.free = len;
  596         for (i = len; i < UVM_KM_PAGES_HIWAT_MAX; i++)
  597                 uvm_km_pages.page[i] = 0;
  598 
  599         /* tone down if really high */
  600         if (uvm_km_pages.lowat > 512)
  601                 uvm_km_pages.lowat = 512;
  602 }
  603 
  604 void
  605 uvm_km_page_lateinit(void)
  606 {
  607         kthread_create_deferred(uvm_km_createthread, NULL);
  608 }
  609 
  610 void
  611 uvm_km_createthread(void *arg)
  612 {
  613         kthread_create(uvm_km_thread, NULL, &uvm_km_pages.km_proc, "kmthread");
  614 }
  615 
  616 /*
  617  * Endless loop.  We grab pages in increments of 16 pages, then
  618  * quickly swap them into the list.
  619  */
  620 void
  621 uvm_km_thread(void *arg)
  622 {
  623         vaddr_t pg[16];
  624         int i;
  625         int allocmore = 0;
  626         int flags;
  627         struct uvm_km_free_page *fp = NULL;
  628 
  629         KERNEL_UNLOCK();
  630 
  631         for (;;) {
  632                 mtx_enter(&uvm_km_pages.mtx);
  633                 if (uvm_km_pages.free >= uvm_km_pages.lowat &&
  634                     uvm_km_pages.freelist == NULL) {
  635                         msleep_nsec(&uvm_km_pages.km_proc, &uvm_km_pages.mtx,
  636                             PVM, "kmalloc", INFSLP);
  637                 }
  638                 allocmore = uvm_km_pages.free < uvm_km_pages.lowat;
  639                 fp = uvm_km_pages.freelist;
  640                 uvm_km_pages.freelist = NULL;
  641                 uvm_km_pages.freelistlen = 0;
  642                 mtx_leave(&uvm_km_pages.mtx);
  643 
  644                 if (allocmore) {
  645                         /*
  646                          * If there was nothing on the freelist, then we
  647                          * must obtain at least one page to make progress.
  648                          * So, only use UVM_KMF_TRYLOCK for the first page
  649                          * if fp != NULL
  650                          */
  651                         flags = UVM_MAPFLAG(PROT_READ | PROT_WRITE,
  652                             PROT_READ | PROT_WRITE, MAP_INHERIT_NONE,
  653                             MADV_RANDOM, fp != NULL ? UVM_KMF_TRYLOCK : 0);
  654                         memset(pg, 0, sizeof(pg));
  655                         for (i = 0; i < nitems(pg); i++) {
  656                                 pg[i] = vm_map_min(kernel_map);
  657                                 if (uvm_map(kernel_map, &pg[i], PAGE_SIZE,
  658                                     NULL, UVM_UNKNOWN_OFFSET, 0, flags) != 0) {
  659                                         pg[i] = 0;
  660                                         break;
  661                                 }
  662 
  663                                 /* made progress, so don't sleep for more */
  664                                 flags = UVM_MAPFLAG(PROT_READ | PROT_WRITE,
  665                                     PROT_READ | PROT_WRITE, MAP_INHERIT_NONE,
  666                                     MADV_RANDOM, UVM_KMF_TRYLOCK);
  667                         }
  668 
  669                         mtx_enter(&uvm_km_pages.mtx);
  670                         for (i = 0; i < nitems(pg); i++) {
  671                                 if (uvm_km_pages.free ==
  672                                     nitems(uvm_km_pages.page))
  673                                         break;
  674                                 else if (pg[i] != 0)
  675                                         uvm_km_pages.page[uvm_km_pages.free++]
  676                                             = pg[i];
  677                         }
  678                         wakeup(&uvm_km_pages.free);
  679                         mtx_leave(&uvm_km_pages.mtx);
  680 
  681                         /* Cleanup left-over pages (if any). */
  682                         for (; i < nitems(pg); i++) {
  683                                 if (pg[i] != 0) {
  684                                         uvm_unmap(kernel_map,
  685                                             pg[i], pg[i] + PAGE_SIZE);
  686                                 }
  687                         }
  688                 }
  689                 while (fp) {
  690                         fp = uvm_km_doputpage(fp);
  691                 }
  692         }
  693 }
  694 
  695 struct uvm_km_free_page *
  696 uvm_km_doputpage(struct uvm_km_free_page *fp)
  697 {
  698         vaddr_t va = (vaddr_t)fp;
  699         struct vm_page *pg;
  700         int     freeva = 1;
  701         struct uvm_km_free_page *nextfp = fp->next;
  702 
  703         pg = uvm_atopg(va);
  704 
  705         pmap_kremove(va, PAGE_SIZE);
  706         pmap_update(kernel_map->pmap);
  707 
  708         mtx_enter(&uvm_km_pages.mtx);
  709         if (uvm_km_pages.free < uvm_km_pages.hiwat) {
  710                 uvm_km_pages.page[uvm_km_pages.free++] = va;
  711                 freeva = 0;
  712         }
  713         mtx_leave(&uvm_km_pages.mtx);
  714 
  715         if (freeva)
  716                 uvm_unmap(kernel_map, va, va + PAGE_SIZE);
  717 
  718         uvm_pagefree(pg);
  719         return (nextfp);
  720 }
  721 #endif  /* !__HAVE_PMAP_DIRECT */
  722 
  723 void *
  724 km_alloc(size_t sz, const struct kmem_va_mode *kv,
  725     const struct kmem_pa_mode *kp, const struct kmem_dyn_mode *kd)
  726 {
  727         struct vm_map *map;
  728         struct vm_page *pg;
  729         struct pglist pgl;
  730         int mapflags = 0;
  731         vm_prot_t prot;
  732         paddr_t pla_align;
  733         int pla_flags;
  734         int pla_maxseg;
  735         vaddr_t va, sva = 0;
  736 
  737         KASSERT(sz == round_page(sz));
  738 
  739         TAILQ_INIT(&pgl);
  740 
  741         if (kp->kp_nomem || kp->kp_pageable)
  742                 goto alloc_va;
  743 
  744         pla_flags = kd->kd_waitok ? UVM_PLA_WAITOK : UVM_PLA_NOWAIT;
  745         pla_flags |= UVM_PLA_TRYCONTIG;
  746         if (kp->kp_zero)
  747                 pla_flags |= UVM_PLA_ZERO;
  748 
  749         pla_align = kp->kp_align;
  750 #ifdef __HAVE_PMAP_DIRECT
  751         if (pla_align < kv->kv_align)
  752                 pla_align = kv->kv_align;
  753 #endif
  754         pla_maxseg = kp->kp_maxseg;
  755         if (pla_maxseg == 0)
  756                 pla_maxseg = sz / PAGE_SIZE;
  757 
  758         if (uvm_pglistalloc(sz, kp->kp_constraint->ucr_low,
  759             kp->kp_constraint->ucr_high, pla_align, kp->kp_boundary,
  760             &pgl, pla_maxseg, pla_flags)) {     
  761                 return (NULL);
  762         }
  763 
  764 #ifdef __HAVE_PMAP_DIRECT
  765         /*
  766          * Only use direct mappings for single page or single segment
  767          * allocations.
  768          */
  769         if (kv->kv_singlepage || kp->kp_maxseg == 1) {
  770                 TAILQ_FOREACH(pg, &pgl, pageq) {
  771                         va = pmap_map_direct(pg);
  772                         if (pg == TAILQ_FIRST(&pgl))
  773                                 sva = va;
  774                 }
  775                 return ((void *)sva);
  776         }
  777 #endif
  778 alloc_va:
  779         prot = PROT_READ | PROT_WRITE;
  780 
  781         if (kp->kp_pageable) {
  782                 KASSERT(kp->kp_object);
  783                 KASSERT(!kv->kv_singlepage);
  784         } else {
  785                 KASSERT(kp->kp_object == NULL);
  786         }
  787 
  788         if (kv->kv_singlepage) {
  789                 KASSERT(sz == PAGE_SIZE);
  790 #ifdef __HAVE_PMAP_DIRECT
  791                 panic("km_alloc: DIRECT single page");
  792 #else
  793                 mtx_enter(&uvm_km_pages.mtx);
  794                 while (uvm_km_pages.free == 0) {
  795                         if (kd->kd_waitok == 0) {
  796                                 mtx_leave(&uvm_km_pages.mtx);
  797                                 uvm_pglistfree(&pgl);
  798                                 return NULL;
  799                         }
  800                         msleep_nsec(&uvm_km_pages.free, &uvm_km_pages.mtx,
  801                             PVM, "getpage", INFSLP);
  802                 }
  803                 va = uvm_km_pages.page[--uvm_km_pages.free];
  804                 if (uvm_km_pages.free < uvm_km_pages.lowat &&
  805                     curproc != uvm_km_pages.km_proc) {
  806                         if (kd->kd_slowdown)
  807                                 *kd->kd_slowdown = 1;
  808                         wakeup(&uvm_km_pages.km_proc);
  809                 }
  810                 mtx_leave(&uvm_km_pages.mtx);
  811 #endif
  812         } else {
  813                 struct uvm_object *uobj = NULL;
  814 
  815                 if (kd->kd_trylock)
  816                         mapflags |= UVM_KMF_TRYLOCK;
  817 
  818                 if (kp->kp_object)
  819                         uobj = *kp->kp_object;
  820 try_map:
  821                 map = *kv->kv_map;
  822                 va = vm_map_min(map);
  823                 if (uvm_map(map, &va, sz, uobj, kd->kd_prefer,
  824                     kv->kv_align, UVM_MAPFLAG(prot, prot, MAP_INHERIT_NONE,
  825                     MADV_RANDOM, mapflags))) {
  826                         if (kv->kv_wait && kd->kd_waitok) {
  827                                 tsleep_nsec(map, PVM, "km_allocva", INFSLP);
  828                                 goto try_map;
  829                         }
  830                         uvm_pglistfree(&pgl);
  831                         return (NULL);
  832                 }
  833         }
  834         sva = va;
  835         TAILQ_FOREACH(pg, &pgl, pageq) {
  836                 if (kp->kp_pageable)
  837                         pmap_enter(pmap_kernel(), va, VM_PAGE_TO_PHYS(pg),
  838                             prot, prot | PMAP_WIRED);
  839                 else
  840                         pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg), prot);
  841                 va += PAGE_SIZE;
  842         }
  843         pmap_update(pmap_kernel());
  844         return ((void *)sva);
  845 }
  846 
  847 void
  848 km_free(void *v, size_t sz, const struct kmem_va_mode *kv,
  849     const struct kmem_pa_mode *kp)
  850 {
  851         vaddr_t sva, eva, va;
  852         struct vm_page *pg;
  853         struct pglist pgl;
  854 
  855         sva = (vaddr_t)v;
  856         eva = sva + sz;
  857 
  858         if (kp->kp_nomem)
  859                 goto free_va;
  860 
  861 #ifdef __HAVE_PMAP_DIRECT
  862         if (kv->kv_singlepage || kp->kp_maxseg == 1) {
  863                 TAILQ_INIT(&pgl);
  864                 for (va = sva; va < eva; va += PAGE_SIZE) {
  865                         pg = pmap_unmap_direct(va);
  866                         TAILQ_INSERT_TAIL(&pgl, pg, pageq);
  867                 }
  868                 uvm_pglistfree(&pgl);
  869                 return;
  870         }
  871 #else
  872         if (kv->kv_singlepage) {
  873                 struct uvm_km_free_page *fp = v;
  874 
  875                 mtx_enter(&uvm_km_pages.mtx);
  876                 fp->next = uvm_km_pages.freelist;
  877                 uvm_km_pages.freelist = fp;
  878                 if (uvm_km_pages.freelistlen++ > 16)
  879                         wakeup(&uvm_km_pages.km_proc);
  880                 mtx_leave(&uvm_km_pages.mtx);
  881                 return;
  882         }
  883 #endif
  884 
  885         if (kp->kp_pageable) {
  886                 pmap_remove(pmap_kernel(), sva, eva);
  887                 pmap_update(pmap_kernel());
  888         } else {
  889                 TAILQ_INIT(&pgl);
  890                 for (va = sva; va < eva; va += PAGE_SIZE) {
  891                         paddr_t pa;
  892 
  893                         if (!pmap_extract(pmap_kernel(), va, &pa))
  894                                 continue;
  895 
  896                         pg = PHYS_TO_VM_PAGE(pa);
  897                         if (pg == NULL) {
  898                                 panic("km_free: unmanaged page 0x%lx", pa);
  899                         }
  900                         TAILQ_INSERT_TAIL(&pgl, pg, pageq);
  901                 }
  902                 pmap_kremove(sva, sz);
  903                 pmap_update(pmap_kernel());
  904                 uvm_pglistfree(&pgl);
  905         }
  906 free_va:
  907         uvm_unmap(*kv->kv_map, sva, eva);
  908         if (kv->kv_wait)
  909                 wakeup(*kv->kv_map);
  910 }
  911 
  912 const struct kmem_va_mode kv_any = {
  913         .kv_map = &kernel_map,
  914 };
  915 
  916 const struct kmem_va_mode kv_intrsafe = {
  917         .kv_map = &kmem_map,
  918 };
  919 
  920 const struct kmem_va_mode kv_page = {
  921         .kv_singlepage = 1
  922 };
  923 
  924 const struct kmem_pa_mode kp_dirty = {
  925         .kp_constraint = &no_constraint
  926 };
  927 
  928 const struct kmem_pa_mode kp_dma = {
  929         .kp_constraint = &dma_constraint
  930 };
  931 
  932 const struct kmem_pa_mode kp_dma_contig = {
  933         .kp_constraint = &dma_constraint,
  934         .kp_maxseg = 1
  935 };
  936 
  937 const struct kmem_pa_mode kp_dma_zero = {
  938         .kp_constraint = &dma_constraint,
  939         .kp_zero = 1
  940 };
  941 
  942 const struct kmem_pa_mode kp_zero = {
  943         .kp_constraint = &no_constraint,
  944         .kp_zero = 1
  945 };
  946 
  947 const struct kmem_pa_mode kp_pageable = {
  948         .kp_object = &uvm.kernel_object,
  949         .kp_pageable = 1
  950 /* XXX - kp_nomem, maybe, but we'll need to fix km_free. */
  951 };
  952 
  953 const struct kmem_pa_mode kp_none = {
  954         .kp_nomem = 1
  955 };
  956 
  957 const struct kmem_dyn_mode kd_waitok = {
  958         .kd_waitok = 1,
  959         .kd_prefer = UVM_UNKNOWN_OFFSET
  960 };
  961 
  962 const struct kmem_dyn_mode kd_nowait = {
  963         .kd_prefer = UVM_UNKNOWN_OFFSET
  964 };
  965 
  966 const struct kmem_dyn_mode kd_trylock = {
  967         .kd_trylock = 1,
  968         .kd_prefer = UVM_UNKNOWN_OFFSET
  969 };
Cache object: a9dc06628664e8bea4b9f3bdcb3ce1b0
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/uvm/uvm_km.c

FreeBSD/Linux Kernel Cross Reference
sys/uvm/uvm_km.c