The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/uvm/uvm_page.c

Version: -  FREEBSD  -  FREEBSD11  -  FREEBSD10  -  FREEBSD9  -  FREEBSD92  -  FREEBSD91  -  FREEBSD90  -  FREEBSD8  -  FREEBSD82  -  FREEBSD81  -  FREEBSD80  -  FREEBSD7  -  FREEBSD74  -  FREEBSD73  -  FREEBSD72  -  FREEBSD71  -  FREEBSD70  -  FREEBSD6  -  FREEBSD64  -  FREEBSD63  -  FREEBSD62  -  FREEBSD61  -  FREEBSD60  -  FREEBSD5  -  FREEBSD55  -  FREEBSD54  -  FREEBSD53  -  FREEBSD52  -  FREEBSD51  -  FREEBSD50  -  FREEBSD4  -  FREEBSD3  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  xnu-1456.1.26  -  xnu-1699.24.8  -  xnu-2050.18.24  -  OPENSOLARIS  -  minix-3-1-1 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: uvm_page.c,v 1.101 2004/10/23 21:29:27 yamt Exp $      */
    2 
    3 /*
    4  * Copyright (c) 1997 Charles D. Cranor and Washington University.
    5  * Copyright (c) 1991, 1993, The Regents of the University of California.
    6  *
    7  * All rights reserved.
    8  *
    9  * This code is derived from software contributed to Berkeley by
   10  * The Mach Operating System project at Carnegie-Mellon University.
   11  *
   12  * Redistribution and use in source and binary forms, with or without
   13  * modification, are permitted provided that the following conditions
   14  * are met:
   15  * 1. Redistributions of source code must retain the above copyright
   16  *    notice, this list of conditions and the following disclaimer.
   17  * 2. Redistributions in binary form must reproduce the above copyright
   18  *    notice, this list of conditions and the following disclaimer in the
   19  *    documentation and/or other materials provided with the distribution.
   20  * 3. All advertising materials mentioning features or use of this software
   21  *    must display the following acknowledgement:
   22  *      This product includes software developed by Charles D. Cranor,
   23  *      Washington University, the University of California, Berkeley and
   24  *      its contributors.
   25  * 4. Neither the name of the University nor the names of its contributors
   26  *    may be used to endorse or promote products derived from this software
   27  *    without specific prior written permission.
   28  *
   29  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   30  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   31  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   32  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   33  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   37  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   38  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   39  * SUCH DAMAGE.
   40  *
   41  *      @(#)vm_page.c   8.3 (Berkeley) 3/21/94
   42  * from: Id: uvm_page.c,v 1.1.2.18 1998/02/06 05:24:42 chs Exp
   43  *
   44  *
   45  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
   46  * All rights reserved.
   47  *
   48  * Permission to use, copy, modify and distribute this software and
   49  * its documentation is hereby granted, provided that both the copyright
   50  * notice and this permission notice appear in all copies of the
   51  * software, derivative works or modified versions, and any portions
   52  * thereof, and that both notices appear in supporting documentation.
   53  *
   54  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
   55  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
   56  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
   57  *
   58  * Carnegie Mellon requests users of this software to return to
   59  *
   60  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   61  *  School of Computer Science
   62  *  Carnegie Mellon University
   63  *  Pittsburgh PA 15213-3890
   64  *
   65  * any improvements or extensions that they make and grant Carnegie the
   66  * rights to redistribute these changes.
   67  */
   68 
   69 /*
   70  * uvm_page.c: page ops.
   71  */
   72 
   73 #include <sys/cdefs.h>
   74 __KERNEL_RCSID(0, "$NetBSD: uvm_page.c,v 1.101 2004/10/23 21:29:27 yamt Exp $");
   75 
   76 #include "opt_uvmhist.h"
   77 
   78 #include <sys/param.h>
   79 #include <sys/systm.h>
   80 #include <sys/malloc.h>
   81 #include <sys/sched.h>
   82 #include <sys/kernel.h>
   83 #include <sys/vnode.h>
   84 #include <sys/proc.h>
   85 
   86 #define UVM_PAGE                /* pull in uvm_page.h functions */
   87 #include <uvm/uvm.h>
   88 
   89 /*
   90  * global vars... XXXCDC: move to uvm. structure.
   91  */
   92 
   93 /*
   94  * physical memory config is stored in vm_physmem.
   95  */
   96 
   97 struct vm_physseg vm_physmem[VM_PHYSSEG_MAX];   /* XXXCDC: uvm.physmem */
   98 int vm_nphysseg = 0;                            /* XXXCDC: uvm.nphysseg */
   99 
  100 /*
  101  * Some supported CPUs in a given architecture don't support all
  102  * of the things necessary to do idle page zero'ing efficiently.
  103  * We therefore provide a way to disable it from machdep code here.
  104  */
  105 /*
  106  * XXX disabled until we can find a way to do this without causing
  107  * problems for either CPU caches or DMA latency.
  108  */
  109 boolean_t vm_page_zero_enable = FALSE;
  110 
  111 /*
  112  * local variables
  113  */
  114 
  115 /*
  116  * these variables record the values returned by vm_page_bootstrap,
  117  * for debugging purposes.  The implementation of uvm_pageboot_alloc
  118  * and pmap_startup here also uses them internally.
  119  */
  120 
  121 static vaddr_t      virtual_space_start;
  122 static vaddr_t      virtual_space_end;
  123 
  124 /*
  125  * we use a hash table with only one bucket during bootup.  we will
  126  * later rehash (resize) the hash table once the allocator is ready.
  127  * we static allocate the one bootstrap bucket below...
  128  */
  129 
  130 static struct pglist uvm_bootbucket;
  131 
  132 /*
  133  * we allocate an initial number of page colors in uvm_page_init(),
  134  * and remember them.  We may re-color pages as cache sizes are
  135  * discovered during the autoconfiguration phase.  But we can never
  136  * free the initial set of buckets, since they are allocated using
  137  * uvm_pageboot_alloc().
  138  */
  139 
  140 static boolean_t have_recolored_pages /* = FALSE */;
  141 
  142 MALLOC_DEFINE(M_VMPAGE, "VM page", "VM page");
  143 
  144 #ifdef DEBUG
  145 vaddr_t uvm_zerocheckkva;
  146 #endif /* DEBUG */
  147 
  148 /*
  149  * local prototypes
  150  */
  151 
  152 static void uvm_pageinsert(struct vm_page *);
  153 static void uvm_pageinsert_after(struct vm_page *, struct vm_page *);
  154 static void uvm_pageremove(struct vm_page *);
  155 
  156 /*
  157  * inline functions
  158  */
  159 
  160 /*
  161  * uvm_pageinsert: insert a page in the object and the hash table
  162  * uvm_pageinsert_after: insert a page into the specified place in listq
  163  *
  164  * => caller must lock object
  165  * => caller must lock page queues
  166  * => call should have already set pg's object and offset pointers
  167  *    and bumped the version counter
  168  */
  169 
  170 __inline static void
  171 uvm_pageinsert_after(pg, where)
  172         struct vm_page *pg;
  173         struct vm_page *where;
  174 {
  175         struct pglist *buck;
  176         struct uvm_object *uobj = pg->uobject;
  177 
  178         KASSERT((pg->flags & PG_TABLED) == 0);
  179         KASSERT(where == NULL || (where->flags & PG_TABLED));
  180         KASSERT(where == NULL || (where->uobject == uobj));
  181         buck = &uvm.page_hash[uvm_pagehash(uobj, pg->offset)];
  182         simple_lock(&uvm.hashlock);
  183         TAILQ_INSERT_TAIL(buck, pg, hashq);
  184         simple_unlock(&uvm.hashlock);
  185 
  186         if (UVM_OBJ_IS_VNODE(uobj)) {
  187                 if (uobj->uo_npages == 0) {
  188                         struct vnode *vp = (struct vnode *)uobj;
  189 
  190                         vholdl(vp);
  191                 }
  192                 if (UVM_OBJ_IS_VTEXT(uobj)) {
  193                         uvmexp.execpages++;
  194                 } else {
  195                         uvmexp.filepages++;
  196                 }
  197         } else if (UVM_OBJ_IS_AOBJ(uobj)) {
  198                 uvmexp.anonpages++;
  199         }
  200 
  201         if (where)
  202                 TAILQ_INSERT_AFTER(&uobj->memq, where, pg, listq);
  203         else
  204                 TAILQ_INSERT_TAIL(&uobj->memq, pg, listq);
  205         pg->flags |= PG_TABLED;
  206         uobj->uo_npages++;
  207 }
  208 
  209 __inline static void
  210 uvm_pageinsert(pg)
  211         struct vm_page *pg;
  212 {
  213 
  214         uvm_pageinsert_after(pg, NULL);
  215 }
  216 
  217 /*
  218  * uvm_page_remove: remove page from object and hash
  219  *
  220  * => caller must lock object
  221  * => caller must lock page queues
  222  */
  223 
  224 static __inline void
  225 uvm_pageremove(pg)
  226         struct vm_page *pg;
  227 {
  228         struct pglist *buck;
  229         struct uvm_object *uobj = pg->uobject;
  230 
  231         KASSERT(pg->flags & PG_TABLED);
  232         buck = &uvm.page_hash[uvm_pagehash(uobj, pg->offset)];
  233         simple_lock(&uvm.hashlock);
  234         TAILQ_REMOVE(buck, pg, hashq);
  235         simple_unlock(&uvm.hashlock);
  236 
  237         if (UVM_OBJ_IS_VNODE(uobj)) {
  238                 if (uobj->uo_npages == 1) {
  239                         struct vnode *vp = (struct vnode *)uobj;
  240 
  241                         holdrelel(vp);
  242                 }
  243                 if (UVM_OBJ_IS_VTEXT(uobj)) {
  244                         uvmexp.execpages--;
  245                 } else {
  246                         uvmexp.filepages--;
  247                 }
  248         } else if (UVM_OBJ_IS_AOBJ(uobj)) {
  249                 uvmexp.anonpages--;
  250         }
  251 
  252         /* object should be locked */
  253         uobj->uo_npages--;
  254         TAILQ_REMOVE(&uobj->memq, pg, listq);
  255         pg->flags &= ~PG_TABLED;
  256         pg->uobject = NULL;
  257 }
  258 
  259 static void
  260 uvm_page_init_buckets(struct pgfreelist *pgfl)
  261 {
  262         int color, i;
  263 
  264         for (color = 0; color < uvmexp.ncolors; color++) {
  265                 for (i = 0; i < PGFL_NQUEUES; i++) {
  266                         TAILQ_INIT(&pgfl->pgfl_buckets[color].pgfl_queues[i]);
  267                 }
  268         }
  269 }
  270 
  271 /*
  272  * uvm_page_init: init the page system.   called from uvm_init().
  273  *
  274  * => we return the range of kernel virtual memory in kvm_startp/kvm_endp
  275  */
  276 
  277 void
  278 uvm_page_init(kvm_startp, kvm_endp)
  279         vaddr_t *kvm_startp, *kvm_endp;
  280 {
  281         vsize_t freepages, pagecount, bucketcount, n;
  282         struct pgflbucket *bucketarray;
  283         struct vm_page *pagearray;
  284         int lcv;
  285         u_int i;
  286         paddr_t paddr;
  287 
  288         /*
  289          * init the page queues and page queue locks, except the free
  290          * list; we allocate that later (with the initial vm_page
  291          * structures).
  292          */
  293 
  294         TAILQ_INIT(&uvm.page_active);
  295         TAILQ_INIT(&uvm.page_inactive);
  296         simple_lock_init(&uvm.pageqlock);
  297         simple_lock_init(&uvm.fpageqlock);
  298 
  299         /*
  300          * init the <obj,offset> => <page> hash table.  for now
  301          * we just have one bucket (the bootstrap bucket).  later on we
  302          * will allocate new buckets as we dynamically resize the hash table.
  303          */
  304 
  305         uvm.page_nhash = 1;                     /* 1 bucket */
  306         uvm.page_hashmask = 0;                  /* mask for hash function */
  307         uvm.page_hash = &uvm_bootbucket;        /* install bootstrap bucket */
  308         TAILQ_INIT(uvm.page_hash);              /* init hash table */
  309         simple_lock_init(&uvm.hashlock);        /* init hash table lock */
  310 
  311         /*
  312          * allocate vm_page structures.
  313          */
  314 
  315         /*
  316          * sanity check:
  317          * before calling this function the MD code is expected to register
  318          * some free RAM with the uvm_page_physload() function.   our job
  319          * now is to allocate vm_page structures for this memory.
  320          */
  321 
  322         if (vm_nphysseg == 0)
  323                 panic("uvm_page_bootstrap: no memory pre-allocated");
  324 
  325         /*
  326          * first calculate the number of free pages...
  327          *
  328          * note that we use start/end rather than avail_start/avail_end.
  329          * this allows us to allocate extra vm_page structures in case we
  330          * want to return some memory to the pool after booting.
  331          */
  332 
  333         freepages = 0;
  334         for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
  335                 freepages += (vm_physmem[lcv].end - vm_physmem[lcv].start);
  336 
  337         /*
  338          * Let MD code initialize the number of colors, or default
  339          * to 1 color if MD code doesn't care.
  340          */
  341         if (uvmexp.ncolors == 0)
  342                 uvmexp.ncolors = 1;
  343         uvmexp.colormask = uvmexp.ncolors - 1;
  344 
  345         /*
  346          * we now know we have (PAGE_SIZE * freepages) bytes of memory we can
  347          * use.   for each page of memory we use we need a vm_page structure.
  348          * thus, the total number of pages we can use is the total size of
  349          * the memory divided by the PAGE_SIZE plus the size of the vm_page
  350          * structure.   we add one to freepages as a fudge factor to avoid
  351          * truncation errors (since we can only allocate in terms of whole
  352          * pages).
  353          */
  354 
  355         bucketcount = uvmexp.ncolors * VM_NFREELIST;
  356         pagecount = ((freepages + 1) << PAGE_SHIFT) /
  357             (PAGE_SIZE + sizeof(struct vm_page));
  358 
  359         bucketarray = (void *)uvm_pageboot_alloc((bucketcount *
  360             sizeof(struct pgflbucket)) + (pagecount *
  361             sizeof(struct vm_page)));
  362         pagearray = (struct vm_page *)(bucketarray + bucketcount);
  363 
  364         for (lcv = 0; lcv < VM_NFREELIST; lcv++) {
  365                 uvm.page_free[lcv].pgfl_buckets =
  366                     (bucketarray + (lcv * uvmexp.ncolors));
  367                 uvm_page_init_buckets(&uvm.page_free[lcv]);
  368         }
  369         memset(pagearray, 0, pagecount * sizeof(struct vm_page));
  370 
  371         /*
  372          * init the vm_page structures and put them in the correct place.
  373          */
  374 
  375         for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) {
  376                 n = vm_physmem[lcv].end - vm_physmem[lcv].start;
  377 
  378                 /* set up page array pointers */
  379                 vm_physmem[lcv].pgs = pagearray;
  380                 pagearray += n;
  381                 pagecount -= n;
  382                 vm_physmem[lcv].lastpg = vm_physmem[lcv].pgs + (n - 1);
  383 
  384                 /* init and free vm_pages (we've already zeroed them) */
  385                 paddr = ptoa(vm_physmem[lcv].start);
  386                 for (i = 0 ; i < n ; i++, paddr += PAGE_SIZE) {
  387                         vm_physmem[lcv].pgs[i].phys_addr = paddr;
  388 #ifdef __HAVE_VM_PAGE_MD
  389                         VM_MDPAGE_INIT(&vm_physmem[lcv].pgs[i]);
  390 #endif
  391                         if (atop(paddr) >= vm_physmem[lcv].avail_start &&
  392                             atop(paddr) <= vm_physmem[lcv].avail_end) {
  393                                 uvmexp.npages++;
  394                                 /* add page to free pool */
  395                                 uvm_pagefree(&vm_physmem[lcv].pgs[i]);
  396                         }
  397                 }
  398         }
  399 
  400         /*
  401          * pass up the values of virtual_space_start and
  402          * virtual_space_end (obtained by uvm_pageboot_alloc) to the upper
  403          * layers of the VM.
  404          */
  405 
  406         *kvm_startp = round_page(virtual_space_start);
  407         *kvm_endp = trunc_page(virtual_space_end);
  408 #ifdef DEBUG
  409         /*
  410          * steal kva for uvm_pagezerocheck().
  411          */
  412         uvm_zerocheckkva = *kvm_startp;
  413         *kvm_startp += PAGE_SIZE;
  414 #endif /* DEBUG */
  415 
  416         /*
  417          * init locks for kernel threads
  418          */
  419 
  420         simple_lock_init(&uvm.pagedaemon_lock);
  421         simple_lock_init(&uvm.aiodoned_lock);
  422 
  423         /*
  424          * init various thresholds.
  425          */
  426 
  427         uvmexp.reserve_pagedaemon = 1;
  428         uvmexp.reserve_kernel = 5;
  429         uvmexp.anonminpct = 10;
  430         uvmexp.fileminpct = 10;
  431         uvmexp.execminpct = 5;
  432         uvmexp.anonmaxpct = 80;
  433         uvmexp.filemaxpct = 50;
  434         uvmexp.execmaxpct = 30;
  435         uvmexp.anonmin = uvmexp.anonminpct * 256 / 100;
  436         uvmexp.filemin = uvmexp.fileminpct * 256 / 100;
  437         uvmexp.execmin = uvmexp.execminpct * 256 / 100;
  438         uvmexp.anonmax = uvmexp.anonmaxpct * 256 / 100;
  439         uvmexp.filemax = uvmexp.filemaxpct * 256 / 100;
  440         uvmexp.execmax = uvmexp.execmaxpct * 256 / 100;
  441 
  442         /*
  443          * determine if we should zero pages in the idle loop.
  444          */
  445 
  446         uvm.page_idle_zero = vm_page_zero_enable;
  447 
  448         /*
  449          * done!
  450          */
  451 
  452         uvm.page_init_done = TRUE;
  453 }
  454 
  455 /*
  456  * uvm_setpagesize: set the page size
  457  *
  458  * => sets page_shift and page_mask from uvmexp.pagesize.
  459  */
  460 
  461 void
  462 uvm_setpagesize()
  463 {
  464 
  465         /*
  466          * If uvmexp.pagesize is 0 at this point, we expect PAGE_SIZE
  467          * to be a constant (indicated by being a non-zero value).
  468          */
  469         if (uvmexp.pagesize == 0) {
  470                 if (PAGE_SIZE == 0)
  471                         panic("uvm_setpagesize: uvmexp.pagesize not set");
  472                 uvmexp.pagesize = PAGE_SIZE;
  473         }
  474         uvmexp.pagemask = uvmexp.pagesize - 1;
  475         if ((uvmexp.pagemask & uvmexp.pagesize) != 0)
  476                 panic("uvm_setpagesize: page size not a power of two");
  477         for (uvmexp.pageshift = 0; ; uvmexp.pageshift++)
  478                 if ((1 << uvmexp.pageshift) == uvmexp.pagesize)
  479                         break;
  480 }
  481 
  482 /*
  483  * uvm_pageboot_alloc: steal memory from physmem for bootstrapping
  484  */
  485 
  486 vaddr_t
  487 uvm_pageboot_alloc(size)
  488         vsize_t size;
  489 {
  490         static boolean_t initialized = FALSE;
  491         vaddr_t addr;
  492 #if !defined(PMAP_STEAL_MEMORY)
  493         vaddr_t vaddr;
  494         paddr_t paddr;
  495 #endif
  496 
  497         /*
  498          * on first call to this function, initialize ourselves.
  499          */
  500         if (initialized == FALSE) {
  501                 pmap_virtual_space(&virtual_space_start, &virtual_space_end);
  502 
  503                 /* round it the way we like it */
  504                 virtual_space_start = round_page(virtual_space_start);
  505                 virtual_space_end = trunc_page(virtual_space_end);
  506 
  507                 initialized = TRUE;
  508         }
  509 
  510         /* round to page size */
  511         size = round_page(size);
  512 
  513 #if defined(PMAP_STEAL_MEMORY)
  514 
  515         /*
  516          * defer bootstrap allocation to MD code (it may want to allocate
  517          * from a direct-mapped segment).  pmap_steal_memory should adjust
  518          * virtual_space_start/virtual_space_end if necessary.
  519          */
  520 
  521         addr = pmap_steal_memory(size, &virtual_space_start,
  522             &virtual_space_end);
  523 
  524         return(addr);
  525 
  526 #else /* !PMAP_STEAL_MEMORY */
  527 
  528         /*
  529          * allocate virtual memory for this request
  530          */
  531         if (virtual_space_start == virtual_space_end ||
  532             (virtual_space_end - virtual_space_start) < size)
  533                 panic("uvm_pageboot_alloc: out of virtual space");
  534 
  535         addr = virtual_space_start;
  536 
  537 #ifdef PMAP_GROWKERNEL
  538         /*
  539          * If the kernel pmap can't map the requested space,
  540          * then allocate more resources for it.
  541          */
  542         if (uvm_maxkaddr < (addr + size)) {
  543                 uvm_maxkaddr = pmap_growkernel(addr + size);
  544                 if (uvm_maxkaddr < (addr + size))
  545                         panic("uvm_pageboot_alloc: pmap_growkernel() failed");
  546         }
  547 #endif
  548 
  549         virtual_space_start += size;
  550 
  551         /*
  552          * allocate and mapin physical pages to back new virtual pages
  553          */
  554 
  555         for (vaddr = round_page(addr) ; vaddr < addr + size ;
  556             vaddr += PAGE_SIZE) {
  557 
  558                 if (!uvm_page_physget(&paddr))
  559                         panic("uvm_pageboot_alloc: out of memory");
  560 
  561                 /*
  562                  * Note this memory is no longer managed, so using
  563                  * pmap_kenter is safe.
  564                  */
  565                 pmap_kenter_pa(vaddr, paddr, VM_PROT_READ|VM_PROT_WRITE);
  566         }
  567         pmap_update(pmap_kernel());
  568         return(addr);
  569 #endif  /* PMAP_STEAL_MEMORY */
  570 }
  571 
  572 #if !defined(PMAP_STEAL_MEMORY)
  573 /*
  574  * uvm_page_physget: "steal" one page from the vm_physmem structure.
  575  *
  576  * => attempt to allocate it off the end of a segment in which the "avail"
  577  *    values match the start/end values.   if we can't do that, then we
  578  *    will advance both values (making them equal, and removing some
  579  *    vm_page structures from the non-avail area).
  580  * => return false if out of memory.
  581  */
  582 
  583 /* subroutine: try to allocate from memory chunks on the specified freelist */
  584 static boolean_t uvm_page_physget_freelist(paddr_t *, int);
  585 
  586 static boolean_t
  587 uvm_page_physget_freelist(paddrp, freelist)
  588         paddr_t *paddrp;
  589         int freelist;
  590 {
  591         int lcv, x;
  592 
  593         /* pass 1: try allocating from a matching end */
  594 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
  595         for (lcv = vm_nphysseg - 1 ; lcv >= 0 ; lcv--)
  596 #else
  597         for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
  598 #endif
  599         {
  600 
  601                 if (uvm.page_init_done == TRUE)
  602                         panic("uvm_page_physget: called _after_ bootstrap");
  603 
  604                 if (vm_physmem[lcv].free_list != freelist)
  605                         continue;
  606 
  607                 /* try from front */
  608                 if (vm_physmem[lcv].avail_start == vm_physmem[lcv].start &&
  609                     vm_physmem[lcv].avail_start < vm_physmem[lcv].avail_end) {
  610                         *paddrp = ptoa(vm_physmem[lcv].avail_start);
  611                         vm_physmem[lcv].avail_start++;
  612                         vm_physmem[lcv].start++;
  613                         /* nothing left?   nuke it */
  614                         if (vm_physmem[lcv].avail_start ==
  615                             vm_physmem[lcv].end) {
  616                                 if (vm_nphysseg == 1)
  617                                     panic("uvm_page_physget: out of memory!");
  618                                 vm_nphysseg--;
  619                                 for (x = lcv ; x < vm_nphysseg ; x++)
  620                                         /* structure copy */
  621                                         vm_physmem[x] = vm_physmem[x+1];
  622                         }
  623                         return (TRUE);
  624                 }
  625 
  626                 /* try from rear */
  627                 if (vm_physmem[lcv].avail_end == vm_physmem[lcv].end &&
  628                     vm_physmem[lcv].avail_start < vm_physmem[lcv].avail_end) {
  629                         *paddrp = ptoa(vm_physmem[lcv].avail_end - 1);
  630                         vm_physmem[lcv].avail_end--;
  631                         vm_physmem[lcv].end--;
  632                         /* nothing left?   nuke it */
  633                         if (vm_physmem[lcv].avail_end ==
  634                             vm_physmem[lcv].start) {
  635                                 if (vm_nphysseg == 1)
  636                                     panic("uvm_page_physget: out of memory!");
  637                                 vm_nphysseg--;
  638                                 for (x = lcv ; x < vm_nphysseg ; x++)
  639                                         /* structure copy */
  640                                         vm_physmem[x] = vm_physmem[x+1];
  641                         }
  642                         return (TRUE);
  643                 }
  644         }
  645 
  646         /* pass2: forget about matching ends, just allocate something */
  647 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
  648         for (lcv = vm_nphysseg - 1 ; lcv >= 0 ; lcv--)
  649 #else
  650         for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
  651 #endif
  652         {
  653 
  654                 /* any room in this bank? */
  655                 if (vm_physmem[lcv].avail_start >= vm_physmem[lcv].avail_end)
  656                         continue;  /* nope */
  657 
  658                 *paddrp = ptoa(vm_physmem[lcv].avail_start);
  659                 vm_physmem[lcv].avail_start++;
  660                 /* truncate! */
  661                 vm_physmem[lcv].start = vm_physmem[lcv].avail_start;
  662 
  663                 /* nothing left?   nuke it */
  664                 if (vm_physmem[lcv].avail_start == vm_physmem[lcv].end) {
  665                         if (vm_nphysseg == 1)
  666                                 panic("uvm_page_physget: out of memory!");
  667                         vm_nphysseg--;
  668                         for (x = lcv ; x < vm_nphysseg ; x++)
  669                                 /* structure copy */
  670                                 vm_physmem[x] = vm_physmem[x+1];
  671                 }
  672                 return (TRUE);
  673         }
  674 
  675         return (FALSE);        /* whoops! */
  676 }
  677 
  678 boolean_t
  679 uvm_page_physget(paddrp)
  680         paddr_t *paddrp;
  681 {
  682         int i;
  683 
  684         /* try in the order of freelist preference */
  685         for (i = 0; i < VM_NFREELIST; i++)
  686                 if (uvm_page_physget_freelist(paddrp, i) == TRUE)
  687                         return (TRUE);
  688         return (FALSE);
  689 }
  690 #endif /* PMAP_STEAL_MEMORY */
  691 
  692 /*
  693  * uvm_page_physload: load physical memory into VM system
  694  *
  695  * => all args are PFs
  696  * => all pages in start/end get vm_page structures
  697  * => areas marked by avail_start/avail_end get added to the free page pool
  698  * => we are limited to VM_PHYSSEG_MAX physical memory segments
  699  */
  700 
  701 void
  702 uvm_page_physload(start, end, avail_start, avail_end, free_list)
  703         paddr_t start, end, avail_start, avail_end;
  704         int free_list;
  705 {
  706         int preload, lcv;
  707         psize_t npages;
  708         struct vm_page *pgs;
  709         struct vm_physseg *ps;
  710 
  711         if (uvmexp.pagesize == 0)
  712                 panic("uvm_page_physload: page size not set!");
  713         if (free_list >= VM_NFREELIST || free_list < VM_FREELIST_DEFAULT)
  714                 panic("uvm_page_physload: bad free list %d", free_list);
  715         if (start >= end)
  716                 panic("uvm_page_physload: start >= end");
  717 
  718         /*
  719          * do we have room?
  720          */
  721 
  722         if (vm_nphysseg == VM_PHYSSEG_MAX) {
  723                 printf("uvm_page_physload: unable to load physical memory "
  724                     "segment\n");
  725                 printf("\t%d segments allocated, ignoring 0x%llx -> 0x%llx\n",
  726                     VM_PHYSSEG_MAX, (long long)start, (long long)end);
  727                 printf("\tincrease VM_PHYSSEG_MAX\n");
  728                 return;
  729         }
  730 
  731         /*
  732          * check to see if this is a "preload" (i.e. uvm_mem_init hasn't been
  733          * called yet, so malloc is not available).
  734          */
  735 
  736         for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) {
  737                 if (vm_physmem[lcv].pgs)
  738                         break;
  739         }
  740         preload = (lcv == vm_nphysseg);
  741 
  742         /*
  743          * if VM is already running, attempt to malloc() vm_page structures
  744          */
  745 
  746         if (!preload) {
  747 #if defined(VM_PHYSSEG_NOADD)
  748                 panic("uvm_page_physload: tried to add RAM after vm_mem_init");
  749 #else
  750                 /* XXXCDC: need some sort of lockout for this case */
  751                 paddr_t paddr;
  752                 npages = end - start;  /* # of pages */
  753                 pgs = malloc(sizeof(struct vm_page) * npages,
  754                     M_VMPAGE, M_NOWAIT);
  755                 if (pgs == NULL) {
  756                         printf("uvm_page_physload: can not malloc vm_page "
  757                             "structs for segment\n");
  758                         printf("\tignoring 0x%lx -> 0x%lx\n", start, end);
  759                         return;
  760                 }
  761                 /* zero data, init phys_addr and free_list, and free pages */
  762                 memset(pgs, 0, sizeof(struct vm_page) * npages);
  763                 for (lcv = 0, paddr = ptoa(start) ;
  764                                  lcv < npages ; lcv++, paddr += PAGE_SIZE) {
  765                         pgs[lcv].phys_addr = paddr;
  766                         pgs[lcv].free_list = free_list;
  767                         if (atop(paddr) >= avail_start &&
  768                             atop(paddr) <= avail_end)
  769                                 uvm_pagefree(&pgs[lcv]);
  770                 }
  771                 /* XXXCDC: incomplete: need to update uvmexp.free, what else? */
  772                 /* XXXCDC: need hook to tell pmap to rebuild pv_list, etc... */
  773 #endif
  774         } else {
  775                 pgs = NULL;
  776                 npages = 0;
  777         }
  778 
  779         /*
  780          * now insert us in the proper place in vm_physmem[]
  781          */
  782 
  783 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_RANDOM)
  784         /* random: put it at the end (easy!) */
  785         ps = &vm_physmem[vm_nphysseg];
  786 #elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH)
  787         {
  788                 int x;
  789                 /* sort by address for binary search */
  790                 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
  791                         if (start < vm_physmem[lcv].start)
  792                                 break;
  793                 ps = &vm_physmem[lcv];
  794                 /* move back other entries, if necessary ... */
  795                 for (x = vm_nphysseg ; x > lcv ; x--)
  796                         /* structure copy */
  797                         vm_physmem[x] = vm_physmem[x - 1];
  798         }
  799 #elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
  800         {
  801                 int x;
  802                 /* sort by largest segment first */
  803                 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
  804                         if ((end - start) >
  805                             (vm_physmem[lcv].end - vm_physmem[lcv].start))
  806                                 break;
  807                 ps = &vm_physmem[lcv];
  808                 /* move back other entries, if necessary ... */
  809                 for (x = vm_nphysseg ; x > lcv ; x--)
  810                         /* structure copy */
  811                         vm_physmem[x] = vm_physmem[x - 1];
  812         }
  813 #else
  814         panic("uvm_page_physload: unknown physseg strategy selected!");
  815 #endif
  816 
  817         ps->start = start;
  818         ps->end = end;
  819         ps->avail_start = avail_start;
  820         ps->avail_end = avail_end;
  821         if (preload) {
  822                 ps->pgs = NULL;
  823         } else {
  824                 ps->pgs = pgs;
  825                 ps->lastpg = pgs + npages - 1;
  826         }
  827         ps->free_list = free_list;
  828         vm_nphysseg++;
  829 
  830         if (!preload)
  831                 uvm_page_rehash();
  832 }
  833 
  834 /*
  835  * uvm_page_rehash: reallocate hash table based on number of free pages.
  836  */
  837 
  838 void
  839 uvm_page_rehash()
  840 {
  841         int freepages, lcv, bucketcount, oldcount;
  842         struct pglist *newbuckets, *oldbuckets;
  843         struct vm_page *pg;
  844         size_t newsize, oldsize;
  845 
  846         /*
  847          * compute number of pages that can go in the free pool
  848          */
  849 
  850         freepages = 0;
  851         for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
  852                 freepages +=
  853                     (vm_physmem[lcv].avail_end - vm_physmem[lcv].avail_start);
  854 
  855         /*
  856          * compute number of buckets needed for this number of pages
  857          */
  858 
  859         bucketcount = 1;
  860         while (bucketcount < freepages)
  861                 bucketcount = bucketcount * 2;
  862 
  863         /*
  864          * compute the size of the current table and new table.
  865          */
  866 
  867         oldbuckets = uvm.page_hash;
  868         oldcount = uvm.page_nhash;
  869         oldsize = round_page(sizeof(struct pglist) * oldcount);
  870         newsize = round_page(sizeof(struct pglist) * bucketcount);
  871 
  872         /*
  873          * allocate the new buckets
  874          */
  875 
  876         newbuckets = (struct pglist *) uvm_km_alloc(kernel_map, newsize);
  877         if (newbuckets == NULL) {
  878                 printf("uvm_page_physrehash: WARNING: could not grow page "
  879                     "hash table\n");
  880                 return;
  881         }
  882         for (lcv = 0 ; lcv < bucketcount ; lcv++)
  883                 TAILQ_INIT(&newbuckets[lcv]);
  884 
  885         /*
  886          * now replace the old buckets with the new ones and rehash everything
  887          */
  888 
  889         simple_lock(&uvm.hashlock);
  890         uvm.page_hash = newbuckets;
  891         uvm.page_nhash = bucketcount;
  892         uvm.page_hashmask = bucketcount - 1;  /* power of 2 */
  893 
  894         /* ... and rehash */
  895         for (lcv = 0 ; lcv < oldcount ; lcv++) {
  896                 while ((pg = oldbuckets[lcv].tqh_first) != NULL) {
  897                         TAILQ_REMOVE(&oldbuckets[lcv], pg, hashq);
  898                         TAILQ_INSERT_TAIL(
  899                           &uvm.page_hash[uvm_pagehash(pg->uobject, pg->offset)],
  900                           pg, hashq);
  901                 }
  902         }
  903         simple_unlock(&uvm.hashlock);
  904 
  905         /*
  906          * free old bucket array if is not the boot-time table
  907          */
  908 
  909         if (oldbuckets != &uvm_bootbucket)
  910                 uvm_km_free(kernel_map, (vaddr_t) oldbuckets, oldsize);
  911 }
  912 
  913 /*
  914  * uvm_page_recolor: Recolor the pages if the new bucket count is
  915  * larger than the old one.
  916  */
  917 
  918 void
  919 uvm_page_recolor(int newncolors)
  920 {
  921         struct pgflbucket *bucketarray, *oldbucketarray;
  922         struct pgfreelist pgfl;
  923         struct vm_page *pg;
  924         vsize_t bucketcount;
  925         int s, lcv, color, i, ocolors;
  926 
  927         if (newncolors <= uvmexp.ncolors)
  928                 return;
  929 
  930         if (uvm.page_init_done == FALSE) {
  931                 uvmexp.ncolors = newncolors;
  932                 return;
  933         }
  934 
  935         bucketcount = newncolors * VM_NFREELIST;
  936         bucketarray = malloc(bucketcount * sizeof(struct pgflbucket),
  937             M_VMPAGE, M_NOWAIT);
  938         if (bucketarray == NULL) {
  939                 printf("WARNING: unable to allocate %ld page color buckets\n",
  940                     (long) bucketcount);
  941                 return;
  942         }
  943 
  944         s = uvm_lock_fpageq();
  945 
  946         /* Make sure we should still do this. */
  947         if (newncolors <= uvmexp.ncolors) {
  948                 uvm_unlock_fpageq(s);
  949                 free(bucketarray, M_VMPAGE);
  950                 return;
  951         }
  952 
  953         oldbucketarray = uvm.page_free[0].pgfl_buckets;
  954         ocolors = uvmexp.ncolors;
  955 
  956         uvmexp.ncolors = newncolors;
  957         uvmexp.colormask = uvmexp.ncolors - 1;
  958 
  959         for (lcv = 0; lcv < VM_NFREELIST; lcv++) {
  960                 pgfl.pgfl_buckets = (bucketarray + (lcv * newncolors));
  961                 uvm_page_init_buckets(&pgfl);
  962                 for (color = 0; color < ocolors; color++) {
  963                         for (i = 0; i < PGFL_NQUEUES; i++) {
  964                                 while ((pg = TAILQ_FIRST(&uvm.page_free[
  965                                     lcv].pgfl_buckets[color].pgfl_queues[i]))
  966                                     != NULL) {
  967                                         TAILQ_REMOVE(&uvm.page_free[
  968                                             lcv].pgfl_buckets[
  969                                             color].pgfl_queues[i], pg, pageq);
  970                                         TAILQ_INSERT_TAIL(&pgfl.pgfl_buckets[
  971                                             VM_PGCOLOR_BUCKET(pg)].pgfl_queues[
  972                                             i], pg, pageq);
  973                                 }
  974                         }
  975                 }
  976                 uvm.page_free[lcv].pgfl_buckets = pgfl.pgfl_buckets;
  977         }
  978 
  979         if (have_recolored_pages) {
  980                 uvm_unlock_fpageq(s);
  981                 free(oldbucketarray, M_VMPAGE);
  982                 return;
  983         }
  984 
  985         have_recolored_pages = TRUE;
  986         uvm_unlock_fpageq(s);
  987 }
  988 
  989 /*
  990  * uvm_pagealloc_pgfl: helper routine for uvm_pagealloc_strat
  991  */
  992 
  993 static __inline struct vm_page *
  994 uvm_pagealloc_pgfl(struct pgfreelist *pgfl, int try1, int try2,
  995     int *trycolorp)
  996 {
  997         struct pglist *freeq;
  998         struct vm_page *pg;
  999         int color, trycolor = *trycolorp;
 1000 
 1001         color = trycolor;
 1002         do {
 1003                 if ((pg = TAILQ_FIRST((freeq =
 1004                     &pgfl->pgfl_buckets[color].pgfl_queues[try1]))) != NULL)
 1005                         goto gotit;
 1006                 if ((pg = TAILQ_FIRST((freeq =
 1007                     &pgfl->pgfl_buckets[color].pgfl_queues[try2]))) != NULL)
 1008                         goto gotit;
 1009                 color = (color + 1) & uvmexp.colormask;
 1010         } while (color != trycolor);
 1011 
 1012         return (NULL);
 1013 
 1014  gotit:
 1015         TAILQ_REMOVE(freeq, pg, pageq);
 1016         uvmexp.free--;
 1017 
 1018         /* update zero'd page count */
 1019         if (pg->flags & PG_ZERO)
 1020                 uvmexp.zeropages--;
 1021 
 1022         if (color == trycolor)
 1023                 uvmexp.colorhit++;
 1024         else {
 1025                 uvmexp.colormiss++;
 1026                 *trycolorp = color;
 1027         }
 1028 
 1029         return (pg);
 1030 }
 1031 
 1032 /*
 1033  * uvm_pagealloc_strat: allocate vm_page from a particular free list.
 1034  *
 1035  * => return null if no pages free
 1036  * => wake up pagedaemon if number of free pages drops below low water mark
 1037  * => if obj != NULL, obj must be locked (to put in hash)
 1038  * => if anon != NULL, anon must be locked (to put in anon)
 1039  * => only one of obj or anon can be non-null
 1040  * => caller must activate/deactivate page if it is not wired.
 1041  * => free_list is ignored if strat == UVM_PGA_STRAT_NORMAL.
 1042  * => policy decision: it is more important to pull a page off of the
 1043  *      appropriate priority free list than it is to get a zero'd or
 1044  *      unknown contents page.  This is because we live with the
 1045  *      consequences of a bad free list decision for the entire
 1046  *      lifetime of the page, e.g. if the page comes from memory that
 1047  *      is slower to access.
 1048  */
 1049 
 1050 struct vm_page *
 1051 uvm_pagealloc_strat(obj, off, anon, flags, strat, free_list)
 1052         struct uvm_object *obj;
 1053         voff_t off;
 1054         int flags;
 1055         struct vm_anon *anon;
 1056         int strat, free_list;
 1057 {
 1058         int lcv, try1, try2, s, zeroit = 0, color;
 1059         struct vm_page *pg;
 1060         boolean_t use_reserve;
 1061 
 1062         KASSERT(obj == NULL || anon == NULL);
 1063         KASSERT(off == trunc_page(off));
 1064         LOCK_ASSERT(obj == NULL || simple_lock_held(&obj->vmobjlock));
 1065         LOCK_ASSERT(anon == NULL || simple_lock_held(&anon->an_lock));
 1066 
 1067         s = uvm_lock_fpageq();
 1068 
 1069         /*
 1070          * This implements a global round-robin page coloring
 1071          * algorithm.
 1072          *
 1073          * XXXJRT: Should we make the `nextcolor' per-CPU?
 1074          * XXXJRT: What about virtually-indexed caches?
 1075          */
 1076 
 1077         color = uvm.page_free_nextcolor;
 1078 
 1079         /*
 1080          * check to see if we need to generate some free pages waking
 1081          * the pagedaemon.
 1082          */
 1083 
 1084         UVM_KICK_PDAEMON();
 1085 
 1086         /*
 1087          * fail if any of these conditions is true:
 1088          * [1]  there really are no free pages, or
 1089          * [2]  only kernel "reserved" pages remain and
 1090          *        the page isn't being allocated to a kernel object.
 1091          * [3]  only pagedaemon "reserved" pages remain and
 1092          *        the requestor isn't the pagedaemon.
 1093          */
 1094 
 1095         use_reserve = (flags & UVM_PGA_USERESERVE) ||
 1096                 (obj && UVM_OBJ_IS_KERN_OBJECT(obj));
 1097         if ((uvmexp.free <= uvmexp.reserve_kernel && !use_reserve) ||
 1098             (uvmexp.free <= uvmexp.reserve_pagedaemon &&
 1099              !(use_reserve && curproc == uvm.pagedaemon_proc)))
 1100                 goto fail;
 1101 
 1102 #if PGFL_NQUEUES != 2
 1103 #error uvm_pagealloc_strat needs to be updated
 1104 #endif
 1105 
 1106         /*
 1107          * If we want a zero'd page, try the ZEROS queue first, otherwise
 1108          * we try the UNKNOWN queue first.
 1109          */
 1110         if (flags & UVM_PGA_ZERO) {
 1111                 try1 = PGFL_ZEROS;
 1112                 try2 = PGFL_UNKNOWN;
 1113         } else {
 1114                 try1 = PGFL_UNKNOWN;
 1115                 try2 = PGFL_ZEROS;
 1116         }
 1117 
 1118  again:
 1119         switch (strat) {
 1120         case UVM_PGA_STRAT_NORMAL:
 1121                 /* Check all freelists in descending priority order. */
 1122                 for (lcv = 0; lcv < VM_NFREELIST; lcv++) {
 1123                         pg = uvm_pagealloc_pgfl(&uvm.page_free[lcv],
 1124                             try1, try2, &color);
 1125                         if (pg != NULL)
 1126                                 goto gotit;
 1127                 }
 1128 
 1129                 /* No pages free! */
 1130                 goto fail;
 1131 
 1132         case UVM_PGA_STRAT_ONLY:
 1133         case UVM_PGA_STRAT_FALLBACK:
 1134                 /* Attempt to allocate from the specified free list. */
 1135                 KASSERT(free_list >= 0 && free_list < VM_NFREELIST);
 1136                 pg = uvm_pagealloc_pgfl(&uvm.page_free[free_list],
 1137                     try1, try2, &color);
 1138                 if (pg != NULL)
 1139                         goto gotit;
 1140 
 1141                 /* Fall back, if possible. */
 1142                 if (strat == UVM_PGA_STRAT_FALLBACK) {
 1143                         strat = UVM_PGA_STRAT_NORMAL;
 1144                         goto again;
 1145                 }
 1146 
 1147                 /* No pages free! */
 1148                 goto fail;
 1149 
 1150         default:
 1151                 panic("uvm_pagealloc_strat: bad strat %d", strat);
 1152                 /* NOTREACHED */
 1153         }
 1154 
 1155  gotit:
 1156         /*
 1157          * We now know which color we actually allocated from; set
 1158          * the next color accordingly.
 1159          */
 1160 
 1161         uvm.page_free_nextcolor = (color + 1) & uvmexp.colormask;
 1162 
 1163         /*
 1164          * update allocation statistics and remember if we have to
 1165          * zero the page
 1166          */
 1167 
 1168         if (flags & UVM_PGA_ZERO) {
 1169                 if (pg->flags & PG_ZERO) {
 1170                         uvmexp.pga_zerohit++;
 1171                         zeroit = 0;
 1172                 } else {
 1173                         uvmexp.pga_zeromiss++;
 1174                         zeroit = 1;
 1175                 }
 1176         }
 1177         uvm_unlock_fpageq(s);
 1178 
 1179         pg->offset = off;
 1180         pg->uobject = obj;
 1181         pg->uanon = anon;
 1182         pg->flags = PG_BUSY|PG_CLEAN|PG_FAKE;
 1183         if (anon) {
 1184                 anon->u.an_page = pg;
 1185                 pg->pqflags = PQ_ANON;
 1186                 uvmexp.anonpages++;
 1187         } else {
 1188                 if (obj) {
 1189                         uvm_pageinsert(pg);
 1190                 }
 1191                 pg->pqflags = 0;
 1192         }
 1193 #if defined(UVM_PAGE_TRKOWN)
 1194         pg->owner_tag = NULL;
 1195 #endif
 1196         UVM_PAGE_OWN(pg, "new alloc");
 1197 
 1198         if (flags & UVM_PGA_ZERO) {
 1199                 /*
 1200                  * A zero'd page is not clean.  If we got a page not already
 1201                  * zero'd, then we have to zero it ourselves.
 1202                  */
 1203                 pg->flags &= ~PG_CLEAN;
 1204                 if (zeroit)
 1205                         pmap_zero_page(VM_PAGE_TO_PHYS(pg));
 1206         }
 1207 
 1208         return(pg);
 1209 
 1210  fail:
 1211         uvm_unlock_fpageq(s);
 1212         return (NULL);
 1213 }
 1214 
 1215 /*
 1216  * uvm_pagereplace: replace a page with another
 1217  *
 1218  * => object must be locked
 1219  */
 1220 
 1221 void
 1222 uvm_pagereplace(oldpg, newpg)
 1223         struct vm_page *oldpg;
 1224         struct vm_page *newpg;
 1225 {
 1226 
 1227         KASSERT((oldpg->flags & PG_TABLED) != 0);
 1228         KASSERT(oldpg->uobject != NULL);
 1229         KASSERT((newpg->flags & PG_TABLED) == 0);
 1230         KASSERT(newpg->uobject == NULL);
 1231         LOCK_ASSERT(simple_lock_held(&oldpg->uobject->vmobjlock));
 1232 
 1233         newpg->uobject = oldpg->uobject;
 1234         newpg->offset = oldpg->offset;
 1235 
 1236         uvm_pageinsert_after(newpg, oldpg);
 1237         uvm_pageremove(oldpg);
 1238 }
 1239 
 1240 /*
 1241  * uvm_pagerealloc: reallocate a page from one object to another
 1242  *
 1243  * => both objects must be locked
 1244  */
 1245 
 1246 void
 1247 uvm_pagerealloc(pg, newobj, newoff)
 1248         struct vm_page *pg;
 1249         struct uvm_object *newobj;
 1250         voff_t newoff;
 1251 {
 1252         /*
 1253          * remove it from the old object
 1254          */
 1255 
 1256         if (pg->uobject) {
 1257                 uvm_pageremove(pg);
 1258         }
 1259 
 1260         /*
 1261          * put it in the new object
 1262          */
 1263 
 1264         if (newobj) {
 1265                 pg->uobject = newobj;
 1266                 pg->offset = newoff;
 1267                 uvm_pageinsert(pg);
 1268         }
 1269 }
 1270 
 1271 #ifdef DEBUG
 1272 /*
 1273  * check if page is zero-filled
 1274  *
 1275  *  - called with free page queue lock held.
 1276  */
 1277 void
 1278 uvm_pagezerocheck(struct vm_page *pg)
 1279 {
 1280         int *p, *ep;
 1281 
 1282         KASSERT(uvm_zerocheckkva != 0);
 1283         LOCK_ASSERT(simple_lock_held(&uvm.fpageqlock));
 1284 
 1285         /*
 1286          * XXX assuming pmap_kenter_pa and pmap_kremove never call
 1287          * uvm page allocator.
 1288          *
 1289          * it might be better to have "CPU-local temporary map" pmap interface.
 1290          */
 1291         pmap_kenter_pa(uvm_zerocheckkva, VM_PAGE_TO_PHYS(pg), VM_PROT_READ);
 1292         p = (int *)uvm_zerocheckkva;
 1293         ep = (int *)((char *)p + PAGE_SIZE);
 1294         pmap_update(pmap_kernel());
 1295         while (p < ep) {
 1296                 if (*p != 0)
 1297                         panic("PG_ZERO page isn't zero-filled");
 1298                 p++;
 1299         }
 1300         pmap_kremove(uvm_zerocheckkva, PAGE_SIZE);
 1301 }
 1302 #endif /* DEBUG */
 1303 
 1304 /*
 1305  * uvm_pagefree: free page
 1306  *
 1307  * => erase page's identity (i.e. remove from hash/object)
 1308  * => put page on free list
 1309  * => caller must lock owning object (either anon or uvm_object)
 1310  * => caller must lock page queues
 1311  * => assumes all valid mappings of pg are gone
 1312  */
 1313 
 1314 void
 1315 uvm_pagefree(pg)
 1316         struct vm_page *pg;
 1317 {
 1318         int s;
 1319         struct pglist *pgfl;
 1320         boolean_t iszero;
 1321 
 1322         KASSERT((pg->flags & PG_PAGEOUT) == 0);
 1323         LOCK_ASSERT(simple_lock_held(&uvm.pageqlock) ||
 1324                     (pg->pqflags & (PQ_ACTIVE|PQ_INACTIVE)) == 0);
 1325         LOCK_ASSERT(pg->uobject == NULL ||
 1326                     simple_lock_held(&pg->uobject->vmobjlock));
 1327         LOCK_ASSERT(pg->uobject != NULL || pg->uanon == NULL ||
 1328                     simple_lock_held(&pg->uanon->an_lock));
 1329 
 1330 #ifdef DEBUG
 1331         if (pg->uobject == (void *)0xdeadbeef &&
 1332             pg->uanon == (void *)0xdeadbeef) {
 1333                 panic("uvm_pagefree: freeing free page %p", pg);
 1334         }
 1335 #endif /* DEBUG */
 1336 
 1337         /*
 1338          * if the page is loaned, resolve the loan instead of freeing.
 1339          */
 1340 
 1341         if (pg->loan_count) {
 1342                 KASSERT(pg->wire_count == 0);
 1343 
 1344                 /*
 1345                  * if the page is owned by an anon then we just want to
 1346                  * drop anon ownership.  the kernel will free the page when
 1347                  * it is done with it.  if the page is owned by an object,
 1348                  * remove it from the object and mark it dirty for the benefit
 1349                  * of possible anon owners.
 1350                  *
 1351                  * regardless of previous ownership, wakeup any waiters,
 1352                  * unbusy the page, and we're done.
 1353                  */
 1354 
 1355                 if (pg->uobject != NULL) {
 1356                         uvm_pageremove(pg);
 1357                         pg->flags &= ~PG_CLEAN;
 1358                 } else if (pg->uanon != NULL) {
 1359                         if ((pg->pqflags & PQ_ANON) == 0) {
 1360                                 pg->loan_count--;
 1361                         } else {
 1362                                 pg->pqflags &= ~PQ_ANON;
 1363                                 uvmexp.anonpages--;
 1364                         }
 1365                         pg->uanon->u.an_page = NULL;
 1366                         pg->uanon = NULL;
 1367                 }
 1368                 if (pg->flags & PG_WANTED) {
 1369                         wakeup(pg);
 1370                 }
 1371                 pg->flags &= ~(PG_WANTED|PG_BUSY|PG_RELEASED|PG_PAGER1);
 1372 #ifdef UVM_PAGE_TRKOWN
 1373                 pg->owner_tag = NULL;
 1374 #endif
 1375                 if (pg->loan_count) {
 1376                         uvm_pagedequeue(pg);
 1377                         return;
 1378                 }
 1379         }
 1380 
 1381         /*
 1382          * remove page from its object or anon.
 1383          */
 1384 
 1385         if (pg->uobject != NULL) {
 1386                 uvm_pageremove(pg);
 1387         } else if (pg->uanon != NULL) {
 1388                 pg->uanon->u.an_page = NULL;
 1389                 uvmexp.anonpages--;
 1390         }
 1391 
 1392         /*
 1393          * now remove the page from the queues.
 1394          */
 1395 
 1396         uvm_pagedequeue(pg);
 1397 
 1398         /*
 1399          * if the page was wired, unwire it now.
 1400          */
 1401 
 1402         if (pg->wire_count) {
 1403                 pg->wire_count = 0;
 1404                 uvmexp.wired--;
 1405         }
 1406 
 1407         /*
 1408          * and put on free queue
 1409          */
 1410 
 1411         iszero = (pg->flags & PG_ZERO);
 1412         pgfl = &uvm.page_free[uvm_page_lookup_freelist(pg)].
 1413             pgfl_buckets[VM_PGCOLOR_BUCKET(pg)].
 1414             pgfl_queues[iszero ? PGFL_ZEROS : PGFL_UNKNOWN];
 1415 
 1416         pg->pqflags = PQ_FREE;
 1417 #ifdef DEBUG
 1418         pg->uobject = (void *)0xdeadbeef;
 1419         pg->offset = 0xdeadbeef;
 1420         pg->uanon = (void *)0xdeadbeef;
 1421 #endif
 1422 
 1423         s = uvm_lock_fpageq();
 1424 
 1425 #ifdef DEBUG
 1426         if (iszero)
 1427                 uvm_pagezerocheck(pg);
 1428 #endif /* DEBUG */
 1429 
 1430         TAILQ_INSERT_HEAD(pgfl, pg, pageq);
 1431         uvmexp.free++;
 1432         if (iszero)
 1433                 uvmexp.zeropages++;
 1434 
 1435         if (uvmexp.zeropages < UVM_PAGEZERO_TARGET)
 1436                 uvm.page_idle_zero = vm_page_zero_enable;
 1437 
 1438         uvm_unlock_fpageq(s);
 1439 }
 1440 
 1441 /*
 1442  * uvm_page_unbusy: unbusy an array of pages.
 1443  *
 1444  * => pages must either all belong to the same object, or all belong to anons.
 1445  * => if pages are object-owned, object must be locked.
 1446  * => if pages are anon-owned, anons must be locked.
 1447  * => caller must lock page queues if pages may be released.
 1448  * => caller must make sure that anon-owned pages are not PG_RELEASED.
 1449  */
 1450 
 1451 void
 1452 uvm_page_unbusy(pgs, npgs)
 1453         struct vm_page **pgs;
 1454         int npgs;
 1455 {
 1456         struct vm_page *pg;
 1457         int i;
 1458         UVMHIST_FUNC("uvm_page_unbusy"); UVMHIST_CALLED(ubchist);
 1459 
 1460         for (i = 0; i < npgs; i++) {
 1461                 pg = pgs[i];
 1462                 if (pg == NULL || pg == PGO_DONTCARE) {
 1463                         continue;
 1464                 }
 1465 
 1466                 LOCK_ASSERT(pg->uobject == NULL ||
 1467                     simple_lock_held(&pg->uobject->vmobjlock));
 1468                 LOCK_ASSERT(pg->uobject != NULL ||
 1469                     (pg->uanon != NULL &&
 1470                     simple_lock_held(&pg->uanon->an_lock)));
 1471 
 1472                 KASSERT(pg->flags & PG_BUSY);
 1473                 KASSERT((pg->flags & PG_PAGEOUT) == 0);
 1474                 if (pg->flags & PG_WANTED) {
 1475                         wakeup(pg);
 1476                 }
 1477                 if (pg->flags & PG_RELEASED) {
 1478                         UVMHIST_LOG(ubchist, "releasing pg %p", pg,0,0,0);
 1479                         KASSERT(pg->uobject != NULL ||
 1480                             (pg->uanon != NULL && pg->uanon->an_ref > 0));
 1481                         pg->flags &= ~PG_RELEASED;
 1482                         uvm_pagefree(pg);
 1483                 } else {
 1484                         UVMHIST_LOG(ubchist, "unbusying pg %p", pg,0,0,0);
 1485                         pg->flags &= ~(PG_WANTED|PG_BUSY);
 1486                         UVM_PAGE_OWN(pg, NULL);
 1487                 }
 1488         }
 1489 }
 1490 
 1491 #if defined(UVM_PAGE_TRKOWN)
 1492 /*
 1493  * uvm_page_own: set or release page ownership
 1494  *
 1495  * => this is a debugging function that keeps track of who sets PG_BUSY
 1496  *      and where they do it.   it can be used to track down problems
 1497  *      such a process setting "PG_BUSY" and never releasing it.
 1498  * => page's object [if any] must be locked
 1499  * => if "tag" is NULL then we are releasing page ownership
 1500  */
 1501 void
 1502 uvm_page_own(pg, tag)
 1503         struct vm_page *pg;
 1504         char *tag;
 1505 {
 1506         KASSERT((pg->flags & (PG_PAGEOUT|PG_RELEASED)) == 0);
 1507 
 1508         /* gain ownership? */
 1509         if (tag) {
 1510                 if (pg->owner_tag) {
 1511                         printf("uvm_page_own: page %p already owned "
 1512                             "by proc %d [%s]\n", pg,
 1513                             pg->owner, pg->owner_tag);
 1514                         panic("uvm_page_own");
 1515                 }
 1516                 pg->owner = (curproc) ? curproc->p_pid :  (pid_t) -1;
 1517                 pg->owner_tag = tag;
 1518                 return;
 1519         }
 1520 
 1521         /* drop ownership */
 1522         if (pg->owner_tag == NULL) {
 1523                 printf("uvm_page_own: dropping ownership of an non-owned "
 1524                     "page (%p)\n", pg);
 1525                 panic("uvm_page_own");
 1526         }
 1527         KASSERT((pg->pqflags & (PQ_ACTIVE|PQ_INACTIVE)) ||
 1528             (pg->uanon == NULL && pg->uobject == NULL) ||
 1529             pg->uobject == uvm.kernel_object ||
 1530             pg->wire_count > 0 ||
 1531             (pg->loan_count == 1 && pg->uanon == NULL) ||
 1532             pg->loan_count > 1);
 1533         pg->owner_tag = NULL;
 1534 }
 1535 #endif
 1536 
 1537 /*
 1538  * uvm_pageidlezero: zero free pages while the system is idle.
 1539  *
 1540  * => try to complete one color bucket at a time, to reduce our impact
 1541  *      on the CPU cache.
 1542  * => we loop until we either reach the target or whichqs indicates that
 1543  *      there is a process ready to run.
 1544  */
 1545 void
 1546 uvm_pageidlezero()
 1547 {
 1548         struct vm_page *pg;
 1549         struct pgfreelist *pgfl;
 1550         int free_list, s, firstbucket;
 1551         static int nextbucket;
 1552 
 1553         KERNEL_LOCK(LK_EXCLUSIVE | LK_CANRECURSE);
 1554         s = uvm_lock_fpageq();
 1555         firstbucket = nextbucket;
 1556         do {
 1557                 if (sched_whichqs != 0)
 1558                         goto quit;
 1559                 if (uvmexp.zeropages >= UVM_PAGEZERO_TARGET) {
 1560                         uvm.page_idle_zero = FALSE;
 1561                         goto quit;
 1562                 }
 1563                 for (free_list = 0; free_list < VM_NFREELIST; free_list++) {
 1564                         pgfl = &uvm.page_free[free_list];
 1565                         while ((pg = TAILQ_FIRST(&pgfl->pgfl_buckets[
 1566                             nextbucket].pgfl_queues[PGFL_UNKNOWN])) != NULL) {
 1567                                 if (sched_whichqs != 0)
 1568                                         goto quit;
 1569 
 1570                                 TAILQ_REMOVE(&pgfl->pgfl_buckets[
 1571                                     nextbucket].pgfl_queues[PGFL_UNKNOWN],
 1572                                     pg, pageq);
 1573                                 uvmexp.free--;
 1574                                 uvm_unlock_fpageq(s);
 1575                                 KERNEL_UNLOCK();
 1576 #ifdef PMAP_PAGEIDLEZERO
 1577                                 if (!PMAP_PAGEIDLEZERO(VM_PAGE_TO_PHYS(pg))) {
 1578 
 1579                                         /*
 1580                                          * The machine-dependent code detected
 1581                                          * some reason for us to abort zeroing
 1582                                          * pages, probably because there is a
 1583                                          * process now ready to run.
 1584                                          */
 1585 
 1586                                         KERNEL_LOCK(
 1587                                             LK_EXCLUSIVE | LK_CANRECURSE);
 1588                                         s = uvm_lock_fpageq();
 1589                                         TAILQ_INSERT_HEAD(&pgfl->pgfl_buckets[
 1590                                             nextbucket].pgfl_queues[
 1591                                             PGFL_UNKNOWN], pg, pageq);
 1592                                         uvmexp.free++;
 1593                                         uvmexp.zeroaborts++;
 1594                                         goto quit;
 1595                                 }
 1596 #else
 1597                                 pmap_zero_page(VM_PAGE_TO_PHYS(pg));
 1598 #endif /* PMAP_PAGEIDLEZERO */
 1599                                 pg->flags |= PG_ZERO;
 1600 
 1601                                 KERNEL_LOCK(LK_EXCLUSIVE | LK_CANRECURSE);
 1602                                 s = uvm_lock_fpageq();
 1603                                 TAILQ_INSERT_HEAD(&pgfl->pgfl_buckets[
 1604                                     nextbucket].pgfl_queues[PGFL_ZEROS],
 1605                                     pg, pageq);
 1606                                 uvmexp.free++;
 1607                                 uvmexp.zeropages++;
 1608                         }
 1609                 }
 1610                 nextbucket = (nextbucket + 1) & uvmexp.colormask;
 1611         } while (nextbucket != firstbucket);
 1612 quit:
 1613         uvm_unlock_fpageq(s);
 1614         KERNEL_UNLOCK();
 1615 }

Cache object: b561ac830ff85053a2a476f7d555079e


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.