uvm_page.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*      $NetBSD: uvm_page.c,v 1.114 2006/09/27 17:18:50 thorpej Exp $   */
    2 
    3 /*
    4  * Copyright (c) 1997 Charles D. Cranor and Washington University.
    5  * Copyright (c) 1991, 1993, The Regents of the University of California.
    6  *
    7  * All rights reserved.
    8  *
    9  * This code is derived from software contributed to Berkeley by
   10  * The Mach Operating System project at Carnegie-Mellon University.
   11  *
   12  * Redistribution and use in source and binary forms, with or without
   13  * modification, are permitted provided that the following conditions
   14  * are met:
   15  * 1. Redistributions of source code must retain the above copyright
   16  *    notice, this list of conditions and the following disclaimer.
   17  * 2. Redistributions in binary form must reproduce the above copyright
   18  *    notice, this list of conditions and the following disclaimer in the
   19  *    documentation and/or other materials provided with the distribution.
   20  * 3. All advertising materials mentioning features or use of this software
   21  *    must display the following acknowledgement:
   22  *      This product includes software developed by Charles D. Cranor,
   23  *      Washington University, the University of California, Berkeley and
   24  *      its contributors.
   25  * 4. Neither the name of the University nor the names of its contributors
   26  *    may be used to endorse or promote products derived from this software
   27  *    without specific prior written permission.
   28  *
   29  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   30  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   31  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   32  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   33  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   37  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   38  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   39  * SUCH DAMAGE.
   40  *
   41  *      @(#)vm_page.c   8.3 (Berkeley) 3/21/94
   42  * from: Id: uvm_page.c,v 1.1.2.18 1998/02/06 05:24:42 chs Exp
   43  *
   44  *
   45  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
   46  * All rights reserved.
   47  *
   48  * Permission to use, copy, modify and distribute this software and
   49  * its documentation is hereby granted, provided that both the copyright
   50  * notice and this permission notice appear in all copies of the
   51  * software, derivative works or modified versions, and any portions
   52  * thereof, and that both notices appear in supporting documentation.
   53  *
   54  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
   55  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
   56  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
   57  *
   58  * Carnegie Mellon requests users of this software to return to
   59  *
   60  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   61  *  School of Computer Science
   62  *  Carnegie Mellon University
   63  *  Pittsburgh PA 15213-3890
   64  *
   65  * any improvements or extensions that they make and grant Carnegie the
   66  * rights to redistribute these changes.
   67  */
   68 
   69 /*
   70  * uvm_page.c: page ops.
   71  */
   72 
   73 #include <sys/cdefs.h>
   74 __KERNEL_RCSID(0, "$NetBSD: uvm_page.c,v 1.114 2006/09/27 17:18:50 thorpej Exp $");
   75 
   76 #include "opt_uvmhist.h"
   77 #include "opt_readahead.h"
   78 
   79 #include <sys/param.h>
   80 #include <sys/systm.h>
   81 #include <sys/malloc.h>
   82 #include <sys/sched.h>
   83 #include <sys/kernel.h>
   84 #include <sys/vnode.h>
   85 #include <sys/proc.h>
   86 
   87 #include <uvm/uvm.h>
   88 #include <uvm/uvm_pdpolicy.h>
   89 
   90 /*
   91  * global vars... XXXCDC: move to uvm. structure.
   92  */
   93 
   94 /*
   95  * physical memory config is stored in vm_physmem.
   96  */
   97 
   98 struct vm_physseg vm_physmem[VM_PHYSSEG_MAX];   /* XXXCDC: uvm.physmem */
   99 int vm_nphysseg = 0;                            /* XXXCDC: uvm.nphysseg */
  100 
  101 /*
  102  * Some supported CPUs in a given architecture don't support all
  103  * of the things necessary to do idle page zero'ing efficiently.
  104  * We therefore provide a way to disable it from machdep code here.
  105  */
  106 /*
  107  * XXX disabled until we can find a way to do this without causing
  108  * problems for either CPU caches or DMA latency.
  109  */
  110 boolean_t vm_page_zero_enable = FALSE;
  111 
  112 /*
  113  * local variables
  114  */
  115 
  116 /*
  117  * these variables record the values returned by vm_page_bootstrap,
  118  * for debugging purposes.  The implementation of uvm_pageboot_alloc
  119  * and pmap_startup here also uses them internally.
  120  */
  121 
  122 static vaddr_t      virtual_space_start;
  123 static vaddr_t      virtual_space_end;
  124 
  125 /*
  126  * we use a hash table with only one bucket during bootup.  we will
  127  * later rehash (resize) the hash table once the allocator is ready.
  128  * we static allocate the one bootstrap bucket below...
  129  */
  130 
  131 static struct pglist uvm_bootbucket;
  132 
  133 /*
  134  * we allocate an initial number of page colors in uvm_page_init(),
  135  * and remember them.  We may re-color pages as cache sizes are
  136  * discovered during the autoconfiguration phase.  But we can never
  137  * free the initial set of buckets, since they are allocated using
  138  * uvm_pageboot_alloc().
  139  */
  140 
  141 static boolean_t have_recolored_pages /* = FALSE */;
  142 
  143 MALLOC_DEFINE(M_VMPAGE, "VM page", "VM page");
  144 
  145 #ifdef DEBUG
  146 vaddr_t uvm_zerocheckkva;
  147 #endif /* DEBUG */
  148 
  149 /*
  150  * local prototypes
  151  */
  152 
  153 static void uvm_pageinsert(struct vm_page *);
  154 static void uvm_pageinsert_after(struct vm_page *, struct vm_page *);
  155 static void uvm_pageremove(struct vm_page *);
  156 
  157 /*
  158  * inline functions
  159  */
  160 
  161 /*
  162  * uvm_pageinsert: insert a page in the object and the hash table
  163  * uvm_pageinsert_after: insert a page into the specified place in listq
  164  *
  165  * => caller must lock object
  166  * => caller must lock page queues
  167  * => call should have already set pg's object and offset pointers
  168  *    and bumped the version counter
  169  */
  170 
  171 inline static void
  172 uvm_pageinsert_after(struct vm_page *pg, struct vm_page *where)
  173 {
  174         struct pglist *buck;
  175         struct uvm_object *uobj = pg->uobject;
  176 
  177         KASSERT((pg->flags & PG_TABLED) == 0);
  178         KASSERT(where == NULL || (where->flags & PG_TABLED));
  179         KASSERT(where == NULL || (where->uobject == uobj));
  180         buck = &uvm.page_hash[uvm_pagehash(uobj, pg->offset)];
  181         simple_lock(&uvm.hashlock);
  182         TAILQ_INSERT_TAIL(buck, pg, hashq);
  183         simple_unlock(&uvm.hashlock);
  184 
  185         if (UVM_OBJ_IS_VNODE(uobj)) {
  186                 if (uobj->uo_npages == 0) {
  187                         struct vnode *vp = (struct vnode *)uobj;
  188 
  189                         vholdl(vp);
  190                 }
  191                 if (UVM_OBJ_IS_VTEXT(uobj)) {
  192                         uvmexp.execpages++;
  193                 } else {
  194                         uvmexp.filepages++;
  195                 }
  196         } else if (UVM_OBJ_IS_AOBJ(uobj)) {
  197                 uvmexp.anonpages++;
  198         }
  199 
  200         if (where)
  201                 TAILQ_INSERT_AFTER(&uobj->memq, where, pg, listq);
  202         else
  203                 TAILQ_INSERT_TAIL(&uobj->memq, pg, listq);
  204         pg->flags |= PG_TABLED;
  205         uobj->uo_npages++;
  206 }
  207 
  208 inline static void
  209 uvm_pageinsert(struct vm_page *pg)
  210 {
  211 
  212         uvm_pageinsert_after(pg, NULL);
  213 }
  214 
  215 /*
  216  * uvm_page_remove: remove page from object and hash
  217  *
  218  * => caller must lock object
  219  * => caller must lock page queues
  220  */
  221 
  222 static inline void
  223 uvm_pageremove(struct vm_page *pg)
  224 {
  225         struct pglist *buck;
  226         struct uvm_object *uobj = pg->uobject;
  227 
  228         KASSERT(pg->flags & PG_TABLED);
  229         buck = &uvm.page_hash[uvm_pagehash(uobj, pg->offset)];
  230         simple_lock(&uvm.hashlock);
  231         TAILQ_REMOVE(buck, pg, hashq);
  232         simple_unlock(&uvm.hashlock);
  233 
  234         if (UVM_OBJ_IS_VNODE(uobj)) {
  235                 if (uobj->uo_npages == 1) {
  236                         struct vnode *vp = (struct vnode *)uobj;
  237 
  238                         holdrelel(vp);
  239                 }
  240                 if (UVM_OBJ_IS_VTEXT(uobj)) {
  241                         uvmexp.execpages--;
  242                 } else {
  243                         uvmexp.filepages--;
  244                 }
  245         } else if (UVM_OBJ_IS_AOBJ(uobj)) {
  246                 uvmexp.anonpages--;
  247         }
  248 
  249         /* object should be locked */
  250         uobj->uo_npages--;
  251         TAILQ_REMOVE(&uobj->memq, pg, listq);
  252         pg->flags &= ~PG_TABLED;
  253         pg->uobject = NULL;
  254 }
  255 
  256 static void
  257 uvm_page_init_buckets(struct pgfreelist *pgfl)
  258 {
  259         int color, i;
  260 
  261         for (color = 0; color < uvmexp.ncolors; color++) {
  262                 for (i = 0; i < PGFL_NQUEUES; i++) {
  263                         TAILQ_INIT(&pgfl->pgfl_buckets[color].pgfl_queues[i]);
  264                 }
  265         }
  266 }
  267 
  268 /*
  269  * uvm_page_init: init the page system.   called from uvm_init().
  270  *
  271  * => we return the range of kernel virtual memory in kvm_startp/kvm_endp
  272  */
  273 
  274 void
  275 uvm_page_init(vaddr_t *kvm_startp, vaddr_t *kvm_endp)
  276 {
  277         vsize_t freepages, pagecount, bucketcount, n;
  278         struct pgflbucket *bucketarray;
  279         struct vm_page *pagearray;
  280         int lcv;
  281         u_int i;
  282         paddr_t paddr;
  283 
  284         /*
  285          * init the page queues and page queue locks, except the free
  286          * list; we allocate that later (with the initial vm_page
  287          * structures).
  288          */
  289 
  290         uvmpdpol_init();
  291         simple_lock_init(&uvm.pageqlock);
  292         simple_lock_init(&uvm.fpageqlock);
  293 
  294         /*
  295          * init the <obj,offset> => <page> hash table.  for now
  296          * we just have one bucket (the bootstrap bucket).  later on we
  297          * will allocate new buckets as we dynamically resize the hash table.
  298          */
  299 
  300         uvm.page_nhash = 1;                     /* 1 bucket */
  301         uvm.page_hashmask = 0;                  /* mask for hash function */
  302         uvm.page_hash = &uvm_bootbucket;        /* install bootstrap bucket */
  303         TAILQ_INIT(uvm.page_hash);              /* init hash table */
  304         simple_lock_init(&uvm.hashlock);        /* init hash table lock */
  305 
  306         /*
  307          * allocate vm_page structures.
  308          */
  309 
  310         /*
  311          * sanity check:
  312          * before calling this function the MD code is expected to register
  313          * some free RAM with the uvm_page_physload() function.   our job
  314          * now is to allocate vm_page structures for this memory.
  315          */
  316 
  317         if (vm_nphysseg == 0)
  318                 panic("uvm_page_bootstrap: no memory pre-allocated");
  319 
  320         /*
  321          * first calculate the number of free pages...
  322          *
  323          * note that we use start/end rather than avail_start/avail_end.
  324          * this allows us to allocate extra vm_page structures in case we
  325          * want to return some memory to the pool after booting.
  326          */
  327 
  328         freepages = 0;
  329         for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
  330                 freepages += (vm_physmem[lcv].end - vm_physmem[lcv].start);
  331 
  332         /*
  333          * Let MD code initialize the number of colors, or default
  334          * to 1 color if MD code doesn't care.
  335          */
  336         if (uvmexp.ncolors == 0)
  337                 uvmexp.ncolors = 1;
  338         uvmexp.colormask = uvmexp.ncolors - 1;
  339 
  340         /*
  341          * we now know we have (PAGE_SIZE * freepages) bytes of memory we can
  342          * use.   for each page of memory we use we need a vm_page structure.
  343          * thus, the total number of pages we can use is the total size of
  344          * the memory divided by the PAGE_SIZE plus the size of the vm_page
  345          * structure.   we add one to freepages as a fudge factor to avoid
  346          * truncation errors (since we can only allocate in terms of whole
  347          * pages).
  348          */
  349 
  350         bucketcount = uvmexp.ncolors * VM_NFREELIST;
  351         pagecount = ((freepages + 1) << PAGE_SHIFT) /
  352             (PAGE_SIZE + sizeof(struct vm_page));
  353 
  354         bucketarray = (void *)uvm_pageboot_alloc((bucketcount *
  355             sizeof(struct pgflbucket)) + (pagecount *
  356             sizeof(struct vm_page)));
  357         pagearray = (struct vm_page *)(bucketarray + bucketcount);
  358 
  359         for (lcv = 0; lcv < VM_NFREELIST; lcv++) {
  360                 uvm.page_free[lcv].pgfl_buckets =
  361                     (bucketarray + (lcv * uvmexp.ncolors));
  362                 uvm_page_init_buckets(&uvm.page_free[lcv]);
  363         }
  364         memset(pagearray, 0, pagecount * sizeof(struct vm_page));
  365 
  366         /*
  367          * init the vm_page structures and put them in the correct place.
  368          */
  369 
  370         for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) {
  371                 n = vm_physmem[lcv].end - vm_physmem[lcv].start;
  372 
  373                 /* set up page array pointers */
  374                 vm_physmem[lcv].pgs = pagearray;
  375                 pagearray += n;
  376                 pagecount -= n;
  377                 vm_physmem[lcv].lastpg = vm_physmem[lcv].pgs + (n - 1);
  378 
  379                 /* init and free vm_pages (we've already zeroed them) */
  380                 paddr = ptoa(vm_physmem[lcv].start);
  381                 for (i = 0 ; i < n ; i++, paddr += PAGE_SIZE) {
  382                         vm_physmem[lcv].pgs[i].phys_addr = paddr;
  383 #ifdef __HAVE_VM_PAGE_MD
  384                         VM_MDPAGE_INIT(&vm_physmem[lcv].pgs[i]);
  385 #endif
  386                         if (atop(paddr) >= vm_physmem[lcv].avail_start &&
  387                             atop(paddr) <= vm_physmem[lcv].avail_end) {
  388                                 uvmexp.npages++;
  389                                 /* add page to free pool */
  390                                 uvm_pagefree(&vm_physmem[lcv].pgs[i]);
  391                         }
  392                 }
  393         }
  394 
  395         /*
  396          * pass up the values of virtual_space_start and
  397          * virtual_space_end (obtained by uvm_pageboot_alloc) to the upper
  398          * layers of the VM.
  399          */
  400 
  401         *kvm_startp = round_page(virtual_space_start);
  402         *kvm_endp = trunc_page(virtual_space_end);
  403 #ifdef DEBUG
  404         /*
  405          * steal kva for uvm_pagezerocheck().
  406          */
  407         uvm_zerocheckkva = *kvm_startp;
  408         *kvm_startp += PAGE_SIZE;
  409 #endif /* DEBUG */
  410 
  411         /*
  412          * init locks for kernel threads
  413          */
  414 
  415         simple_lock_init(&uvm.pagedaemon_lock);
  416         simple_lock_init(&uvm.aiodoned_lock);
  417 
  418         /*
  419          * init various thresholds.
  420          */
  421 
  422         uvmexp.reserve_pagedaemon = 1;
  423         uvmexp.reserve_kernel = 5;
  424 
  425         /*
  426          * determine if we should zero pages in the idle loop.
  427          */
  428 
  429         uvm.page_idle_zero = vm_page_zero_enable;
  430 
  431         /*
  432          * done!
  433          */
  434 
  435         uvm.page_init_done = TRUE;
  436 }
  437 
  438 /*
  439  * uvm_setpagesize: set the page size
  440  *
  441  * => sets page_shift and page_mask from uvmexp.pagesize.
  442  */
  443 
  444 void
  445 uvm_setpagesize(void)
  446 {
  447 
  448         /*
  449          * If uvmexp.pagesize is 0 at this point, we expect PAGE_SIZE
  450          * to be a constant (indicated by being a non-zero value).
  451          */
  452         if (uvmexp.pagesize == 0) {
  453                 if (PAGE_SIZE == 0)
  454                         panic("uvm_setpagesize: uvmexp.pagesize not set");
  455                 uvmexp.pagesize = PAGE_SIZE;
  456         }
  457         uvmexp.pagemask = uvmexp.pagesize - 1;
  458         if ((uvmexp.pagemask & uvmexp.pagesize) != 0)
  459                 panic("uvm_setpagesize: page size not a power of two");
  460         for (uvmexp.pageshift = 0; ; uvmexp.pageshift++)
  461                 if ((1 << uvmexp.pageshift) == uvmexp.pagesize)
  462                         break;
  463 }
  464 
  465 /*
  466  * uvm_pageboot_alloc: steal memory from physmem for bootstrapping
  467  */
  468 
  469 vaddr_t
  470 uvm_pageboot_alloc(vsize_t size)
  471 {
  472         static boolean_t initialized = FALSE;
  473         vaddr_t addr;
  474 #if !defined(PMAP_STEAL_MEMORY)
  475         vaddr_t vaddr;
  476         paddr_t paddr;
  477 #endif
  478 
  479         /*
  480          * on first call to this function, initialize ourselves.
  481          */
  482         if (initialized == FALSE) {
  483                 pmap_virtual_space(&virtual_space_start, &virtual_space_end);
  484 
  485                 /* round it the way we like it */
  486                 virtual_space_start = round_page(virtual_space_start);
  487                 virtual_space_end = trunc_page(virtual_space_end);
  488 
  489                 initialized = TRUE;
  490         }
  491 
  492         /* round to page size */
  493         size = round_page(size);
  494 
  495 #if defined(PMAP_STEAL_MEMORY)
  496 
  497         /*
  498          * defer bootstrap allocation to MD code (it may want to allocate
  499          * from a direct-mapped segment).  pmap_steal_memory should adjust
  500          * virtual_space_start/virtual_space_end if necessary.
  501          */
  502 
  503         addr = pmap_steal_memory(size, &virtual_space_start,
  504             &virtual_space_end);
  505 
  506         return(addr);
  507 
  508 #else /* !PMAP_STEAL_MEMORY */
  509 
  510         /*
  511          * allocate virtual memory for this request
  512          */
  513         if (virtual_space_start == virtual_space_end ||
  514             (virtual_space_end - virtual_space_start) < size)
  515                 panic("uvm_pageboot_alloc: out of virtual space");
  516 
  517         addr = virtual_space_start;
  518 
  519 #ifdef PMAP_GROWKERNEL
  520         /*
  521          * If the kernel pmap can't map the requested space,
  522          * then allocate more resources for it.
  523          */
  524         if (uvm_maxkaddr < (addr + size)) {
  525                 uvm_maxkaddr = pmap_growkernel(addr + size);
  526                 if (uvm_maxkaddr < (addr + size))
  527                         panic("uvm_pageboot_alloc: pmap_growkernel() failed");
  528         }
  529 #endif
  530 
  531         virtual_space_start += size;
  532 
  533         /*
  534          * allocate and mapin physical pages to back new virtual pages
  535          */
  536 
  537         for (vaddr = round_page(addr) ; vaddr < addr + size ;
  538             vaddr += PAGE_SIZE) {
  539 
  540                 if (!uvm_page_physget(&paddr))
  541                         panic("uvm_pageboot_alloc: out of memory");
  542 
  543                 /*
  544                  * Note this memory is no longer managed, so using
  545                  * pmap_kenter is safe.
  546                  */
  547                 pmap_kenter_pa(vaddr, paddr, VM_PROT_READ|VM_PROT_WRITE);
  548         }
  549         pmap_update(pmap_kernel());
  550         return(addr);
  551 #endif  /* PMAP_STEAL_MEMORY */
  552 }
  553 
  554 #if !defined(PMAP_STEAL_MEMORY)
  555 /*
  556  * uvm_page_physget: "steal" one page from the vm_physmem structure.
  557  *
  558  * => attempt to allocate it off the end of a segment in which the "avail"
  559  *    values match the start/end values.   if we can't do that, then we
  560  *    will advance both values (making them equal, and removing some
  561  *    vm_page structures from the non-avail area).
  562  * => return false if out of memory.
  563  */
  564 
  565 /* subroutine: try to allocate from memory chunks on the specified freelist */
  566 static boolean_t uvm_page_physget_freelist(paddr_t *, int);
  567 
  568 static boolean_t
  569 uvm_page_physget_freelist(paddr_t *paddrp, int freelist)
  570 {
  571         int lcv, x;
  572 
  573         /* pass 1: try allocating from a matching end */
  574 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
  575         for (lcv = vm_nphysseg - 1 ; lcv >= 0 ; lcv--)
  576 #else
  577         for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
  578 #endif
  579         {
  580 
  581                 if (uvm.page_init_done == TRUE)
  582                         panic("uvm_page_physget: called _after_ bootstrap");
  583 
  584                 if (vm_physmem[lcv].free_list != freelist)
  585                         continue;
  586 
  587                 /* try from front */
  588                 if (vm_physmem[lcv].avail_start == vm_physmem[lcv].start &&
  589                     vm_physmem[lcv].avail_start < vm_physmem[lcv].avail_end) {
  590                         *paddrp = ptoa(vm_physmem[lcv].avail_start);
  591                         vm_physmem[lcv].avail_start++;
  592                         vm_physmem[lcv].start++;
  593                         /* nothing left?   nuke it */
  594                         if (vm_physmem[lcv].avail_start ==
  595                             vm_physmem[lcv].end) {
  596                                 if (vm_nphysseg == 1)
  597                                     panic("uvm_page_physget: out of memory!");
  598                                 vm_nphysseg--;
  599                                 for (x = lcv ; x < vm_nphysseg ; x++)
  600                                         /* structure copy */
  601                                         vm_physmem[x] = vm_physmem[x+1];
  602                         }
  603                         return (TRUE);
  604                 }
  605 
  606                 /* try from rear */
  607                 if (vm_physmem[lcv].avail_end == vm_physmem[lcv].end &&
  608                     vm_physmem[lcv].avail_start < vm_physmem[lcv].avail_end) {
  609                         *paddrp = ptoa(vm_physmem[lcv].avail_end - 1);
  610                         vm_physmem[lcv].avail_end--;
  611                         vm_physmem[lcv].end--;
  612                         /* nothing left?   nuke it */
  613                         if (vm_physmem[lcv].avail_end ==
  614                             vm_physmem[lcv].start) {
  615                                 if (vm_nphysseg == 1)
  616                                     panic("uvm_page_physget: out of memory!");
  617                                 vm_nphysseg--;
  618                                 for (x = lcv ; x < vm_nphysseg ; x++)
  619                                         /* structure copy */
  620                                         vm_physmem[x] = vm_physmem[x+1];
  621                         }
  622                         return (TRUE);
  623                 }
  624         }
  625 
  626         /* pass2: forget about matching ends, just allocate something */
  627 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
  628         for (lcv = vm_nphysseg - 1 ; lcv >= 0 ; lcv--)
  629 #else
  630         for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
  631 #endif
  632         {
  633 
  634                 /* any room in this bank? */
  635                 if (vm_physmem[lcv].avail_start >= vm_physmem[lcv].avail_end)
  636                         continue;  /* nope */
  637 
  638                 *paddrp = ptoa(vm_physmem[lcv].avail_start);
  639                 vm_physmem[lcv].avail_start++;
  640                 /* truncate! */
  641                 vm_physmem[lcv].start = vm_physmem[lcv].avail_start;
  642 
  643                 /* nothing left?   nuke it */
  644                 if (vm_physmem[lcv].avail_start == vm_physmem[lcv].end) {
  645                         if (vm_nphysseg == 1)
  646                                 panic("uvm_page_physget: out of memory!");
  647                         vm_nphysseg--;
  648                         for (x = lcv ; x < vm_nphysseg ; x++)
  649                                 /* structure copy */
  650                                 vm_physmem[x] = vm_physmem[x+1];
  651                 }
  652                 return (TRUE);
  653         }
  654 
  655         return (FALSE);        /* whoops! */
  656 }
  657 
  658 boolean_t
  659 uvm_page_physget(paddr_t *paddrp)
  660 {
  661         int i;
  662 
  663         /* try in the order of freelist preference */
  664         for (i = 0; i < VM_NFREELIST; i++)
  665                 if (uvm_page_physget_freelist(paddrp, i) == TRUE)
  666                         return (TRUE);
  667         return (FALSE);
  668 }
  669 #endif /* PMAP_STEAL_MEMORY */
  670 
  671 /*
  672  * uvm_page_physload: load physical memory into VM system
  673  *
  674  * => all args are PFs
  675  * => all pages in start/end get vm_page structures
  676  * => areas marked by avail_start/avail_end get added to the free page pool
  677  * => we are limited to VM_PHYSSEG_MAX physical memory segments
  678  */
  679 
  680 void
  681 uvm_page_physload(paddr_t start, paddr_t end, paddr_t avail_start,
  682     paddr_t avail_end, int free_list)
  683 {
  684         int preload, lcv;
  685         psize_t npages;
  686         struct vm_page *pgs;
  687         struct vm_physseg *ps;
  688 
  689         if (uvmexp.pagesize == 0)
  690                 panic("uvm_page_physload: page size not set!");
  691         if (free_list >= VM_NFREELIST || free_list < VM_FREELIST_DEFAULT)
  692                 panic("uvm_page_physload: bad free list %d", free_list);
  693         if (start >= end)
  694                 panic("uvm_page_physload: start >= end");
  695 
  696         /*
  697          * do we have room?
  698          */
  699 
  700         if (vm_nphysseg == VM_PHYSSEG_MAX) {
  701                 printf("uvm_page_physload: unable to load physical memory "
  702                     "segment\n");
  703                 printf("\t%d segments allocated, ignoring 0x%llx -> 0x%llx\n",
  704                     VM_PHYSSEG_MAX, (long long)start, (long long)end);
  705                 printf("\tincrease VM_PHYSSEG_MAX\n");
  706                 return;
  707         }
  708 
  709         /*
  710          * check to see if this is a "preload" (i.e. uvm_mem_init hasn't been
  711          * called yet, so malloc is not available).
  712          */
  713 
  714         for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) {
  715                 if (vm_physmem[lcv].pgs)
  716                         break;
  717         }
  718         preload = (lcv == vm_nphysseg);
  719 
  720         /*
  721          * if VM is already running, attempt to malloc() vm_page structures
  722          */
  723 
  724         if (!preload) {
  725 #if defined(VM_PHYSSEG_NOADD)
  726                 panic("uvm_page_physload: tried to add RAM after vm_mem_init");
  727 #else
  728                 /* XXXCDC: need some sort of lockout for this case */
  729                 paddr_t paddr;
  730                 npages = end - start;  /* # of pages */
  731                 pgs = malloc(sizeof(struct vm_page) * npages,
  732                     M_VMPAGE, M_NOWAIT);
  733                 if (pgs == NULL) {
  734                         printf("uvm_page_physload: can not malloc vm_page "
  735                             "structs for segment\n");
  736                         printf("\tignoring 0x%lx -> 0x%lx\n", start, end);
  737                         return;
  738                 }
  739                 /* zero data, init phys_addr and free_list, and free pages */
  740                 memset(pgs, 0, sizeof(struct vm_page) * npages);
  741                 for (lcv = 0, paddr = ptoa(start) ;
  742                                  lcv < npages ; lcv++, paddr += PAGE_SIZE) {
  743                         pgs[lcv].phys_addr = paddr;
  744                         pgs[lcv].free_list = free_list;
  745                         if (atop(paddr) >= avail_start &&
  746                             atop(paddr) <= avail_end)
  747                                 uvm_pagefree(&pgs[lcv]);
  748                 }
  749                 /* XXXCDC: incomplete: need to update uvmexp.free, what else? */
  750                 /* XXXCDC: need hook to tell pmap to rebuild pv_list, etc... */
  751 #endif
  752         } else {
  753                 pgs = NULL;
  754                 npages = 0;
  755         }
  756 
  757         /*
  758          * now insert us in the proper place in vm_physmem[]
  759          */
  760 
  761 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_RANDOM)
  762         /* random: put it at the end (easy!) */
  763         ps = &vm_physmem[vm_nphysseg];
  764 #elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH)
  765         {
  766                 int x;
  767                 /* sort by address for binary search */
  768                 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
  769                         if (start < vm_physmem[lcv].start)
  770                                 break;
  771                 ps = &vm_physmem[lcv];
  772                 /* move back other entries, if necessary ... */
  773                 for (x = vm_nphysseg ; x > lcv ; x--)
  774                         /* structure copy */
  775                         vm_physmem[x] = vm_physmem[x - 1];
  776         }
  777 #elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
  778         {
  779                 int x;
  780                 /* sort by largest segment first */
  781                 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
  782                         if ((end - start) >
  783                             (vm_physmem[lcv].end - vm_physmem[lcv].start))
  784                                 break;
  785                 ps = &vm_physmem[lcv];
  786                 /* move back other entries, if necessary ... */
  787                 for (x = vm_nphysseg ; x > lcv ; x--)
  788                         /* structure copy */
  789                         vm_physmem[x] = vm_physmem[x - 1];
  790         }
  791 #else
  792         panic("uvm_page_physload: unknown physseg strategy selected!");
  793 #endif
  794 
  795         ps->start = start;
  796         ps->end = end;
  797         ps->avail_start = avail_start;
  798         ps->avail_end = avail_end;
  799         if (preload) {
  800                 ps->pgs = NULL;
  801         } else {
  802                 ps->pgs = pgs;
  803                 ps->lastpg = pgs + npages - 1;
  804         }
  805         ps->free_list = free_list;
  806         vm_nphysseg++;
  807 
  808         if (!preload) {
  809                 uvm_page_rehash();
  810                 uvmpdpol_reinit();
  811         }
  812 }
  813 
  814 /*
  815  * uvm_page_rehash: reallocate hash table based on number of free pages.
  816  */
  817 
  818 void
  819 uvm_page_rehash(void)
  820 {
  821         int freepages, lcv, bucketcount, oldcount;
  822         struct pglist *newbuckets, *oldbuckets;
  823         struct vm_page *pg;
  824         size_t newsize, oldsize;
  825 
  826         /*
  827          * compute number of pages that can go in the free pool
  828          */
  829 
  830         freepages = 0;
  831         for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
  832                 freepages +=
  833                     (vm_physmem[lcv].avail_end - vm_physmem[lcv].avail_start);
  834 
  835         /*
  836          * compute number of buckets needed for this number of pages
  837          */
  838 
  839         bucketcount = 1;
  840         while (bucketcount < freepages)
  841                 bucketcount = bucketcount * 2;
  842 
  843         /*
  844          * compute the size of the current table and new table.
  845          */
  846 
  847         oldbuckets = uvm.page_hash;
  848         oldcount = uvm.page_nhash;
  849         oldsize = round_page(sizeof(struct pglist) * oldcount);
  850         newsize = round_page(sizeof(struct pglist) * bucketcount);
  851 
  852         /*
  853          * allocate the new buckets
  854          */
  855 
  856         newbuckets = (struct pglist *) uvm_km_alloc(kernel_map, newsize,
  857             0, UVM_KMF_WIRED);
  858         if (newbuckets == NULL) {
  859                 printf("uvm_page_physrehash: WARNING: could not grow page "
  860                     "hash table\n");
  861                 return;
  862         }
  863         for (lcv = 0 ; lcv < bucketcount ; lcv++)
  864                 TAILQ_INIT(&newbuckets[lcv]);
  865 
  866         /*
  867          * now replace the old buckets with the new ones and rehash everything
  868          */
  869 
  870         simple_lock(&uvm.hashlock);
  871         uvm.page_hash = newbuckets;
  872         uvm.page_nhash = bucketcount;
  873         uvm.page_hashmask = bucketcount - 1;  /* power of 2 */
  874 
  875         /* ... and rehash */
  876         for (lcv = 0 ; lcv < oldcount ; lcv++) {
  877                 while ((pg = oldbuckets[lcv].tqh_first) != NULL) {
  878                         TAILQ_REMOVE(&oldbuckets[lcv], pg, hashq);
  879                         TAILQ_INSERT_TAIL(
  880                           &uvm.page_hash[uvm_pagehash(pg->uobject, pg->offset)],
  881                           pg, hashq);
  882                 }
  883         }
  884         simple_unlock(&uvm.hashlock);
  885 
  886         /*
  887          * free old bucket array if is not the boot-time table
  888          */
  889 
  890         if (oldbuckets != &uvm_bootbucket)
  891                 uvm_km_free(kernel_map, (vaddr_t) oldbuckets, oldsize,
  892                     UVM_KMF_WIRED);
  893 }
  894 
  895 /*
  896  * uvm_page_recolor: Recolor the pages if the new bucket count is
  897  * larger than the old one.
  898  */
  899 
  900 void
  901 uvm_page_recolor(int newncolors)
  902 {
  903         struct pgflbucket *bucketarray, *oldbucketarray;
  904         struct pgfreelist pgfl;
  905         struct vm_page *pg;
  906         vsize_t bucketcount;
  907         int s, lcv, color, i, ocolors;
  908 
  909         if (newncolors <= uvmexp.ncolors)
  910                 return;
  911 
  912         if (uvm.page_init_done == FALSE) {
  913                 uvmexp.ncolors = newncolors;
  914                 return;
  915         }
  916 
  917         bucketcount = newncolors * VM_NFREELIST;
  918         bucketarray = malloc(bucketcount * sizeof(struct pgflbucket),
  919             M_VMPAGE, M_NOWAIT);
  920         if (bucketarray == NULL) {
  921                 printf("WARNING: unable to allocate %ld page color buckets\n",
  922                     (long) bucketcount);
  923                 return;
  924         }
  925 
  926         s = uvm_lock_fpageq();
  927 
  928         /* Make sure we should still do this. */
  929         if (newncolors <= uvmexp.ncolors) {
  930                 uvm_unlock_fpageq(s);
  931                 free(bucketarray, M_VMPAGE);
  932                 return;
  933         }
  934 
  935         oldbucketarray = uvm.page_free[0].pgfl_buckets;
  936         ocolors = uvmexp.ncolors;
  937 
  938         uvmexp.ncolors = newncolors;
  939         uvmexp.colormask = uvmexp.ncolors - 1;
  940 
  941         for (lcv = 0; lcv < VM_NFREELIST; lcv++) {
  942                 pgfl.pgfl_buckets = (bucketarray + (lcv * newncolors));
  943                 uvm_page_init_buckets(&pgfl);
  944                 for (color = 0; color < ocolors; color++) {
  945                         for (i = 0; i < PGFL_NQUEUES; i++) {
  946                                 while ((pg = TAILQ_FIRST(&uvm.page_free[
  947                                     lcv].pgfl_buckets[color].pgfl_queues[i]))
  948                                     != NULL) {
  949                                         TAILQ_REMOVE(&uvm.page_free[
  950                                             lcv].pgfl_buckets[
  951                                             color].pgfl_queues[i], pg, pageq);
  952                                         TAILQ_INSERT_TAIL(&pgfl.pgfl_buckets[
  953                                             VM_PGCOLOR_BUCKET(pg)].pgfl_queues[
  954                                             i], pg, pageq);
  955                                 }
  956                         }
  957                 }
  958                 uvm.page_free[lcv].pgfl_buckets = pgfl.pgfl_buckets;
  959         }
  960 
  961         if (have_recolored_pages) {
  962                 uvm_unlock_fpageq(s);
  963                 free(oldbucketarray, M_VMPAGE);
  964                 return;
  965         }
  966 
  967         have_recolored_pages = TRUE;
  968         uvm_unlock_fpageq(s);
  969 }
  970 
  971 /*
  972  * uvm_pagealloc_pgfl: helper routine for uvm_pagealloc_strat
  973  */
  974 
  975 static struct vm_page *
  976 uvm_pagealloc_pgfl(struct pgfreelist *pgfl, int try1, int try2,
  977     int *trycolorp)
  978 {
  979         struct pglist *freeq;
  980         struct vm_page *pg;
  981         int color, trycolor = *trycolorp;
  982 
  983         color = trycolor;
  984         do {
  985                 if ((pg = TAILQ_FIRST((freeq =
  986                     &pgfl->pgfl_buckets[color].pgfl_queues[try1]))) != NULL)
  987                         goto gotit;
  988                 if ((pg = TAILQ_FIRST((freeq =
  989                     &pgfl->pgfl_buckets[color].pgfl_queues[try2]))) != NULL)
  990                         goto gotit;
  991                 color = (color + 1) & uvmexp.colormask;
  992         } while (color != trycolor);
  993 
  994         return (NULL);
  995 
  996  gotit:
  997         TAILQ_REMOVE(freeq, pg, pageq);
  998         uvmexp.free--;
  999 
 1000         /* update zero'd page count */
 1001         if (pg->flags & PG_ZERO)
 1002                 uvmexp.zeropages--;
 1003 
 1004         if (color == trycolor)
 1005                 uvmexp.colorhit++;
 1006         else {
 1007                 uvmexp.colormiss++;
 1008                 *trycolorp = color;
 1009         }
 1010 
 1011         return (pg);
 1012 }
 1013 
 1014 /*
 1015  * uvm_pagealloc_strat: allocate vm_page from a particular free list.
 1016  *
 1017  * => return null if no pages free
 1018  * => wake up pagedaemon if number of free pages drops below low water mark
 1019  * => if obj != NULL, obj must be locked (to put in hash)
 1020  * => if anon != NULL, anon must be locked (to put in anon)
 1021  * => only one of obj or anon can be non-null
 1022  * => caller must activate/deactivate page if it is not wired.
 1023  * => free_list is ignored if strat == UVM_PGA_STRAT_NORMAL.
 1024  * => policy decision: it is more important to pull a page off of the
 1025  *      appropriate priority free list than it is to get a zero'd or
 1026  *      unknown contents page.  This is because we live with the
 1027  *      consequences of a bad free list decision for the entire
 1028  *      lifetime of the page, e.g. if the page comes from memory that
 1029  *      is slower to access.
 1030  */
 1031 
 1032 struct vm_page *
 1033 uvm_pagealloc_strat(struct uvm_object *obj, voff_t off, struct vm_anon *anon,
 1034     int flags, int strat, int free_list)
 1035 {
 1036         int lcv, try1, try2, s, zeroit = 0, color;
 1037         struct vm_page *pg;
 1038         boolean_t use_reserve;
 1039 
 1040         KASSERT(obj == NULL || anon == NULL);
 1041         KASSERT(anon == NULL || off == 0);
 1042         KASSERT(off == trunc_page(off));
 1043         LOCK_ASSERT(obj == NULL || simple_lock_held(&obj->vmobjlock));
 1044         LOCK_ASSERT(anon == NULL || simple_lock_held(&anon->an_lock));
 1045 
 1046         s = uvm_lock_fpageq();
 1047 
 1048         /*
 1049          * This implements a global round-robin page coloring
 1050          * algorithm.
 1051          *
 1052          * XXXJRT: Should we make the `nextcolor' per-CPU?
 1053          * XXXJRT: What about virtually-indexed caches?
 1054          */
 1055 
 1056         color = uvm.page_free_nextcolor;
 1057 
 1058         /*
 1059          * check to see if we need to generate some free pages waking
 1060          * the pagedaemon.
 1061          */
 1062 
 1063         uvm_kick_pdaemon();
 1064 
 1065         /*
 1066          * fail if any of these conditions is true:
 1067          * [1]  there really are no free pages, or
 1068          * [2]  only kernel "reserved" pages remain and
 1069          *        the page isn't being allocated to a kernel object.
 1070          * [3]  only pagedaemon "reserved" pages remain and
 1071          *        the requestor isn't the pagedaemon.
 1072          */
 1073 
 1074         use_reserve = (flags & UVM_PGA_USERESERVE) ||
 1075                 (obj && UVM_OBJ_IS_KERN_OBJECT(obj));
 1076         if ((uvmexp.free <= uvmexp.reserve_kernel && !use_reserve) ||
 1077             (uvmexp.free <= uvmexp.reserve_pagedaemon &&
 1078              !(use_reserve && curproc == uvm.pagedaemon_proc)))
 1079                 goto fail;
 1080 
 1081 #if PGFL_NQUEUES != 2
 1082 #error uvm_pagealloc_strat needs to be updated
 1083 #endif
 1084 
 1085         /*
 1086          * If we want a zero'd page, try the ZEROS queue first, otherwise
 1087          * we try the UNKNOWN queue first.
 1088          */
 1089         if (flags & UVM_PGA_ZERO) {
 1090                 try1 = PGFL_ZEROS;
 1091                 try2 = PGFL_UNKNOWN;
 1092         } else {
 1093                 try1 = PGFL_UNKNOWN;
 1094                 try2 = PGFL_ZEROS;
 1095         }
 1096 
 1097  again:
 1098         switch (strat) {
 1099         case UVM_PGA_STRAT_NORMAL:
 1100                 /* Check all freelists in descending priority order. */
 1101                 for (lcv = 0; lcv < VM_NFREELIST; lcv++) {
 1102                         pg = uvm_pagealloc_pgfl(&uvm.page_free[lcv],
 1103                             try1, try2, &color);
 1104                         if (pg != NULL)
 1105                                 goto gotit;
 1106                 }
 1107 
 1108                 /* No pages free! */
 1109                 goto fail;
 1110 
 1111         case UVM_PGA_STRAT_ONLY:
 1112         case UVM_PGA_STRAT_FALLBACK:
 1113                 /* Attempt to allocate from the specified free list. */
 1114                 KASSERT(free_list >= 0 && free_list < VM_NFREELIST);
 1115                 pg = uvm_pagealloc_pgfl(&uvm.page_free[free_list],
 1116                     try1, try2, &color);
 1117                 if (pg != NULL)
 1118                         goto gotit;
 1119 
 1120                 /* Fall back, if possible. */
 1121                 if (strat == UVM_PGA_STRAT_FALLBACK) {
 1122                         strat = UVM_PGA_STRAT_NORMAL;
 1123                         goto again;
 1124                 }
 1125 
 1126                 /* No pages free! */
 1127                 goto fail;
 1128 
 1129         default:
 1130                 panic("uvm_pagealloc_strat: bad strat %d", strat);
 1131                 /* NOTREACHED */
 1132         }
 1133 
 1134  gotit:
 1135         /*
 1136          * We now know which color we actually allocated from; set
 1137          * the next color accordingly.
 1138          */
 1139 
 1140         uvm.page_free_nextcolor = (color + 1) & uvmexp.colormask;
 1141 
 1142         /*
 1143          * update allocation statistics and remember if we have to
 1144          * zero the page
 1145          */
 1146 
 1147         if (flags & UVM_PGA_ZERO) {
 1148                 if (pg->flags & PG_ZERO) {
 1149                         uvmexp.pga_zerohit++;
 1150                         zeroit = 0;
 1151                 } else {
 1152                         uvmexp.pga_zeromiss++;
 1153                         zeroit = 1;
 1154                 }
 1155         }
 1156         uvm_unlock_fpageq(s);
 1157 
 1158         pg->offset = off;
 1159         pg->uobject = obj;
 1160         pg->uanon = anon;
 1161         pg->flags = PG_BUSY|PG_CLEAN|PG_FAKE;
 1162         if (anon) {
 1163                 anon->an_page = pg;
 1164                 pg->pqflags = PQ_ANON;
 1165                 uvmexp.anonpages++;
 1166         } else {
 1167                 if (obj) {
 1168                         uvm_pageinsert(pg);
 1169                 }
 1170                 pg->pqflags = 0;
 1171         }
 1172 #if defined(UVM_PAGE_TRKOWN)
 1173         pg->owner_tag = NULL;
 1174 #endif
 1175         UVM_PAGE_OWN(pg, "new alloc");
 1176 
 1177         if (flags & UVM_PGA_ZERO) {
 1178                 /*
 1179                  * A zero'd page is not clean.  If we got a page not already
 1180                  * zero'd, then we have to zero it ourselves.
 1181                  */
 1182                 pg->flags &= ~PG_CLEAN;
 1183                 if (zeroit)
 1184                         pmap_zero_page(VM_PAGE_TO_PHYS(pg));
 1185         }
 1186 
 1187         return(pg);
 1188 
 1189  fail:
 1190         uvm_unlock_fpageq(s);
 1191         return (NULL);
 1192 }
 1193 
 1194 /*
 1195  * uvm_pagereplace: replace a page with another
 1196  *
 1197  * => object must be locked
 1198  */
 1199 
 1200 void
 1201 uvm_pagereplace(struct vm_page *oldpg, struct vm_page *newpg)
 1202 {
 1203 
 1204         KASSERT((oldpg->flags & PG_TABLED) != 0);
 1205         KASSERT(oldpg->uobject != NULL);
 1206         KASSERT((newpg->flags & PG_TABLED) == 0);
 1207         KASSERT(newpg->uobject == NULL);
 1208         LOCK_ASSERT(simple_lock_held(&oldpg->uobject->vmobjlock));
 1209 
 1210         newpg->uobject = oldpg->uobject;
 1211         newpg->offset = oldpg->offset;
 1212 
 1213         uvm_pageinsert_after(newpg, oldpg);
 1214         uvm_pageremove(oldpg);
 1215 }
 1216 
 1217 /*
 1218  * uvm_pagerealloc: reallocate a page from one object to another
 1219  *
 1220  * => both objects must be locked
 1221  */
 1222 
 1223 void
 1224 uvm_pagerealloc(struct vm_page *pg, struct uvm_object *newobj, voff_t newoff)
 1225 {
 1226         /*
 1227          * remove it from the old object
 1228          */
 1229 
 1230         if (pg->uobject) {
 1231                 uvm_pageremove(pg);
 1232         }
 1233 
 1234         /*
 1235          * put it in the new object
 1236          */
 1237 
 1238         if (newobj) {
 1239                 pg->uobject = newobj;
 1240                 pg->offset = newoff;
 1241                 uvm_pageinsert(pg);
 1242         }
 1243 }
 1244 
 1245 #ifdef DEBUG
 1246 /*
 1247  * check if page is zero-filled
 1248  *
 1249  *  - called with free page queue lock held.
 1250  */
 1251 void
 1252 uvm_pagezerocheck(struct vm_page *pg)
 1253 {
 1254         int *p, *ep;
 1255 
 1256         KASSERT(uvm_zerocheckkva != 0);
 1257         LOCK_ASSERT(simple_lock_held(&uvm.fpageqlock));
 1258 
 1259         /*
 1260          * XXX assuming pmap_kenter_pa and pmap_kremove never call
 1261          * uvm page allocator.
 1262          *
 1263          * it might be better to have "CPU-local temporary map" pmap interface.
 1264          */
 1265         pmap_kenter_pa(uvm_zerocheckkva, VM_PAGE_TO_PHYS(pg), VM_PROT_READ);
 1266         p = (int *)uvm_zerocheckkva;
 1267         ep = (int *)((char *)p + PAGE_SIZE);
 1268         pmap_update(pmap_kernel());
 1269         while (p < ep) {
 1270                 if (*p != 0)
 1271                         panic("PG_ZERO page isn't zero-filled");
 1272                 p++;
 1273         }
 1274         pmap_kremove(uvm_zerocheckkva, PAGE_SIZE);
 1275 }
 1276 #endif /* DEBUG */
 1277 
 1278 /*
 1279  * uvm_pagefree: free page
 1280  *
 1281  * => erase page's identity (i.e. remove from hash/object)
 1282  * => put page on free list
 1283  * => caller must lock owning object (either anon or uvm_object)
 1284  * => caller must lock page queues
 1285  * => assumes all valid mappings of pg are gone
 1286  */
 1287 
 1288 void
 1289 uvm_pagefree(struct vm_page *pg)
 1290 {
 1291         int s;
 1292         struct pglist *pgfl;
 1293         boolean_t iszero;
 1294 
 1295         KASSERT((pg->flags & PG_PAGEOUT) == 0);
 1296         LOCK_ASSERT(simple_lock_held(&uvm.pageqlock) ||
 1297                     !uvmpdpol_pageisqueued_p(pg));
 1298         LOCK_ASSERT(pg->uobject == NULL ||
 1299                     simple_lock_held(&pg->uobject->vmobjlock));
 1300         LOCK_ASSERT(pg->uobject != NULL || pg->uanon == NULL ||
 1301                     simple_lock_held(&pg->uanon->an_lock));
 1302 
 1303 #ifdef DEBUG
 1304         if (pg->uobject == (void *)0xdeadbeef &&
 1305             pg->uanon == (void *)0xdeadbeef) {
 1306                 panic("uvm_pagefree: freeing free page %p", pg);
 1307         }
 1308 #endif /* DEBUG */
 1309 
 1310         /*
 1311          * if the page is loaned, resolve the loan instead of freeing.
 1312          */
 1313 
 1314         if (pg->loan_count) {
 1315                 KASSERT(pg->wire_count == 0);
 1316 
 1317                 /*
 1318                  * if the page is owned by an anon then we just want to
 1319                  * drop anon ownership.  the kernel will free the page when
 1320                  * it is done with it.  if the page is owned by an object,
 1321                  * remove it from the object and mark it dirty for the benefit
 1322                  * of possible anon owners.
 1323                  *
 1324                  * regardless of previous ownership, wakeup any waiters,
 1325                  * unbusy the page, and we're done.
 1326                  */
 1327 
 1328                 if (pg->uobject != NULL) {
 1329                         uvm_pageremove(pg);
 1330                         pg->flags &= ~PG_CLEAN;
 1331                 } else if (pg->uanon != NULL) {
 1332                         if ((pg->pqflags & PQ_ANON) == 0) {
 1333                                 pg->loan_count--;
 1334                         } else {
 1335                                 pg->pqflags &= ~PQ_ANON;
 1336                                 uvmexp.anonpages--;
 1337                         }
 1338                         pg->uanon->an_page = NULL;
 1339                         pg->uanon = NULL;
 1340                 }
 1341                 if (pg->flags & PG_WANTED) {
 1342                         wakeup(pg);
 1343                 }
 1344                 pg->flags &= ~(PG_WANTED|PG_BUSY|PG_RELEASED|PG_PAGER1);
 1345 #ifdef UVM_PAGE_TRKOWN
 1346                 pg->owner_tag = NULL;
 1347 #endif
 1348                 if (pg->loan_count) {
 1349                         uvm_pagedequeue(pg);
 1350                         return;
 1351                 }
 1352         }
 1353 
 1354         /*
 1355          * remove page from its object or anon.
 1356          */
 1357 
 1358         if (pg->uobject != NULL) {
 1359                 uvm_pageremove(pg);
 1360         } else if (pg->uanon != NULL) {
 1361                 pg->uanon->an_page = NULL;
 1362                 uvmexp.anonpages--;
 1363         }
 1364 
 1365         /*
 1366          * now remove the page from the queues.
 1367          */
 1368 
 1369         uvm_pagedequeue(pg);
 1370 
 1371         /*
 1372          * if the page was wired, unwire it now.
 1373          */
 1374 
 1375         if (pg->wire_count) {
 1376                 pg->wire_count = 0;
 1377                 uvmexp.wired--;
 1378         }
 1379 
 1380         /*
 1381          * and put on free queue
 1382          */
 1383 
 1384         iszero = (pg->flags & PG_ZERO);
 1385         pgfl = &uvm.page_free[uvm_page_lookup_freelist(pg)].
 1386             pgfl_buckets[VM_PGCOLOR_BUCKET(pg)].
 1387             pgfl_queues[iszero ? PGFL_ZEROS : PGFL_UNKNOWN];
 1388 
 1389         pg->pqflags = PQ_FREE;
 1390 #ifdef DEBUG
 1391         pg->uobject = (void *)0xdeadbeef;
 1392         pg->offset = 0xdeadbeef;
 1393         pg->uanon = (void *)0xdeadbeef;
 1394 #endif
 1395 
 1396         s = uvm_lock_fpageq();
 1397 
 1398 #ifdef DEBUG
 1399         if (iszero)
 1400                 uvm_pagezerocheck(pg);
 1401 #endif /* DEBUG */
 1402 
 1403         TAILQ_INSERT_HEAD(pgfl, pg, pageq);
 1404         uvmexp.free++;
 1405         if (iszero)
 1406                 uvmexp.zeropages++;
 1407 
 1408         if (uvmexp.zeropages < UVM_PAGEZERO_TARGET)
 1409                 uvm.page_idle_zero = vm_page_zero_enable;
 1410 
 1411         uvm_unlock_fpageq(s);
 1412 }
 1413 
 1414 /*
 1415  * uvm_page_unbusy: unbusy an array of pages.
 1416  *
 1417  * => pages must either all belong to the same object, or all belong to anons.
 1418  * => if pages are object-owned, object must be locked.
 1419  * => if pages are anon-owned, anons must be locked.
 1420  * => caller must lock page queues if pages may be released.
 1421  * => caller must make sure that anon-owned pages are not PG_RELEASED.
 1422  */
 1423 
 1424 void
 1425 uvm_page_unbusy(struct vm_page **pgs, int npgs)
 1426 {
 1427         struct vm_page *pg;
 1428         int i;
 1429         UVMHIST_FUNC("uvm_page_unbusy"); UVMHIST_CALLED(ubchist);
 1430 
 1431         for (i = 0; i < npgs; i++) {
 1432                 pg = pgs[i];
 1433                 if (pg == NULL || pg == PGO_DONTCARE) {
 1434                         continue;
 1435                 }
 1436 
 1437                 LOCK_ASSERT(pg->uobject == NULL ||
 1438                     simple_lock_held(&pg->uobject->vmobjlock));
 1439                 LOCK_ASSERT(pg->uobject != NULL ||
 1440                     (pg->uanon != NULL &&
 1441                     simple_lock_held(&pg->uanon->an_lock)));
 1442 
 1443                 KASSERT(pg->flags & PG_BUSY);
 1444                 KASSERT((pg->flags & PG_PAGEOUT) == 0);
 1445                 if (pg->flags & PG_WANTED) {
 1446                         wakeup(pg);
 1447                 }
 1448                 if (pg->flags & PG_RELEASED) {
 1449                         UVMHIST_LOG(ubchist, "releasing pg %p", pg,0,0,0);
 1450                         KASSERT(pg->uobject != NULL ||
 1451                             (pg->uanon != NULL && pg->uanon->an_ref > 0));
 1452                         pg->flags &= ~PG_RELEASED;
 1453                         uvm_pagefree(pg);
 1454                 } else {
 1455                         UVMHIST_LOG(ubchist, "unbusying pg %p", pg,0,0,0);
 1456                         pg->flags &= ~(PG_WANTED|PG_BUSY);
 1457                         UVM_PAGE_OWN(pg, NULL);
 1458                 }
 1459         }
 1460 }
 1461 
 1462 #if defined(UVM_PAGE_TRKOWN)
 1463 /*
 1464  * uvm_page_own: set or release page ownership
 1465  *
 1466  * => this is a debugging function that keeps track of who sets PG_BUSY
 1467  *      and where they do it.   it can be used to track down problems
 1468  *      such a process setting "PG_BUSY" and never releasing it.
 1469  * => page's object [if any] must be locked
 1470  * => if "tag" is NULL then we are releasing page ownership
 1471  */
 1472 void
 1473 uvm_page_own(struct vm_page *pg, const char *tag)
 1474 {
 1475         struct uvm_object *uobj;
 1476         struct vm_anon *anon;
 1477 
 1478         KASSERT((pg->flags & (PG_PAGEOUT|PG_RELEASED)) == 0);
 1479 
 1480         uobj = pg->uobject;
 1481         anon = pg->uanon;
 1482         if (uobj != NULL) {
 1483                 LOCK_ASSERT(simple_lock_held(&uobj->vmobjlock));
 1484         } else if (anon != NULL) {
 1485                 LOCK_ASSERT(simple_lock_held(&anon->an_lock));
 1486         }
 1487 
 1488         KASSERT((pg->flags & PG_WANTED) == 0);
 1489 
 1490         /* gain ownership? */
 1491         if (tag) {
 1492                 KASSERT((pg->flags & PG_BUSY) != 0);
 1493                 if (pg->owner_tag) {
 1494                         printf("uvm_page_own: page %p already owned "
 1495                             "by proc %d [%s]\n", pg,
 1496                             pg->owner, pg->owner_tag);
 1497                         panic("uvm_page_own");
 1498                 }
 1499                 pg->owner = (curproc) ? curproc->p_pid :  (pid_t) -1;
 1500                 pg->owner_tag = tag;
 1501                 return;
 1502         }
 1503 
 1504         /* drop ownership */
 1505         KASSERT((pg->flags & PG_BUSY) == 0);
 1506         if (pg->owner_tag == NULL) {
 1507                 printf("uvm_page_own: dropping ownership of an non-owned "
 1508                     "page (%p)\n", pg);
 1509                 panic("uvm_page_own");
 1510         }
 1511         KASSERT(uvmpdpol_pageisqueued_p(pg) ||
 1512             (pg->uanon == NULL && pg->uobject == NULL) ||
 1513             pg->uobject == uvm.kernel_object ||
 1514             pg->wire_count > 0 ||
 1515             (pg->loan_count == 1 && pg->uanon == NULL) ||
 1516             pg->loan_count > 1);
 1517         pg->owner_tag = NULL;
 1518 }
 1519 #endif
 1520 
 1521 /*
 1522  * uvm_pageidlezero: zero free pages while the system is idle.
 1523  *
 1524  * => try to complete one color bucket at a time, to reduce our impact
 1525  *      on the CPU cache.
 1526  * => we loop until we either reach the target or whichqs indicates that
 1527  *      there is a process ready to run.
 1528  */
 1529 void
 1530 uvm_pageidlezero(void)
 1531 {
 1532         struct vm_page *pg;
 1533         struct pgfreelist *pgfl;
 1534         int free_list, s, firstbucket;
 1535         static int nextbucket;
 1536 
 1537         KERNEL_LOCK(LK_EXCLUSIVE | LK_CANRECURSE);
 1538         s = uvm_lock_fpageq();
 1539         firstbucket = nextbucket;
 1540         do {
 1541                 if (sched_whichqs != 0)
 1542                         goto quit;
 1543                 if (uvmexp.zeropages >= UVM_PAGEZERO_TARGET) {
 1544                         uvm.page_idle_zero = FALSE;
 1545                         goto quit;
 1546                 }
 1547                 for (free_list = 0; free_list < VM_NFREELIST; free_list++) {
 1548                         pgfl = &uvm.page_free[free_list];
 1549                         while ((pg = TAILQ_FIRST(&pgfl->pgfl_buckets[
 1550                             nextbucket].pgfl_queues[PGFL_UNKNOWN])) != NULL) {
 1551                                 if (sched_whichqs != 0)
 1552                                         goto quit;
 1553 
 1554                                 TAILQ_REMOVE(&pgfl->pgfl_buckets[
 1555                                     nextbucket].pgfl_queues[PGFL_UNKNOWN],
 1556                                     pg, pageq);
 1557                                 uvmexp.free--;
 1558                                 uvm_unlock_fpageq(s);
 1559                                 KERNEL_UNLOCK();
 1560 #ifdef PMAP_PAGEIDLEZERO
 1561                                 if (!PMAP_PAGEIDLEZERO(VM_PAGE_TO_PHYS(pg))) {
 1562 
 1563                                         /*
 1564                                          * The machine-dependent code detected
 1565                                          * some reason for us to abort zeroing
 1566                                          * pages, probably because there is a
 1567                                          * process now ready to run.
 1568                                          */
 1569 
 1570                                         KERNEL_LOCK(
 1571                                             LK_EXCLUSIVE | LK_CANRECURSE);
 1572                                         s = uvm_lock_fpageq();
 1573                                         TAILQ_INSERT_HEAD(&pgfl->pgfl_buckets[
 1574                                             nextbucket].pgfl_queues[
 1575                                             PGFL_UNKNOWN], pg, pageq);
 1576                                         uvmexp.free++;
 1577                                         uvmexp.zeroaborts++;
 1578                                         goto quit;
 1579                                 }
 1580 #else
 1581                                 pmap_zero_page(VM_PAGE_TO_PHYS(pg));
 1582 #endif /* PMAP_PAGEIDLEZERO */
 1583                                 pg->flags |= PG_ZERO;
 1584 
 1585                                 KERNEL_LOCK(LK_EXCLUSIVE | LK_CANRECURSE);
 1586                                 s = uvm_lock_fpageq();
 1587                                 TAILQ_INSERT_HEAD(&pgfl->pgfl_buckets[
 1588                                     nextbucket].pgfl_queues[PGFL_ZEROS],
 1589                                     pg, pageq);
 1590                                 uvmexp.free++;
 1591                                 uvmexp.zeropages++;
 1592                         }
 1593                 }
 1594                 nextbucket = (nextbucket + 1) & uvmexp.colormask;
 1595         } while (nextbucket != firstbucket);
 1596 quit:
 1597         uvm_unlock_fpageq(s);
 1598         KERNEL_UNLOCK();
 1599 }
 1600 
 1601 /*
 1602  * uvm_lock_fpageq: lock the free page queue
 1603  *
 1604  * => free page queue can be accessed in interrupt context, so this
 1605  *      blocks all interrupts that can cause memory allocation, and
 1606  *      returns the previous interrupt level.
 1607  */
 1608 
 1609 int
 1610 uvm_lock_fpageq(void)
 1611 {
 1612         int s;
 1613 
 1614         s = splvm();
 1615         simple_lock(&uvm.fpageqlock);
 1616         return (s);
 1617 }
 1618 
 1619 /*
 1620  * uvm_unlock_fpageq: unlock the free page queue
 1621  *
 1622  * => caller must supply interrupt level returned by uvm_lock_fpageq()
 1623  *      so that it may be restored.
 1624  */
 1625 
 1626 void
 1627 uvm_unlock_fpageq(int s)
 1628 {
 1629 
 1630         simple_unlock(&uvm.fpageqlock);
 1631         splx(s);
 1632 }
 1633 
 1634 /*
 1635  * uvm_pagelookup: look up a page
 1636  *
 1637  * => caller should lock object to keep someone from pulling the page
 1638  *      out from under it
 1639  */
 1640 
 1641 struct vm_page *
 1642 uvm_pagelookup(struct uvm_object *obj, voff_t off)
 1643 {
 1644         struct vm_page *pg;
 1645         struct pglist *buck;
 1646 
 1647         buck = &uvm.page_hash[uvm_pagehash(obj,off)];
 1648         simple_lock(&uvm.hashlock);
 1649         TAILQ_FOREACH(pg, buck, hashq) {
 1650                 if (pg->uobject == obj && pg->offset == off) {
 1651                         break;
 1652                 }
 1653         }
 1654         simple_unlock(&uvm.hashlock);
 1655         KASSERT(pg == NULL || obj->uo_npages != 0);
 1656         KASSERT(pg == NULL || (pg->flags & (PG_RELEASED|PG_PAGEOUT)) == 0 ||
 1657                 (pg->flags & PG_BUSY) != 0);
 1658         return(pg);
 1659 }
 1660 
 1661 /*
 1662  * uvm_pagewire: wire the page, thus removing it from the daemon's grasp
 1663  *
 1664  * => caller must lock page queues
 1665  */
 1666 
 1667 void
 1668 uvm_pagewire(struct vm_page *pg)
 1669 {
 1670         UVM_LOCK_ASSERT_PAGEQ();
 1671 #if defined(READAHEAD_STATS)
 1672         if ((pg->pqflags & PQ_READAHEAD) != 0) {
 1673                 uvm_ra_hit.ev_count++;
 1674                 pg->pqflags &= ~PQ_READAHEAD;
 1675         }
 1676 #endif /* defined(READAHEAD_STATS) */
 1677         if (pg->wire_count == 0) {
 1678                 uvm_pagedequeue(pg);
 1679                 uvmexp.wired++;
 1680         }
 1681         pg->wire_count++;
 1682 }
 1683 
 1684 /*
 1685  * uvm_pageunwire: unwire the page.
 1686  *
 1687  * => activate if wire count goes to zero.
 1688  * => caller must lock page queues
 1689  */
 1690 
 1691 void
 1692 uvm_pageunwire(struct vm_page *pg)
 1693 {
 1694         UVM_LOCK_ASSERT_PAGEQ();
 1695         pg->wire_count--;
 1696         if (pg->wire_count == 0) {
 1697                 uvm_pageactivate(pg);
 1698                 uvmexp.wired--;
 1699         }
 1700 }
 1701 
 1702 /*
 1703  * uvm_pagedeactivate: deactivate page
 1704  *
 1705  * => caller must lock page queues
 1706  * => caller must check to make sure page is not wired
 1707  * => object that page belongs to must be locked (so we can adjust pg->flags)
 1708  * => caller must clear the reference on the page before calling
 1709  */
 1710 
 1711 void
 1712 uvm_pagedeactivate(struct vm_page *pg)
 1713 {
 1714 
 1715         UVM_LOCK_ASSERT_PAGEQ();
 1716         KASSERT(pg->wire_count != 0 || uvmpdpol_pageisqueued_p(pg));
 1717         uvmpdpol_pagedeactivate(pg);
 1718 }
 1719 
 1720 /*
 1721  * uvm_pageactivate: activate page
 1722  *
 1723  * => caller must lock page queues
 1724  */
 1725 
 1726 void
 1727 uvm_pageactivate(struct vm_page *pg)
 1728 {
 1729 
 1730         UVM_LOCK_ASSERT_PAGEQ();
 1731 #if defined(READAHEAD_STATS)
 1732         if ((pg->pqflags & PQ_READAHEAD) != 0) {
 1733                 uvm_ra_hit.ev_count++;
 1734                 pg->pqflags &= ~PQ_READAHEAD;
 1735         }
 1736 #endif /* defined(READAHEAD_STATS) */
 1737         if (pg->wire_count != 0) {
 1738                 return;
 1739         }
 1740         uvmpdpol_pageactivate(pg);
 1741 }
 1742 
 1743 /*
 1744  * uvm_pagedequeue: remove a page from any paging queue
 1745  */
 1746 
 1747 void
 1748 uvm_pagedequeue(struct vm_page *pg)
 1749 {
 1750 
 1751 #if defined(LOCKDEBUG)
 1752         if (uvmpdpol_pageisqueued_p(pg)) {
 1753                 UVM_LOCK_ASSERT_PAGEQ();
 1754         }
 1755 #endif /* defined(LOCKDEBUG) */
 1756         uvmpdpol_pagedequeue(pg);
 1757 }
 1758 
 1759 /*
 1760  * uvm_pageenqueue: add a page to a paging queue without activating.
 1761  * used where a page is not really demanded (yet).  eg. read-ahead
 1762  */
 1763 
 1764 void
 1765 uvm_pageenqueue(struct vm_page *pg)
 1766 {
 1767 
 1768         UVM_LOCK_ASSERT_PAGEQ();
 1769         if (pg->wire_count != 0) {
 1770                 return;
 1771         }
 1772         uvmpdpol_pageenqueue(pg);
 1773 }
 1774 
 1775 /*
 1776  * uvm_pagezero: zero fill a page
 1777  *
 1778  * => if page is part of an object then the object should be locked
 1779  *      to protect pg->flags.
 1780  */
 1781 
 1782 void
 1783 uvm_pagezero(struct vm_page *pg)
 1784 {
 1785         pg->flags &= ~PG_CLEAN;
 1786         pmap_zero_page(VM_PAGE_TO_PHYS(pg));
 1787 }
 1788 
 1789 /*
 1790  * uvm_pagecopy: copy a page
 1791  *
 1792  * => if page is part of an object then the object should be locked
 1793  *      to protect pg->flags.
 1794  */
 1795 
 1796 void
 1797 uvm_pagecopy(struct vm_page *src, struct vm_page *dst)
 1798 {
 1799 
 1800         dst->flags &= ~PG_CLEAN;
 1801         pmap_copy_page(VM_PAGE_TO_PHYS(src), VM_PAGE_TO_PHYS(dst));
 1802 }
 1803 
 1804 /*
 1805  * uvm_page_lookup_freelist: look up the free list for the specified page
 1806  */
 1807 
 1808 int
 1809 uvm_page_lookup_freelist(struct vm_page *pg)
 1810 {
 1811         int lcv;
 1812 
 1813         lcv = vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), NULL);
 1814         KASSERT(lcv != -1);
 1815         return (vm_physmem[lcv].free_list);
 1816 }
Cache object: 23cf27e23d9dc97338939a89f076f387
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/uvm/uvm_page.c

FreeBSD/Linux Kernel Cross Reference
sys/uvm/uvm_page.c