The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/i386/i386/pmap.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1991 Regents of the University of California.
    3  * All rights reserved.
    4  * Copyright (c) 1994 John S. Dyson
    5  * All rights reserved.
    6  * Copyright (c) 1994 David Greenman
    7  * All rights reserved.
    8  * Copyright (c) 2005-2010 Alan L. Cox <alc@cs.rice.edu>
    9  * All rights reserved.
   10  *
   11  * This code is derived from software contributed to Berkeley by
   12  * the Systems Programming Group of the University of Utah Computer
   13  * Science Department and William Jolitz of UUNET Technologies Inc.
   14  *
   15  * Redistribution and use in source and binary forms, with or without
   16  * modification, are permitted provided that the following conditions
   17  * are met:
   18  * 1. Redistributions of source code must retain the above copyright
   19  *    notice, this list of conditions and the following disclaimer.
   20  * 2. Redistributions in binary form must reproduce the above copyright
   21  *    notice, this list of conditions and the following disclaimer in the
   22  *    documentation and/or other materials provided with the distribution.
   23  * 3. All advertising materials mentioning features or use of this software
   24  *    must display the following acknowledgement:
   25  *      This product includes software developed by the University of
   26  *      California, Berkeley and its contributors.
   27  * 4. Neither the name of the University nor the names of its contributors
   28  *    may be used to endorse or promote products derived from this software
   29  *    without specific prior written permission.
   30  *
   31  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   32  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   33  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   34  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   35  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   36  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   37  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   38  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   39  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   40  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   41  * SUCH DAMAGE.
   42  *
   43  *      from:   @(#)pmap.c      7.7 (Berkeley)  5/12/91
   44  */
   45 /*-
   46  * Copyright (c) 2003 Networks Associates Technology, Inc.
   47  * All rights reserved.
   48  *
   49  * This software was developed for the FreeBSD Project by Jake Burkholder,
   50  * Safeport Network Services, and Network Associates Laboratories, the
   51  * Security Research Division of Network Associates, Inc. under
   52  * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA
   53  * CHATS research program.
   54  *
   55  * Redistribution and use in source and binary forms, with or without
   56  * modification, are permitted provided that the following conditions
   57  * are met:
   58  * 1. Redistributions of source code must retain the above copyright
   59  *    notice, this list of conditions and the following disclaimer.
   60  * 2. Redistributions in binary form must reproduce the above copyright
   61  *    notice, this list of conditions and the following disclaimer in the
   62  *    documentation and/or other materials provided with the distribution.
   63  *
   64  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   65  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   66  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   67  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   68  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   69  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   70  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   71  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   72  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   73  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   74  * SUCH DAMAGE.
   75  */
   76 
   77 #include <sys/cdefs.h>
   78 __FBSDID("$FreeBSD: releng/8.1/sys/i386/i386/pmap.c 208903 2010-06-08 04:41:31Z alc $");
   79 
   80 /*
   81  *      Manages physical address maps.
   82  *
   83  *      In addition to hardware address maps, this
   84  *      module is called upon to provide software-use-only
   85  *      maps which may or may not be stored in the same
   86  *      form as hardware maps.  These pseudo-maps are
   87  *      used to store intermediate results from copy
   88  *      operations to and from address spaces.
   89  *
   90  *      Since the information managed by this module is
   91  *      also stored by the logical address mapping module,
   92  *      this module may throw away valid virtual-to-physical
   93  *      mappings at almost any time.  However, invalidations
   94  *      of virtual-to-physical mappings must be done as
   95  *      requested.
   96  *
   97  *      In order to cope with hardware architectures which
   98  *      make virtual-to-physical map invalidates expensive,
   99  *      this module may delay invalidate or reduced protection
  100  *      operations until such time as they are actually
  101  *      necessary.  This module is given full information as
  102  *      to which processors are currently using which maps,
  103  *      and to when physical maps must be made correct.
  104  */
  105 
  106 #include "opt_cpu.h"
  107 #include "opt_pmap.h"
  108 #include "opt_msgbuf.h"
  109 #include "opt_smp.h"
  110 #include "opt_xbox.h"
  111 
  112 #include <sys/param.h>
  113 #include <sys/systm.h>
  114 #include <sys/kernel.h>
  115 #include <sys/ktr.h>
  116 #include <sys/lock.h>
  117 #include <sys/malloc.h>
  118 #include <sys/mman.h>
  119 #include <sys/msgbuf.h>
  120 #include <sys/mutex.h>
  121 #include <sys/proc.h>
  122 #include <sys/sf_buf.h>
  123 #include <sys/sx.h>
  124 #include <sys/vmmeter.h>
  125 #include <sys/sched.h>
  126 #include <sys/sysctl.h>
  127 #ifdef SMP
  128 #include <sys/smp.h>
  129 #endif
  130 
  131 #include <vm/vm.h>
  132 #include <vm/vm_param.h>
  133 #include <vm/vm_kern.h>
  134 #include <vm/vm_page.h>
  135 #include <vm/vm_map.h>
  136 #include <vm/vm_object.h>
  137 #include <vm/vm_extern.h>
  138 #include <vm/vm_pageout.h>
  139 #include <vm/vm_pager.h>
  140 #include <vm/vm_reserv.h>
  141 #include <vm/uma.h>
  142 
  143 #include <machine/cpu.h>
  144 #include <machine/cputypes.h>
  145 #include <machine/md_var.h>
  146 #include <machine/pcb.h>
  147 #include <machine/specialreg.h>
  148 #ifdef SMP
  149 #include <machine/smp.h>
  150 #endif
  151 
  152 #ifdef XBOX
  153 #include <machine/xbox.h>
  154 #endif
  155 
  156 #if !defined(CPU_DISABLE_SSE) && defined(I686_CPU)
  157 #define CPU_ENABLE_SSE
  158 #endif
  159 
  160 #ifndef PMAP_SHPGPERPROC
  161 #define PMAP_SHPGPERPROC 200
  162 #endif
  163 
  164 #if !defined(DIAGNOSTIC)
  165 #define PMAP_INLINE     __gnu89_inline
  166 #else
  167 #define PMAP_INLINE
  168 #endif
  169 
  170 #define PV_STATS
  171 #ifdef PV_STATS
  172 #define PV_STAT(x)      do { x ; } while (0)
  173 #else
  174 #define PV_STAT(x)      do { } while (0)
  175 #endif
  176 
  177 #define pa_index(pa)    ((pa) >> PDRSHIFT)
  178 #define pa_to_pvh(pa)   (&pv_table[pa_index(pa)])
  179 
  180 /*
  181  * Get PDEs and PTEs for user/kernel address space
  182  */
  183 #define pmap_pde(m, v)  (&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT]))
  184 #define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT])
  185 
  186 #define pmap_pde_v(pte)         ((*(int *)pte & PG_V) != 0)
  187 #define pmap_pte_w(pte)         ((*(int *)pte & PG_W) != 0)
  188 #define pmap_pte_m(pte)         ((*(int *)pte & PG_M) != 0)
  189 #define pmap_pte_u(pte)         ((*(int *)pte & PG_A) != 0)
  190 #define pmap_pte_v(pte)         ((*(int *)pte & PG_V) != 0)
  191 
  192 #define pmap_pte_set_w(pte, v)  ((v) ? atomic_set_int((u_int *)(pte), PG_W) : \
  193     atomic_clear_int((u_int *)(pte), PG_W))
  194 #define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v)))
  195 
  196 struct pmap kernel_pmap_store;
  197 LIST_HEAD(pmaplist, pmap);
  198 static struct pmaplist allpmaps;
  199 static struct mtx allpmaps_lock;
  200 
  201 vm_offset_t virtual_avail;      /* VA of first avail page (after kernel bss) */
  202 vm_offset_t virtual_end;        /* VA of last avail page (end of kernel AS) */
  203 int pgeflag = 0;                /* PG_G or-in */
  204 int pseflag = 0;                /* PG_PS or-in */
  205 
  206 static int nkpt;
  207 vm_offset_t kernel_vm_end;
  208 extern u_int32_t KERNend;
  209 extern u_int32_t KPTphys;
  210 
  211 #ifdef PAE
  212 pt_entry_t pg_nx;
  213 static uma_zone_t pdptzone;
  214 #endif
  215 
  216 static int pat_works;                   /* Is page attribute table sane? */
  217 
  218 SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters");
  219 
  220 static int pg_ps_enabled;
  221 SYSCTL_INT(_vm_pmap, OID_AUTO, pg_ps_enabled, CTLFLAG_RDTUN, &pg_ps_enabled, 0,
  222     "Are large page mappings enabled?");
  223 
  224 /*
  225  * Data for the pv entry allocation mechanism
  226  */
  227 static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
  228 static struct md_page *pv_table;
  229 static int shpgperproc = PMAP_SHPGPERPROC;
  230 
  231 struct pv_chunk *pv_chunkbase;          /* KVA block for pv_chunks */
  232 int pv_maxchunks;                       /* How many chunks we have KVA for */
  233 vm_offset_t pv_vafree;                  /* freelist stored in the PTE */
  234 
  235 /*
  236  * All those kernel PT submaps that BSD is so fond of
  237  */
  238 struct sysmaps {
  239         struct  mtx lock;
  240         pt_entry_t *CMAP1;
  241         pt_entry_t *CMAP2;
  242         caddr_t CADDR1;
  243         caddr_t CADDR2;
  244 };
  245 static struct sysmaps sysmaps_pcpu[MAXCPU];
  246 pt_entry_t *CMAP1 = 0, *KPTmap;
  247 static pt_entry_t *CMAP3;
  248 static pd_entry_t *KPTD;
  249 caddr_t CADDR1 = 0, ptvmmap = 0;
  250 static caddr_t CADDR3;
  251 struct msgbuf *msgbufp = 0;
  252 
  253 /*
  254  * Crashdump maps.
  255  */
  256 static caddr_t crashdumpmap;
  257 
  258 static pt_entry_t *PMAP1 = 0, *PMAP2;
  259 static pt_entry_t *PADDR1 = 0, *PADDR2;
  260 #ifdef SMP
  261 static int PMAP1cpu;
  262 static int PMAP1changedcpu;
  263 SYSCTL_INT(_debug, OID_AUTO, PMAP1changedcpu, CTLFLAG_RD, 
  264            &PMAP1changedcpu, 0,
  265            "Number of times pmap_pte_quick changed CPU with same PMAP1");
  266 #endif
  267 static int PMAP1changed;
  268 SYSCTL_INT(_debug, OID_AUTO, PMAP1changed, CTLFLAG_RD, 
  269            &PMAP1changed, 0,
  270            "Number of times pmap_pte_quick changed PMAP1");
  271 static int PMAP1unchanged;
  272 SYSCTL_INT(_debug, OID_AUTO, PMAP1unchanged, CTLFLAG_RD, 
  273            &PMAP1unchanged, 0,
  274            "Number of times pmap_pte_quick didn't change PMAP1");
  275 static struct mtx PMAP2mutex;
  276 
  277 static void     free_pv_entry(pmap_t pmap, pv_entry_t pv);
  278 static pv_entry_t get_pv_entry(pmap_t locked_pmap, int try);
  279 static void     pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa);
  280 static boolean_t pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa);
  281 static void     pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa);
  282 static void     pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va);
  283 static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap,
  284                     vm_offset_t va);
  285 static int      pmap_pvh_wired_mappings(struct md_page *pvh, int count);
  286 
  287 static boolean_t pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va);
  288 static boolean_t pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t m,
  289     vm_prot_t prot);
  290 static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
  291     vm_page_t m, vm_prot_t prot, vm_page_t mpte);
  292 static void pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte);
  293 static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte);
  294 static boolean_t pmap_is_modified_pvh(struct md_page *pvh);
  295 static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode);
  296 static void pmap_kenter_pde(vm_offset_t va, pd_entry_t newpde);
  297 static vm_page_t pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va);
  298 static void pmap_pde_attr(pd_entry_t *pde, int cache_bits);
  299 static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va);
  300 static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva,
  301     vm_prot_t prot);
  302 static void pmap_pte_attr(pt_entry_t *pte, int cache_bits);
  303 static void pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva,
  304     vm_page_t *free);
  305 static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva,
  306     vm_page_t *free);
  307 static void pmap_remove_pt_page(pmap_t pmap, vm_page_t mpte);
  308 static void pmap_remove_page(struct pmap *pmap, vm_offset_t va,
  309     vm_page_t *free);
  310 static void pmap_remove_entry(struct pmap *pmap, vm_page_t m,
  311                                         vm_offset_t va);
  312 static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m);
  313 static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
  314     vm_page_t m);
  315 static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde,
  316     pd_entry_t newpde);
  317 static void pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde);
  318 
  319 static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags);
  320 
  321 static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags);
  322 static int _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm_page_t *free);
  323 static pt_entry_t *pmap_pte_quick(pmap_t pmap, vm_offset_t va);
  324 static void pmap_pte_release(pt_entry_t *pte);
  325 static int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t *);
  326 static vm_offset_t pmap_kmem_choose(vm_offset_t addr);
  327 #ifdef PAE
  328 static void *pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait);
  329 #endif
  330 
  331 CTASSERT(1 << PDESHIFT == sizeof(pd_entry_t));
  332 CTASSERT(1 << PTESHIFT == sizeof(pt_entry_t));
  333 
  334 /*
  335  * If you get an error here, then you set KVA_PAGES wrong! See the
  336  * description of KVA_PAGES in sys/i386/include/pmap.h. It must be
  337  * multiple of 4 for a normal kernel, or a multiple of 8 for a PAE.
  338  */
  339 CTASSERT(KERNBASE % (1 << 24) == 0);
  340 
  341 /*
  342  * Move the kernel virtual free pointer to the next
  343  * 4MB.  This is used to help improve performance
  344  * by using a large (4MB) page for much of the kernel
  345  * (.text, .data, .bss)
  346  */
  347 static vm_offset_t
  348 pmap_kmem_choose(vm_offset_t addr)
  349 {
  350         vm_offset_t newaddr = addr;
  351 
  352 #ifndef DISABLE_PSE
  353         if (cpu_feature & CPUID_PSE)
  354                 newaddr = (addr + PDRMASK) & ~PDRMASK;
  355 #endif
  356         return newaddr;
  357 }
  358 
  359 /*
  360  *      Bootstrap the system enough to run with virtual memory.
  361  *
  362  *      On the i386 this is called after mapping has already been enabled
  363  *      and just syncs the pmap module with what has already been done.
  364  *      [We can't call it easily with mapping off since the kernel is not
  365  *      mapped with PA == VA, hence we would have to relocate every address
  366  *      from the linked base (virtual) address "KERNBASE" to the actual
  367  *      (physical) address starting relative to 0]
  368  */
  369 void
  370 pmap_bootstrap(vm_paddr_t firstaddr)
  371 {
  372         vm_offset_t va;
  373         pt_entry_t *pte, *unused;
  374         struct sysmaps *sysmaps;
  375         int i;
  376 
  377         /*
  378          * XXX The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE too
  379          * large. It should instead be correctly calculated in locore.s and
  380          * not based on 'first' (which is a physical address, not a virtual
  381          * address, for the start of unused physical memory). The kernel
  382          * page tables are NOT double mapped and thus should not be included
  383          * in this calculation.
  384          */
  385         virtual_avail = (vm_offset_t) KERNBASE + firstaddr;
  386         virtual_avail = pmap_kmem_choose(virtual_avail);
  387 
  388         virtual_end = VM_MAX_KERNEL_ADDRESS;
  389 
  390         /*
  391          * Initialize the kernel pmap (which is statically allocated).
  392          */
  393         PMAP_LOCK_INIT(kernel_pmap);
  394         kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD);
  395 #ifdef PAE
  396         kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT);
  397 #endif
  398         kernel_pmap->pm_root = NULL;
  399         kernel_pmap->pm_active = -1;    /* don't allow deactivation */
  400         TAILQ_INIT(&kernel_pmap->pm_pvchunk);
  401         LIST_INIT(&allpmaps);
  402 
  403         /*
  404          * Request a spin mutex so that changes to allpmaps cannot be
  405          * preempted by smp_rendezvous_cpus().  Otherwise,
  406          * pmap_update_pde_kernel() could access allpmaps while it is
  407          * being changed.
  408          */
  409         mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN);
  410         mtx_lock_spin(&allpmaps_lock);
  411         LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list);
  412         mtx_unlock_spin(&allpmaps_lock);
  413         nkpt = NKPT;
  414 
  415         /*
  416          * Reserve some special page table entries/VA space for temporary
  417          * mapping of pages.
  418          */
  419 #define SYSMAP(c, p, v, n)      \
  420         v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n);
  421 
  422         va = virtual_avail;
  423         pte = vtopte(va);
  424 
  425         /*
  426          * CMAP1/CMAP2 are used for zeroing and copying pages.
  427          * CMAP3 is used for the idle process page zeroing.
  428          */
  429         for (i = 0; i < MAXCPU; i++) {
  430                 sysmaps = &sysmaps_pcpu[i];
  431                 mtx_init(&sysmaps->lock, "SYSMAPS", NULL, MTX_DEF);
  432                 SYSMAP(caddr_t, sysmaps->CMAP1, sysmaps->CADDR1, 1)
  433                 SYSMAP(caddr_t, sysmaps->CMAP2, sysmaps->CADDR2, 1)
  434         }
  435         SYSMAP(caddr_t, CMAP1, CADDR1, 1)
  436         SYSMAP(caddr_t, CMAP3, CADDR3, 1)
  437 
  438         /*
  439          * Crashdump maps.
  440          */
  441         SYSMAP(caddr_t, unused, crashdumpmap, MAXDUMPPGS)
  442 
  443         /*
  444          * ptvmmap is used for reading arbitrary physical pages via /dev/mem.
  445          */
  446         SYSMAP(caddr_t, unused, ptvmmap, 1)
  447 
  448         /*
  449          * msgbufp is used to map the system message buffer.
  450          */
  451         SYSMAP(struct msgbuf *, unused, msgbufp, atop(round_page(MSGBUF_SIZE)))
  452 
  453         /*
  454          * KPTmap is used by pmap_kextract().
  455          */
  456         SYSMAP(pt_entry_t *, KPTD, KPTmap, KVA_PAGES)
  457 
  458         for (i = 0; i < NKPT; i++)
  459                 KPTD[i] = (KPTphys + (i << PAGE_SHIFT)) | pgeflag | PG_RW | PG_V;
  460 
  461         /*
  462          * Adjust the start of the KPTD and KPTmap so that the implementation
  463          * of pmap_kextract() and pmap_growkernel() can be made simpler.
  464          */
  465         KPTD -= KPTDI;
  466         KPTmap -= i386_btop(KPTDI << PDRSHIFT);
  467 
  468         /*
  469          * ptemap is used for pmap_pte_quick
  470          */
  471         SYSMAP(pt_entry_t *, PMAP1, PADDR1, 1)
  472         SYSMAP(pt_entry_t *, PMAP2, PADDR2, 1)
  473 
  474         mtx_init(&PMAP2mutex, "PMAP2", NULL, MTX_DEF);
  475 
  476         virtual_avail = va;
  477 
  478         /*
  479          * Leave in place an identity mapping (virt == phys) for the low 1 MB
  480          * physical memory region that is used by the ACPI wakeup code.  This
  481          * mapping must not have PG_G set. 
  482          */
  483 #ifdef XBOX
  484         /* FIXME: This is gross, but needed for the XBOX. Since we are in such
  485          * an early stadium, we cannot yet neatly map video memory ... :-(
  486          * Better fixes are very welcome! */
  487         if (!arch_i386_is_xbox)
  488 #endif
  489         for (i = 1; i < NKPT; i++)
  490                 PTD[i] = 0;
  491 
  492         /* Initialize the PAT MSR if present. */
  493         pmap_init_pat();
  494 
  495         /* Turn on PG_G on kernel page(s) */
  496         pmap_set_pg();
  497 }
  498 
  499 /*
  500  * Setup the PAT MSR.
  501  */
  502 void
  503 pmap_init_pat(void)
  504 {
  505         uint64_t pat_msr;
  506 
  507         /* Bail if this CPU doesn't implement PAT. */
  508         if (!(cpu_feature & CPUID_PAT))
  509                 return;
  510 
  511         if (cpu_vendor_id != CPU_VENDOR_INTEL ||
  512             (CPUID_TO_FAMILY(cpu_id) == 6 && CPUID_TO_MODEL(cpu_id) >= 0xe)) {
  513                 /*
  514                  * Leave the indices 0-3 at the default of WB, WT, UC, and UC-.
  515                  * Program 4 and 5 as WP and WC.
  516                  * Leave 6 and 7 as UC and UC-.
  517                  */
  518                 pat_msr = rdmsr(MSR_PAT);
  519                 pat_msr &= ~(PAT_MASK(4) | PAT_MASK(5));
  520                 pat_msr |= PAT_VALUE(4, PAT_WRITE_PROTECTED) |
  521                     PAT_VALUE(5, PAT_WRITE_COMBINING);
  522                 pat_works = 1;
  523         } else {
  524                 /*
  525                  * Due to some Intel errata, we can only safely use the lower 4
  526                  * PAT entries.  Thus, just replace PAT Index 2 with WC instead
  527                  * of UC-.
  528                  *
  529                  *   Intel Pentium III Processor Specification Update
  530                  * Errata E.27 (Upper Four PAT Entries Not Usable With Mode B
  531                  * or Mode C Paging)
  532                  *
  533                  *   Intel Pentium IV  Processor Specification Update
  534                  * Errata N46 (PAT Index MSB May Be Calculated Incorrectly)
  535                  */
  536                 pat_msr = rdmsr(MSR_PAT);
  537                 pat_msr &= ~PAT_MASK(2);
  538                 pat_msr |= PAT_VALUE(2, PAT_WRITE_COMBINING);
  539                 pat_works = 0;
  540         }
  541         wrmsr(MSR_PAT, pat_msr);
  542 }
  543 
  544 /*
  545  * Set PG_G on kernel pages.  Only the BSP calls this when SMP is turned on.
  546  */
  547 void
  548 pmap_set_pg(void)
  549 {
  550         pt_entry_t *pte;
  551         vm_offset_t va, endva;
  552 
  553         if (pgeflag == 0)
  554                 return;
  555 
  556         endva = KERNBASE + KERNend;
  557 
  558         if (pseflag) {
  559                 va = KERNBASE + KERNLOAD;
  560                 while (va  < endva) {
  561                         pdir_pde(PTD, va) |= pgeflag;
  562                         invltlb();      /* Play it safe, invltlb() every time */
  563                         va += NBPDR;
  564                 }
  565         } else {
  566                 va = (vm_offset_t)btext;
  567                 while (va < endva) {
  568                         pte = vtopte(va);
  569                         if (*pte)
  570                                 *pte |= pgeflag;
  571                         invltlb();      /* Play it safe, invltlb() every time */
  572                         va += PAGE_SIZE;
  573                 }
  574         }
  575 }
  576 
  577 /*
  578  * Initialize a vm_page's machine-dependent fields.
  579  */
  580 void
  581 pmap_page_init(vm_page_t m)
  582 {
  583 
  584         TAILQ_INIT(&m->md.pv_list);
  585         m->md.pat_mode = PAT_WRITE_BACK;
  586 }
  587 
  588 #ifdef PAE
  589 static void *
  590 pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
  591 {
  592 
  593         /* Inform UMA that this allocator uses kernel_map/object. */
  594         *flags = UMA_SLAB_KERNEL;
  595         return ((void *)kmem_alloc_contig(kernel_map, bytes, wait, 0x0ULL,
  596             0xffffffffULL, 1, 0, VM_MEMATTR_DEFAULT));
  597 }
  598 #endif
  599 
  600 /*
  601  * ABuse the pte nodes for unmapped kva to thread a kva freelist through.
  602  * Requirements:
  603  *  - Must deal with pages in order to ensure that none of the PG_* bits
  604  *    are ever set, PG_V in particular.
  605  *  - Assumes we can write to ptes without pte_store() atomic ops, even
  606  *    on PAE systems.  This should be ok.
  607  *  - Assumes nothing will ever test these addresses for 0 to indicate
  608  *    no mapping instead of correctly checking PG_V.
  609  *  - Assumes a vm_offset_t will fit in a pte (true for i386).
  610  * Because PG_V is never set, there can be no mappings to invalidate.
  611  */
  612 static vm_offset_t
  613 pmap_ptelist_alloc(vm_offset_t *head)
  614 {
  615         pt_entry_t *pte;
  616         vm_offset_t va;
  617 
  618         va = *head;
  619         if (va == 0)
  620                 return (va);    /* Out of memory */
  621         pte = vtopte(va);
  622         *head = *pte;
  623         if (*head & PG_V)
  624                 panic("pmap_ptelist_alloc: va with PG_V set!");
  625         *pte = 0;
  626         return (va);
  627 }
  628 
  629 static void
  630 pmap_ptelist_free(vm_offset_t *head, vm_offset_t va)
  631 {
  632         pt_entry_t *pte;
  633 
  634         if (va & PG_V)
  635                 panic("pmap_ptelist_free: freeing va with PG_V set!");
  636         pte = vtopte(va);
  637         *pte = *head;           /* virtual! PG_V is 0 though */
  638         *head = va;
  639 }
  640 
  641 static void
  642 pmap_ptelist_init(vm_offset_t *head, void *base, int npages)
  643 {
  644         int i;
  645         vm_offset_t va;
  646 
  647         *head = 0;
  648         for (i = npages - 1; i >= 0; i--) {
  649                 va = (vm_offset_t)base + i * PAGE_SIZE;
  650                 pmap_ptelist_free(head, va);
  651         }
  652 }
  653 
  654 
  655 /*
  656  *      Initialize the pmap module.
  657  *      Called by vm_init, to initialize any structures that the pmap
  658  *      system needs to map virtual memory.
  659  */
  660 void
  661 pmap_init(void)
  662 {
  663         vm_page_t mpte;
  664         vm_size_t s;
  665         int i, pv_npg;
  666 
  667         /*
  668          * Initialize the vm page array entries for the kernel pmap's
  669          * page table pages.
  670          */ 
  671         for (i = 0; i < NKPT; i++) {
  672                 mpte = PHYS_TO_VM_PAGE(KPTphys + (i << PAGE_SHIFT));
  673                 KASSERT(mpte >= vm_page_array &&
  674                     mpte < &vm_page_array[vm_page_array_size],
  675                     ("pmap_init: page table page is out of range"));
  676                 mpte->pindex = i + KPTDI;
  677                 mpte->phys_addr = KPTphys + (i << PAGE_SHIFT);
  678         }
  679 
  680         /*
  681          * Initialize the address space (zone) for the pv entries.  Set a
  682          * high water mark so that the system can recover from excessive
  683          * numbers of pv entries.
  684          */
  685         TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
  686         pv_entry_max = shpgperproc * maxproc + cnt.v_page_count;
  687         TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max);
  688         pv_entry_max = roundup(pv_entry_max, _NPCPV);
  689         pv_entry_high_water = 9 * (pv_entry_max / 10);
  690 
  691         /*
  692          * If the kernel is running in a virtual machine on an AMD Family 10h
  693          * processor, then it must assume that MCA is enabled by the virtual
  694          * machine monitor.
  695          */
  696         if (vm_guest == VM_GUEST_VM && cpu_vendor_id == CPU_VENDOR_AMD &&
  697             CPUID_TO_FAMILY(cpu_id) == 0x10)
  698                 workaround_erratum383 = 1;
  699 
  700         /*
  701          * Are large page mappings supported and enabled?
  702          */
  703         TUNABLE_INT_FETCH("vm.pmap.pg_ps_enabled", &pg_ps_enabled);
  704         if (pseflag == 0)
  705                 pg_ps_enabled = 0;
  706         else if (pg_ps_enabled) {
  707                 KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0,
  708                     ("pmap_init: can't assign to pagesizes[1]"));
  709                 pagesizes[1] = NBPDR;
  710         }
  711 
  712         /*
  713          * Calculate the size of the pv head table for superpages.
  714          */
  715         for (i = 0; phys_avail[i + 1]; i += 2);
  716         pv_npg = round_4mpage(phys_avail[(i - 2) + 1]) / NBPDR;
  717 
  718         /*
  719          * Allocate memory for the pv head table for superpages.
  720          */
  721         s = (vm_size_t)(pv_npg * sizeof(struct md_page));
  722         s = round_page(s);
  723         pv_table = (struct md_page *)kmem_alloc(kernel_map, s);
  724         for (i = 0; i < pv_npg; i++)
  725                 TAILQ_INIT(&pv_table[i].pv_list);
  726 
  727         pv_maxchunks = MAX(pv_entry_max / _NPCPV, maxproc);
  728         pv_chunkbase = (struct pv_chunk *)kmem_alloc_nofault(kernel_map,
  729             PAGE_SIZE * pv_maxchunks);
  730         if (pv_chunkbase == NULL)
  731                 panic("pmap_init: not enough kvm for pv chunks");
  732         pmap_ptelist_init(&pv_vafree, pv_chunkbase, pv_maxchunks);
  733 #ifdef PAE
  734         pdptzone = uma_zcreate("PDPT", NPGPTD * sizeof(pdpt_entry_t), NULL,
  735             NULL, NULL, NULL, (NPGPTD * sizeof(pdpt_entry_t)) - 1,
  736             UMA_ZONE_VM | UMA_ZONE_NOFREE);
  737         uma_zone_set_allocf(pdptzone, pmap_pdpt_allocf);
  738 #endif
  739 }
  740 
  741 
  742 SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_max, CTLFLAG_RD, &pv_entry_max, 0,
  743         "Max number of PV entries");
  744 SYSCTL_INT(_vm_pmap, OID_AUTO, shpgperproc, CTLFLAG_RD, &shpgperproc, 0,
  745         "Page share factor per proc");
  746 
  747 SYSCTL_NODE(_vm_pmap, OID_AUTO, pde, CTLFLAG_RD, 0,
  748     "2/4MB page mapping counters");
  749 
  750 static u_long pmap_pde_demotions;
  751 SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, demotions, CTLFLAG_RD,
  752     &pmap_pde_demotions, 0, "2/4MB page demotions");
  753 
  754 static u_long pmap_pde_mappings;
  755 SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, mappings, CTLFLAG_RD,
  756     &pmap_pde_mappings, 0, "2/4MB page mappings");
  757 
  758 static u_long pmap_pde_p_failures;
  759 SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, p_failures, CTLFLAG_RD,
  760     &pmap_pde_p_failures, 0, "2/4MB page promotion failures");
  761 
  762 static u_long pmap_pde_promotions;
  763 SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, promotions, CTLFLAG_RD,
  764     &pmap_pde_promotions, 0, "2/4MB page promotions");
  765 
  766 /***************************************************
  767  * Low level helper routines.....
  768  ***************************************************/
  769 
  770 /*
  771  * Determine the appropriate bits to set in a PTE or PDE for a specified
  772  * caching mode.
  773  */
  774 int
  775 pmap_cache_bits(int mode, boolean_t is_pde)
  776 {
  777         int pat_flag, pat_index, cache_bits;
  778 
  779         /* The PAT bit is different for PTE's and PDE's. */
  780         pat_flag = is_pde ? PG_PDE_PAT : PG_PTE_PAT;
  781 
  782         /* If we don't support PAT, map extended modes to older ones. */
  783         if (!(cpu_feature & CPUID_PAT)) {
  784                 switch (mode) {
  785                 case PAT_UNCACHEABLE:
  786                 case PAT_WRITE_THROUGH:
  787                 case PAT_WRITE_BACK:
  788                         break;
  789                 case PAT_UNCACHED:
  790                 case PAT_WRITE_COMBINING:
  791                 case PAT_WRITE_PROTECTED:
  792                         mode = PAT_UNCACHEABLE;
  793                         break;
  794                 }
  795         }
  796         
  797         /* Map the caching mode to a PAT index. */
  798         if (pat_works) {
  799                 switch (mode) {
  800                 case PAT_UNCACHEABLE:
  801                         pat_index = 3;
  802                         break;
  803                 case PAT_WRITE_THROUGH:
  804                         pat_index = 1;
  805                         break;
  806                 case PAT_WRITE_BACK:
  807                         pat_index = 0;
  808                         break;
  809                 case PAT_UNCACHED:
  810                         pat_index = 2;
  811                         break;
  812                 case PAT_WRITE_COMBINING:
  813                         pat_index = 5;
  814                         break;
  815                 case PAT_WRITE_PROTECTED:
  816                         pat_index = 4;
  817                         break;
  818                 default:
  819                         panic("Unknown caching mode %d\n", mode);
  820                 }
  821         } else {
  822                 switch (mode) {
  823                 case PAT_UNCACHED:
  824                 case PAT_UNCACHEABLE:
  825                 case PAT_WRITE_PROTECTED:
  826                         pat_index = 3;
  827                         break;
  828                 case PAT_WRITE_THROUGH:
  829                         pat_index = 1;
  830                         break;
  831                 case PAT_WRITE_BACK:
  832                         pat_index = 0;
  833                         break;
  834                 case PAT_WRITE_COMBINING:
  835                         pat_index = 2;
  836                         break;
  837                 default:
  838                         panic("Unknown caching mode %d\n", mode);
  839                 }
  840         }
  841 
  842         /* Map the 3-bit index value into the PAT, PCD, and PWT bits. */
  843         cache_bits = 0;
  844         if (pat_index & 0x4)
  845                 cache_bits |= pat_flag;
  846         if (pat_index & 0x2)
  847                 cache_bits |= PG_NC_PCD;
  848         if (pat_index & 0x1)
  849                 cache_bits |= PG_NC_PWT;
  850         return (cache_bits);
  851 }
  852 
  853 /*
  854  * The caller is responsible for maintaining TLB consistency.
  855  */
  856 static void
  857 pmap_kenter_pde(vm_offset_t va, pd_entry_t newpde)
  858 {
  859         pd_entry_t *pde;
  860         pmap_t pmap;
  861         boolean_t PTD_updated;
  862 
  863         PTD_updated = FALSE;
  864         mtx_lock_spin(&allpmaps_lock);
  865         LIST_FOREACH(pmap, &allpmaps, pm_list) {
  866                 if ((pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] &
  867                     PG_FRAME))
  868                         PTD_updated = TRUE;
  869                 pde = pmap_pde(pmap, va);
  870                 pde_store(pde, newpde);
  871         }
  872         mtx_unlock_spin(&allpmaps_lock);
  873         KASSERT(PTD_updated,
  874             ("pmap_kenter_pde: current page table is not in allpmaps"));
  875 }
  876 
  877 /*
  878  * After changing the page size for the specified virtual address in the page
  879  * table, flush the corresponding entries from the processor's TLB.  Only the
  880  * calling processor's TLB is affected.
  881  *
  882  * The calling thread must be pinned to a processor.
  883  */
  884 static void
  885 pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde)
  886 {
  887         u_long cr4;
  888 
  889         if ((newpde & PG_PS) == 0)
  890                 /* Demotion: flush a specific 2MB page mapping. */
  891                 invlpg(va);
  892         else if ((newpde & PG_G) == 0)
  893                 /*
  894                  * Promotion: flush every 4KB page mapping from the TLB
  895                  * because there are too many to flush individually.
  896                  */
  897                 invltlb();
  898         else {
  899                 /*
  900                  * Promotion: flush every 4KB page mapping from the TLB,
  901                  * including any global (PG_G) mappings.
  902                  */
  903                 cr4 = rcr4();
  904                 load_cr4(cr4 & ~CR4_PGE);
  905                 /*
  906                  * Although preemption at this point could be detrimental to
  907                  * performance, it would not lead to an error.  PG_G is simply
  908                  * ignored if CR4.PGE is clear.  Moreover, in case this block
  909                  * is re-entered, the load_cr4() either above or below will
  910                  * modify CR4.PGE flushing the TLB.
  911                  */
  912                 load_cr4(cr4 | CR4_PGE);
  913         }
  914 }
  915 #ifdef SMP
  916 /*
  917  * For SMP, these functions have to use the IPI mechanism for coherence.
  918  *
  919  * N.B.: Before calling any of the following TLB invalidation functions,
  920  * the calling processor must ensure that all stores updating a non-
  921  * kernel page table are globally performed.  Otherwise, another
  922  * processor could cache an old, pre-update entry without being
  923  * invalidated.  This can happen one of two ways: (1) The pmap becomes
  924  * active on another processor after its pm_active field is checked by
  925  * one of the following functions but before a store updating the page
  926  * table is globally performed. (2) The pmap becomes active on another
  927  * processor before its pm_active field is checked but due to
  928  * speculative loads one of the following functions stills reads the
  929  * pmap as inactive on the other processor.
  930  * 
  931  * The kernel page table is exempt because its pm_active field is
  932  * immutable.  The kernel page table is always active on every
  933  * processor.
  934  */
  935 void
  936 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
  937 {
  938         u_int cpumask;
  939         u_int other_cpus;
  940 
  941         sched_pin();
  942         if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
  943                 invlpg(va);
  944                 smp_invlpg(va);
  945         } else {
  946                 cpumask = PCPU_GET(cpumask);
  947                 other_cpus = PCPU_GET(other_cpus);
  948                 if (pmap->pm_active & cpumask)
  949                         invlpg(va);
  950                 if (pmap->pm_active & other_cpus)
  951                         smp_masked_invlpg(pmap->pm_active & other_cpus, va);
  952         }
  953         sched_unpin();
  954 }
  955 
  956 void
  957 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
  958 {
  959         u_int cpumask;
  960         u_int other_cpus;
  961         vm_offset_t addr;
  962 
  963         sched_pin();
  964         if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
  965                 for (addr = sva; addr < eva; addr += PAGE_SIZE)
  966                         invlpg(addr);
  967                 smp_invlpg_range(sva, eva);
  968         } else {
  969                 cpumask = PCPU_GET(cpumask);
  970                 other_cpus = PCPU_GET(other_cpus);
  971                 if (pmap->pm_active & cpumask)
  972                         for (addr = sva; addr < eva; addr += PAGE_SIZE)
  973                                 invlpg(addr);
  974                 if (pmap->pm_active & other_cpus)
  975                         smp_masked_invlpg_range(pmap->pm_active & other_cpus,
  976                             sva, eva);
  977         }
  978         sched_unpin();
  979 }
  980 
  981 void
  982 pmap_invalidate_all(pmap_t pmap)
  983 {
  984         u_int cpumask;
  985         u_int other_cpus;
  986 
  987         sched_pin();
  988         if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
  989                 invltlb();
  990                 smp_invltlb();
  991         } else {
  992                 cpumask = PCPU_GET(cpumask);
  993                 other_cpus = PCPU_GET(other_cpus);
  994                 if (pmap->pm_active & cpumask)
  995                         invltlb();
  996                 if (pmap->pm_active & other_cpus)
  997                         smp_masked_invltlb(pmap->pm_active & other_cpus);
  998         }
  999         sched_unpin();
 1000 }
 1001 
 1002 void
 1003 pmap_invalidate_cache(void)
 1004 {
 1005 
 1006         sched_pin();
 1007         wbinvd();
 1008         smp_cache_flush();
 1009         sched_unpin();
 1010 }
 1011 
 1012 struct pde_action {
 1013         cpumask_t store;        /* processor that updates the PDE */
 1014         cpumask_t invalidate;   /* processors that invalidate their TLB */
 1015         vm_offset_t va;
 1016         pd_entry_t *pde;
 1017         pd_entry_t newpde;
 1018 };
 1019 
 1020 static void
 1021 pmap_update_pde_kernel(void *arg)
 1022 {
 1023         struct pde_action *act = arg;
 1024         pd_entry_t *pde;
 1025         pmap_t pmap;
 1026 
 1027         if (act->store == PCPU_GET(cpumask))
 1028                 /*
 1029                  * Elsewhere, this operation requires allpmaps_lock for
 1030                  * synchronization.  Here, it does not because it is being
 1031                  * performed in the context of an all_cpus rendezvous.
 1032                  */
 1033                 LIST_FOREACH(pmap, &allpmaps, pm_list) {
 1034                         pde = pmap_pde(pmap, act->va);
 1035                         pde_store(pde, act->newpde);
 1036                 }
 1037 }
 1038 
 1039 static void
 1040 pmap_update_pde_user(void *arg)
 1041 {
 1042         struct pde_action *act = arg;
 1043 
 1044         if (act->store == PCPU_GET(cpumask))
 1045                 pde_store(act->pde, act->newpde);
 1046 }
 1047 
 1048 static void
 1049 pmap_update_pde_teardown(void *arg)
 1050 {
 1051         struct pde_action *act = arg;
 1052 
 1053         if ((act->invalidate & PCPU_GET(cpumask)) != 0)
 1054                 pmap_update_pde_invalidate(act->va, act->newpde);
 1055 }
 1056 
 1057 /*
 1058  * Change the page size for the specified virtual address in a way that
 1059  * prevents any possibility of the TLB ever having two entries that map the
 1060  * same virtual address using different page sizes.  This is the recommended
 1061  * workaround for Erratum 383 on AMD Family 10h processors.  It prevents a
 1062  * machine check exception for a TLB state that is improperly diagnosed as a
 1063  * hardware error.
 1064  */
 1065 static void
 1066 pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde)
 1067 {
 1068         struct pde_action act;
 1069         cpumask_t active, cpumask;
 1070 
 1071         sched_pin();
 1072         cpumask = PCPU_GET(cpumask);
 1073         if (pmap == kernel_pmap)
 1074                 active = all_cpus;
 1075         else
 1076                 active = pmap->pm_active;
 1077         if ((active & PCPU_GET(other_cpus)) != 0) {
 1078                 act.store = cpumask;
 1079                 act.invalidate = active;
 1080                 act.va = va;
 1081                 act.pde = pde;
 1082                 act.newpde = newpde;
 1083                 smp_rendezvous_cpus(cpumask | active,
 1084                     smp_no_rendevous_barrier, pmap == kernel_pmap ?
 1085                     pmap_update_pde_kernel : pmap_update_pde_user,
 1086                     pmap_update_pde_teardown, &act);
 1087         } else {
 1088                 if (pmap == kernel_pmap)
 1089                         pmap_kenter_pde(va, newpde);
 1090                 else
 1091                         pde_store(pde, newpde);
 1092                 if ((active & cpumask) != 0)
 1093                         pmap_update_pde_invalidate(va, newpde);
 1094         }
 1095         sched_unpin();
 1096 }
 1097 #else /* !SMP */
 1098 /*
 1099  * Normal, non-SMP, 486+ invalidation functions.
 1100  * We inline these within pmap.c for speed.
 1101  */
 1102 PMAP_INLINE void
 1103 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 1104 {
 1105 
 1106         if (pmap == kernel_pmap || pmap->pm_active)
 1107                 invlpg(va);
 1108 }
 1109 
 1110 PMAP_INLINE void
 1111 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 1112 {
 1113         vm_offset_t addr;
 1114 
 1115         if (pmap == kernel_pmap || pmap->pm_active)
 1116                 for (addr = sva; addr < eva; addr += PAGE_SIZE)
 1117                         invlpg(addr);
 1118 }
 1119 
 1120 PMAP_INLINE void
 1121 pmap_invalidate_all(pmap_t pmap)
 1122 {
 1123 
 1124         if (pmap == kernel_pmap || pmap->pm_active)
 1125                 invltlb();
 1126 }
 1127 
 1128 PMAP_INLINE void
 1129 pmap_invalidate_cache(void)
 1130 {
 1131 
 1132         wbinvd();
 1133 }
 1134 
 1135 static void
 1136 pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde)
 1137 {
 1138 
 1139         if (pmap == kernel_pmap)
 1140                 pmap_kenter_pde(va, newpde);
 1141         else
 1142                 pde_store(pde, newpde);
 1143         if (pmap == kernel_pmap || pmap->pm_active)
 1144                 pmap_update_pde_invalidate(va, newpde);
 1145 }
 1146 #endif /* !SMP */
 1147 
 1148 void
 1149 pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva)
 1150 {
 1151 
 1152         KASSERT((sva & PAGE_MASK) == 0,
 1153             ("pmap_invalidate_cache_range: sva not page-aligned"));
 1154         KASSERT((eva & PAGE_MASK) == 0,
 1155             ("pmap_invalidate_cache_range: eva not page-aligned"));
 1156 
 1157         if (cpu_feature & CPUID_SS)
 1158                 ; /* If "Self Snoop" is supported, do nothing. */
 1159         else if ((cpu_feature & CPUID_CLFSH) != 0 &&
 1160                  eva - sva < 2 * 1024 * 1024) {
 1161 
 1162                 /*
 1163                  * Otherwise, do per-cache line flush.  Use the mfence
 1164                  * instruction to insure that previous stores are
 1165                  * included in the write-back.  The processor
 1166                  * propagates flush to other processors in the cache
 1167                  * coherence domain.
 1168                  */
 1169                 mfence();
 1170                 for (; sva < eva; sva += cpu_clflush_line_size)
 1171                         clflush(sva);
 1172                 mfence();
 1173         } else {
 1174 
 1175                 /*
 1176                  * No targeted cache flush methods are supported by CPU,
 1177                  * or the supplied range is bigger than 2MB.
 1178                  * Globally invalidate cache.
 1179                  */
 1180                 pmap_invalidate_cache();
 1181         }
 1182 }
 1183 
 1184 /*
 1185  * Are we current address space or kernel?  N.B. We return FALSE when
 1186  * a pmap's page table is in use because a kernel thread is borrowing
 1187  * it.  The borrowed page table can change spontaneously, making any
 1188  * dependence on its continued use subject to a race condition.
 1189  */
 1190 static __inline int
 1191 pmap_is_current(pmap_t pmap)
 1192 {
 1193 
 1194         return (pmap == kernel_pmap ||
 1195                 (pmap == vmspace_pmap(curthread->td_proc->p_vmspace) &&
 1196             (pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME)));
 1197 }
 1198 
 1199 /*
 1200  * If the given pmap is not the current or kernel pmap, the returned pte must
 1201  * be released by passing it to pmap_pte_release().
 1202  */
 1203 pt_entry_t *
 1204 pmap_pte(pmap_t pmap, vm_offset_t va)
 1205 {
 1206         pd_entry_t newpf;
 1207         pd_entry_t *pde;
 1208 
 1209         pde = pmap_pde(pmap, va);
 1210         if (*pde & PG_PS)
 1211                 return (pde);
 1212         if (*pde != 0) {
 1213                 /* are we current address space or kernel? */
 1214                 if (pmap_is_current(pmap))
 1215                         return (vtopte(va));
 1216                 mtx_lock(&PMAP2mutex);
 1217                 newpf = *pde & PG_FRAME;
 1218                 if ((*PMAP2 & PG_FRAME) != newpf) {
 1219                         *PMAP2 = newpf | PG_RW | PG_V | PG_A | PG_M;
 1220                         pmap_invalidate_page(kernel_pmap, (vm_offset_t)PADDR2);
 1221                 }
 1222                 return (PADDR2 + (i386_btop(va) & (NPTEPG - 1)));
 1223         }
 1224         return (0);
 1225 }
 1226 
 1227 /*
 1228  * Releases a pte that was obtained from pmap_pte().  Be prepared for the pte
 1229  * being NULL.
 1230  */
 1231 static __inline void
 1232 pmap_pte_release(pt_entry_t *pte)
 1233 {
 1234 
 1235         if ((pt_entry_t *)((vm_offset_t)pte & ~PAGE_MASK) == PADDR2)
 1236                 mtx_unlock(&PMAP2mutex);
 1237 }
 1238 
 1239 static __inline void
 1240 invlcaddr(void *caddr)
 1241 {
 1242 
 1243         invlpg((u_int)caddr);
 1244 }
 1245 
 1246 /*
 1247  * Super fast pmap_pte routine best used when scanning
 1248  * the pv lists.  This eliminates many coarse-grained
 1249  * invltlb calls.  Note that many of the pv list
 1250  * scans are across different pmaps.  It is very wasteful
 1251  * to do an entire invltlb for checking a single mapping.
 1252  *
 1253  * If the given pmap is not the current pmap, vm_page_queue_mtx
 1254  * must be held and curthread pinned to a CPU.
 1255  */
 1256 static pt_entry_t *
 1257 pmap_pte_quick(pmap_t pmap, vm_offset_t va)
 1258 {
 1259         pd_entry_t newpf;
 1260         pd_entry_t *pde;
 1261 
 1262         pde = pmap_pde(pmap, va);
 1263         if (*pde & PG_PS)
 1264                 return (pde);
 1265         if (*pde != 0) {
 1266                 /* are we current address space or kernel? */
 1267                 if (pmap_is_current(pmap))
 1268                         return (vtopte(va));
 1269                 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 1270                 KASSERT(curthread->td_pinned > 0, ("curthread not pinned"));
 1271                 newpf = *pde & PG_FRAME;
 1272                 if ((*PMAP1 & PG_FRAME) != newpf) {
 1273                         *PMAP1 = newpf | PG_RW | PG_V | PG_A | PG_M;
 1274 #ifdef SMP
 1275                         PMAP1cpu = PCPU_GET(cpuid);
 1276 #endif
 1277                         invlcaddr(PADDR1);
 1278                         PMAP1changed++;
 1279                 } else
 1280 #ifdef SMP
 1281                 if (PMAP1cpu != PCPU_GET(cpuid)) {
 1282                         PMAP1cpu = PCPU_GET(cpuid);
 1283                         invlcaddr(PADDR1);
 1284                         PMAP1changedcpu++;
 1285                 } else
 1286 #endif
 1287                         PMAP1unchanged++;
 1288                 return (PADDR1 + (i386_btop(va) & (NPTEPG - 1)));
 1289         }
 1290         return (0);
 1291 }
 1292 
 1293 /*
 1294  *      Routine:        pmap_extract
 1295  *      Function:
 1296  *              Extract the physical page address associated
 1297  *              with the given map/virtual_address pair.
 1298  */
 1299 vm_paddr_t 
 1300 pmap_extract(pmap_t pmap, vm_offset_t va)
 1301 {
 1302         vm_paddr_t rtval;
 1303         pt_entry_t *pte;
 1304         pd_entry_t pde;
 1305 
 1306         rtval = 0;
 1307         PMAP_LOCK(pmap);
 1308         pde = pmap->pm_pdir[va >> PDRSHIFT];
 1309         if (pde != 0) {
 1310                 if ((pde & PG_PS) != 0)
 1311                         rtval = (pde & PG_PS_FRAME) | (va & PDRMASK);
 1312                 else {
 1313                         pte = pmap_pte(pmap, va);
 1314                         rtval = (*pte & PG_FRAME) | (va & PAGE_MASK);
 1315                         pmap_pte_release(pte);
 1316                 }
 1317         }
 1318         PMAP_UNLOCK(pmap);
 1319         return (rtval);
 1320 }
 1321 
 1322 /*
 1323  *      Routine:        pmap_extract_and_hold
 1324  *      Function:
 1325  *              Atomically extract and hold the physical page
 1326  *              with the given pmap and virtual address pair
 1327  *              if that mapping permits the given protection.
 1328  */
 1329 vm_page_t
 1330 pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
 1331 {
 1332         pd_entry_t pde;
 1333         pt_entry_t pte;
 1334         vm_page_t m;
 1335 
 1336         m = NULL;
 1337         vm_page_lock_queues();
 1338         PMAP_LOCK(pmap);
 1339         pde = *pmap_pde(pmap, va);
 1340         if (pde != 0) {
 1341                 if (pde & PG_PS) {
 1342                         if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) {
 1343                                 m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) |
 1344                                     (va & PDRMASK));
 1345                                 vm_page_hold(m);
 1346                         }
 1347                 } else {
 1348                         sched_pin();
 1349                         pte = *pmap_pte_quick(pmap, va);
 1350                         if (pte != 0 &&
 1351                             ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) {
 1352                                 m = PHYS_TO_VM_PAGE(pte & PG_FRAME);
 1353                                 vm_page_hold(m);
 1354                         }
 1355                         sched_unpin();
 1356                 }
 1357         }
 1358         vm_page_unlock_queues();
 1359         PMAP_UNLOCK(pmap);
 1360         return (m);
 1361 }
 1362 
 1363 /***************************************************
 1364  * Low level mapping routines.....
 1365  ***************************************************/
 1366 
 1367 /*
 1368  * Add a wired page to the kva.
 1369  * Note: not SMP coherent.
 1370  */
 1371 PMAP_INLINE void 
 1372 pmap_kenter(vm_offset_t va, vm_paddr_t pa)
 1373 {
 1374         pt_entry_t *pte;
 1375 
 1376         pte = vtopte(va);
 1377         pte_store(pte, pa | PG_RW | PG_V | pgeflag);
 1378 }
 1379 
 1380 static __inline void
 1381 pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode)
 1382 {
 1383         pt_entry_t *pte;
 1384 
 1385         pte = vtopte(va);
 1386         pte_store(pte, pa | PG_RW | PG_V | pgeflag | pmap_cache_bits(mode, 0));
 1387 }
 1388 
 1389 /*
 1390  * Remove a page from the kernel pagetables.
 1391  * Note: not SMP coherent.
 1392  */
 1393 PMAP_INLINE void
 1394 pmap_kremove(vm_offset_t va)
 1395 {
 1396         pt_entry_t *pte;
 1397 
 1398         pte = vtopte(va);
 1399         pte_clear(pte);
 1400 }
 1401 
 1402 /*
 1403  *      Used to map a range of physical addresses into kernel
 1404  *      virtual address space.
 1405  *
 1406  *      The value passed in '*virt' is a suggested virtual address for
 1407  *      the mapping. Architectures which can support a direct-mapped
 1408  *      physical to virtual region can return the appropriate address
 1409  *      within that region, leaving '*virt' unchanged. Other
 1410  *      architectures should map the pages starting at '*virt' and
 1411  *      update '*virt' with the first usable address after the mapped
 1412  *      region.
 1413  */
 1414 vm_offset_t
 1415 pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
 1416 {
 1417         vm_offset_t va, sva;
 1418 
 1419         va = sva = *virt;
 1420         while (start < end) {
 1421                 pmap_kenter(va, start);
 1422                 va += PAGE_SIZE;
 1423                 start += PAGE_SIZE;
 1424         }
 1425         pmap_invalidate_range(kernel_pmap, sva, va);
 1426         *virt = va;
 1427         return (sva);
 1428 }
 1429 
 1430 
 1431 /*
 1432  * Add a list of wired pages to the kva
 1433  * this routine is only used for temporary
 1434  * kernel mappings that do not need to have
 1435  * page modification or references recorded.
 1436  * Note that old mappings are simply written
 1437  * over.  The page *must* be wired.
 1438  * Note: SMP coherent.  Uses a ranged shootdown IPI.
 1439  */
 1440 void
 1441 pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count)
 1442 {
 1443         pt_entry_t *endpte, oldpte, *pte;
 1444 
 1445         oldpte = 0;
 1446         pte = vtopte(sva);
 1447         endpte = pte + count;
 1448         while (pte < endpte) {
 1449                 oldpte |= *pte;
 1450                 pte_store(pte, VM_PAGE_TO_PHYS(*ma) | pgeflag |
 1451                     pmap_cache_bits((*ma)->md.pat_mode, 0) | PG_RW | PG_V);
 1452                 pte++;
 1453                 ma++;
 1454         }
 1455         if ((oldpte & PG_V) != 0)
 1456                 pmap_invalidate_range(kernel_pmap, sva, sva + count *
 1457                     PAGE_SIZE);
 1458 }
 1459 
 1460 /*
 1461  * This routine tears out page mappings from the
 1462  * kernel -- it is meant only for temporary mappings.
 1463  * Note: SMP coherent.  Uses a ranged shootdown IPI.
 1464  */
 1465 void
 1466 pmap_qremove(vm_offset_t sva, int count)
 1467 {
 1468         vm_offset_t va;
 1469 
 1470         va = sva;
 1471         while (count-- > 0) {
 1472                 pmap_kremove(va);
 1473                 va += PAGE_SIZE;
 1474         }
 1475         pmap_invalidate_range(kernel_pmap, sva, va);
 1476 }
 1477 
 1478 /***************************************************
 1479  * Page table page management routines.....
 1480  ***************************************************/
 1481 static __inline void
 1482 pmap_free_zero_pages(vm_page_t free)
 1483 {
 1484         vm_page_t m;
 1485 
 1486         while (free != NULL) {
 1487                 m = free;
 1488                 free = m->right;
 1489                 /* Preserve the page's PG_ZERO setting. */
 1490                 vm_page_free_toq(m);
 1491         }
 1492 }
 1493 
 1494 /*
 1495  * Schedule the specified unused page table page to be freed.  Specifically,
 1496  * add the page to the specified list of pages that will be released to the
 1497  * physical memory manager after the TLB has been updated.
 1498  */
 1499 static __inline void
 1500 pmap_add_delayed_free_list(vm_page_t m, vm_page_t *free, boolean_t set_PG_ZERO)
 1501 {
 1502 
 1503         if (set_PG_ZERO)
 1504                 m->flags |= PG_ZERO;
 1505         else
 1506                 m->flags &= ~PG_ZERO;
 1507         m->right = *free;
 1508         *free = m;
 1509 }
 1510 
 1511 /*
 1512  * Inserts the specified page table page into the specified pmap's collection
 1513  * of idle page table pages.  Each of a pmap's page table pages is responsible
 1514  * for mapping a distinct range of virtual addresses.  The pmap's collection is
 1515  * ordered by this virtual address range.
 1516  */
 1517 static void
 1518 pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte)
 1519 {
 1520         vm_page_t root;
 1521 
 1522         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 1523         root = pmap->pm_root;
 1524         if (root == NULL) {
 1525                 mpte->left = NULL;
 1526                 mpte->right = NULL;
 1527         } else {
 1528                 root = vm_page_splay(mpte->pindex, root);
 1529                 if (mpte->pindex < root->pindex) {
 1530                         mpte->left = root->left;
 1531                         mpte->right = root;
 1532                         root->left = NULL;
 1533                 } else if (mpte->pindex == root->pindex)
 1534                         panic("pmap_insert_pt_page: pindex already inserted");
 1535                 else {
 1536                         mpte->right = root->right;
 1537                         mpte->left = root;
 1538                         root->right = NULL;
 1539                 }
 1540         }
 1541         pmap->pm_root = mpte;
 1542 }
 1543 
 1544 /*
 1545  * Looks for a page table page mapping the specified virtual address in the
 1546  * specified pmap's collection of idle page table pages.  Returns NULL if there
 1547  * is no page table page corresponding to the specified virtual address.
 1548  */
 1549 static vm_page_t
 1550 pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va)
 1551 {
 1552         vm_page_t mpte;
 1553         vm_pindex_t pindex = va >> PDRSHIFT;
 1554 
 1555         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 1556         if ((mpte = pmap->pm_root) != NULL && mpte->pindex != pindex) {
 1557                 mpte = vm_page_splay(pindex, mpte);
 1558                 if ((pmap->pm_root = mpte)->pindex != pindex)
 1559                         mpte = NULL;
 1560         }
 1561         return (mpte);
 1562 }
 1563 
 1564 /*
 1565  * Removes the specified page table page from the specified pmap's collection
 1566  * of idle page table pages.  The specified page table page must be a member of
 1567  * the pmap's collection.
 1568  */
 1569 static void
 1570 pmap_remove_pt_page(pmap_t pmap, vm_page_t mpte)
 1571 {
 1572         vm_page_t root;
 1573 
 1574         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 1575         if (mpte != pmap->pm_root)
 1576                 vm_page_splay(mpte->pindex, pmap->pm_root);
 1577         if (mpte->left == NULL)
 1578                 root = mpte->right;
 1579         else {
 1580                 root = vm_page_splay(mpte->pindex, mpte->left);
 1581                 root->right = mpte->right;
 1582         }
 1583         pmap->pm_root = root;
 1584 }
 1585 
 1586 /*
 1587  * This routine unholds page table pages, and if the hold count
 1588  * drops to zero, then it decrements the wire count.
 1589  */
 1590 static __inline int
 1591 pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm_page_t *free)
 1592 {
 1593 
 1594         --m->wire_count;
 1595         if (m->wire_count == 0)
 1596                 return _pmap_unwire_pte_hold(pmap, m, free);
 1597         else
 1598                 return 0;
 1599 }
 1600 
 1601 static int 
 1602 _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm_page_t *free)
 1603 {
 1604         vm_offset_t pteva;
 1605 
 1606         /*
 1607          * unmap the page table page
 1608          */
 1609         pmap->pm_pdir[m->pindex] = 0;
 1610         --pmap->pm_stats.resident_count;
 1611 
 1612         /*
 1613          * This is a release store so that the ordinary store unmapping
 1614          * the page table page is globally performed before TLB shoot-
 1615          * down is begun.
 1616          */
 1617         atomic_subtract_rel_int(&cnt.v_wire_count, 1);
 1618 
 1619         /*
 1620          * Do an invltlb to make the invalidated mapping
 1621          * take effect immediately.
 1622          */
 1623         pteva = VM_MAXUSER_ADDRESS + i386_ptob(m->pindex);
 1624         pmap_invalidate_page(pmap, pteva);
 1625 
 1626         /* 
 1627          * Put page on a list so that it is released after
 1628          * *ALL* TLB shootdown is done
 1629          */
 1630         pmap_add_delayed_free_list(m, free, TRUE);
 1631 
 1632         return 1;
 1633 }
 1634 
 1635 /*
 1636  * After removing a page table entry, this routine is used to
 1637  * conditionally free the page, and manage the hold/wire counts.
 1638  */
 1639 static int
 1640 pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t *free)
 1641 {
 1642         pd_entry_t ptepde;
 1643         vm_page_t mpte;
 1644 
 1645         if (va >= VM_MAXUSER_ADDRESS)
 1646                 return 0;
 1647         ptepde = *pmap_pde(pmap, va);
 1648         mpte = PHYS_TO_VM_PAGE(ptepde & PG_FRAME);
 1649         return pmap_unwire_pte_hold(pmap, mpte, free);
 1650 }
 1651 
 1652 void
 1653 pmap_pinit0(pmap_t pmap)
 1654 {
 1655 
 1656         PMAP_LOCK_INIT(pmap);
 1657         pmap->pm_pdir = (pd_entry_t *)(KERNBASE + (vm_offset_t)IdlePTD);
 1658 #ifdef PAE
 1659         pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT);
 1660 #endif
 1661         pmap->pm_root = NULL;
 1662         pmap->pm_active = 0;
 1663         PCPU_SET(curpmap, pmap);
 1664         TAILQ_INIT(&pmap->pm_pvchunk);
 1665         bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 1666         mtx_lock_spin(&allpmaps_lock);
 1667         LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
 1668         mtx_unlock_spin(&allpmaps_lock);
 1669 }
 1670 
 1671 /*
 1672  * Initialize a preallocated and zeroed pmap structure,
 1673  * such as one in a vmspace structure.
 1674  */
 1675 int
 1676 pmap_pinit(pmap_t pmap)
 1677 {
 1678         vm_page_t m, ptdpg[NPGPTD];
 1679         vm_paddr_t pa;
 1680         static int color;
 1681         int i;
 1682 
 1683         PMAP_LOCK_INIT(pmap);
 1684 
 1685         /*
 1686          * No need to allocate page table space yet but we do need a valid
 1687          * page directory table.
 1688          */
 1689         if (pmap->pm_pdir == NULL) {
 1690                 pmap->pm_pdir = (pd_entry_t *)kmem_alloc_nofault(kernel_map,
 1691                     NBPTD);
 1692 
 1693                 if (pmap->pm_pdir == NULL) {
 1694                         PMAP_LOCK_DESTROY(pmap);
 1695                         return (0);
 1696                 }
 1697 #ifdef PAE
 1698                 pmap->pm_pdpt = uma_zalloc(pdptzone, M_WAITOK | M_ZERO);
 1699                 KASSERT(((vm_offset_t)pmap->pm_pdpt &
 1700                     ((NPGPTD * sizeof(pdpt_entry_t)) - 1)) == 0,
 1701                     ("pmap_pinit: pdpt misaligned"));
 1702                 KASSERT(pmap_kextract((vm_offset_t)pmap->pm_pdpt) < (4ULL<<30),
 1703                     ("pmap_pinit: pdpt above 4g"));
 1704 #endif
 1705                 pmap->pm_root = NULL;
 1706         }
 1707         KASSERT(pmap->pm_root == NULL,
 1708             ("pmap_pinit: pmap has reserved page table page(s)"));
 1709 
 1710         /*
 1711          * allocate the page directory page(s)
 1712          */
 1713         for (i = 0; i < NPGPTD;) {
 1714                 m = vm_page_alloc(NULL, color++,
 1715                     VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
 1716                     VM_ALLOC_ZERO);
 1717                 if (m == NULL)
 1718                         VM_WAIT;
 1719                 else {
 1720                         ptdpg[i++] = m;
 1721                 }
 1722         }
 1723 
 1724         pmap_qenter((vm_offset_t)pmap->pm_pdir, ptdpg, NPGPTD);
 1725 
 1726         for (i = 0; i < NPGPTD; i++) {
 1727                 if ((ptdpg[i]->flags & PG_ZERO) == 0)
 1728                         bzero(pmap->pm_pdir + (i * NPDEPG), PAGE_SIZE);
 1729         }
 1730 
 1731         mtx_lock_spin(&allpmaps_lock);
 1732         LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
 1733         mtx_unlock_spin(&allpmaps_lock);
 1734         /* Wire in kernel global address entries. */
 1735         bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * sizeof(pd_entry_t));
 1736 
 1737         /* install self-referential address mapping entry(s) */
 1738         for (i = 0; i < NPGPTD; i++) {
 1739                 pa = VM_PAGE_TO_PHYS(ptdpg[i]);
 1740                 pmap->pm_pdir[PTDPTDI + i] = pa | PG_V | PG_RW | PG_A | PG_M;
 1741 #ifdef PAE
 1742                 pmap->pm_pdpt[i] = pa | PG_V;
 1743 #endif
 1744         }
 1745 
 1746         pmap->pm_active = 0;
 1747         TAILQ_INIT(&pmap->pm_pvchunk);
 1748         bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 1749 
 1750         return (1);
 1751 }
 1752 
 1753 /*
 1754  * this routine is called if the page table page is not
 1755  * mapped correctly.
 1756  */
 1757 static vm_page_t
 1758 _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags)
 1759 {
 1760         vm_paddr_t ptepa;
 1761         vm_page_t m;
 1762 
 1763         KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT ||
 1764             (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK,
 1765             ("_pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK"));
 1766 
 1767         /*
 1768          * Allocate a page table page.
 1769          */
 1770         if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ |
 1771             VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) {
 1772                 if (flags & M_WAITOK) {
 1773                         PMAP_UNLOCK(pmap);
 1774                         vm_page_unlock_queues();
 1775                         VM_WAIT;
 1776                         vm_page_lock_queues();
 1777                         PMAP_LOCK(pmap);
 1778                 }
 1779 
 1780                 /*
 1781                  * Indicate the need to retry.  While waiting, the page table
 1782                  * page may have been allocated.
 1783                  */
 1784                 return (NULL);
 1785         }
 1786         if ((m->flags & PG_ZERO) == 0)
 1787                 pmap_zero_page(m);
 1788 
 1789         /*
 1790          * Map the pagetable page into the process address space, if
 1791          * it isn't already there.
 1792          */
 1793 
 1794         pmap->pm_stats.resident_count++;
 1795 
 1796         ptepa = VM_PAGE_TO_PHYS(m);
 1797         pmap->pm_pdir[ptepindex] =
 1798                 (pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_A | PG_M);
 1799 
 1800         return m;
 1801 }
 1802 
 1803 static vm_page_t
 1804 pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags)
 1805 {
 1806         unsigned ptepindex;
 1807         pd_entry_t ptepa;
 1808         vm_page_t m;
 1809 
 1810         KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT ||
 1811             (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK,
 1812             ("pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK"));
 1813 
 1814         /*
 1815          * Calculate pagetable page index
 1816          */
 1817         ptepindex = va >> PDRSHIFT;
 1818 retry:
 1819         /*
 1820          * Get the page directory entry
 1821          */
 1822         ptepa = pmap->pm_pdir[ptepindex];
 1823 
 1824         /*
 1825          * This supports switching from a 4MB page to a
 1826          * normal 4K page.
 1827          */
 1828         if (ptepa & PG_PS) {
 1829                 (void)pmap_demote_pde(pmap, &pmap->pm_pdir[ptepindex], va);
 1830                 ptepa = pmap->pm_pdir[ptepindex];
 1831         }
 1832 
 1833         /*
 1834          * If the page table page is mapped, we just increment the
 1835          * hold count, and activate it.
 1836          */
 1837         if (ptepa) {
 1838                 m = PHYS_TO_VM_PAGE(ptepa & PG_FRAME);
 1839                 m->wire_count++;
 1840         } else {
 1841                 /*
 1842                  * Here if the pte page isn't mapped, or if it has
 1843                  * been deallocated. 
 1844                  */
 1845                 m = _pmap_allocpte(pmap, ptepindex, flags);
 1846                 if (m == NULL && (flags & M_WAITOK))
 1847                         goto retry;
 1848         }
 1849         return (m);
 1850 }
 1851 
 1852 
 1853 /***************************************************
 1854 * Pmap allocation/deallocation routines.
 1855  ***************************************************/
 1856 
 1857 #ifdef SMP
 1858 /*
 1859  * Deal with a SMP shootdown of other users of the pmap that we are
 1860  * trying to dispose of.  This can be a bit hairy.
 1861  */
 1862 static cpumask_t *lazymask;
 1863 static u_int lazyptd;
 1864 static volatile u_int lazywait;
 1865 
 1866 void pmap_lazyfix_action(void);
 1867 
 1868 void
 1869 pmap_lazyfix_action(void)
 1870 {
 1871         cpumask_t mymask = PCPU_GET(cpumask);
 1872 
 1873 #ifdef COUNT_IPIS
 1874         (*ipi_lazypmap_counts[PCPU_GET(cpuid)])++;
 1875 #endif
 1876         if (rcr3() == lazyptd)
 1877                 load_cr3(PCPU_GET(curpcb)->pcb_cr3);
 1878         atomic_clear_int(lazymask, mymask);
 1879         atomic_store_rel_int(&lazywait, 1);
 1880 }
 1881 
 1882 static void
 1883 pmap_lazyfix_self(cpumask_t mymask)
 1884 {
 1885 
 1886         if (rcr3() == lazyptd)
 1887                 load_cr3(PCPU_GET(curpcb)->pcb_cr3);
 1888         atomic_clear_int(lazymask, mymask);
 1889 }
 1890 
 1891 
 1892 static void
 1893 pmap_lazyfix(pmap_t pmap)
 1894 {
 1895         cpumask_t mymask, mask;
 1896         u_int spins;
 1897 
 1898         while ((mask = pmap->pm_active) != 0) {
 1899                 spins = 50000000;
 1900                 mask = mask & -mask;    /* Find least significant set bit */
 1901                 mtx_lock_spin(&smp_ipi_mtx);
 1902 #ifdef PAE
 1903                 lazyptd = vtophys(pmap->pm_pdpt);
 1904 #else
 1905                 lazyptd = vtophys(pmap->pm_pdir);
 1906 #endif
 1907                 mymask = PCPU_GET(cpumask);
 1908                 if (mask == mymask) {
 1909                         lazymask = &pmap->pm_active;
 1910                         pmap_lazyfix_self(mymask);
 1911                 } else {
 1912                         atomic_store_rel_int((u_int *)&lazymask,
 1913                             (u_int)&pmap->pm_active);
 1914                         atomic_store_rel_int(&lazywait, 0);
 1915                         ipi_selected(mask, IPI_LAZYPMAP);
 1916                         while (lazywait == 0) {
 1917                                 ia32_pause();
 1918                                 if (--spins == 0)
 1919                                         break;
 1920                         }
 1921                 }
 1922                 mtx_unlock_spin(&smp_ipi_mtx);
 1923                 if (spins == 0)
 1924                         printf("pmap_lazyfix: spun for 50000000\n");
 1925         }
 1926 }
 1927 
 1928 #else   /* SMP */
 1929 
 1930 /*
 1931  * Cleaning up on uniprocessor is easy.  For various reasons, we're
 1932  * unlikely to have to even execute this code, including the fact
 1933  * that the cleanup is deferred until the parent does a wait(2), which
 1934  * means that another userland process has run.
 1935  */
 1936 static void
 1937 pmap_lazyfix(pmap_t pmap)
 1938 {
 1939         u_int cr3;
 1940 
 1941         cr3 = vtophys(pmap->pm_pdir);
 1942         if (cr3 == rcr3()) {
 1943                 load_cr3(PCPU_GET(curpcb)->pcb_cr3);
 1944                 pmap->pm_active &= ~(PCPU_GET(cpumask));
 1945         }
 1946 }
 1947 #endif  /* SMP */
 1948 
 1949 /*
 1950  * Release any resources held by the given physical map.
 1951  * Called when a pmap initialized by pmap_pinit is being released.
 1952  * Should only be called if the map contains no valid mappings.
 1953  */
 1954 void
 1955 pmap_release(pmap_t pmap)
 1956 {
 1957         vm_page_t m, ptdpg[NPGPTD];
 1958         int i;
 1959 
 1960         KASSERT(pmap->pm_stats.resident_count == 0,
 1961             ("pmap_release: pmap resident count %ld != 0",
 1962             pmap->pm_stats.resident_count));
 1963         KASSERT(pmap->pm_root == NULL,
 1964             ("pmap_release: pmap has reserved page table page(s)"));
 1965 
 1966         pmap_lazyfix(pmap);
 1967         mtx_lock_spin(&allpmaps_lock);
 1968         LIST_REMOVE(pmap, pm_list);
 1969         mtx_unlock_spin(&allpmaps_lock);
 1970 
 1971         for (i = 0; i < NPGPTD; i++)
 1972                 ptdpg[i] = PHYS_TO_VM_PAGE(pmap->pm_pdir[PTDPTDI + i] &
 1973                     PG_FRAME);
 1974 
 1975         bzero(pmap->pm_pdir + PTDPTDI, (nkpt + NPGPTD) *
 1976             sizeof(*pmap->pm_pdir));
 1977 
 1978         pmap_qremove((vm_offset_t)pmap->pm_pdir, NPGPTD);
 1979 
 1980         for (i = 0; i < NPGPTD; i++) {
 1981                 m = ptdpg[i];
 1982 #ifdef PAE
 1983                 KASSERT(VM_PAGE_TO_PHYS(m) == (pmap->pm_pdpt[i] & PG_FRAME),
 1984                     ("pmap_release: got wrong ptd page"));
 1985 #endif
 1986                 m->wire_count--;
 1987                 atomic_subtract_int(&cnt.v_wire_count, 1);
 1988                 vm_page_free_zero(m);
 1989         }
 1990         PMAP_LOCK_DESTROY(pmap);
 1991 }
 1992 
 1993 static int
 1994 kvm_size(SYSCTL_HANDLER_ARGS)
 1995 {
 1996         unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE;
 1997 
 1998         return sysctl_handle_long(oidp, &ksize, 0, req);
 1999 }
 2000 SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 
 2001     0, 0, kvm_size, "IU", "Size of KVM");
 2002 
 2003 static int
 2004 kvm_free(SYSCTL_HANDLER_ARGS)
 2005 {
 2006         unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end;
 2007 
 2008         return sysctl_handle_long(oidp, &kfree, 0, req);
 2009 }
 2010 SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 
 2011     0, 0, kvm_free, "IU", "Amount of KVM free");
 2012 
 2013 /*
 2014  * grow the number of kernel page table entries, if needed
 2015  */
 2016 void
 2017 pmap_growkernel(vm_offset_t addr)
 2018 {
 2019         vm_paddr_t ptppaddr;
 2020         vm_page_t nkpg;
 2021         pd_entry_t newpdir;
 2022 
 2023         mtx_assert(&kernel_map->system_mtx, MA_OWNED);
 2024         if (kernel_vm_end == 0) {
 2025                 kernel_vm_end = KERNBASE;
 2026                 nkpt = 0;
 2027                 while (pdir_pde(PTD, kernel_vm_end)) {
 2028                         kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 2029                         nkpt++;
 2030                         if (kernel_vm_end - 1 >= kernel_map->max_offset) {
 2031                                 kernel_vm_end = kernel_map->max_offset;
 2032                                 break;
 2033                         }
 2034                 }
 2035         }
 2036         addr = roundup2(addr, PAGE_SIZE * NPTEPG);
 2037         if (addr - 1 >= kernel_map->max_offset)
 2038                 addr = kernel_map->max_offset;
 2039         while (kernel_vm_end < addr) {
 2040                 if (pdir_pde(PTD, kernel_vm_end)) {
 2041                         kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 2042                         if (kernel_vm_end - 1 >= kernel_map->max_offset) {
 2043                                 kernel_vm_end = kernel_map->max_offset;
 2044                                 break;
 2045                         }
 2046                         continue;
 2047                 }
 2048 
 2049                 nkpg = vm_page_alloc(NULL, kernel_vm_end >> PDRSHIFT,
 2050                     VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
 2051                     VM_ALLOC_ZERO);
 2052                 if (nkpg == NULL)
 2053                         panic("pmap_growkernel: no memory to grow kernel");
 2054 
 2055                 nkpt++;
 2056 
 2057                 if ((nkpg->flags & PG_ZERO) == 0)
 2058                         pmap_zero_page(nkpg);
 2059                 ptppaddr = VM_PAGE_TO_PHYS(nkpg);
 2060                 newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M);
 2061                 pdir_pde(KPTD, kernel_vm_end) = pgeflag | newpdir;
 2062 
 2063                 pmap_kenter_pde(kernel_vm_end, newpdir);
 2064                 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 2065                 if (kernel_vm_end - 1 >= kernel_map->max_offset) {
 2066                         kernel_vm_end = kernel_map->max_offset;
 2067                         break;
 2068                 }
 2069         }
 2070 }
 2071 
 2072 
 2073 /***************************************************
 2074  * page management routines.
 2075  ***************************************************/
 2076 
 2077 CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE);
 2078 CTASSERT(_NPCM == 11);
 2079 
 2080 static __inline struct pv_chunk *
 2081 pv_to_chunk(pv_entry_t pv)
 2082 {
 2083 
 2084         return (struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK);
 2085 }
 2086 
 2087 #define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap)
 2088 
 2089 #define PC_FREE0_9      0xfffffffful    /* Free values for index 0 through 9 */
 2090 #define PC_FREE10       0x0000fffful    /* Free values for index 10 */
 2091 
 2092 static uint32_t pc_freemask[11] = {
 2093         PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
 2094         PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
 2095         PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
 2096         PC_FREE0_9, PC_FREE10
 2097 };
 2098 
 2099 SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0,
 2100         "Current number of pv entries");
 2101 
 2102 #ifdef PV_STATS
 2103 static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail;
 2104 
 2105 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0,
 2106         "Current number of pv entry chunks");
 2107 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0,
 2108         "Current number of pv entry chunks allocated");
 2109 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0,
 2110         "Current number of pv entry chunks frees");
 2111 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0,
 2112         "Number of times tried to get a chunk page but failed.");
 2113 
 2114 static long pv_entry_frees, pv_entry_allocs;
 2115 static int pv_entry_spare;
 2116 
 2117 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0,
 2118         "Current number of pv entry frees");
 2119 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0,
 2120         "Current number of pv entry allocs");
 2121 SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0,
 2122         "Current number of spare pv entries");
 2123 
 2124 static int pmap_collect_inactive, pmap_collect_active;
 2125 
 2126 SYSCTL_INT(_vm_pmap, OID_AUTO, pmap_collect_inactive, CTLFLAG_RD, &pmap_collect_inactive, 0,
 2127         "Current number times pmap_collect called on inactive queue");
 2128 SYSCTL_INT(_vm_pmap, OID_AUTO, pmap_collect_active, CTLFLAG_RD, &pmap_collect_active, 0,
 2129         "Current number times pmap_collect called on active queue");
 2130 #endif
 2131 
 2132 /*
 2133  * We are in a serious low memory condition.  Resort to
 2134  * drastic measures to free some pages so we can allocate
 2135  * another pv entry chunk.  This is normally called to
 2136  * unmap inactive pages, and if necessary, active pages.
 2137  */
 2138 static void
 2139 pmap_collect(pmap_t locked_pmap, struct vpgqueues *vpq)
 2140 {
 2141         struct md_page *pvh;
 2142         pd_entry_t *pde;
 2143         pmap_t pmap;
 2144         pt_entry_t *pte, tpte;
 2145         pv_entry_t next_pv, pv;
 2146         vm_offset_t va;
 2147         vm_page_t m, free;
 2148 
 2149         sched_pin();
 2150         TAILQ_FOREACH(m, &vpq->pl, pageq) {
 2151                 if (m->hold_count || m->busy)
 2152                         continue;
 2153                 TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) {
 2154                         va = pv->pv_va;
 2155                         pmap = PV_PMAP(pv);
 2156                         /* Avoid deadlock and lock recursion. */
 2157                         if (pmap > locked_pmap)
 2158                                 PMAP_LOCK(pmap);
 2159                         else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap))
 2160                                 continue;
 2161                         pmap->pm_stats.resident_count--;
 2162                         pde = pmap_pde(pmap, va);
 2163                         KASSERT((*pde & PG_PS) == 0, ("pmap_collect: found"
 2164                             " a 4mpage in page %p's pv list", m));
 2165                         pte = pmap_pte_quick(pmap, va);
 2166                         tpte = pte_load_clear(pte);
 2167                         KASSERT((tpte & PG_W) == 0,
 2168                             ("pmap_collect: wired pte %#jx", (uintmax_t)tpte));
 2169                         if (tpte & PG_A)
 2170                                 vm_page_flag_set(m, PG_REFERENCED);
 2171                         if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
 2172                                 vm_page_dirty(m);
 2173                         free = NULL;
 2174                         pmap_unuse_pt(pmap, va, &free);
 2175                         pmap_invalidate_page(pmap, va);
 2176                         pmap_free_zero_pages(free);
 2177                         TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 2178                         if (TAILQ_EMPTY(&m->md.pv_list)) {
 2179                                 pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 2180                                 if (TAILQ_EMPTY(&pvh->pv_list))
 2181                                         vm_page_flag_clear(m, PG_WRITEABLE);
 2182                         }
 2183                         free_pv_entry(pmap, pv);
 2184                         if (pmap != locked_pmap)
 2185                                 PMAP_UNLOCK(pmap);
 2186                 }
 2187         }
 2188         sched_unpin();
 2189 }
 2190 
 2191 
 2192 /*
 2193  * free the pv_entry back to the free list
 2194  */
 2195 static void
 2196 free_pv_entry(pmap_t pmap, pv_entry_t pv)
 2197 {
 2198         vm_page_t m;
 2199         struct pv_chunk *pc;
 2200         int idx, field, bit;
 2201 
 2202         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 2203         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 2204         PV_STAT(pv_entry_frees++);
 2205         PV_STAT(pv_entry_spare++);
 2206         pv_entry_count--;
 2207         pc = pv_to_chunk(pv);
 2208         idx = pv - &pc->pc_pventry[0];
 2209         field = idx / 32;
 2210         bit = idx % 32;
 2211         pc->pc_map[field] |= 1ul << bit;
 2212         /* move to head of list */
 2213         TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 2214         for (idx = 0; idx < _NPCM; idx++)
 2215                 if (pc->pc_map[idx] != pc_freemask[idx]) {
 2216                         TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
 2217                         return;
 2218                 }
 2219         PV_STAT(pv_entry_spare -= _NPCPV);
 2220         PV_STAT(pc_chunk_count--);
 2221         PV_STAT(pc_chunk_frees++);
 2222         /* entire chunk is free, return it */
 2223         m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc));
 2224         pmap_qremove((vm_offset_t)pc, 1);
 2225         vm_page_unwire(m, 0);
 2226         vm_page_free(m);
 2227         pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc);
 2228 }
 2229 
 2230 /*
 2231  * get a new pv_entry, allocating a block from the system
 2232  * when needed.
 2233  */
 2234 static pv_entry_t
 2235 get_pv_entry(pmap_t pmap, int try)
 2236 {
 2237         static const struct timeval printinterval = { 60, 0 };
 2238         static struct timeval lastprint;
 2239         static vm_pindex_t colour;
 2240         struct vpgqueues *pq;
 2241         int bit, field;
 2242         pv_entry_t pv;
 2243         struct pv_chunk *pc;
 2244         vm_page_t m;
 2245 
 2246         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 2247         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 2248         PV_STAT(pv_entry_allocs++);
 2249         pv_entry_count++;
 2250         if (pv_entry_count > pv_entry_high_water)
 2251                 if (ratecheck(&lastprint, &printinterval))
 2252                         printf("Approaching the limit on PV entries, consider "
 2253                             "increasing either the vm.pmap.shpgperproc or the "
 2254                             "vm.pmap.pv_entry_max tunable.\n");
 2255         pq = NULL;
 2256 retry:
 2257         pc = TAILQ_FIRST(&pmap->pm_pvchunk);
 2258         if (pc != NULL) {
 2259                 for (field = 0; field < _NPCM; field++) {
 2260                         if (pc->pc_map[field]) {
 2261                                 bit = bsfl(pc->pc_map[field]);
 2262                                 break;
 2263                         }
 2264                 }
 2265                 if (field < _NPCM) {
 2266                         pv = &pc->pc_pventry[field * 32 + bit];
 2267                         pc->pc_map[field] &= ~(1ul << bit);
 2268                         /* If this was the last item, move it to tail */
 2269                         for (field = 0; field < _NPCM; field++)
 2270                                 if (pc->pc_map[field] != 0) {
 2271                                         PV_STAT(pv_entry_spare--);
 2272                                         return (pv);    /* not full, return */
 2273                                 }
 2274                         TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 2275                         TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list);
 2276                         PV_STAT(pv_entry_spare--);
 2277                         return (pv);
 2278                 }
 2279         }
 2280         /*
 2281          * Access to the ptelist "pv_vafree" is synchronized by the page
 2282          * queues lock.  If "pv_vafree" is currently non-empty, it will
 2283          * remain non-empty until pmap_ptelist_alloc() completes.
 2284          */
 2285         if (pv_vafree == 0 || (m = vm_page_alloc(NULL, colour, (pq ==
 2286             &vm_page_queues[PQ_ACTIVE] ? VM_ALLOC_SYSTEM : VM_ALLOC_NORMAL) |
 2287             VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) {
 2288                 if (try) {
 2289                         pv_entry_count--;
 2290                         PV_STAT(pc_chunk_tryfail++);
 2291                         return (NULL);
 2292                 }
 2293                 /*
 2294                  * Reclaim pv entries: At first, destroy mappings to
 2295                  * inactive pages.  After that, if a pv chunk entry
 2296                  * is still needed, destroy mappings to active pages.
 2297                  */
 2298                 if (pq == NULL) {
 2299                         PV_STAT(pmap_collect_inactive++);
 2300                         pq = &vm_page_queues[PQ_INACTIVE];
 2301                 } else if (pq == &vm_page_queues[PQ_INACTIVE]) {
 2302                         PV_STAT(pmap_collect_active++);
 2303                         pq = &vm_page_queues[PQ_ACTIVE];
 2304                 } else
 2305                         panic("get_pv_entry: increase vm.pmap.shpgperproc");
 2306                 pmap_collect(pmap, pq);
 2307                 goto retry;
 2308         }
 2309         PV_STAT(pc_chunk_count++);
 2310         PV_STAT(pc_chunk_allocs++);
 2311         colour++;
 2312         pc = (struct pv_chunk *)pmap_ptelist_alloc(&pv_vafree);
 2313         pmap_qenter((vm_offset_t)pc, &m, 1);
 2314         pc->pc_pmap = pmap;
 2315         pc->pc_map[0] = pc_freemask[0] & ~1ul;  /* preallocated bit 0 */
 2316         for (field = 1; field < _NPCM; field++)
 2317                 pc->pc_map[field] = pc_freemask[field];
 2318         pv = &pc->pc_pventry[0];
 2319         TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
 2320         PV_STAT(pv_entry_spare += _NPCPV - 1);
 2321         return (pv);
 2322 }
 2323 
 2324 static __inline pv_entry_t
 2325 pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
 2326 {
 2327         pv_entry_t pv;
 2328 
 2329         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 2330         TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) {
 2331                 if (pmap == PV_PMAP(pv) && va == pv->pv_va) {
 2332                         TAILQ_REMOVE(&pvh->pv_list, pv, pv_list);
 2333                         break;
 2334                 }
 2335         }
 2336         return (pv);
 2337 }
 2338 
 2339 static void
 2340 pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa)
 2341 {
 2342         struct md_page *pvh;
 2343         pv_entry_t pv;
 2344         vm_offset_t va_last;
 2345         vm_page_t m;
 2346 
 2347         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 2348         KASSERT((pa & PDRMASK) == 0,
 2349             ("pmap_pv_demote_pde: pa is not 4mpage aligned"));
 2350 
 2351         /*
 2352          * Transfer the 4mpage's pv entry for this mapping to the first
 2353          * page's pv list.
 2354          */
 2355         pvh = pa_to_pvh(pa);
 2356         va = trunc_4mpage(va);
 2357         pv = pmap_pvh_remove(pvh, pmap, va);
 2358         KASSERT(pv != NULL, ("pmap_pv_demote_pde: pv not found"));
 2359         m = PHYS_TO_VM_PAGE(pa);
 2360         TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
 2361         /* Instantiate the remaining NPTEPG - 1 pv entries. */
 2362         va_last = va + NBPDR - PAGE_SIZE;
 2363         do {
 2364                 m++;
 2365                 KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
 2366                     ("pmap_pv_demote_pde: page %p is not managed", m));
 2367                 va += PAGE_SIZE;
 2368                 pmap_insert_entry(pmap, va, m);
 2369         } while (va < va_last);
 2370 }
 2371 
 2372 static void
 2373 pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa)
 2374 {
 2375         struct md_page *pvh;
 2376         pv_entry_t pv;
 2377         vm_offset_t va_last;
 2378         vm_page_t m;
 2379 
 2380         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 2381         KASSERT((pa & PDRMASK) == 0,
 2382             ("pmap_pv_promote_pde: pa is not 4mpage aligned"));
 2383 
 2384         /*
 2385          * Transfer the first page's pv entry for this mapping to the
 2386          * 4mpage's pv list.  Aside from avoiding the cost of a call
 2387          * to get_pv_entry(), a transfer avoids the possibility that
 2388          * get_pv_entry() calls pmap_collect() and that pmap_collect()
 2389          * removes one of the mappings that is being promoted.
 2390          */
 2391         m = PHYS_TO_VM_PAGE(pa);
 2392         va = trunc_4mpage(va);
 2393         pv = pmap_pvh_remove(&m->md, pmap, va);
 2394         KASSERT(pv != NULL, ("pmap_pv_promote_pde: pv not found"));
 2395         pvh = pa_to_pvh(pa);
 2396         TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_list);
 2397         /* Free the remaining NPTEPG - 1 pv entries. */
 2398         va_last = va + NBPDR - PAGE_SIZE;
 2399         do {
 2400                 m++;
 2401                 va += PAGE_SIZE;
 2402                 pmap_pvh_free(&m->md, pmap, va);
 2403         } while (va < va_last);
 2404 }
 2405 
 2406 static void
 2407 pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
 2408 {
 2409         pv_entry_t pv;
 2410 
 2411         pv = pmap_pvh_remove(pvh, pmap, va);
 2412         KASSERT(pv != NULL, ("pmap_pvh_free: pv not found"));
 2413         free_pv_entry(pmap, pv);
 2414 }
 2415 
 2416 static void
 2417 pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va)
 2418 {
 2419         struct md_page *pvh;
 2420 
 2421         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 2422         pmap_pvh_free(&m->md, pmap, va);
 2423         if (TAILQ_EMPTY(&m->md.pv_list)) {
 2424                 pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 2425                 if (TAILQ_EMPTY(&pvh->pv_list))
 2426                         vm_page_flag_clear(m, PG_WRITEABLE);
 2427         }
 2428 }
 2429 
 2430 /*
 2431  * Create a pv entry for page at pa for
 2432  * (pmap, va).
 2433  */
 2434 static void
 2435 pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
 2436 {
 2437         pv_entry_t pv;
 2438 
 2439         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 2440         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 2441         pv = get_pv_entry(pmap, FALSE);
 2442         pv->pv_va = va;
 2443         TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
 2444 }
 2445 
 2446 /*
 2447  * Conditionally create a pv entry.
 2448  */
 2449 static boolean_t
 2450 pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
 2451 {
 2452         pv_entry_t pv;
 2453 
 2454         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 2455         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 2456         if (pv_entry_count < pv_entry_high_water && 
 2457             (pv = get_pv_entry(pmap, TRUE)) != NULL) {
 2458                 pv->pv_va = va;
 2459                 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
 2460                 return (TRUE);
 2461         } else
 2462                 return (FALSE);
 2463 }
 2464 
 2465 /*
 2466  * Create the pv entries for each of the pages within a superpage.
 2467  */
 2468 static boolean_t
 2469 pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa)
 2470 {
 2471         struct md_page *pvh;
 2472         pv_entry_t pv;
 2473 
 2474         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 2475         if (pv_entry_count < pv_entry_high_water && 
 2476             (pv = get_pv_entry(pmap, TRUE)) != NULL) {
 2477                 pv->pv_va = va;
 2478                 pvh = pa_to_pvh(pa);
 2479                 TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_list);
 2480                 return (TRUE);
 2481         } else
 2482                 return (FALSE);
 2483 }
 2484 
 2485 /*
 2486  * Fills a page table page with mappings to consecutive physical pages.
 2487  */
 2488 static void
 2489 pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte)
 2490 {
 2491         pt_entry_t *pte;
 2492 
 2493         for (pte = firstpte; pte < firstpte + NPTEPG; pte++) {
 2494                 *pte = newpte;  
 2495                 newpte += PAGE_SIZE;
 2496         }
 2497 }
 2498 
 2499 /*
 2500  * Tries to demote a 2- or 4MB page mapping.  If demotion fails, the
 2501  * 2- or 4MB page mapping is invalidated.
 2502  */
 2503 static boolean_t
 2504 pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
 2505 {
 2506         pd_entry_t newpde, oldpde;
 2507         pt_entry_t *firstpte, newpte;
 2508         vm_paddr_t mptepa;
 2509         vm_page_t free, mpte;
 2510 
 2511         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 2512         oldpde = *pde;
 2513         KASSERT((oldpde & (PG_PS | PG_V)) == (PG_PS | PG_V),
 2514             ("pmap_demote_pde: oldpde is missing PG_PS and/or PG_V"));
 2515         mpte = pmap_lookup_pt_page(pmap, va);
 2516         if (mpte != NULL)
 2517                 pmap_remove_pt_page(pmap, mpte);
 2518         else {
 2519                 KASSERT((oldpde & PG_W) == 0,
 2520                     ("pmap_demote_pde: page table page for a wired mapping"
 2521                     " is missing"));
 2522 
 2523                 /*
 2524                  * Invalidate the 2- or 4MB page mapping and return
 2525                  * "failure" if the mapping was never accessed or the
 2526                  * allocation of the new page table page fails.
 2527                  */
 2528                 if ((oldpde & PG_A) == 0 || (mpte = vm_page_alloc(NULL,
 2529                     va >> PDRSHIFT, VM_ALLOC_NOOBJ | VM_ALLOC_NORMAL |
 2530                     VM_ALLOC_WIRED)) == NULL) {
 2531                         free = NULL;
 2532                         pmap_remove_pde(pmap, pde, trunc_4mpage(va), &free);
 2533                         pmap_invalidate_page(pmap, trunc_4mpage(va));
 2534                         pmap_free_zero_pages(free);
 2535                         CTR2(KTR_PMAP, "pmap_demote_pde: failure for va %#x"
 2536                             " in pmap %p", va, pmap);
 2537                         return (FALSE);
 2538                 }
 2539                 if (va < VM_MAXUSER_ADDRESS)
 2540                         pmap->pm_stats.resident_count++;
 2541         }
 2542         mptepa = VM_PAGE_TO_PHYS(mpte);
 2543 
 2544         /*
 2545          * If the page mapping is in the kernel's address space, then the
 2546          * KPTmap can provide access to the page table page.  Otherwise,
 2547          * temporarily map the page table page (mpte) into the kernel's
 2548          * address space at either PADDR1 or PADDR2. 
 2549          */
 2550         if (va >= KERNBASE)
 2551                 firstpte = &KPTmap[i386_btop(trunc_4mpage(va))];
 2552         else if (curthread->td_pinned > 0 && mtx_owned(&vm_page_queue_mtx)) {
 2553                 if ((*PMAP1 & PG_FRAME) != mptepa) {
 2554                         *PMAP1 = mptepa | PG_RW | PG_V | PG_A | PG_M;
 2555 #ifdef SMP
 2556                         PMAP1cpu = PCPU_GET(cpuid);
 2557 #endif
 2558                         invlcaddr(PADDR1);
 2559                         PMAP1changed++;
 2560                 } else
 2561 #ifdef SMP
 2562                 if (PMAP1cpu != PCPU_GET(cpuid)) {
 2563                         PMAP1cpu = PCPU_GET(cpuid);
 2564                         invlcaddr(PADDR1);
 2565                         PMAP1changedcpu++;
 2566                 } else
 2567 #endif
 2568                         PMAP1unchanged++;
 2569                 firstpte = PADDR1;
 2570         } else {
 2571                 mtx_lock(&PMAP2mutex);
 2572                 if ((*PMAP2 & PG_FRAME) != mptepa) {
 2573                         *PMAP2 = mptepa | PG_RW | PG_V | PG_A | PG_M;
 2574                         pmap_invalidate_page(kernel_pmap, (vm_offset_t)PADDR2);
 2575                 }
 2576                 firstpte = PADDR2;
 2577         }
 2578         newpde = mptepa | PG_M | PG_A | (oldpde & PG_U) | PG_RW | PG_V;
 2579         KASSERT((oldpde & PG_A) != 0,
 2580             ("pmap_demote_pde: oldpde is missing PG_A"));
 2581         KASSERT((oldpde & (PG_M | PG_RW)) != PG_RW,
 2582             ("pmap_demote_pde: oldpde is missing PG_M"));
 2583         newpte = oldpde & ~PG_PS;
 2584         if ((newpte & PG_PDE_PAT) != 0)
 2585                 newpte ^= PG_PDE_PAT | PG_PTE_PAT;
 2586 
 2587         /*
 2588          * If the page table page is new, initialize it.
 2589          */
 2590         if (mpte->wire_count == 1) {
 2591                 mpte->wire_count = NPTEPG;
 2592                 pmap_fill_ptp(firstpte, newpte);
 2593         }
 2594         KASSERT((*firstpte & PG_FRAME) == (newpte & PG_FRAME),
 2595             ("pmap_demote_pde: firstpte and newpte map different physical"
 2596             " addresses"));
 2597 
 2598         /*
 2599          * If the mapping has changed attributes, update the page table
 2600          * entries.
 2601          */ 
 2602         if ((*firstpte & PG_PTE_PROMOTE) != (newpte & PG_PTE_PROMOTE))
 2603                 pmap_fill_ptp(firstpte, newpte);
 2604         
 2605         /*
 2606          * Demote the mapping.  This pmap is locked.  The old PDE has
 2607          * PG_A set.  If the old PDE has PG_RW set, it also has PG_M
 2608          * set.  Thus, there is no danger of a race with another
 2609          * processor changing the setting of PG_A and/or PG_M between
 2610          * the read above and the store below. 
 2611          */
 2612         if (workaround_erratum383)
 2613                 pmap_update_pde(pmap, va, pde, newpde);
 2614         else if (pmap == kernel_pmap)
 2615                 pmap_kenter_pde(va, newpde);
 2616         else
 2617                 pde_store(pde, newpde); 
 2618         if (firstpte == PADDR2)
 2619                 mtx_unlock(&PMAP2mutex);
 2620 
 2621         /*
 2622          * Invalidate the recursive mapping of the page table page.
 2623          */
 2624         pmap_invalidate_page(pmap, (vm_offset_t)vtopte(va));
 2625 
 2626         /*
 2627          * Demote the pv entry.  This depends on the earlier demotion
 2628          * of the mapping.  Specifically, the (re)creation of a per-
 2629          * page pv entry might trigger the execution of pmap_collect(),
 2630          * which might reclaim a newly (re)created per-page pv entry
 2631          * and destroy the associated mapping.  In order to destroy
 2632          * the mapping, the PDE must have already changed from mapping
 2633          * the 2mpage to referencing the page table page.
 2634          */
 2635         if ((oldpde & PG_MANAGED) != 0)
 2636                 pmap_pv_demote_pde(pmap, va, oldpde & PG_PS_FRAME);
 2637 
 2638         pmap_pde_demotions++;
 2639         CTR2(KTR_PMAP, "pmap_demote_pde: success for va %#x"
 2640             " in pmap %p", va, pmap);
 2641         return (TRUE);
 2642 }
 2643 
 2644 /*
 2645  * pmap_remove_pde: do the things to unmap a superpage in a process
 2646  */
 2647 static void
 2648 pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva,
 2649     vm_page_t *free)
 2650 {
 2651         struct md_page *pvh;
 2652         pd_entry_t oldpde;
 2653         vm_offset_t eva, va;
 2654         vm_page_t m, mpte;
 2655 
 2656         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 2657         KASSERT((sva & PDRMASK) == 0,
 2658             ("pmap_remove_pde: sva is not 4mpage aligned"));
 2659         oldpde = pte_load_clear(pdq);
 2660         if (oldpde & PG_W)
 2661                 pmap->pm_stats.wired_count -= NBPDR / PAGE_SIZE;
 2662 
 2663         /*
 2664          * Machines that don't support invlpg, also don't support
 2665          * PG_G.
 2666          */
 2667         if (oldpde & PG_G)
 2668                 pmap_invalidate_page(kernel_pmap, sva);
 2669         pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
 2670         if (oldpde & PG_MANAGED) {
 2671                 pvh = pa_to_pvh(oldpde & PG_PS_FRAME);
 2672                 pmap_pvh_free(pvh, pmap, sva);
 2673                 eva = sva + NBPDR;
 2674                 for (va = sva, m = PHYS_TO_VM_PAGE(oldpde & PG_PS_FRAME);
 2675                     va < eva; va += PAGE_SIZE, m++) {
 2676                         if ((oldpde & (PG_M | PG_RW)) == (PG_M | PG_RW))
 2677                                 vm_page_dirty(m);
 2678                         if (oldpde & PG_A)
 2679                                 vm_page_flag_set(m, PG_REFERENCED);
 2680                         if (TAILQ_EMPTY(&m->md.pv_list) &&
 2681                             TAILQ_EMPTY(&pvh->pv_list))
 2682                                 vm_page_flag_clear(m, PG_WRITEABLE);
 2683                 }
 2684         }
 2685         if (pmap == kernel_pmap) {
 2686                 if (!pmap_demote_pde(pmap, pdq, sva))
 2687                         panic("pmap_remove_pde: failed demotion");
 2688         } else {
 2689                 mpte = pmap_lookup_pt_page(pmap, sva);
 2690                 if (mpte != NULL) {
 2691                         pmap_remove_pt_page(pmap, mpte);
 2692                         pmap->pm_stats.resident_count--;
 2693                         KASSERT(mpte->wire_count == NPTEPG,
 2694                             ("pmap_remove_pde: pte page wire count error"));
 2695                         mpte->wire_count = 0;
 2696                         pmap_add_delayed_free_list(mpte, free, FALSE);
 2697                         atomic_subtract_int(&cnt.v_wire_count, 1);
 2698                 }
 2699         }
 2700 }
 2701 
 2702 /*
 2703  * pmap_remove_pte: do the things to unmap a page in a process
 2704  */
 2705 static int
 2706 pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va, vm_page_t *free)
 2707 {
 2708         pt_entry_t oldpte;
 2709         vm_page_t m;
 2710 
 2711         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 2712         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 2713         oldpte = pte_load_clear(ptq);
 2714         if (oldpte & PG_W)
 2715                 pmap->pm_stats.wired_count -= 1;
 2716         /*
 2717          * Machines that don't support invlpg, also don't support
 2718          * PG_G.
 2719          */
 2720         if (oldpte & PG_G)
 2721                 pmap_invalidate_page(kernel_pmap, va);
 2722         pmap->pm_stats.resident_count -= 1;
 2723         if (oldpte & PG_MANAGED) {
 2724                 m = PHYS_TO_VM_PAGE(oldpte & PG_FRAME);
 2725                 if ((oldpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
 2726                         vm_page_dirty(m);
 2727                 if (oldpte & PG_A)
 2728                         vm_page_flag_set(m, PG_REFERENCED);
 2729                 pmap_remove_entry(pmap, m, va);
 2730         }
 2731         return (pmap_unuse_pt(pmap, va, free));
 2732 }
 2733 
 2734 /*
 2735  * Remove a single page from a process address space
 2736  */
 2737 static void
 2738 pmap_remove_page(pmap_t pmap, vm_offset_t va, vm_page_t *free)
 2739 {
 2740         pt_entry_t *pte;
 2741 
 2742         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 2743         KASSERT(curthread->td_pinned > 0, ("curthread not pinned"));
 2744         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 2745         if ((pte = pmap_pte_quick(pmap, va)) == NULL || *pte == 0)
 2746                 return;
 2747         pmap_remove_pte(pmap, pte, va, free);
 2748         pmap_invalidate_page(pmap, va);
 2749 }
 2750 
 2751 /*
 2752  *      Remove the given range of addresses from the specified map.
 2753  *
 2754  *      It is assumed that the start and end are properly
 2755  *      rounded to the page size.
 2756  */
 2757 void
 2758 pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 2759 {
 2760         vm_offset_t pdnxt;
 2761         pd_entry_t ptpaddr;
 2762         pt_entry_t *pte;
 2763         vm_page_t free = NULL;
 2764         int anyvalid;
 2765 
 2766         /*
 2767          * Perform an unsynchronized read.  This is, however, safe.
 2768          */
 2769         if (pmap->pm_stats.resident_count == 0)
 2770                 return;
 2771 
 2772         anyvalid = 0;
 2773 
 2774         vm_page_lock_queues();
 2775         sched_pin();
 2776         PMAP_LOCK(pmap);
 2777 
 2778         /*
 2779          * special handling of removing one page.  a very
 2780          * common operation and easy to short circuit some
 2781          * code.
 2782          */
 2783         if ((sva + PAGE_SIZE == eva) && 
 2784             ((pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) {
 2785                 pmap_remove_page(pmap, sva, &free);
 2786                 goto out;
 2787         }
 2788 
 2789         for (; sva < eva; sva = pdnxt) {
 2790                 unsigned pdirindex;
 2791 
 2792                 /*
 2793                  * Calculate index for next page table.
 2794                  */
 2795                 pdnxt = (sva + NBPDR) & ~PDRMASK;
 2796                 if (pdnxt < sva)
 2797                         pdnxt = eva;
 2798                 if (pmap->pm_stats.resident_count == 0)
 2799                         break;
 2800 
 2801                 pdirindex = sva >> PDRSHIFT;
 2802                 ptpaddr = pmap->pm_pdir[pdirindex];
 2803 
 2804                 /*
 2805                  * Weed out invalid mappings. Note: we assume that the page
 2806                  * directory table is always allocated, and in kernel virtual.
 2807                  */
 2808                 if (ptpaddr == 0)
 2809                         continue;
 2810 
 2811                 /*
 2812                  * Check for large page.
 2813                  */
 2814                 if ((ptpaddr & PG_PS) != 0) {
 2815                         /*
 2816                          * Are we removing the entire large page?  If not,
 2817                          * demote the mapping and fall through.
 2818                          */
 2819                         if (sva + NBPDR == pdnxt && eva >= pdnxt) {
 2820                                 /*
 2821                                  * The TLB entry for a PG_G mapping is
 2822                                  * invalidated by pmap_remove_pde().
 2823                                  */
 2824                                 if ((ptpaddr & PG_G) == 0)
 2825                                         anyvalid = 1;
 2826                                 pmap_remove_pde(pmap,
 2827                                     &pmap->pm_pdir[pdirindex], sva, &free);
 2828                                 continue;
 2829                         } else if (!pmap_demote_pde(pmap,
 2830                             &pmap->pm_pdir[pdirindex], sva)) {
 2831                                 /* The large page mapping was destroyed. */
 2832                                 continue;
 2833                         }
 2834                 }
 2835 
 2836                 /*
 2837                  * Limit our scan to either the end of the va represented
 2838                  * by the current page table page, or to the end of the
 2839                  * range being removed.
 2840                  */
 2841                 if (pdnxt > eva)
 2842                         pdnxt = eva;
 2843 
 2844                 for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++,
 2845                     sva += PAGE_SIZE) {
 2846                         if (*pte == 0)
 2847                                 continue;
 2848 
 2849                         /*
 2850                          * The TLB entry for a PG_G mapping is invalidated
 2851                          * by pmap_remove_pte().
 2852                          */
 2853                         if ((*pte & PG_G) == 0)
 2854                                 anyvalid = 1;
 2855                         if (pmap_remove_pte(pmap, pte, sva, &free))
 2856                                 break;
 2857                 }
 2858         }
 2859 out:
 2860         sched_unpin();
 2861         if (anyvalid)
 2862                 pmap_invalidate_all(pmap);
 2863         vm_page_unlock_queues();
 2864         PMAP_UNLOCK(pmap);
 2865         pmap_free_zero_pages(free);
 2866 }
 2867 
 2868 /*
 2869  *      Routine:        pmap_remove_all
 2870  *      Function:
 2871  *              Removes this physical page from
 2872  *              all physical maps in which it resides.
 2873  *              Reflects back modify bits to the pager.
 2874  *
 2875  *      Notes:
 2876  *              Original versions of this routine were very
 2877  *              inefficient because they iteratively called
 2878  *              pmap_remove (slow...)
 2879  */
 2880 
 2881 void
 2882 pmap_remove_all(vm_page_t m)
 2883 {
 2884         struct md_page *pvh;
 2885         pv_entry_t pv;
 2886         pmap_t pmap;
 2887         pt_entry_t *pte, tpte;
 2888         pd_entry_t *pde;
 2889         vm_offset_t va;
 2890         vm_page_t free;
 2891 
 2892         KASSERT((m->flags & PG_FICTITIOUS) == 0,
 2893             ("pmap_remove_all: page %p is fictitious", m));
 2894         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 2895         sched_pin();
 2896         pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 2897         while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) {
 2898                 va = pv->pv_va;
 2899                 pmap = PV_PMAP(pv);
 2900                 PMAP_LOCK(pmap);
 2901                 pde = pmap_pde(pmap, va);
 2902                 (void)pmap_demote_pde(pmap, pde, va);
 2903                 PMAP_UNLOCK(pmap);
 2904         }
 2905         while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
 2906                 pmap = PV_PMAP(pv);
 2907                 PMAP_LOCK(pmap);
 2908                 pmap->pm_stats.resident_count--;
 2909                 pde = pmap_pde(pmap, pv->pv_va);
 2910                 KASSERT((*pde & PG_PS) == 0, ("pmap_remove_all: found"
 2911                     " a 4mpage in page %p's pv list", m));
 2912                 pte = pmap_pte_quick(pmap, pv->pv_va);
 2913                 tpte = pte_load_clear(pte);
 2914                 if (tpte & PG_W)
 2915                         pmap->pm_stats.wired_count--;
 2916                 if (tpte & PG_A)
 2917                         vm_page_flag_set(m, PG_REFERENCED);
 2918 
 2919                 /*
 2920                  * Update the vm_page_t clean and reference bits.
 2921                  */
 2922                 if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
 2923                         vm_page_dirty(m);
 2924                 free = NULL;
 2925                 pmap_unuse_pt(pmap, pv->pv_va, &free);
 2926                 pmap_invalidate_page(pmap, pv->pv_va);
 2927                 pmap_free_zero_pages(free);
 2928                 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 2929                 free_pv_entry(pmap, pv);
 2930                 PMAP_UNLOCK(pmap);
 2931         }
 2932         vm_page_flag_clear(m, PG_WRITEABLE);
 2933         sched_unpin();
 2934 }
 2935 
 2936 /*
 2937  * pmap_protect_pde: do the things to protect a 4mpage in a process
 2938  */
 2939 static boolean_t
 2940 pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva, vm_prot_t prot)
 2941 {
 2942         pd_entry_t newpde, oldpde;
 2943         vm_offset_t eva, va;
 2944         vm_page_t m;
 2945         boolean_t anychanged;
 2946 
 2947         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 2948         KASSERT((sva & PDRMASK) == 0,
 2949             ("pmap_protect_pde: sva is not 4mpage aligned"));
 2950         anychanged = FALSE;
 2951 retry:
 2952         oldpde = newpde = *pde;
 2953         if (oldpde & PG_MANAGED) {
 2954                 eva = sva + NBPDR;
 2955                 for (va = sva, m = PHYS_TO_VM_PAGE(oldpde & PG_PS_FRAME);
 2956                     va < eva; va += PAGE_SIZE, m++) {
 2957                         /*
 2958                          * In contrast to the analogous operation on a 4KB page
 2959                          * mapping, the mapping's PG_A flag is not cleared and
 2960                          * the page's PG_REFERENCED flag is not set.  The
 2961                          * reason is that pmap_demote_pde() expects that a 2/4MB
 2962                          * page mapping with a stored page table page has PG_A
 2963                          * set.
 2964                          */
 2965                         if ((oldpde & (PG_M | PG_RW)) == (PG_M | PG_RW))
 2966                                 vm_page_dirty(m);
 2967                 }
 2968         }
 2969         if ((prot & VM_PROT_WRITE) == 0)
 2970                 newpde &= ~(PG_RW | PG_M);
 2971 #ifdef PAE
 2972         if ((prot & VM_PROT_EXECUTE) == 0)
 2973                 newpde |= pg_nx;
 2974 #endif
 2975         if (newpde != oldpde) {
 2976                 if (!pde_cmpset(pde, oldpde, newpde))
 2977                         goto retry;
 2978                 if (oldpde & PG_G)
 2979                         pmap_invalidate_page(pmap, sva);
 2980                 else
 2981                         anychanged = TRUE;
 2982         }
 2983         return (anychanged);
 2984 }
 2985 
 2986 /*
 2987  *      Set the physical protection on the
 2988  *      specified range of this map as requested.
 2989  */
 2990 void
 2991 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
 2992 {
 2993         vm_offset_t pdnxt;
 2994         pd_entry_t ptpaddr;
 2995         pt_entry_t *pte;
 2996         int anychanged;
 2997 
 2998         if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
 2999                 pmap_remove(pmap, sva, eva);
 3000                 return;
 3001         }
 3002 
 3003 #ifdef PAE
 3004         if ((prot & (VM_PROT_WRITE|VM_PROT_EXECUTE)) ==
 3005             (VM_PROT_WRITE|VM_PROT_EXECUTE))
 3006                 return;
 3007 #else
 3008         if (prot & VM_PROT_WRITE)
 3009                 return;
 3010 #endif
 3011 
 3012         anychanged = 0;
 3013 
 3014         vm_page_lock_queues();
 3015         sched_pin();
 3016         PMAP_LOCK(pmap);
 3017         for (; sva < eva; sva = pdnxt) {
 3018                 pt_entry_t obits, pbits;
 3019                 unsigned pdirindex;
 3020 
 3021                 pdnxt = (sva + NBPDR) & ~PDRMASK;
 3022                 if (pdnxt < sva)
 3023                         pdnxt = eva;
 3024 
 3025                 pdirindex = sva >> PDRSHIFT;
 3026                 ptpaddr = pmap->pm_pdir[pdirindex];
 3027 
 3028                 /*
 3029                  * Weed out invalid mappings. Note: we assume that the page
 3030                  * directory table is always allocated, and in kernel virtual.
 3031                  */
 3032                 if (ptpaddr == 0)
 3033                         continue;
 3034 
 3035                 /*
 3036                  * Check for large page.
 3037                  */
 3038                 if ((ptpaddr & PG_PS) != 0) {
 3039                         /*
 3040                          * Are we protecting the entire large page?  If not,
 3041                          * demote the mapping and fall through.
 3042                          */
 3043                         if (sva + NBPDR == pdnxt && eva >= pdnxt) {
 3044                                 /*
 3045                                  * The TLB entry for a PG_G mapping is
 3046                                  * invalidated by pmap_protect_pde().
 3047                                  */
 3048                                 if (pmap_protect_pde(pmap,
 3049                                     &pmap->pm_pdir[pdirindex], sva, prot))
 3050                                         anychanged = 1;
 3051                                 continue;
 3052                         } else if (!pmap_demote_pde(pmap,
 3053                             &pmap->pm_pdir[pdirindex], sva)) {
 3054                                 /* The large page mapping was destroyed. */
 3055                                 continue;
 3056                         }
 3057                 }
 3058 
 3059                 if (pdnxt > eva)
 3060                         pdnxt = eva;
 3061 
 3062                 for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++,
 3063                     sva += PAGE_SIZE) {
 3064                         vm_page_t m;
 3065 
 3066 retry:
 3067                         /*
 3068                          * Regardless of whether a pte is 32 or 64 bits in
 3069                          * size, PG_RW, PG_A, and PG_M are among the least
 3070                          * significant 32 bits.
 3071                          */
 3072                         obits = pbits = *pte;
 3073                         if ((pbits & PG_V) == 0)
 3074                                 continue;
 3075                         if (pbits & PG_MANAGED) {
 3076                                 m = NULL;
 3077                                 if (pbits & PG_A) {
 3078                                         m = PHYS_TO_VM_PAGE(pbits & PG_FRAME);
 3079                                         vm_page_flag_set(m, PG_REFERENCED);
 3080                                         pbits &= ~PG_A;
 3081                                 }
 3082                                 if ((pbits & (PG_M | PG_RW)) == (PG_M | PG_RW)) {
 3083                                         if (m == NULL)
 3084                                                 m = PHYS_TO_VM_PAGE(pbits & PG_FRAME);
 3085                                         vm_page_dirty(m);
 3086                                 }
 3087                         }
 3088 
 3089                         if ((prot & VM_PROT_WRITE) == 0)
 3090                                 pbits &= ~(PG_RW | PG_M);
 3091 #ifdef PAE
 3092                         if ((prot & VM_PROT_EXECUTE) == 0)
 3093                                 pbits |= pg_nx;
 3094 #endif
 3095 
 3096                         if (pbits != obits) {
 3097 #ifdef PAE
 3098                                 if (!atomic_cmpset_64(pte, obits, pbits))
 3099                                         goto retry;
 3100 #else
 3101                                 if (!atomic_cmpset_int((u_int *)pte, obits,
 3102                                     pbits))
 3103                                         goto retry;
 3104 #endif
 3105                                 if (obits & PG_G)
 3106                                         pmap_invalidate_page(pmap, sva);
 3107                                 else
 3108                                         anychanged = 1;
 3109                         }
 3110                 }
 3111         }
 3112         sched_unpin();
 3113         if (anychanged)
 3114                 pmap_invalidate_all(pmap);
 3115         vm_page_unlock_queues();
 3116         PMAP_UNLOCK(pmap);
 3117 }
 3118 
 3119 /*
 3120  * Tries to promote the 512 or 1024, contiguous 4KB page mappings that are
 3121  * within a single page table page (PTP) to a single 2- or 4MB page mapping.
 3122  * For promotion to occur, two conditions must be met: (1) the 4KB page
 3123  * mappings must map aligned, contiguous physical memory and (2) the 4KB page
 3124  * mappings must have identical characteristics.
 3125  *
 3126  * Managed (PG_MANAGED) mappings within the kernel address space are not
 3127  * promoted.  The reason is that kernel PDEs are replicated in each pmap but
 3128  * pmap_clear_ptes() and pmap_ts_referenced() only read the PDE from the kernel
 3129  * pmap.
 3130  */
 3131 static void
 3132 pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
 3133 {
 3134         pd_entry_t newpde;
 3135         pt_entry_t *firstpte, oldpte, pa, *pte;
 3136         vm_offset_t oldpteva;
 3137         vm_page_t mpte;
 3138 
 3139         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 3140 
 3141         /*
 3142          * Examine the first PTE in the specified PTP.  Abort if this PTE is
 3143          * either invalid, unused, or does not map the first 4KB physical page
 3144          * within a 2- or 4MB page.
 3145          */
 3146         firstpte = pmap_pte_quick(pmap, trunc_4mpage(va));
 3147 setpde:
 3148         newpde = *firstpte;
 3149         if ((newpde & ((PG_FRAME & PDRMASK) | PG_A | PG_V)) != (PG_A | PG_V)) {
 3150                 pmap_pde_p_failures++;
 3151                 CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x"
 3152                     " in pmap %p", va, pmap);
 3153                 return;
 3154         }
 3155         if ((*firstpte & PG_MANAGED) != 0 && pmap == kernel_pmap) {
 3156                 pmap_pde_p_failures++;
 3157                 CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x"
 3158                     " in pmap %p", va, pmap);
 3159                 return;
 3160         }
 3161         if ((newpde & (PG_M | PG_RW)) == PG_RW) {
 3162                 /*
 3163                  * When PG_M is already clear, PG_RW can be cleared without
 3164                  * a TLB invalidation.
 3165                  */
 3166                 if (!atomic_cmpset_int((u_int *)firstpte, newpde, newpde &
 3167                     ~PG_RW))  
 3168                         goto setpde;
 3169                 newpde &= ~PG_RW;
 3170         }
 3171 
 3172         /* 
 3173          * Examine each of the other PTEs in the specified PTP.  Abort if this
 3174          * PTE maps an unexpected 4KB physical page or does not have identical
 3175          * characteristics to the first PTE.
 3176          */
 3177         pa = (newpde & (PG_PS_FRAME | PG_A | PG_V)) + NBPDR - PAGE_SIZE;
 3178         for (pte = firstpte + NPTEPG - 1; pte > firstpte; pte--) {
 3179 setpte:
 3180                 oldpte = *pte;
 3181                 if ((oldpte & (PG_FRAME | PG_A | PG_V)) != pa) {
 3182                         pmap_pde_p_failures++;
 3183                         CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x"
 3184                             " in pmap %p", va, pmap);
 3185                         return;
 3186                 }
 3187                 if ((oldpte & (PG_M | PG_RW)) == PG_RW) {
 3188                         /*
 3189                          * When PG_M is already clear, PG_RW can be cleared
 3190                          * without a TLB invalidation.
 3191                          */
 3192                         if (!atomic_cmpset_int((u_int *)pte, oldpte,
 3193                             oldpte & ~PG_RW))
 3194                                 goto setpte;
 3195                         oldpte &= ~PG_RW;
 3196                         oldpteva = (oldpte & PG_FRAME & PDRMASK) |
 3197                             (va & ~PDRMASK);
 3198                         CTR2(KTR_PMAP, "pmap_promote_pde: protect for va %#x"
 3199                             " in pmap %p", oldpteva, pmap);
 3200                 }
 3201                 if ((oldpte & PG_PTE_PROMOTE) != (newpde & PG_PTE_PROMOTE)) {
 3202                         pmap_pde_p_failures++;
 3203                         CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x"
 3204                             " in pmap %p", va, pmap);
 3205                         return;
 3206                 }
 3207                 pa -= PAGE_SIZE;
 3208         }
 3209 
 3210         /*
 3211          * Save the page table page in its current state until the PDE
 3212          * mapping the superpage is demoted by pmap_demote_pde() or
 3213          * destroyed by pmap_remove_pde(). 
 3214          */
 3215         mpte = PHYS_TO_VM_PAGE(*pde & PG_FRAME);
 3216         KASSERT(mpte >= vm_page_array &&
 3217             mpte < &vm_page_array[vm_page_array_size],
 3218             ("pmap_promote_pde: page table page is out of range"));
 3219         KASSERT(mpte->pindex == va >> PDRSHIFT,
 3220             ("pmap_promote_pde: page table page's pindex is wrong"));
 3221         pmap_insert_pt_page(pmap, mpte);
 3222 
 3223         /*
 3224          * Promote the pv entries.
 3225          */
 3226         if ((newpde & PG_MANAGED) != 0)
 3227                 pmap_pv_promote_pde(pmap, va, newpde & PG_PS_FRAME);
 3228 
 3229         /*
 3230          * Propagate the PAT index to its proper position.
 3231          */
 3232         if ((newpde & PG_PTE_PAT) != 0)
 3233                 newpde ^= PG_PDE_PAT | PG_PTE_PAT;
 3234 
 3235         /*
 3236          * Map the superpage.
 3237          */
 3238         if (workaround_erratum383)
 3239                 pmap_update_pde(pmap, va, pde, PG_PS | newpde);
 3240         else if (pmap == kernel_pmap)
 3241                 pmap_kenter_pde(va, PG_PS | newpde);
 3242         else
 3243                 pde_store(pde, PG_PS | newpde);
 3244 
 3245         pmap_pde_promotions++;
 3246         CTR2(KTR_PMAP, "pmap_promote_pde: success for va %#x"
 3247             " in pmap %p", va, pmap);
 3248 }
 3249 
 3250 /*
 3251  *      Insert the given physical page (p) at
 3252  *      the specified virtual address (v) in the
 3253  *      target physical map with the protection requested.
 3254  *
 3255  *      If specified, the page will be wired down, meaning
 3256  *      that the related pte can not be reclaimed.
 3257  *
 3258  *      NB:  This is the only routine which MAY NOT lazy-evaluate
 3259  *      or lose information.  That is, this routine must actually
 3260  *      insert this page into the given map NOW.
 3261  */
 3262 void
 3263 pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m,
 3264     vm_prot_t prot, boolean_t wired)
 3265 {
 3266         vm_paddr_t pa;
 3267         pd_entry_t *pde;
 3268         pt_entry_t *pte;
 3269         vm_paddr_t opa;
 3270         pt_entry_t origpte, newpte;
 3271         vm_page_t mpte, om;
 3272         boolean_t invlva;
 3273 
 3274         va = trunc_page(va);
 3275         KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig"));
 3276         KASSERT(va < UPT_MIN_ADDRESS || va >= UPT_MAX_ADDRESS,
 3277             ("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", va));
 3278 
 3279         mpte = NULL;
 3280 
 3281         vm_page_lock_queues();
 3282         PMAP_LOCK(pmap);
 3283         sched_pin();
 3284 
 3285         /*
 3286          * In the case that a page table page is not
 3287          * resident, we are creating it here.
 3288          */
 3289         if (va < VM_MAXUSER_ADDRESS) {
 3290                 mpte = pmap_allocpte(pmap, va, M_WAITOK);
 3291         }
 3292 
 3293         pde = pmap_pde(pmap, va);
 3294         if ((*pde & PG_PS) != 0)
 3295                 panic("pmap_enter: attempted pmap_enter on 4MB page");
 3296         pte = pmap_pte_quick(pmap, va);
 3297 
 3298         /*
 3299          * Page Directory table entry not valid, we need a new PT page
 3300          */
 3301         if (pte == NULL) {
 3302                 panic("pmap_enter: invalid page directory pdir=%#jx, va=%#x",
 3303                         (uintmax_t)pmap->pm_pdir[PTDPTDI], va);
 3304         }
 3305 
 3306         pa = VM_PAGE_TO_PHYS(m);
 3307         om = NULL;
 3308         origpte = *pte;
 3309         opa = origpte & PG_FRAME;
 3310 
 3311         /*
 3312          * Mapping has not changed, must be protection or wiring change.
 3313          */
 3314         if (origpte && (opa == pa)) {
 3315                 /*
 3316                  * Wiring change, just update stats. We don't worry about
 3317                  * wiring PT pages as they remain resident as long as there
 3318                  * are valid mappings in them. Hence, if a user page is wired,
 3319                  * the PT page will be also.
 3320                  */
 3321                 if (wired && ((origpte & PG_W) == 0))
 3322                         pmap->pm_stats.wired_count++;
 3323                 else if (!wired && (origpte & PG_W))
 3324                         pmap->pm_stats.wired_count--;
 3325 
 3326                 /*
 3327                  * Remove extra pte reference
 3328                  */
 3329                 if (mpte)
 3330                         mpte->wire_count--;
 3331 
 3332                 /*
 3333                  * We might be turning off write access to the page,
 3334                  * so we go ahead and sense modify status.
 3335                  */
 3336                 if (origpte & PG_MANAGED) {
 3337                         om = m;
 3338                         pa |= PG_MANAGED;
 3339                 }
 3340                 goto validate;
 3341         } 
 3342         /*
 3343          * Mapping has changed, invalidate old range and fall through to
 3344          * handle validating new mapping.
 3345          */
 3346         if (opa) {
 3347                 if (origpte & PG_W)
 3348                         pmap->pm_stats.wired_count--;
 3349                 if (origpte & PG_MANAGED) {
 3350                         om = PHYS_TO_VM_PAGE(opa);
 3351                         pmap_remove_entry(pmap, om, va);
 3352                 }
 3353                 if (mpte != NULL) {
 3354                         mpte->wire_count--;
 3355                         KASSERT(mpte->wire_count > 0,
 3356                             ("pmap_enter: missing reference to page table page,"
 3357                              " va: 0x%x", va));
 3358                 }
 3359         } else
 3360                 pmap->pm_stats.resident_count++;
 3361 
 3362         /*
 3363          * Enter on the PV list if part of our managed memory.
 3364          */
 3365         if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) {
 3366                 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva,
 3367                     ("pmap_enter: managed mapping within the clean submap"));
 3368                 pmap_insert_entry(pmap, va, m);
 3369                 pa |= PG_MANAGED;
 3370         }
 3371 
 3372         /*
 3373          * Increment counters
 3374          */
 3375         if (wired)
 3376                 pmap->pm_stats.wired_count++;
 3377 
 3378 validate:
 3379         /*
 3380          * Now validate mapping with desired protection/wiring.
 3381          */
 3382         newpte = (pt_entry_t)(pa | pmap_cache_bits(m->md.pat_mode, 0) | PG_V);
 3383         if ((prot & VM_PROT_WRITE) != 0) {
 3384                 newpte |= PG_RW;
 3385                 vm_page_flag_set(m, PG_WRITEABLE);
 3386         }
 3387 #ifdef PAE
 3388         if ((prot & VM_PROT_EXECUTE) == 0)
 3389                 newpte |= pg_nx;
 3390 #endif
 3391         if (wired)
 3392                 newpte |= PG_W;
 3393         if (va < VM_MAXUSER_ADDRESS)
 3394                 newpte |= PG_U;
 3395         if (pmap == kernel_pmap)
 3396                 newpte |= pgeflag;
 3397 
 3398         /*
 3399          * if the mapping or permission bits are different, we need
 3400          * to update the pte.
 3401          */
 3402         if ((origpte & ~(PG_M|PG_A)) != newpte) {
 3403                 newpte |= PG_A;
 3404                 if ((access & VM_PROT_WRITE) != 0)
 3405                         newpte |= PG_M;
 3406                 if (origpte & PG_V) {
 3407                         invlva = FALSE;
 3408                         origpte = pte_load_store(pte, newpte);
 3409                         if (origpte & PG_A) {
 3410                                 if (origpte & PG_MANAGED)
 3411                                         vm_page_flag_set(om, PG_REFERENCED);
 3412                                 if (opa != VM_PAGE_TO_PHYS(m))
 3413                                         invlva = TRUE;
 3414 #ifdef PAE
 3415                                 if ((origpte & PG_NX) == 0 &&
 3416                                     (newpte & PG_NX) != 0)
 3417                                         invlva = TRUE;
 3418 #endif
 3419                         }
 3420                         if ((origpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) {
 3421                                 if ((origpte & PG_MANAGED) != 0)
 3422                                         vm_page_dirty(om);
 3423                                 if ((prot & VM_PROT_WRITE) == 0)
 3424                                         invlva = TRUE;
 3425                         }
 3426                         if (invlva)
 3427                                 pmap_invalidate_page(pmap, va);
 3428                 } else
 3429                         pte_store(pte, newpte);
 3430         }
 3431 
 3432         /*
 3433          * If both the page table page and the reservation are fully
 3434          * populated, then attempt promotion.
 3435          */
 3436         if ((mpte == NULL || mpte->wire_count == NPTEPG) &&
 3437             pg_ps_enabled && vm_reserv_level_iffullpop(m) == 0)
 3438                 pmap_promote_pde(pmap, pde, va);
 3439 
 3440         sched_unpin();
 3441         vm_page_unlock_queues();
 3442         PMAP_UNLOCK(pmap);
 3443 }
 3444 
 3445 /*
 3446  * Tries to create a 2- or 4MB page mapping.  Returns TRUE if successful and
 3447  * FALSE otherwise.  Fails if (1) a page table page cannot be allocated without
 3448  * blocking, (2) a mapping already exists at the specified virtual address, or
 3449  * (3) a pv entry cannot be allocated without reclaiming another pv entry. 
 3450  */
 3451 static boolean_t
 3452 pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
 3453 {
 3454         pd_entry_t *pde, newpde;
 3455 
 3456         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 3457         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 3458         pde = pmap_pde(pmap, va);
 3459         if (*pde != 0) {
 3460                 CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx"
 3461                     " in pmap %p", va, pmap);
 3462                 return (FALSE);
 3463         }
 3464         newpde = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(m->md.pat_mode, 1) |
 3465             PG_PS | PG_V;
 3466         if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) {
 3467                 newpde |= PG_MANAGED;
 3468 
 3469                 /*
 3470                  * Abort this mapping if its PV entry could not be created.
 3471                  */
 3472                 if (!pmap_pv_insert_pde(pmap, va, VM_PAGE_TO_PHYS(m))) {
 3473                         CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx"
 3474                             " in pmap %p", va, pmap);
 3475                         return (FALSE);
 3476                 }
 3477         }
 3478 #ifdef PAE
 3479         if ((prot & VM_PROT_EXECUTE) == 0)
 3480                 newpde |= pg_nx;
 3481 #endif
 3482         if (va < VM_MAXUSER_ADDRESS)
 3483                 newpde |= PG_U;
 3484 
 3485         /*
 3486          * Increment counters.
 3487          */
 3488         pmap->pm_stats.resident_count += NBPDR / PAGE_SIZE;
 3489 
 3490         /*
 3491          * Map the superpage.
 3492          */
 3493         pde_store(pde, newpde);
 3494 
 3495         pmap_pde_mappings++;
 3496         CTR2(KTR_PMAP, "pmap_enter_pde: success for va %#lx"
 3497             " in pmap %p", va, pmap);
 3498         return (TRUE);
 3499 }
 3500 
 3501 /*
 3502  * Maps a sequence of resident pages belonging to the same object.
 3503  * The sequence begins with the given page m_start.  This page is
 3504  * mapped at the given virtual address start.  Each subsequent page is
 3505  * mapped at a virtual address that is offset from start by the same
 3506  * amount as the page is offset from m_start within the object.  The
 3507  * last page in the sequence is the page with the largest offset from
 3508  * m_start that can be mapped at a virtual address less than the given
 3509  * virtual address end.  Not every virtual page between start and end
 3510  * is mapped; only those for which a resident page exists with the
 3511  * corresponding offset from m_start are mapped.
 3512  */
 3513 void
 3514 pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
 3515     vm_page_t m_start, vm_prot_t prot)
 3516 {
 3517         vm_offset_t va;
 3518         vm_page_t m, mpte;
 3519         vm_pindex_t diff, psize;
 3520 
 3521         VM_OBJECT_LOCK_ASSERT(m_start->object, MA_OWNED);
 3522         psize = atop(end - start);
 3523         mpte = NULL;
 3524         m = m_start;
 3525         PMAP_LOCK(pmap);
 3526         while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
 3527                 va = start + ptoa(diff);
 3528                 if ((va & PDRMASK) == 0 && va + NBPDR <= end &&
 3529                     (VM_PAGE_TO_PHYS(m) & PDRMASK) == 0 &&
 3530                     pg_ps_enabled && vm_reserv_level_iffullpop(m) == 0 &&
 3531                     pmap_enter_pde(pmap, va, m, prot))
 3532                         m = &m[NBPDR / PAGE_SIZE - 1];
 3533                 else
 3534                         mpte = pmap_enter_quick_locked(pmap, va, m, prot,
 3535                             mpte);
 3536                 m = TAILQ_NEXT(m, listq);
 3537         }
 3538         PMAP_UNLOCK(pmap);
 3539 }
 3540 
 3541 /*
 3542  * this code makes some *MAJOR* assumptions:
 3543  * 1. Current pmap & pmap exists.
 3544  * 2. Not wired.
 3545  * 3. Read access.
 3546  * 4. No page table pages.
 3547  * but is *MUCH* faster than pmap_enter...
 3548  */
 3549 
 3550 void
 3551 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
 3552 {
 3553 
 3554         PMAP_LOCK(pmap);
 3555         (void) pmap_enter_quick_locked(pmap, va, m, prot, NULL);
 3556         PMAP_UNLOCK(pmap);
 3557 }
 3558 
 3559 static vm_page_t
 3560 pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
 3561     vm_prot_t prot, vm_page_t mpte)
 3562 {
 3563         pt_entry_t *pte;
 3564         vm_paddr_t pa;
 3565         vm_page_t free;
 3566 
 3567         KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
 3568             (m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0,
 3569             ("pmap_enter_quick_locked: managed mapping within the clean submap"));
 3570         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 3571         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 3572 
 3573         /*
 3574          * In the case that a page table page is not
 3575          * resident, we are creating it here.
 3576          */
 3577         if (va < VM_MAXUSER_ADDRESS) {
 3578                 unsigned ptepindex;
 3579                 pd_entry_t ptepa;
 3580 
 3581                 /*
 3582                  * Calculate pagetable page index
 3583                  */
 3584                 ptepindex = va >> PDRSHIFT;
 3585                 if (mpte && (mpte->pindex == ptepindex)) {
 3586                         mpte->wire_count++;
 3587                 } else {
 3588                         /*
 3589                          * Get the page directory entry
 3590                          */
 3591                         ptepa = pmap->pm_pdir[ptepindex];
 3592 
 3593                         /*
 3594                          * If the page table page is mapped, we just increment
 3595                          * the hold count, and activate it.
 3596                          */
 3597                         if (ptepa) {
 3598                                 if (ptepa & PG_PS)
 3599                                         return (NULL);
 3600                                 mpte = PHYS_TO_VM_PAGE(ptepa & PG_FRAME);
 3601                                 mpte->wire_count++;
 3602                         } else {
 3603                                 mpte = _pmap_allocpte(pmap, ptepindex,
 3604                                     M_NOWAIT);
 3605                                 if (mpte == NULL)
 3606                                         return (mpte);
 3607                         }
 3608                 }
 3609         } else {
 3610                 mpte = NULL;
 3611         }
 3612 
 3613         /*
 3614          * This call to vtopte makes the assumption that we are
 3615          * entering the page into the current pmap.  In order to support
 3616          * quick entry into any pmap, one would likely use pmap_pte_quick.
 3617          * But that isn't as quick as vtopte.
 3618          */
 3619         pte = vtopte(va);
 3620         if (*pte) {
 3621                 if (mpte != NULL) {
 3622                         mpte->wire_count--;
 3623                         mpte = NULL;
 3624                 }
 3625                 return (mpte);
 3626         }
 3627 
 3628         /*
 3629          * Enter on the PV list if part of our managed memory.
 3630          */
 3631         if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0 &&
 3632             !pmap_try_insert_pv_entry(pmap, va, m)) {
 3633                 if (mpte != NULL) {
 3634                         free = NULL;
 3635                         if (pmap_unwire_pte_hold(pmap, mpte, &free)) {
 3636                                 pmap_invalidate_page(pmap, va);
 3637                                 pmap_free_zero_pages(free);
 3638                         }
 3639                         
 3640                         mpte = NULL;
 3641                 }
 3642                 return (mpte);
 3643         }
 3644 
 3645         /*
 3646          * Increment counters
 3647          */
 3648         pmap->pm_stats.resident_count++;
 3649 
 3650         pa = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(m->md.pat_mode, 0);
 3651 #ifdef PAE
 3652         if ((prot & VM_PROT_EXECUTE) == 0)
 3653                 pa |= pg_nx;
 3654 #endif
 3655 
 3656         /*
 3657          * Now validate mapping with RO protection
 3658          */
 3659         if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED))
 3660                 pte_store(pte, pa | PG_V | PG_U);
 3661         else
 3662                 pte_store(pte, pa | PG_V | PG_U | PG_MANAGED);
 3663         return mpte;
 3664 }
 3665 
 3666 /*
 3667  * Make a temporary mapping for a physical address.  This is only intended
 3668  * to be used for panic dumps.
 3669  */
 3670 void *
 3671 pmap_kenter_temporary(vm_paddr_t pa, int i)
 3672 {
 3673         vm_offset_t va;
 3674 
 3675         va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE);
 3676         pmap_kenter(va, pa);
 3677         invlpg(va);
 3678         return ((void *)crashdumpmap);
 3679 }
 3680 
 3681 /*
 3682  * This code maps large physical mmap regions into the
 3683  * processor address space.  Note that some shortcuts
 3684  * are taken, but the code works.
 3685  */
 3686 void
 3687 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object,
 3688     vm_pindex_t pindex, vm_size_t size)
 3689 {
 3690         pd_entry_t *pde;
 3691         vm_paddr_t pa, ptepa;
 3692         vm_page_t p;
 3693         int pat_mode;
 3694 
 3695         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 3696         KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
 3697             ("pmap_object_init_pt: non-device object"));
 3698         if (pseflag && 
 3699             (addr & (NBPDR - 1)) == 0 && (size & (NBPDR - 1)) == 0) {
 3700                 if (!vm_object_populate(object, pindex, pindex + atop(size)))
 3701                         return;
 3702                 p = vm_page_lookup(object, pindex);
 3703                 KASSERT(p->valid == VM_PAGE_BITS_ALL,
 3704                     ("pmap_object_init_pt: invalid page %p", p));
 3705                 pat_mode = p->md.pat_mode;
 3706 
 3707                 /*
 3708                  * Abort the mapping if the first page is not physically
 3709                  * aligned to a 2/4MB page boundary.
 3710                  */
 3711                 ptepa = VM_PAGE_TO_PHYS(p);
 3712                 if (ptepa & (NBPDR - 1))
 3713                         return;
 3714 
 3715                 /*
 3716                  * Skip the first page.  Abort the mapping if the rest of
 3717                  * the pages are not physically contiguous or have differing
 3718                  * memory attributes.
 3719                  */
 3720                 p = TAILQ_NEXT(p, listq);
 3721                 for (pa = ptepa + PAGE_SIZE; pa < ptepa + size;
 3722                     pa += PAGE_SIZE) {
 3723                         KASSERT(p->valid == VM_PAGE_BITS_ALL,
 3724                             ("pmap_object_init_pt: invalid page %p", p));
 3725                         if (pa != VM_PAGE_TO_PHYS(p) ||
 3726                             pat_mode != p->md.pat_mode)
 3727                                 return;
 3728                         p = TAILQ_NEXT(p, listq);
 3729                 }
 3730 
 3731                 /*
 3732                  * Map using 2/4MB pages.  Since "ptepa" is 2/4M aligned and
 3733                  * "size" is a multiple of 2/4M, adding the PAT setting to
 3734                  * "pa" will not affect the termination of this loop.
 3735                  */
 3736                 PMAP_LOCK(pmap);
 3737                 for (pa = ptepa | pmap_cache_bits(pat_mode, 1); pa < ptepa +
 3738                     size; pa += NBPDR) {
 3739                         pde = pmap_pde(pmap, addr);
 3740                         if (*pde == 0) {
 3741                                 pde_store(pde, pa | PG_PS | PG_M | PG_A |
 3742                                     PG_U | PG_RW | PG_V);
 3743                                 pmap->pm_stats.resident_count += NBPDR /
 3744                                     PAGE_SIZE;
 3745                                 pmap_pde_mappings++;
 3746                         }
 3747                         /* Else continue on if the PDE is already valid. */
 3748                         addr += NBPDR;
 3749                 }
 3750                 PMAP_UNLOCK(pmap);
 3751         }
 3752 }
 3753 
 3754 /*
 3755  *      Routine:        pmap_change_wiring
 3756  *      Function:       Change the wiring attribute for a map/virtual-address
 3757  *                      pair.
 3758  *      In/out conditions:
 3759  *                      The mapping must already exist in the pmap.
 3760  */
 3761 void
 3762 pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired)
 3763 {
 3764         pd_entry_t *pde;
 3765         pt_entry_t *pte;
 3766         boolean_t are_queues_locked;
 3767 
 3768         are_queues_locked = FALSE;
 3769 retry:
 3770         PMAP_LOCK(pmap);
 3771         pde = pmap_pde(pmap, va);
 3772         if ((*pde & PG_PS) != 0) {
 3773                 if (!wired != ((*pde & PG_W) == 0)) {
 3774                         if (!are_queues_locked) {
 3775                                 are_queues_locked = TRUE;
 3776                                 if (!mtx_trylock(&vm_page_queue_mtx)) {
 3777                                         PMAP_UNLOCK(pmap);
 3778                                         vm_page_lock_queues();
 3779                                         goto retry;
 3780                                 }
 3781                         }
 3782                         if (!pmap_demote_pde(pmap, pde, va))
 3783                                 panic("pmap_change_wiring: demotion failed");
 3784                 } else
 3785                         goto out;
 3786         }
 3787         pte = pmap_pte(pmap, va);
 3788 
 3789         if (wired && !pmap_pte_w(pte))
 3790                 pmap->pm_stats.wired_count++;
 3791         else if (!wired && pmap_pte_w(pte))
 3792                 pmap->pm_stats.wired_count--;
 3793 
 3794         /*
 3795          * Wiring is not a hardware characteristic so there is no need to
 3796          * invalidate TLB.
 3797          */
 3798         pmap_pte_set_w(pte, wired);
 3799         pmap_pte_release(pte);
 3800 out:
 3801         if (are_queues_locked)
 3802                 vm_page_unlock_queues();
 3803         PMAP_UNLOCK(pmap);
 3804 }
 3805 
 3806 
 3807 
 3808 /*
 3809  *      Copy the range specified by src_addr/len
 3810  *      from the source map to the range dst_addr/len
 3811  *      in the destination map.
 3812  *
 3813  *      This routine is only advisory and need not do anything.
 3814  */
 3815 
 3816 void
 3817 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
 3818     vm_offset_t src_addr)
 3819 {
 3820         vm_page_t   free;
 3821         vm_offset_t addr;
 3822         vm_offset_t end_addr = src_addr + len;
 3823         vm_offset_t pdnxt;
 3824 
 3825         if (dst_addr != src_addr)
 3826                 return;
 3827 
 3828         if (!pmap_is_current(src_pmap))
 3829                 return;
 3830 
 3831         vm_page_lock_queues();
 3832         if (dst_pmap < src_pmap) {
 3833                 PMAP_LOCK(dst_pmap);
 3834                 PMAP_LOCK(src_pmap);
 3835         } else {
 3836                 PMAP_LOCK(src_pmap);
 3837                 PMAP_LOCK(dst_pmap);
 3838         }
 3839         sched_pin();
 3840         for (addr = src_addr; addr < end_addr; addr = pdnxt) {
 3841                 pt_entry_t *src_pte, *dst_pte;
 3842                 vm_page_t dstmpte, srcmpte;
 3843                 pd_entry_t srcptepaddr;
 3844                 unsigned ptepindex;
 3845 
 3846                 KASSERT(addr < UPT_MIN_ADDRESS,
 3847                     ("pmap_copy: invalid to pmap_copy page tables"));
 3848 
 3849                 pdnxt = (addr + NBPDR) & ~PDRMASK;
 3850                 if (pdnxt < addr)
 3851                         pdnxt = end_addr;
 3852                 ptepindex = addr >> PDRSHIFT;
 3853 
 3854                 srcptepaddr = src_pmap->pm_pdir[ptepindex];
 3855                 if (srcptepaddr == 0)
 3856                         continue;
 3857                         
 3858                 if (srcptepaddr & PG_PS) {
 3859                         if (dst_pmap->pm_pdir[ptepindex] == 0 &&
 3860                             ((srcptepaddr & PG_MANAGED) == 0 ||
 3861                             pmap_pv_insert_pde(dst_pmap, addr, srcptepaddr &
 3862                             PG_PS_FRAME))) {
 3863                                 dst_pmap->pm_pdir[ptepindex] = srcptepaddr &
 3864                                     ~PG_W;
 3865                                 dst_pmap->pm_stats.resident_count +=
 3866                                     NBPDR / PAGE_SIZE;
 3867                         }
 3868                         continue;
 3869                 }
 3870 
 3871                 srcmpte = PHYS_TO_VM_PAGE(srcptepaddr & PG_FRAME);
 3872                 KASSERT(srcmpte->wire_count > 0,
 3873                     ("pmap_copy: source page table page is unused"));
 3874 
 3875                 if (pdnxt > end_addr)
 3876                         pdnxt = end_addr;
 3877 
 3878                 src_pte = vtopte(addr);
 3879                 while (addr < pdnxt) {
 3880                         pt_entry_t ptetemp;
 3881                         ptetemp = *src_pte;
 3882                         /*
 3883                          * we only virtual copy managed pages
 3884                          */
 3885                         if ((ptetemp & PG_MANAGED) != 0) {
 3886                                 dstmpte = pmap_allocpte(dst_pmap, addr,
 3887                                     M_NOWAIT);
 3888                                 if (dstmpte == NULL)
 3889                                         goto out;
 3890                                 dst_pte = pmap_pte_quick(dst_pmap, addr);
 3891                                 if (*dst_pte == 0 &&
 3892                                     pmap_try_insert_pv_entry(dst_pmap, addr,
 3893                                     PHYS_TO_VM_PAGE(ptetemp & PG_FRAME))) {
 3894                                         /*
 3895                                          * Clear the wired, modified, and
 3896                                          * accessed (referenced) bits
 3897                                          * during the copy.
 3898                                          */
 3899                                         *dst_pte = ptetemp & ~(PG_W | PG_M |
 3900                                             PG_A);
 3901                                         dst_pmap->pm_stats.resident_count++;
 3902                                 } else {
 3903                                         free = NULL;
 3904                                         if (pmap_unwire_pte_hold(dst_pmap,
 3905                                             dstmpte, &free)) {
 3906                                                 pmap_invalidate_page(dst_pmap,
 3907                                                     addr);
 3908                                                 pmap_free_zero_pages(free);
 3909                                         }
 3910                                         goto out;
 3911                                 }
 3912                                 if (dstmpte->wire_count >= srcmpte->wire_count)
 3913                                         break;
 3914                         }
 3915                         addr += PAGE_SIZE;
 3916                         src_pte++;
 3917                 }
 3918         }
 3919 out:
 3920         sched_unpin();
 3921         vm_page_unlock_queues();
 3922         PMAP_UNLOCK(src_pmap);
 3923         PMAP_UNLOCK(dst_pmap);
 3924 }       
 3925 
 3926 static __inline void
 3927 pagezero(void *page)
 3928 {
 3929 #if defined(I686_CPU)
 3930         if (cpu_class == CPUCLASS_686) {
 3931 #if defined(CPU_ENABLE_SSE)
 3932                 if (cpu_feature & CPUID_SSE2)
 3933                         sse2_pagezero(page);
 3934                 else
 3935 #endif
 3936                         i686_pagezero(page);
 3937         } else
 3938 #endif
 3939                 bzero(page, PAGE_SIZE);
 3940 }
 3941 
 3942 /*
 3943  *      pmap_zero_page zeros the specified hardware page by mapping 
 3944  *      the page into KVM and using bzero to clear its contents.
 3945  */
 3946 void
 3947 pmap_zero_page(vm_page_t m)
 3948 {
 3949         struct sysmaps *sysmaps;
 3950 
 3951         sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
 3952         mtx_lock(&sysmaps->lock);
 3953         if (*sysmaps->CMAP2)
 3954                 panic("pmap_zero_page: CMAP2 busy");
 3955         sched_pin();
 3956         *sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M |
 3957             pmap_cache_bits(m->md.pat_mode, 0);
 3958         invlcaddr(sysmaps->CADDR2);
 3959         pagezero(sysmaps->CADDR2);
 3960         *sysmaps->CMAP2 = 0;
 3961         sched_unpin();
 3962         mtx_unlock(&sysmaps->lock);
 3963 }
 3964 
 3965 /*
 3966  *      pmap_zero_page_area zeros the specified hardware page by mapping 
 3967  *      the page into KVM and using bzero to clear its contents.
 3968  *
 3969  *      off and size may not cover an area beyond a single hardware page.
 3970  */
 3971 void
 3972 pmap_zero_page_area(vm_page_t m, int off, int size)
 3973 {
 3974         struct sysmaps *sysmaps;
 3975 
 3976         sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
 3977         mtx_lock(&sysmaps->lock);
 3978         if (*sysmaps->CMAP2)
 3979                 panic("pmap_zero_page_area: CMAP2 busy");
 3980         sched_pin();
 3981         *sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M |
 3982             pmap_cache_bits(m->md.pat_mode, 0);
 3983         invlcaddr(sysmaps->CADDR2);
 3984         if (off == 0 && size == PAGE_SIZE) 
 3985                 pagezero(sysmaps->CADDR2);
 3986         else
 3987                 bzero((char *)sysmaps->CADDR2 + off, size);
 3988         *sysmaps->CMAP2 = 0;
 3989         sched_unpin();
 3990         mtx_unlock(&sysmaps->lock);
 3991 }
 3992 
 3993 /*
 3994  *      pmap_zero_page_idle zeros the specified hardware page by mapping 
 3995  *      the page into KVM and using bzero to clear its contents.  This
 3996  *      is intended to be called from the vm_pagezero process only and
 3997  *      outside of Giant.
 3998  */
 3999 void
 4000 pmap_zero_page_idle(vm_page_t m)
 4001 {
 4002 
 4003         if (*CMAP3)
 4004                 panic("pmap_zero_page_idle: CMAP3 busy");
 4005         sched_pin();
 4006         *CMAP3 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M |
 4007             pmap_cache_bits(m->md.pat_mode, 0);
 4008         invlcaddr(CADDR3);
 4009         pagezero(CADDR3);
 4010         *CMAP3 = 0;
 4011         sched_unpin();
 4012 }
 4013 
 4014 /*
 4015  *      pmap_copy_page copies the specified (machine independent)
 4016  *      page by mapping the page into virtual memory and using
 4017  *      bcopy to copy the page, one machine dependent page at a
 4018  *      time.
 4019  */
 4020 void
 4021 pmap_copy_page(vm_page_t src, vm_page_t dst)
 4022 {
 4023         struct sysmaps *sysmaps;
 4024 
 4025         sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
 4026         mtx_lock(&sysmaps->lock);
 4027         if (*sysmaps->CMAP1)
 4028                 panic("pmap_copy_page: CMAP1 busy");
 4029         if (*sysmaps->CMAP2)
 4030                 panic("pmap_copy_page: CMAP2 busy");
 4031         sched_pin();
 4032         invlpg((u_int)sysmaps->CADDR1);
 4033         invlpg((u_int)sysmaps->CADDR2);
 4034         *sysmaps->CMAP1 = PG_V | VM_PAGE_TO_PHYS(src) | PG_A |
 4035             pmap_cache_bits(src->md.pat_mode, 0);
 4036         *sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(dst) | PG_A | PG_M |
 4037             pmap_cache_bits(dst->md.pat_mode, 0);
 4038         bcopy(sysmaps->CADDR1, sysmaps->CADDR2, PAGE_SIZE);
 4039         *sysmaps->CMAP1 = 0;
 4040         *sysmaps->CMAP2 = 0;
 4041         sched_unpin();
 4042         mtx_unlock(&sysmaps->lock);
 4043 }
 4044 
 4045 /*
 4046  * Returns true if the pmap's pv is one of the first
 4047  * 16 pvs linked to from this page.  This count may
 4048  * be changed upwards or downwards in the future; it
 4049  * is only necessary that true be returned for a small
 4050  * subset of pmaps for proper page aging.
 4051  */
 4052 boolean_t
 4053 pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
 4054 {
 4055         struct md_page *pvh;
 4056         pv_entry_t pv;
 4057         int loops = 0;
 4058 
 4059         if (m->flags & PG_FICTITIOUS)
 4060                 return FALSE;
 4061 
 4062         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 4063         TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 4064                 if (PV_PMAP(pv) == pmap) {
 4065                         return TRUE;
 4066                 }
 4067                 loops++;
 4068                 if (loops >= 16)
 4069                         break;
 4070         }
 4071         if (loops < 16) {
 4072                 pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 4073                 TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) {
 4074                         if (PV_PMAP(pv) == pmap)
 4075                                 return (TRUE);
 4076                         loops++;
 4077                         if (loops >= 16)
 4078                                 break;
 4079                 }
 4080         }
 4081         return (FALSE);
 4082 }
 4083 
 4084 /*
 4085  *      pmap_page_wired_mappings:
 4086  *
 4087  *      Return the number of managed mappings to the given physical page
 4088  *      that are wired.
 4089  */
 4090 int
 4091 pmap_page_wired_mappings(vm_page_t m)
 4092 {
 4093         int count;
 4094 
 4095         count = 0;
 4096         if ((m->flags & PG_FICTITIOUS) != 0)
 4097                 return (count);
 4098         count = pmap_pvh_wired_mappings(&m->md, count);
 4099         return (pmap_pvh_wired_mappings(pa_to_pvh(VM_PAGE_TO_PHYS(m)), count));
 4100 }
 4101 
 4102 /*
 4103  *      pmap_pvh_wired_mappings:
 4104  *
 4105  *      Return the updated number "count" of managed mappings that are wired.
 4106  */
 4107 static int
 4108 pmap_pvh_wired_mappings(struct md_page *pvh, int count)
 4109 {
 4110         pmap_t pmap;
 4111         pt_entry_t *pte;
 4112         pv_entry_t pv;
 4113 
 4114         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 4115         sched_pin();
 4116         TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) {
 4117                 pmap = PV_PMAP(pv);
 4118                 PMAP_LOCK(pmap);
 4119                 pte = pmap_pte_quick(pmap, pv->pv_va);
 4120                 if ((*pte & PG_W) != 0)
 4121                         count++;
 4122                 PMAP_UNLOCK(pmap);
 4123         }
 4124         sched_unpin();
 4125         return (count);
 4126 }
 4127 
 4128 /*
 4129  * Returns TRUE if the given page is mapped individually or as part of
 4130  * a 4mpage.  Otherwise, returns FALSE.
 4131  */
 4132 boolean_t
 4133 pmap_page_is_mapped(vm_page_t m)
 4134 {
 4135         struct md_page *pvh;
 4136 
 4137         if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0)
 4138                 return (FALSE);
 4139         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 4140         if (TAILQ_EMPTY(&m->md.pv_list)) {
 4141                 pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 4142                 return (!TAILQ_EMPTY(&pvh->pv_list));
 4143         } else
 4144                 return (TRUE);
 4145 }
 4146 
 4147 /*
 4148  * Remove all pages from specified address space
 4149  * this aids process exit speeds.  Also, this code
 4150  * is special cased for current process only, but
 4151  * can have the more generic (and slightly slower)
 4152  * mode enabled.  This is much faster than pmap_remove
 4153  * in the case of running down an entire address space.
 4154  */
 4155 void
 4156 pmap_remove_pages(pmap_t pmap)
 4157 {
 4158         pt_entry_t *pte, tpte;
 4159         vm_page_t free = NULL;
 4160         vm_page_t m, mpte, mt;
 4161         pv_entry_t pv;
 4162         struct md_page *pvh;
 4163         struct pv_chunk *pc, *npc;
 4164         int field, idx;
 4165         int32_t bit;
 4166         uint32_t inuse, bitmask;
 4167         int allfree;
 4168 
 4169         if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) {
 4170                 printf("warning: pmap_remove_pages called with non-current pmap\n");
 4171                 return;
 4172         }
 4173         vm_page_lock_queues();
 4174         PMAP_LOCK(pmap);
 4175         sched_pin();
 4176         TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
 4177                 allfree = 1;
 4178                 for (field = 0; field < _NPCM; field++) {
 4179                         inuse = (~(pc->pc_map[field])) & pc_freemask[field];
 4180                         while (inuse != 0) {
 4181                                 bit = bsfl(inuse);
 4182                                 bitmask = 1UL << bit;
 4183                                 idx = field * 32 + bit;
 4184                                 pv = &pc->pc_pventry[idx];
 4185                                 inuse &= ~bitmask;
 4186 
 4187                                 pte = pmap_pde(pmap, pv->pv_va);
 4188                                 tpte = *pte;
 4189                                 if ((tpte & PG_PS) == 0) {
 4190                                         pte = vtopte(pv->pv_va);
 4191                                         tpte = *pte & ~PG_PTE_PAT;
 4192                                 }
 4193 
 4194                                 if (tpte == 0) {
 4195                                         printf(
 4196                                             "TPTE at %p  IS ZERO @ VA %08x\n",
 4197                                             pte, pv->pv_va);
 4198                                         panic("bad pte");
 4199                                 }
 4200 
 4201 /*
 4202  * We cannot remove wired pages from a process' mapping at this time
 4203  */
 4204                                 if (tpte & PG_W) {
 4205                                         allfree = 0;
 4206                                         continue;
 4207                                 }
 4208 
 4209                                 m = PHYS_TO_VM_PAGE(tpte & PG_FRAME);
 4210                                 KASSERT(m->phys_addr == (tpte & PG_FRAME),
 4211                                     ("vm_page_t %p phys_addr mismatch %016jx %016jx",
 4212                                     m, (uintmax_t)m->phys_addr,
 4213                                     (uintmax_t)tpte));
 4214 
 4215                                 KASSERT(m < &vm_page_array[vm_page_array_size],
 4216                                         ("pmap_remove_pages: bad tpte %#jx",
 4217                                         (uintmax_t)tpte));
 4218 
 4219                                 pte_clear(pte);
 4220 
 4221                                 /*
 4222                                  * Update the vm_page_t clean/reference bits.
 4223                                  */
 4224                                 if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) {
 4225                                         if ((tpte & PG_PS) != 0) {
 4226                                                 for (mt = m; mt < &m[NBPDR / PAGE_SIZE]; mt++)
 4227                                                         vm_page_dirty(mt);
 4228                                         } else
 4229                                                 vm_page_dirty(m);
 4230                                 }
 4231 
 4232                                 /* Mark free */
 4233                                 PV_STAT(pv_entry_frees++);
 4234                                 PV_STAT(pv_entry_spare++);
 4235                                 pv_entry_count--;
 4236                                 pc->pc_map[field] |= bitmask;
 4237                                 if ((tpte & PG_PS) != 0) {
 4238                                         pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
 4239                                         pvh = pa_to_pvh(tpte & PG_PS_FRAME);
 4240                                         TAILQ_REMOVE(&pvh->pv_list, pv, pv_list);
 4241                                         if (TAILQ_EMPTY(&pvh->pv_list)) {
 4242                                                 for (mt = m; mt < &m[NBPDR / PAGE_SIZE]; mt++)
 4243                                                         if (TAILQ_EMPTY(&mt->md.pv_list))
 4244                                                                 vm_page_flag_clear(mt, PG_WRITEABLE);
 4245                                         }
 4246                                         mpte = pmap_lookup_pt_page(pmap, pv->pv_va);
 4247                                         if (mpte != NULL) {
 4248                                                 pmap_remove_pt_page(pmap, mpte);
 4249                                                 pmap->pm_stats.resident_count--;
 4250                                                 KASSERT(mpte->wire_count == NPTEPG,
 4251                                                     ("pmap_remove_pages: pte page wire count error"));
 4252                                                 mpte->wire_count = 0;
 4253                                                 pmap_add_delayed_free_list(mpte, &free, FALSE);
 4254                                                 atomic_subtract_int(&cnt.v_wire_count, 1);
 4255                                         }
 4256                                 } else {
 4257                                         pmap->pm_stats.resident_count--;
 4258                                         TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 4259                                         if (TAILQ_EMPTY(&m->md.pv_list)) {
 4260                                                 pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 4261                                                 if (TAILQ_EMPTY(&pvh->pv_list))
 4262                                                         vm_page_flag_clear(m, PG_WRITEABLE);
 4263                                         }
 4264                                         pmap_unuse_pt(pmap, pv->pv_va, &free);
 4265                                 }
 4266                         }
 4267                 }
 4268                 if (allfree) {
 4269                         PV_STAT(pv_entry_spare -= _NPCPV);
 4270                         PV_STAT(pc_chunk_count--);
 4271                         PV_STAT(pc_chunk_frees++);
 4272                         TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 4273                         m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc));
 4274                         pmap_qremove((vm_offset_t)pc, 1);
 4275                         vm_page_unwire(m, 0);
 4276                         vm_page_free(m);
 4277                         pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc);
 4278                 }
 4279         }
 4280         sched_unpin();
 4281         pmap_invalidate_all(pmap);
 4282         vm_page_unlock_queues();
 4283         PMAP_UNLOCK(pmap);
 4284         pmap_free_zero_pages(free);
 4285 }
 4286 
 4287 /*
 4288  *      pmap_is_modified:
 4289  *
 4290  *      Return whether or not the specified physical page was modified
 4291  *      in any physical maps.
 4292  */
 4293 boolean_t
 4294 pmap_is_modified(vm_page_t m)
 4295 {
 4296 
 4297         if (m->flags & PG_FICTITIOUS)
 4298                 return (FALSE);
 4299         if (pmap_is_modified_pvh(&m->md))
 4300                 return (TRUE);
 4301         return (pmap_is_modified_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m))));
 4302 }
 4303 
 4304 /*
 4305  * Returns TRUE if any of the given mappings were used to modify
 4306  * physical memory.  Otherwise, returns FALSE.  Both page and 2mpage
 4307  * mappings are supported.
 4308  */
 4309 static boolean_t
 4310 pmap_is_modified_pvh(struct md_page *pvh)
 4311 {
 4312         pv_entry_t pv;
 4313         pt_entry_t *pte;
 4314         pmap_t pmap;
 4315         boolean_t rv;
 4316 
 4317         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 4318         rv = FALSE;
 4319         sched_pin();
 4320         TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) {
 4321                 pmap = PV_PMAP(pv);
 4322                 PMAP_LOCK(pmap);
 4323                 pte = pmap_pte_quick(pmap, pv->pv_va);
 4324                 rv = (*pte & (PG_M | PG_RW)) == (PG_M | PG_RW);
 4325                 PMAP_UNLOCK(pmap);
 4326                 if (rv)
 4327                         break;
 4328         }
 4329         sched_unpin();
 4330         return (rv);
 4331 }
 4332 
 4333 /*
 4334  *      pmap_is_prefaultable:
 4335  *
 4336  *      Return whether or not the specified virtual address is elgible
 4337  *      for prefault.
 4338  */
 4339 boolean_t
 4340 pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
 4341 {
 4342         pd_entry_t *pde;
 4343         pt_entry_t *pte;
 4344         boolean_t rv;
 4345 
 4346         rv = FALSE;
 4347         PMAP_LOCK(pmap);
 4348         pde = pmap_pde(pmap, addr);
 4349         if (*pde != 0 && (*pde & PG_PS) == 0) {
 4350                 pte = vtopte(addr);
 4351                 rv = *pte == 0;
 4352         }
 4353         PMAP_UNLOCK(pmap);
 4354         return (rv);
 4355 }
 4356 
 4357 /*
 4358  * Clear the write and modified bits in each of the given page's mappings.
 4359  */
 4360 void
 4361 pmap_remove_write(vm_page_t m)
 4362 {
 4363         struct md_page *pvh;
 4364         pv_entry_t next_pv, pv;
 4365         pmap_t pmap;
 4366         pd_entry_t *pde;
 4367         pt_entry_t oldpte, *pte;
 4368         vm_offset_t va;
 4369 
 4370         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 4371         if ((m->flags & PG_FICTITIOUS) != 0 ||
 4372             (m->flags & PG_WRITEABLE) == 0)
 4373                 return;
 4374         sched_pin();
 4375         pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 4376         TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) {
 4377                 va = pv->pv_va;
 4378                 pmap = PV_PMAP(pv);
 4379                 PMAP_LOCK(pmap);
 4380                 pde = pmap_pde(pmap, va);
 4381                 if ((*pde & PG_RW) != 0)
 4382                         (void)pmap_demote_pde(pmap, pde, va);
 4383                 PMAP_UNLOCK(pmap);
 4384         }
 4385         TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 4386                 pmap = PV_PMAP(pv);
 4387                 PMAP_LOCK(pmap);
 4388                 pde = pmap_pde(pmap, pv->pv_va);
 4389                 KASSERT((*pde & PG_PS) == 0, ("pmap_clear_write: found"
 4390                     " a 4mpage in page %p's pv list", m));
 4391                 pte = pmap_pte_quick(pmap, pv->pv_va);
 4392 retry:
 4393                 oldpte = *pte;
 4394                 if ((oldpte & PG_RW) != 0) {
 4395                         /*
 4396                          * Regardless of whether a pte is 32 or 64 bits
 4397                          * in size, PG_RW and PG_M are among the least
 4398                          * significant 32 bits.
 4399                          */
 4400                         if (!atomic_cmpset_int((u_int *)pte, oldpte,
 4401                             oldpte & ~(PG_RW | PG_M)))
 4402                                 goto retry;
 4403                         if ((oldpte & PG_M) != 0)
 4404                                 vm_page_dirty(m);
 4405                         pmap_invalidate_page(pmap, pv->pv_va);
 4406                 }
 4407                 PMAP_UNLOCK(pmap);
 4408         }
 4409         vm_page_flag_clear(m, PG_WRITEABLE);
 4410         sched_unpin();
 4411 }
 4412 
 4413 /*
 4414  *      pmap_ts_referenced:
 4415  *
 4416  *      Return a count of reference bits for a page, clearing those bits.
 4417  *      It is not necessary for every reference bit to be cleared, but it
 4418  *      is necessary that 0 only be returned when there are truly no
 4419  *      reference bits set.
 4420  *
 4421  *      XXX: The exact number of bits to check and clear is a matter that
 4422  *      should be tested and standardized at some point in the future for
 4423  *      optimal aging of shared pages.
 4424  */
 4425 int
 4426 pmap_ts_referenced(vm_page_t m)
 4427 {
 4428         struct md_page *pvh;
 4429         pv_entry_t pv, pvf, pvn;
 4430         pmap_t pmap;
 4431         pd_entry_t oldpde, *pde;
 4432         pt_entry_t *pte;
 4433         vm_offset_t va;
 4434         int rtval = 0;
 4435 
 4436         if (m->flags & PG_FICTITIOUS)
 4437                 return (rtval);
 4438         sched_pin();
 4439         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 4440         pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 4441         TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, pvn) {
 4442                 va = pv->pv_va;
 4443                 pmap = PV_PMAP(pv);
 4444                 PMAP_LOCK(pmap);
 4445                 pde = pmap_pde(pmap, va);
 4446                 oldpde = *pde;
 4447                 if ((oldpde & PG_A) != 0) {
 4448                         if (pmap_demote_pde(pmap, pde, va)) {
 4449                                 if ((oldpde & PG_W) == 0) {
 4450                                         /*
 4451                                          * Remove the mapping to a single page
 4452                                          * so that a subsequent access may
 4453                                          * repromote.  Since the underlying
 4454                                          * page table page is fully populated,
 4455                                          * this removal never frees a page
 4456                                          * table page.
 4457                                          */
 4458                                         va += VM_PAGE_TO_PHYS(m) - (oldpde &
 4459                                             PG_PS_FRAME);
 4460                                         pmap_remove_page(pmap, va, NULL);
 4461                                         rtval++;
 4462                                         if (rtval > 4) {
 4463                                                 PMAP_UNLOCK(pmap);
 4464                                                 goto out;
 4465                                         }
 4466                                 }
 4467                         }
 4468                 }
 4469                 PMAP_UNLOCK(pmap);
 4470         }
 4471         if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
 4472                 pvf = pv;
 4473                 do {
 4474                         pvn = TAILQ_NEXT(pv, pv_list);
 4475                         TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 4476                         TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
 4477                         pmap = PV_PMAP(pv);
 4478                         PMAP_LOCK(pmap);
 4479                         pde = pmap_pde(pmap, pv->pv_va);
 4480                         KASSERT((*pde & PG_PS) == 0, ("pmap_ts_referenced:"
 4481                             " found a 4mpage in page %p's pv list", m));
 4482                         pte = pmap_pte_quick(pmap, pv->pv_va);
 4483                         if ((*pte & PG_A) != 0) {
 4484                                 atomic_clear_int((u_int *)pte, PG_A);
 4485                                 pmap_invalidate_page(pmap, pv->pv_va);
 4486                                 rtval++;
 4487                                 if (rtval > 4)
 4488                                         pvn = NULL;
 4489                         }
 4490                         PMAP_UNLOCK(pmap);
 4491                 } while ((pv = pvn) != NULL && pv != pvf);
 4492         }
 4493 out:
 4494         sched_unpin();
 4495         return (rtval);
 4496 }
 4497 
 4498 /*
 4499  *      Clear the modify bits on the specified physical page.
 4500  */
 4501 void
 4502 pmap_clear_modify(vm_page_t m)
 4503 {
 4504         struct md_page *pvh;
 4505         pv_entry_t next_pv, pv;
 4506         pmap_t pmap;
 4507         pd_entry_t oldpde, *pde;
 4508         pt_entry_t oldpte, *pte;
 4509         vm_offset_t va;
 4510 
 4511         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 4512         if ((m->flags & PG_FICTITIOUS) != 0)
 4513                 return;
 4514         sched_pin();
 4515         pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 4516         TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) {
 4517                 va = pv->pv_va;
 4518                 pmap = PV_PMAP(pv);
 4519                 PMAP_LOCK(pmap);
 4520                 pde = pmap_pde(pmap, va);
 4521                 oldpde = *pde;
 4522                 if ((oldpde & PG_RW) != 0) {
 4523                         if (pmap_demote_pde(pmap, pde, va)) {
 4524                                 if ((oldpde & PG_W) == 0) {
 4525                                         /*
 4526                                          * Write protect the mapping to a
 4527                                          * single page so that a subsequent
 4528                                          * write access may repromote.
 4529                                          */
 4530                                         va += VM_PAGE_TO_PHYS(m) - (oldpde &
 4531                                             PG_PS_FRAME);
 4532                                         pte = pmap_pte_quick(pmap, va);
 4533                                         oldpte = *pte;
 4534                                         if ((oldpte & PG_V) != 0) {
 4535                                                 /*
 4536                                                  * Regardless of whether a pte is 32 or 64 bits
 4537                                                  * in size, PG_RW and PG_M are among the least
 4538                                                  * significant 32 bits.
 4539                                                  */
 4540                                                 while (!atomic_cmpset_int((u_int *)pte,
 4541                                                     oldpte,
 4542                                                     oldpte & ~(PG_M | PG_RW)))
 4543                                                         oldpte = *pte;
 4544                                                 vm_page_dirty(m);
 4545                                                 pmap_invalidate_page(pmap, va);
 4546                                         }
 4547                                 }
 4548                         }
 4549                 }
 4550                 PMAP_UNLOCK(pmap);
 4551         }
 4552         TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 4553                 pmap = PV_PMAP(pv);
 4554                 PMAP_LOCK(pmap);
 4555                 pde = pmap_pde(pmap, pv->pv_va);
 4556                 KASSERT((*pde & PG_PS) == 0, ("pmap_clear_modify: found"
 4557                     " a 4mpage in page %p's pv list", m));
 4558                 pte = pmap_pte_quick(pmap, pv->pv_va);
 4559                 if ((*pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) {
 4560                         /*
 4561                          * Regardless of whether a pte is 32 or 64 bits
 4562                          * in size, PG_M is among the least significant
 4563                          * 32 bits. 
 4564                          */
 4565                         atomic_clear_int((u_int *)pte, PG_M);
 4566                         pmap_invalidate_page(pmap, pv->pv_va);
 4567                 }
 4568                 PMAP_UNLOCK(pmap);
 4569         }
 4570         sched_unpin();
 4571 }
 4572 
 4573 /*
 4574  *      pmap_clear_reference:
 4575  *
 4576  *      Clear the reference bit on the specified physical page.
 4577  */
 4578 void
 4579 pmap_clear_reference(vm_page_t m)
 4580 {
 4581         struct md_page *pvh;
 4582         pv_entry_t next_pv, pv;
 4583         pmap_t pmap;
 4584         pd_entry_t oldpde, *pde;
 4585         pt_entry_t *pte;
 4586         vm_offset_t va;
 4587 
 4588         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 4589         if ((m->flags & PG_FICTITIOUS) != 0)
 4590                 return;
 4591         sched_pin();
 4592         pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 4593         TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) {
 4594                 va = pv->pv_va;
 4595                 pmap = PV_PMAP(pv);
 4596                 PMAP_LOCK(pmap);
 4597                 pde = pmap_pde(pmap, va);
 4598                 oldpde = *pde;
 4599                 if ((oldpde & PG_A) != 0) {
 4600                         if (pmap_demote_pde(pmap, pde, va)) {
 4601                                 /*
 4602                                  * Remove the mapping to a single page so
 4603                                  * that a subsequent access may repromote.
 4604                                  * Since the underlying page table page is
 4605                                  * fully populated, this removal never frees
 4606                                  * a page table page.
 4607                                  */
 4608                                 va += VM_PAGE_TO_PHYS(m) - (oldpde &
 4609                                     PG_PS_FRAME);
 4610                                 pmap_remove_page(pmap, va, NULL);
 4611                         }
 4612                 }
 4613                 PMAP_UNLOCK(pmap);
 4614         }
 4615         TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 4616                 pmap = PV_PMAP(pv);
 4617                 PMAP_LOCK(pmap);
 4618                 pde = pmap_pde(pmap, pv->pv_va);
 4619                 KASSERT((*pde & PG_PS) == 0, ("pmap_clear_reference: found"
 4620                     " a 4mpage in page %p's pv list", m));
 4621                 pte = pmap_pte_quick(pmap, pv->pv_va);
 4622                 if ((*pte & PG_A) != 0) {
 4623                         /*
 4624                          * Regardless of whether a pte is 32 or 64 bits
 4625                          * in size, PG_A is among the least significant
 4626                          * 32 bits. 
 4627                          */
 4628                         atomic_clear_int((u_int *)pte, PG_A);
 4629                         pmap_invalidate_page(pmap, pv->pv_va);
 4630                 }
 4631                 PMAP_UNLOCK(pmap);
 4632         }
 4633         sched_unpin();
 4634 }
 4635 
 4636 /*
 4637  * Miscellaneous support routines follow
 4638  */
 4639 
 4640 /* Adjust the cache mode for a 4KB page mapped via a PTE. */
 4641 static __inline void
 4642 pmap_pte_attr(pt_entry_t *pte, int cache_bits)
 4643 {
 4644         u_int opte, npte;
 4645 
 4646         /*
 4647          * The cache mode bits are all in the low 32-bits of the
 4648          * PTE, so we can just spin on updating the low 32-bits.
 4649          */
 4650         do {
 4651                 opte = *(u_int *)pte;
 4652                 npte = opte & ~PG_PTE_CACHE;
 4653                 npte |= cache_bits;
 4654         } while (npte != opte && !atomic_cmpset_int((u_int *)pte, opte, npte));
 4655 }
 4656 
 4657 /* Adjust the cache mode for a 2/4MB page mapped via a PDE. */
 4658 static __inline void
 4659 pmap_pde_attr(pd_entry_t *pde, int cache_bits)
 4660 {
 4661         u_int opde, npde;
 4662 
 4663         /*
 4664          * The cache mode bits are all in the low 32-bits of the
 4665          * PDE, so we can just spin on updating the low 32-bits.
 4666          */
 4667         do {
 4668                 opde = *(u_int *)pde;
 4669                 npde = opde & ~PG_PDE_CACHE;
 4670                 npde |= cache_bits;
 4671         } while (npde != opde && !atomic_cmpset_int((u_int *)pde, opde, npde));
 4672 }
 4673 
 4674 /*
 4675  * Map a set of physical memory pages into the kernel virtual
 4676  * address space. Return a pointer to where it is mapped. This
 4677  * routine is intended to be used for mapping device memory,
 4678  * NOT real memory.
 4679  */
 4680 void *
 4681 pmap_mapdev_attr(vm_paddr_t pa, vm_size_t size, int mode)
 4682 {
 4683         vm_offset_t va, offset;
 4684         vm_size_t tmpsize;
 4685 
 4686         offset = pa & PAGE_MASK;
 4687         size = roundup(offset + size, PAGE_SIZE);
 4688         pa = pa & PG_FRAME;
 4689 
 4690         if (pa < KERNLOAD && pa + size <= KERNLOAD)
 4691                 va = KERNBASE + pa;
 4692         else
 4693                 va = kmem_alloc_nofault(kernel_map, size);
 4694         if (!va)
 4695                 panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
 4696 
 4697         for (tmpsize = 0; tmpsize < size; tmpsize += PAGE_SIZE)
 4698                 pmap_kenter_attr(va + tmpsize, pa + tmpsize, mode);
 4699         pmap_invalidate_range(kernel_pmap, va, va + tmpsize);
 4700         pmap_invalidate_cache_range(va, va + size);
 4701         return ((void *)(va + offset));
 4702 }
 4703 
 4704 void *
 4705 pmap_mapdev(vm_paddr_t pa, vm_size_t size)
 4706 {
 4707 
 4708         return (pmap_mapdev_attr(pa, size, PAT_UNCACHEABLE));
 4709 }
 4710 
 4711 void *
 4712 pmap_mapbios(vm_paddr_t pa, vm_size_t size)
 4713 {
 4714 
 4715         return (pmap_mapdev_attr(pa, size, PAT_WRITE_BACK));
 4716 }
 4717 
 4718 void
 4719 pmap_unmapdev(vm_offset_t va, vm_size_t size)
 4720 {
 4721         vm_offset_t base, offset, tmpva;
 4722 
 4723         if (va >= KERNBASE && va + size <= KERNBASE + KERNLOAD)
 4724                 return;
 4725         base = trunc_page(va);
 4726         offset = va & PAGE_MASK;
 4727         size = roundup(offset + size, PAGE_SIZE);
 4728         for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE)
 4729                 pmap_kremove(tmpva);
 4730         pmap_invalidate_range(kernel_pmap, va, tmpva);
 4731         kmem_free(kernel_map, base, size);
 4732 }
 4733 
 4734 /*
 4735  * Sets the memory attribute for the specified page.
 4736  */
 4737 void
 4738 pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma)
 4739 {
 4740         struct sysmaps *sysmaps;
 4741         vm_offset_t sva, eva;
 4742 
 4743         m->md.pat_mode = ma;
 4744         if ((m->flags & PG_FICTITIOUS) != 0)
 4745                 return;
 4746 
 4747         /*
 4748          * If "m" is a normal page, flush it from the cache.
 4749          * See pmap_invalidate_cache_range().
 4750          *
 4751          * First, try to find an existing mapping of the page by sf
 4752          * buffer. sf_buf_invalidate_cache() modifies mapping and
 4753          * flushes the cache.
 4754          */    
 4755         if (sf_buf_invalidate_cache(m))
 4756                 return;
 4757 
 4758         /*
 4759          * If page is not mapped by sf buffer, but CPU does not
 4760          * support self snoop, map the page transient and do
 4761          * invalidation. In the worst case, whole cache is flushed by
 4762          * pmap_invalidate_cache_range().
 4763          */
 4764         if ((cpu_feature & (CPUID_SS|CPUID_CLFSH)) == CPUID_CLFSH) {
 4765                 sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
 4766                 mtx_lock(&sysmaps->lock);
 4767                 if (*sysmaps->CMAP2)
 4768                         panic("pmap_page_set_memattr: CMAP2 busy");
 4769                 sched_pin();
 4770                 *sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) |
 4771                     PG_A | PG_M | pmap_cache_bits(m->md.pat_mode, 0);
 4772                 invlcaddr(sysmaps->CADDR2);
 4773                 sva = (vm_offset_t)sysmaps->CADDR2;
 4774                 eva = sva + PAGE_SIZE;
 4775         } else
 4776                 sva = eva = 0; /* gcc */
 4777         pmap_invalidate_cache_range(sva, eva);
 4778         if (sva != 0) {
 4779                 *sysmaps->CMAP2 = 0;
 4780                 sched_unpin();
 4781                 mtx_unlock(&sysmaps->lock);
 4782         }
 4783 }
 4784 
 4785 /*
 4786  * Changes the specified virtual address range's memory type to that given by
 4787  * the parameter "mode".  The specified virtual address range must be
 4788  * completely contained within either the kernel map.
 4789  *
 4790  * Returns zero if the change completed successfully, and either EINVAL or
 4791  * ENOMEM if the change failed.  Specifically, EINVAL is returned if some part
 4792  * of the virtual address range was not mapped, and ENOMEM is returned if
 4793  * there was insufficient memory available to complete the change.
 4794  */
 4795 int
 4796 pmap_change_attr(vm_offset_t va, vm_size_t size, int mode)
 4797 {
 4798         vm_offset_t base, offset, tmpva;
 4799         pd_entry_t *pde;
 4800         pt_entry_t *pte;
 4801         int cache_bits_pte, cache_bits_pde;
 4802         boolean_t changed;
 4803 
 4804         base = trunc_page(va);
 4805         offset = va & PAGE_MASK;
 4806         size = roundup(offset + size, PAGE_SIZE);
 4807 
 4808         /*
 4809          * Only supported on kernel virtual addresses above the recursive map.
 4810          */
 4811         if (base < VM_MIN_KERNEL_ADDRESS)
 4812                 return (EINVAL);
 4813 
 4814         cache_bits_pde = pmap_cache_bits(mode, 1);
 4815         cache_bits_pte = pmap_cache_bits(mode, 0);
 4816         changed = FALSE;
 4817 
 4818         /*
 4819          * Pages that aren't mapped aren't supported.  Also break down
 4820          * 2/4MB pages into 4KB pages if required.
 4821          */
 4822         PMAP_LOCK(kernel_pmap);
 4823         for (tmpva = base; tmpva < base + size; ) {
 4824                 pde = pmap_pde(kernel_pmap, tmpva);
 4825                 if (*pde == 0) {
 4826                         PMAP_UNLOCK(kernel_pmap);
 4827                         return (EINVAL);
 4828                 }
 4829                 if (*pde & PG_PS) {
 4830                         /*
 4831                          * If the current 2/4MB page already has
 4832                          * the required memory type, then we need not
 4833                          * demote this page.  Just increment tmpva to
 4834                          * the next 2/4MB page frame.
 4835                          */
 4836                         if ((*pde & PG_PDE_CACHE) == cache_bits_pde) {
 4837                                 tmpva = trunc_4mpage(tmpva) + NBPDR;
 4838                                 continue;
 4839                         }
 4840 
 4841                         /*
 4842                          * If the current offset aligns with a 2/4MB
 4843                          * page frame and there is at least 2/4MB left
 4844                          * within the range, then we need not break
 4845                          * down this page into 4KB pages.
 4846                          */
 4847                         if ((tmpva & PDRMASK) == 0 &&
 4848                             tmpva + PDRMASK < base + size) {
 4849                                 tmpva += NBPDR;
 4850                                 continue;
 4851                         }
 4852                         if (!pmap_demote_pde(kernel_pmap, pde, tmpva)) {
 4853                                 PMAP_UNLOCK(kernel_pmap);
 4854                                 return (ENOMEM);
 4855                         }
 4856                 }
 4857                 pte = vtopte(tmpva);
 4858                 if (*pte == 0) {
 4859                         PMAP_UNLOCK(kernel_pmap);
 4860                         return (EINVAL);
 4861                 }
 4862                 tmpva += PAGE_SIZE;
 4863         }
 4864         PMAP_UNLOCK(kernel_pmap);
 4865 
 4866         /*
 4867          * Ok, all the pages exist, so run through them updating their
 4868          * cache mode if required.
 4869          */
 4870         for (tmpva = base; tmpva < base + size; ) {
 4871                 pde = pmap_pde(kernel_pmap, tmpva);
 4872                 if (*pde & PG_PS) {
 4873                         if ((*pde & PG_PDE_CACHE) != cache_bits_pde) {
 4874                                 pmap_pde_attr(pde, cache_bits_pde);
 4875                                 changed = TRUE;
 4876                         }
 4877                         tmpva = trunc_4mpage(tmpva) + NBPDR;
 4878                 } else {
 4879                         pte = vtopte(tmpva);
 4880                         if ((*pte & PG_PTE_CACHE) != cache_bits_pte) {
 4881                                 pmap_pte_attr(pte, cache_bits_pte);
 4882                                 changed = TRUE;
 4883                         }
 4884                         tmpva += PAGE_SIZE;
 4885                 }
 4886         }
 4887 
 4888         /*
 4889          * Flush CPU caches to make sure any data isn't cached that
 4890          * shouldn't be, etc.
 4891          */
 4892         if (changed) {
 4893                 pmap_invalidate_range(kernel_pmap, base, tmpva);
 4894                 pmap_invalidate_cache_range(base, tmpva);
 4895         }
 4896         return (0);
 4897 }
 4898 
 4899 /*
 4900  * perform the pmap work for mincore
 4901  */
 4902 int
 4903 pmap_mincore(pmap_t pmap, vm_offset_t addr)
 4904 {
 4905         pd_entry_t *pdep;
 4906         pt_entry_t *ptep, pte;
 4907         vm_paddr_t pa;
 4908         vm_page_t m;
 4909         int val = 0;
 4910         
 4911         PMAP_LOCK(pmap);
 4912         pdep = pmap_pde(pmap, addr);
 4913         if (*pdep != 0) {
 4914                 if (*pdep & PG_PS) {
 4915                         pte = *pdep;
 4916                         val = MINCORE_SUPER;
 4917                         /* Compute the physical address of the 4KB page. */
 4918                         pa = ((*pdep & PG_PS_FRAME) | (addr & PDRMASK)) &
 4919                             PG_FRAME;
 4920                 } else {
 4921                         ptep = pmap_pte(pmap, addr);
 4922                         pte = *ptep;
 4923                         pmap_pte_release(ptep);
 4924                         pa = pte & PG_FRAME;
 4925                 }
 4926         } else {
 4927                 pte = 0;
 4928                 pa = 0;
 4929         }
 4930         PMAP_UNLOCK(pmap);
 4931 
 4932         if (pte != 0) {
 4933                 val |= MINCORE_INCORE;
 4934                 if ((pte & PG_MANAGED) == 0)
 4935                         return val;
 4936 
 4937                 m = PHYS_TO_VM_PAGE(pa);
 4938 
 4939                 /*
 4940                  * Modified by us
 4941                  */
 4942                 if ((pte & (PG_M | PG_RW)) == (PG_M | PG_RW))
 4943                         val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
 4944                 else {
 4945                         /*
 4946                          * Modified by someone else
 4947                          */
 4948                         vm_page_lock_queues();
 4949                         if (m->dirty || pmap_is_modified(m))
 4950                                 val |= MINCORE_MODIFIED_OTHER;
 4951                         vm_page_unlock_queues();
 4952                 }
 4953                 /*
 4954                  * Referenced by us
 4955                  */
 4956                 if (pte & PG_A)
 4957                         val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
 4958                 else {
 4959                         /*
 4960                          * Referenced by someone else
 4961                          */
 4962                         vm_page_lock_queues();
 4963                         if ((m->flags & PG_REFERENCED) ||
 4964                             pmap_ts_referenced(m)) {
 4965                                 val |= MINCORE_REFERENCED_OTHER;
 4966                                 vm_page_flag_set(m, PG_REFERENCED);
 4967                         }
 4968                         vm_page_unlock_queues();
 4969                 }
 4970         } 
 4971         return val;
 4972 }
 4973 
 4974 void
 4975 pmap_activate(struct thread *td)
 4976 {
 4977         pmap_t  pmap, oldpmap;
 4978         u_int32_t  cr3;
 4979 
 4980         critical_enter();
 4981         pmap = vmspace_pmap(td->td_proc->p_vmspace);
 4982         oldpmap = PCPU_GET(curpmap);
 4983 #if defined(SMP)
 4984         atomic_clear_int(&oldpmap->pm_active, PCPU_GET(cpumask));
 4985         atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask));
 4986 #else
 4987         oldpmap->pm_active &= ~1;
 4988         pmap->pm_active |= 1;
 4989 #endif
 4990 #ifdef PAE
 4991         cr3 = vtophys(pmap->pm_pdpt);
 4992 #else
 4993         cr3 = vtophys(pmap->pm_pdir);
 4994 #endif
 4995         /*
 4996          * pmap_activate is for the current thread on the current cpu
 4997          */
 4998         td->td_pcb->pcb_cr3 = cr3;
 4999         load_cr3(cr3);
 5000         PCPU_SET(curpmap, pmap);
 5001         critical_exit();
 5002 }
 5003 
 5004 void
 5005 pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
 5006 {
 5007 }
 5008 
 5009 /*
 5010  *      Increase the starting virtual address of the given mapping if a
 5011  *      different alignment might result in more superpage mappings.
 5012  */
 5013 void
 5014 pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
 5015     vm_offset_t *addr, vm_size_t size)
 5016 {
 5017         vm_offset_t superpage_offset;
 5018 
 5019         if (size < NBPDR)
 5020                 return;
 5021         if (object != NULL && (object->flags & OBJ_COLORED) != 0)
 5022                 offset += ptoa(object->pg_color);
 5023         superpage_offset = offset & PDRMASK;
 5024         if (size - ((NBPDR - superpage_offset) & PDRMASK) < NBPDR ||
 5025             (*addr & PDRMASK) == superpage_offset)
 5026                 return;
 5027         if ((*addr & PDRMASK) < superpage_offset)
 5028                 *addr = (*addr & ~PDRMASK) + superpage_offset;
 5029         else
 5030                 *addr = ((*addr + PDRMASK) & ~PDRMASK) + superpage_offset;
 5031 }
 5032 
 5033 
 5034 #if defined(PMAP_DEBUG)
 5035 pmap_pid_dump(int pid)
 5036 {
 5037         pmap_t pmap;
 5038         struct proc *p;
 5039         int npte = 0;
 5040         int index;
 5041 
 5042         sx_slock(&allproc_lock);
 5043         FOREACH_PROC_IN_SYSTEM(p) {
 5044                 if (p->p_pid != pid)
 5045                         continue;
 5046 
 5047                 if (p->p_vmspace) {
 5048                         int i,j;
 5049                         index = 0;
 5050                         pmap = vmspace_pmap(p->p_vmspace);
 5051                         for (i = 0; i < NPDEPTD; i++) {
 5052                                 pd_entry_t *pde;
 5053                                 pt_entry_t *pte;
 5054                                 vm_offset_t base = i << PDRSHIFT;
 5055                                 
 5056                                 pde = &pmap->pm_pdir[i];
 5057                                 if (pde && pmap_pde_v(pde)) {
 5058                                         for (j = 0; j < NPTEPG; j++) {
 5059                                                 vm_offset_t va = base + (j << PAGE_SHIFT);
 5060                                                 if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) {
 5061                                                         if (index) {
 5062                                                                 index = 0;
 5063                                                                 printf("\n");
 5064                                                         }
 5065                                                         sx_sunlock(&allproc_lock);
 5066                                                         return npte;
 5067                                                 }
 5068                                                 pte = pmap_pte(pmap, va);
 5069                                                 if (pte && pmap_pte_v(pte)) {
 5070                                                         pt_entry_t pa;
 5071                                                         vm_page_t m;
 5072                                                         pa = *pte;
 5073                                                         m = PHYS_TO_VM_PAGE(pa & PG_FRAME);
 5074                                                         printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x",
 5075                                                                 va, pa, m->hold_count, m->wire_count, m->flags);
 5076                                                         npte++;
 5077                                                         index++;
 5078                                                         if (index >= 2) {
 5079                                                                 index = 0;
 5080                                                                 printf("\n");
 5081                                                         } else {
 5082                                                                 printf(" ");
 5083                                                         }
 5084                                                 }
 5085                                         }
 5086                                 }
 5087                         }
 5088                 }
 5089         }
 5090         sx_sunlock(&allproc_lock);
 5091         return npte;
 5092 }
 5093 #endif
 5094 
 5095 #if defined(DEBUG)
 5096 
 5097 static void     pads(pmap_t pm);
 5098 void            pmap_pvdump(vm_offset_t pa);
 5099 
 5100 /* print address space of pmap*/
 5101 static void
 5102 pads(pmap_t pm)
 5103 {
 5104         int i, j;
 5105         vm_paddr_t va;
 5106         pt_entry_t *ptep;
 5107 
 5108         if (pm == kernel_pmap)
 5109                 return;
 5110         for (i = 0; i < NPDEPTD; i++)
 5111                 if (pm->pm_pdir[i])
 5112                         for (j = 0; j < NPTEPG; j++) {
 5113                                 va = (i << PDRSHIFT) + (j << PAGE_SHIFT);
 5114                                 if (pm == kernel_pmap && va < KERNBASE)
 5115                                         continue;
 5116                                 if (pm != kernel_pmap && va > UPT_MAX_ADDRESS)
 5117                                         continue;
 5118                                 ptep = pmap_pte(pm, va);
 5119                                 if (pmap_pte_v(ptep))
 5120                                         printf("%x:%x ", va, *ptep);
 5121                         };
 5122 
 5123 }
 5124 
 5125 void
 5126 pmap_pvdump(vm_paddr_t pa)
 5127 {
 5128         pv_entry_t pv;
 5129         pmap_t pmap;
 5130         vm_page_t m;
 5131 
 5132         printf("pa %x", pa);
 5133         m = PHYS_TO_VM_PAGE(pa);
 5134         TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 5135                 pmap = PV_PMAP(pv);
 5136                 printf(" -> pmap %p, va %x", (void *)pmap, pv->pv_va);
 5137                 pads(pmap);
 5138         }
 5139         printf(" ");
 5140 }
 5141 #endif

Cache object: c2feb8019ec99c161a2e207bcd865cf5


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.