The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/i386/i386/pmap.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1991 Regents of the University of California.
    3  * All rights reserved.
    4  * Copyright (c) 1994 John S. Dyson
    5  * All rights reserved.
    6  * Copyright (c) 1994 David Greenman
    7  * All rights reserved.
    8  * Copyright (c) 2005-2010 Alan L. Cox <alc@cs.rice.edu>
    9  * All rights reserved.
   10  *
   11  * This code is derived from software contributed to Berkeley by
   12  * the Systems Programming Group of the University of Utah Computer
   13  * Science Department and William Jolitz of UUNET Technologies Inc.
   14  *
   15  * Redistribution and use in source and binary forms, with or without
   16  * modification, are permitted provided that the following conditions
   17  * are met:
   18  * 1. Redistributions of source code must retain the above copyright
   19  *    notice, this list of conditions and the following disclaimer.
   20  * 2. Redistributions in binary form must reproduce the above copyright
   21  *    notice, this list of conditions and the following disclaimer in the
   22  *    documentation and/or other materials provided with the distribution.
   23  * 3. All advertising materials mentioning features or use of this software
   24  *    must display the following acknowledgement:
   25  *      This product includes software developed by the University of
   26  *      California, Berkeley and its contributors.
   27  * 4. Neither the name of the University nor the names of its contributors
   28  *    may be used to endorse or promote products derived from this software
   29  *    without specific prior written permission.
   30  *
   31  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   32  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   33  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   34  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   35  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   36  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   37  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   38  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   39  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   40  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   41  * SUCH DAMAGE.
   42  *
   43  *      from:   @(#)pmap.c      7.7 (Berkeley)  5/12/91
   44  */
   45 /*-
   46  * Copyright (c) 2003 Networks Associates Technology, Inc.
   47  * All rights reserved.
   48  *
   49  * This software was developed for the FreeBSD Project by Jake Burkholder,
   50  * Safeport Network Services, and Network Associates Laboratories, the
   51  * Security Research Division of Network Associates, Inc. under
   52  * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA
   53  * CHATS research program.
   54  *
   55  * Redistribution and use in source and binary forms, with or without
   56  * modification, are permitted provided that the following conditions
   57  * are met:
   58  * 1. Redistributions of source code must retain the above copyright
   59  *    notice, this list of conditions and the following disclaimer.
   60  * 2. Redistributions in binary form must reproduce the above copyright
   61  *    notice, this list of conditions and the following disclaimer in the
   62  *    documentation and/or other materials provided with the distribution.
   63  *
   64  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   65  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   66  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   67  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   68  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   69  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   70  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   71  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   72  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   73  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   74  * SUCH DAMAGE.
   75  */
   76 
   77 #include <sys/cdefs.h>
   78 __FBSDID("$FreeBSD: releng/8.2/sys/i386/i386/pmap.c 215896 2010-11-26 21:16:21Z jkim $");
   79 
   80 /*
   81  *      Manages physical address maps.
   82  *
   83  *      In addition to hardware address maps, this
   84  *      module is called upon to provide software-use-only
   85  *      maps which may or may not be stored in the same
   86  *      form as hardware maps.  These pseudo-maps are
   87  *      used to store intermediate results from copy
   88  *      operations to and from address spaces.
   89  *
   90  *      Since the information managed by this module is
   91  *      also stored by the logical address mapping module,
   92  *      this module may throw away valid virtual-to-physical
   93  *      mappings at almost any time.  However, invalidations
   94  *      of virtual-to-physical mappings must be done as
   95  *      requested.
   96  *
   97  *      In order to cope with hardware architectures which
   98  *      make virtual-to-physical map invalidates expensive,
   99  *      this module may delay invalidate or reduced protection
  100  *      operations until such time as they are actually
  101  *      necessary.  This module is given full information as
  102  *      to which processors are currently using which maps,
  103  *      and to when physical maps must be made correct.
  104  */
  105 
  106 #include "opt_cpu.h"
  107 #include "opt_pmap.h"
  108 #include "opt_msgbuf.h"
  109 #include "opt_smp.h"
  110 #include "opt_xbox.h"
  111 
  112 #include <sys/param.h>
  113 #include <sys/systm.h>
  114 #include <sys/kernel.h>
  115 #include <sys/ktr.h>
  116 #include <sys/lock.h>
  117 #include <sys/malloc.h>
  118 #include <sys/mman.h>
  119 #include <sys/msgbuf.h>
  120 #include <sys/mutex.h>
  121 #include <sys/proc.h>
  122 #include <sys/sf_buf.h>
  123 #include <sys/sx.h>
  124 #include <sys/vmmeter.h>
  125 #include <sys/sched.h>
  126 #include <sys/sysctl.h>
  127 #ifdef SMP
  128 #include <sys/smp.h>
  129 #endif
  130 
  131 #include <vm/vm.h>
  132 #include <vm/vm_param.h>
  133 #include <vm/vm_kern.h>
  134 #include <vm/vm_page.h>
  135 #include <vm/vm_map.h>
  136 #include <vm/vm_object.h>
  137 #include <vm/vm_extern.h>
  138 #include <vm/vm_pageout.h>
  139 #include <vm/vm_pager.h>
  140 #include <vm/vm_reserv.h>
  141 #include <vm/uma.h>
  142 
  143 #include <machine/cpu.h>
  144 #include <machine/cputypes.h>
  145 #include <machine/md_var.h>
  146 #include <machine/pcb.h>
  147 #include <machine/specialreg.h>
  148 #ifdef SMP
  149 #include <machine/smp.h>
  150 #endif
  151 
  152 #ifdef XBOX
  153 #include <machine/xbox.h>
  154 #endif
  155 
  156 #if !defined(CPU_DISABLE_SSE) && defined(I686_CPU)
  157 #define CPU_ENABLE_SSE
  158 #endif
  159 
  160 #ifndef PMAP_SHPGPERPROC
  161 #define PMAP_SHPGPERPROC 200
  162 #endif
  163 
  164 #if !defined(DIAGNOSTIC)
  165 #define PMAP_INLINE     __gnu89_inline
  166 #else
  167 #define PMAP_INLINE
  168 #endif
  169 
  170 #define PV_STATS
  171 #ifdef PV_STATS
  172 #define PV_STAT(x)      do { x ; } while (0)
  173 #else
  174 #define PV_STAT(x)      do { } while (0)
  175 #endif
  176 
  177 #define pa_index(pa)    ((pa) >> PDRSHIFT)
  178 #define pa_to_pvh(pa)   (&pv_table[pa_index(pa)])
  179 
  180 /*
  181  * Get PDEs and PTEs for user/kernel address space
  182  */
  183 #define pmap_pde(m, v)  (&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT]))
  184 #define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT])
  185 
  186 #define pmap_pde_v(pte)         ((*(int *)pte & PG_V) != 0)
  187 #define pmap_pte_w(pte)         ((*(int *)pte & PG_W) != 0)
  188 #define pmap_pte_m(pte)         ((*(int *)pte & PG_M) != 0)
  189 #define pmap_pte_u(pte)         ((*(int *)pte & PG_A) != 0)
  190 #define pmap_pte_v(pte)         ((*(int *)pte & PG_V) != 0)
  191 
  192 #define pmap_pte_set_w(pte, v)  ((v) ? atomic_set_int((u_int *)(pte), PG_W) : \
  193     atomic_clear_int((u_int *)(pte), PG_W))
  194 #define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v)))
  195 
  196 struct pmap kernel_pmap_store;
  197 LIST_HEAD(pmaplist, pmap);
  198 static struct pmaplist allpmaps;
  199 static struct mtx allpmaps_lock;
  200 
  201 vm_offset_t virtual_avail;      /* VA of first avail page (after kernel bss) */
  202 vm_offset_t virtual_end;        /* VA of last avail page (end of kernel AS) */
  203 int pgeflag = 0;                /* PG_G or-in */
  204 int pseflag = 0;                /* PG_PS or-in */
  205 
  206 static int nkpt;
  207 vm_offset_t kernel_vm_end;
  208 extern u_int32_t KERNend;
  209 extern u_int32_t KPTphys;
  210 
  211 #ifdef PAE
  212 pt_entry_t pg_nx;
  213 static uma_zone_t pdptzone;
  214 #endif
  215 
  216 SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters");
  217 
  218 static int pat_works = 1;
  219 SYSCTL_INT(_vm_pmap, OID_AUTO, pat_works, CTLFLAG_RD, &pat_works, 1,
  220     "Is page attribute table fully functional?");
  221 
  222 static int pg_ps_enabled;
  223 SYSCTL_INT(_vm_pmap, OID_AUTO, pg_ps_enabled, CTLFLAG_RDTUN, &pg_ps_enabled, 0,
  224     "Are large page mappings enabled?");
  225 
  226 #define PAT_INDEX_SIZE  8
  227 static int pat_index[PAT_INDEX_SIZE];   /* cache mode to PAT index conversion */
  228 
  229 /*
  230  * Data for the pv entry allocation mechanism
  231  */
  232 static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
  233 static struct md_page *pv_table;
  234 static int shpgperproc = PMAP_SHPGPERPROC;
  235 
  236 struct pv_chunk *pv_chunkbase;          /* KVA block for pv_chunks */
  237 int pv_maxchunks;                       /* How many chunks we have KVA for */
  238 vm_offset_t pv_vafree;                  /* freelist stored in the PTE */
  239 
  240 /*
  241  * All those kernel PT submaps that BSD is so fond of
  242  */
  243 struct sysmaps {
  244         struct  mtx lock;
  245         pt_entry_t *CMAP1;
  246         pt_entry_t *CMAP2;
  247         caddr_t CADDR1;
  248         caddr_t CADDR2;
  249 };
  250 static struct sysmaps sysmaps_pcpu[MAXCPU];
  251 pt_entry_t *CMAP1 = 0;
  252 static pt_entry_t *CMAP3;
  253 static pd_entry_t *KPTD;
  254 caddr_t CADDR1 = 0, ptvmmap = 0;
  255 static caddr_t CADDR3;
  256 struct msgbuf *msgbufp = 0;
  257 
  258 /*
  259  * Crashdump maps.
  260  */
  261 static caddr_t crashdumpmap;
  262 
  263 static pt_entry_t *PMAP1 = 0, *PMAP2;
  264 static pt_entry_t *PADDR1 = 0, *PADDR2;
  265 #ifdef SMP
  266 static int PMAP1cpu;
  267 static int PMAP1changedcpu;
  268 SYSCTL_INT(_debug, OID_AUTO, PMAP1changedcpu, CTLFLAG_RD, 
  269            &PMAP1changedcpu, 0,
  270            "Number of times pmap_pte_quick changed CPU with same PMAP1");
  271 #endif
  272 static int PMAP1changed;
  273 SYSCTL_INT(_debug, OID_AUTO, PMAP1changed, CTLFLAG_RD, 
  274            &PMAP1changed, 0,
  275            "Number of times pmap_pte_quick changed PMAP1");
  276 static int PMAP1unchanged;
  277 SYSCTL_INT(_debug, OID_AUTO, PMAP1unchanged, CTLFLAG_RD, 
  278            &PMAP1unchanged, 0,
  279            "Number of times pmap_pte_quick didn't change PMAP1");
  280 static struct mtx PMAP2mutex;
  281 
  282 static void     free_pv_entry(pmap_t pmap, pv_entry_t pv);
  283 static pv_entry_t get_pv_entry(pmap_t locked_pmap, int try);
  284 static void     pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa);
  285 static boolean_t pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa);
  286 static void     pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa);
  287 static void     pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va);
  288 static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap,
  289                     vm_offset_t va);
  290 static int      pmap_pvh_wired_mappings(struct md_page *pvh, int count);
  291 
  292 static boolean_t pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va);
  293 static boolean_t pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t m,
  294     vm_prot_t prot);
  295 static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
  296     vm_page_t m, vm_prot_t prot, vm_page_t mpte);
  297 static void pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte);
  298 static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte);
  299 static boolean_t pmap_is_modified_pvh(struct md_page *pvh);
  300 static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode);
  301 static void pmap_kenter_pde(vm_offset_t va, pd_entry_t newpde);
  302 static vm_page_t pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va);
  303 static void pmap_pde_attr(pd_entry_t *pde, int cache_bits);
  304 static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va);
  305 static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva,
  306     vm_prot_t prot);
  307 static void pmap_pte_attr(pt_entry_t *pte, int cache_bits);
  308 static void pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva,
  309     vm_page_t *free);
  310 static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva,
  311     vm_page_t *free);
  312 static void pmap_remove_pt_page(pmap_t pmap, vm_page_t mpte);
  313 static void pmap_remove_page(struct pmap *pmap, vm_offset_t va,
  314     vm_page_t *free);
  315 static void pmap_remove_entry(struct pmap *pmap, vm_page_t m,
  316                                         vm_offset_t va);
  317 static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m);
  318 static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
  319     vm_page_t m);
  320 static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde,
  321     pd_entry_t newpde);
  322 static void pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde);
  323 
  324 static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags);
  325 
  326 static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags);
  327 static int _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm_page_t *free);
  328 static pt_entry_t *pmap_pte_quick(pmap_t pmap, vm_offset_t va);
  329 static void pmap_pte_release(pt_entry_t *pte);
  330 static int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t *);
  331 static vm_offset_t pmap_kmem_choose(vm_offset_t addr);
  332 #ifdef PAE
  333 static void *pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait);
  334 #endif
  335 
  336 CTASSERT(1 << PDESHIFT == sizeof(pd_entry_t));
  337 CTASSERT(1 << PTESHIFT == sizeof(pt_entry_t));
  338 
  339 /*
  340  * If you get an error here, then you set KVA_PAGES wrong! See the
  341  * description of KVA_PAGES in sys/i386/include/pmap.h. It must be
  342  * multiple of 4 for a normal kernel, or a multiple of 8 for a PAE.
  343  */
  344 CTASSERT(KERNBASE % (1 << 24) == 0);
  345 
  346 /*
  347  * Move the kernel virtual free pointer to the next
  348  * 4MB.  This is used to help improve performance
  349  * by using a large (4MB) page for much of the kernel
  350  * (.text, .data, .bss)
  351  */
  352 static vm_offset_t
  353 pmap_kmem_choose(vm_offset_t addr)
  354 {
  355         vm_offset_t newaddr = addr;
  356 
  357 #ifndef DISABLE_PSE
  358         if (cpu_feature & CPUID_PSE)
  359                 newaddr = (addr + PDRMASK) & ~PDRMASK;
  360 #endif
  361         return newaddr;
  362 }
  363 
  364 /*
  365  *      Bootstrap the system enough to run with virtual memory.
  366  *
  367  *      On the i386 this is called after mapping has already been enabled
  368  *      and just syncs the pmap module with what has already been done.
  369  *      [We can't call it easily with mapping off since the kernel is not
  370  *      mapped with PA == VA, hence we would have to relocate every address
  371  *      from the linked base (virtual) address "KERNBASE" to the actual
  372  *      (physical) address starting relative to 0]
  373  */
  374 void
  375 pmap_bootstrap(vm_paddr_t firstaddr)
  376 {
  377         vm_offset_t va;
  378         pt_entry_t *pte, *unused;
  379         struct sysmaps *sysmaps;
  380         int i;
  381 
  382         /*
  383          * Initialize the first available kernel virtual address.  However,
  384          * using "firstaddr" may waste a few pages of the kernel virtual
  385          * address space, because locore may not have mapped every physical
  386          * page that it allocated.  Preferably, locore would provide a first
  387          * unused virtual address in addition to "firstaddr".
  388          */
  389         virtual_avail = (vm_offset_t) KERNBASE + firstaddr;
  390         virtual_avail = pmap_kmem_choose(virtual_avail);
  391 
  392         virtual_end = VM_MAX_KERNEL_ADDRESS;
  393 
  394         /*
  395          * Initialize the kernel pmap (which is statically allocated).
  396          */
  397         PMAP_LOCK_INIT(kernel_pmap);
  398         kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD);
  399 #ifdef PAE
  400         kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT);
  401 #endif
  402         kernel_pmap->pm_root = NULL;
  403         kernel_pmap->pm_active = -1;    /* don't allow deactivation */
  404         TAILQ_INIT(&kernel_pmap->pm_pvchunk);
  405         LIST_INIT(&allpmaps);
  406 
  407         /*
  408          * Request a spin mutex so that changes to allpmaps cannot be
  409          * preempted by smp_rendezvous_cpus().  Otherwise,
  410          * pmap_update_pde_kernel() could access allpmaps while it is
  411          * being changed.
  412          */
  413         mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN);
  414         mtx_lock_spin(&allpmaps_lock);
  415         LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list);
  416         mtx_unlock_spin(&allpmaps_lock);
  417         nkpt = NKPT;
  418 
  419         /*
  420          * Reserve some special page table entries/VA space for temporary
  421          * mapping of pages.
  422          */
  423 #define SYSMAP(c, p, v, n)      \
  424         v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n);
  425 
  426         va = virtual_avail;
  427         pte = vtopte(va);
  428 
  429         /*
  430          * CMAP1/CMAP2 are used for zeroing and copying pages.
  431          * CMAP3 is used for the idle process page zeroing.
  432          */
  433         for (i = 0; i < MAXCPU; i++) {
  434                 sysmaps = &sysmaps_pcpu[i];
  435                 mtx_init(&sysmaps->lock, "SYSMAPS", NULL, MTX_DEF);
  436                 SYSMAP(caddr_t, sysmaps->CMAP1, sysmaps->CADDR1, 1)
  437                 SYSMAP(caddr_t, sysmaps->CMAP2, sysmaps->CADDR2, 1)
  438         }
  439         SYSMAP(caddr_t, CMAP1, CADDR1, 1)
  440         SYSMAP(caddr_t, CMAP3, CADDR3, 1)
  441 
  442         /*
  443          * Crashdump maps.
  444          */
  445         SYSMAP(caddr_t, unused, crashdumpmap, MAXDUMPPGS)
  446 
  447         /*
  448          * ptvmmap is used for reading arbitrary physical pages via /dev/mem.
  449          */
  450         SYSMAP(caddr_t, unused, ptvmmap, 1)
  451 
  452         /*
  453          * msgbufp is used to map the system message buffer.
  454          */
  455         SYSMAP(struct msgbuf *, unused, msgbufp, atop(round_page(MSGBUF_SIZE)))
  456 
  457         /*
  458          * KPTmap is used by pmap_kextract().
  459          *
  460          * KPTmap is first initialized by locore.  However, that initial
  461          * KPTmap can only support NKPT page table pages.  Here, a larger
  462          * KPTmap is created that can support KVA_PAGES page table pages.
  463          */
  464         SYSMAP(pt_entry_t *, KPTD, KPTmap, KVA_PAGES)
  465 
  466         for (i = 0; i < NKPT; i++)
  467                 KPTD[i] = (KPTphys + (i << PAGE_SHIFT)) | pgeflag | PG_RW | PG_V;
  468 
  469         /*
  470          * Adjust the start of the KPTD and KPTmap so that the implementation
  471          * of pmap_kextract() and pmap_growkernel() can be made simpler.
  472          */
  473         KPTD -= KPTDI;
  474         KPTmap -= i386_btop(KPTDI << PDRSHIFT);
  475 
  476         /*
  477          * ptemap is used for pmap_pte_quick
  478          */
  479         SYSMAP(pt_entry_t *, PMAP1, PADDR1, 1)
  480         SYSMAP(pt_entry_t *, PMAP2, PADDR2, 1)
  481 
  482         mtx_init(&PMAP2mutex, "PMAP2", NULL, MTX_DEF);
  483 
  484         virtual_avail = va;
  485 
  486         /*
  487          * Leave in place an identity mapping (virt == phys) for the low 1 MB
  488          * physical memory region that is used by the ACPI wakeup code.  This
  489          * mapping must not have PG_G set. 
  490          */
  491 #ifdef XBOX
  492         /* FIXME: This is gross, but needed for the XBOX. Since we are in such
  493          * an early stadium, we cannot yet neatly map video memory ... :-(
  494          * Better fixes are very welcome! */
  495         if (!arch_i386_is_xbox)
  496 #endif
  497         for (i = 1; i < NKPT; i++)
  498                 PTD[i] = 0;
  499 
  500         /* Initialize the PAT MSR if present. */
  501         pmap_init_pat();
  502 
  503         /* Turn on PG_G on kernel page(s) */
  504         pmap_set_pg();
  505 }
  506 
  507 /*
  508  * Setup the PAT MSR.
  509  */
  510 void
  511 pmap_init_pat(void)
  512 {
  513         int pat_table[PAT_INDEX_SIZE];
  514         uint64_t pat_msr;
  515         u_long cr0, cr4;
  516         int i;
  517 
  518         /* Set default PAT index table. */
  519         for (i = 0; i < PAT_INDEX_SIZE; i++)
  520                 pat_table[i] = -1;
  521         pat_table[PAT_WRITE_BACK] = 0;
  522         pat_table[PAT_WRITE_THROUGH] = 1;
  523         pat_table[PAT_UNCACHEABLE] = 3;
  524         pat_table[PAT_WRITE_COMBINING] = 3;
  525         pat_table[PAT_WRITE_PROTECTED] = 3;
  526         pat_table[PAT_UNCACHED] = 3;
  527 
  528         /* Bail if this CPU doesn't implement PAT. */
  529         if ((cpu_feature & CPUID_PAT) == 0) {
  530                 for (i = 0; i < PAT_INDEX_SIZE; i++)
  531                         pat_index[i] = pat_table[i];
  532                 pat_works = 0;
  533                 return;
  534         }
  535 
  536         /*
  537          * Due to some Intel errata, we can only safely use the lower 4
  538          * PAT entries.
  539          *
  540          *   Intel Pentium III Processor Specification Update
  541          * Errata E.27 (Upper Four PAT Entries Not Usable With Mode B
  542          * or Mode C Paging)
  543          *
  544          *   Intel Pentium IV  Processor Specification Update
  545          * Errata N46 (PAT Index MSB May Be Calculated Incorrectly)
  546          */
  547         if (cpu_vendor_id == CPU_VENDOR_INTEL &&
  548             !(CPUID_TO_FAMILY(cpu_id) == 6 && CPUID_TO_MODEL(cpu_id) >= 0xe))
  549                 pat_works = 0;
  550 
  551         /* Initialize default PAT entries. */
  552         pat_msr = PAT_VALUE(0, PAT_WRITE_BACK) |
  553             PAT_VALUE(1, PAT_WRITE_THROUGH) |
  554             PAT_VALUE(2, PAT_UNCACHED) |
  555             PAT_VALUE(3, PAT_UNCACHEABLE) |
  556             PAT_VALUE(4, PAT_WRITE_BACK) |
  557             PAT_VALUE(5, PAT_WRITE_THROUGH) |
  558             PAT_VALUE(6, PAT_UNCACHED) |
  559             PAT_VALUE(7, PAT_UNCACHEABLE);
  560 
  561         if (pat_works) {
  562                 /*
  563                  * Leave the indices 0-3 at the default of WB, WT, UC-, and UC.
  564                  * Program 5 and 6 as WP and WC.
  565                  * Leave 4 and 7 as WB and UC.
  566                  */
  567                 pat_msr &= ~(PAT_MASK(5) | PAT_MASK(6));
  568                 pat_msr |= PAT_VALUE(5, PAT_WRITE_PROTECTED) |
  569                     PAT_VALUE(6, PAT_WRITE_COMBINING);
  570                 pat_table[PAT_UNCACHED] = 2;
  571                 pat_table[PAT_WRITE_PROTECTED] = 5;
  572                 pat_table[PAT_WRITE_COMBINING] = 6;
  573         } else {
  574                 /*
  575                  * Just replace PAT Index 2 with WC instead of UC-.
  576                  */
  577                 pat_msr &= ~PAT_MASK(2);
  578                 pat_msr |= PAT_VALUE(2, PAT_WRITE_COMBINING);
  579                 pat_table[PAT_WRITE_COMBINING] = 2;
  580         }
  581 
  582         /* Disable PGE. */
  583         cr4 = rcr4();
  584         load_cr4(cr4 & ~CR4_PGE);
  585 
  586         /* Disable caches (CD = 1, NW = 0). */
  587         cr0 = rcr0();
  588         load_cr0((cr0 & ~CR0_NW) | CR0_CD);
  589 
  590         /* Flushes caches and TLBs. */
  591         wbinvd();
  592         invltlb();
  593 
  594         /* Update PAT and index table. */
  595         wrmsr(MSR_PAT, pat_msr);
  596         for (i = 0; i < PAT_INDEX_SIZE; i++)
  597                 pat_index[i] = pat_table[i];
  598 
  599         /* Flush caches and TLBs again. */
  600         wbinvd();
  601         invltlb();
  602 
  603         /* Restore caches and PGE. */
  604         load_cr0(cr0);
  605         load_cr4(cr4);
  606 }
  607 
  608 /*
  609  * Set PG_G on kernel pages.  Only the BSP calls this when SMP is turned on.
  610  */
  611 void
  612 pmap_set_pg(void)
  613 {
  614         pt_entry_t *pte;
  615         vm_offset_t va, endva;
  616 
  617         if (pgeflag == 0)
  618                 return;
  619 
  620         endva = KERNBASE + KERNend;
  621 
  622         if (pseflag) {
  623                 va = KERNBASE + KERNLOAD;
  624                 while (va  < endva) {
  625                         pdir_pde(PTD, va) |= pgeflag;
  626                         invltlb();      /* Play it safe, invltlb() every time */
  627                         va += NBPDR;
  628                 }
  629         } else {
  630                 va = (vm_offset_t)btext;
  631                 while (va < endva) {
  632                         pte = vtopte(va);
  633                         if (*pte)
  634                                 *pte |= pgeflag;
  635                         invltlb();      /* Play it safe, invltlb() every time */
  636                         va += PAGE_SIZE;
  637                 }
  638         }
  639 }
  640 
  641 /*
  642  * Initialize a vm_page's machine-dependent fields.
  643  */
  644 void
  645 pmap_page_init(vm_page_t m)
  646 {
  647 
  648         TAILQ_INIT(&m->md.pv_list);
  649         m->md.pat_mode = PAT_WRITE_BACK;
  650 }
  651 
  652 #ifdef PAE
  653 static void *
  654 pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
  655 {
  656 
  657         /* Inform UMA that this allocator uses kernel_map/object. */
  658         *flags = UMA_SLAB_KERNEL;
  659         return ((void *)kmem_alloc_contig(kernel_map, bytes, wait, 0x0ULL,
  660             0xffffffffULL, 1, 0, VM_MEMATTR_DEFAULT));
  661 }
  662 #endif
  663 
  664 /*
  665  * ABuse the pte nodes for unmapped kva to thread a kva freelist through.
  666  * Requirements:
  667  *  - Must deal with pages in order to ensure that none of the PG_* bits
  668  *    are ever set, PG_V in particular.
  669  *  - Assumes we can write to ptes without pte_store() atomic ops, even
  670  *    on PAE systems.  This should be ok.
  671  *  - Assumes nothing will ever test these addresses for 0 to indicate
  672  *    no mapping instead of correctly checking PG_V.
  673  *  - Assumes a vm_offset_t will fit in a pte (true for i386).
  674  * Because PG_V is never set, there can be no mappings to invalidate.
  675  */
  676 static vm_offset_t
  677 pmap_ptelist_alloc(vm_offset_t *head)
  678 {
  679         pt_entry_t *pte;
  680         vm_offset_t va;
  681 
  682         va = *head;
  683         if (va == 0)
  684                 return (va);    /* Out of memory */
  685         pte = vtopte(va);
  686         *head = *pte;
  687         if (*head & PG_V)
  688                 panic("pmap_ptelist_alloc: va with PG_V set!");
  689         *pte = 0;
  690         return (va);
  691 }
  692 
  693 static void
  694 pmap_ptelist_free(vm_offset_t *head, vm_offset_t va)
  695 {
  696         pt_entry_t *pte;
  697 
  698         if (va & PG_V)
  699                 panic("pmap_ptelist_free: freeing va with PG_V set!");
  700         pte = vtopte(va);
  701         *pte = *head;           /* virtual! PG_V is 0 though */
  702         *head = va;
  703 }
  704 
  705 static void
  706 pmap_ptelist_init(vm_offset_t *head, void *base, int npages)
  707 {
  708         int i;
  709         vm_offset_t va;
  710 
  711         *head = 0;
  712         for (i = npages - 1; i >= 0; i--) {
  713                 va = (vm_offset_t)base + i * PAGE_SIZE;
  714                 pmap_ptelist_free(head, va);
  715         }
  716 }
  717 
  718 
  719 /*
  720  *      Initialize the pmap module.
  721  *      Called by vm_init, to initialize any structures that the pmap
  722  *      system needs to map virtual memory.
  723  */
  724 void
  725 pmap_init(void)
  726 {
  727         vm_page_t mpte;
  728         vm_size_t s;
  729         int i, pv_npg;
  730 
  731         /*
  732          * Initialize the vm page array entries for the kernel pmap's
  733          * page table pages.
  734          */ 
  735         for (i = 0; i < NKPT; i++) {
  736                 mpte = PHYS_TO_VM_PAGE(KPTphys + (i << PAGE_SHIFT));
  737                 KASSERT(mpte >= vm_page_array &&
  738                     mpte < &vm_page_array[vm_page_array_size],
  739                     ("pmap_init: page table page is out of range"));
  740                 mpte->pindex = i + KPTDI;
  741                 mpte->phys_addr = KPTphys + (i << PAGE_SHIFT);
  742         }
  743 
  744         /*
  745          * Initialize the address space (zone) for the pv entries.  Set a
  746          * high water mark so that the system can recover from excessive
  747          * numbers of pv entries.
  748          */
  749         TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
  750         pv_entry_max = shpgperproc * maxproc + cnt.v_page_count;
  751         TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max);
  752         pv_entry_max = roundup(pv_entry_max, _NPCPV);
  753         pv_entry_high_water = 9 * (pv_entry_max / 10);
  754 
  755         /*
  756          * If the kernel is running in a virtual machine on an AMD Family 10h
  757          * processor, then it must assume that MCA is enabled by the virtual
  758          * machine monitor.
  759          */
  760         if (vm_guest == VM_GUEST_VM && cpu_vendor_id == CPU_VENDOR_AMD &&
  761             CPUID_TO_FAMILY(cpu_id) == 0x10)
  762                 workaround_erratum383 = 1;
  763 
  764         /*
  765          * Are large page mappings supported and enabled?
  766          */
  767         TUNABLE_INT_FETCH("vm.pmap.pg_ps_enabled", &pg_ps_enabled);
  768         if (pseflag == 0)
  769                 pg_ps_enabled = 0;
  770         else if (pg_ps_enabled) {
  771                 KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0,
  772                     ("pmap_init: can't assign to pagesizes[1]"));
  773                 pagesizes[1] = NBPDR;
  774         }
  775 
  776         /*
  777          * Calculate the size of the pv head table for superpages.
  778          */
  779         for (i = 0; phys_avail[i + 1]; i += 2);
  780         pv_npg = round_4mpage(phys_avail[(i - 2) + 1]) / NBPDR;
  781 
  782         /*
  783          * Allocate memory for the pv head table for superpages.
  784          */
  785         s = (vm_size_t)(pv_npg * sizeof(struct md_page));
  786         s = round_page(s);
  787         pv_table = (struct md_page *)kmem_alloc(kernel_map, s);
  788         for (i = 0; i < pv_npg; i++)
  789                 TAILQ_INIT(&pv_table[i].pv_list);
  790 
  791         pv_maxchunks = MAX(pv_entry_max / _NPCPV, maxproc);
  792         pv_chunkbase = (struct pv_chunk *)kmem_alloc_nofault(kernel_map,
  793             PAGE_SIZE * pv_maxchunks);
  794         if (pv_chunkbase == NULL)
  795                 panic("pmap_init: not enough kvm for pv chunks");
  796         pmap_ptelist_init(&pv_vafree, pv_chunkbase, pv_maxchunks);
  797 #ifdef PAE
  798         pdptzone = uma_zcreate("PDPT", NPGPTD * sizeof(pdpt_entry_t), NULL,
  799             NULL, NULL, NULL, (NPGPTD * sizeof(pdpt_entry_t)) - 1,
  800             UMA_ZONE_VM | UMA_ZONE_NOFREE);
  801         uma_zone_set_allocf(pdptzone, pmap_pdpt_allocf);
  802 #endif
  803 }
  804 
  805 
  806 SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_max, CTLFLAG_RD, &pv_entry_max, 0,
  807         "Max number of PV entries");
  808 SYSCTL_INT(_vm_pmap, OID_AUTO, shpgperproc, CTLFLAG_RD, &shpgperproc, 0,
  809         "Page share factor per proc");
  810 
  811 SYSCTL_NODE(_vm_pmap, OID_AUTO, pde, CTLFLAG_RD, 0,
  812     "2/4MB page mapping counters");
  813 
  814 static u_long pmap_pde_demotions;
  815 SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, demotions, CTLFLAG_RD,
  816     &pmap_pde_demotions, 0, "2/4MB page demotions");
  817 
  818 static u_long pmap_pde_mappings;
  819 SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, mappings, CTLFLAG_RD,
  820     &pmap_pde_mappings, 0, "2/4MB page mappings");
  821 
  822 static u_long pmap_pde_p_failures;
  823 SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, p_failures, CTLFLAG_RD,
  824     &pmap_pde_p_failures, 0, "2/4MB page promotion failures");
  825 
  826 static u_long pmap_pde_promotions;
  827 SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, promotions, CTLFLAG_RD,
  828     &pmap_pde_promotions, 0, "2/4MB page promotions");
  829 
  830 /***************************************************
  831  * Low level helper routines.....
  832  ***************************************************/
  833 
  834 /*
  835  * Determine the appropriate bits to set in a PTE or PDE for a specified
  836  * caching mode.
  837  */
  838 int
  839 pmap_cache_bits(int mode, boolean_t is_pde)
  840 {
  841         int cache_bits, pat_flag, pat_idx;
  842 
  843         if (mode < 0 || mode >= PAT_INDEX_SIZE || pat_index[mode] < 0)
  844                 panic("Unknown caching mode %d\n", mode);
  845 
  846         /* The PAT bit is different for PTE's and PDE's. */
  847         pat_flag = is_pde ? PG_PDE_PAT : PG_PTE_PAT;
  848 
  849         /* Map the caching mode to a PAT index. */
  850         pat_idx = pat_index[mode];
  851 
  852         /* Map the 3-bit index value into the PAT, PCD, and PWT bits. */
  853         cache_bits = 0;
  854         if (pat_idx & 0x4)
  855                 cache_bits |= pat_flag;
  856         if (pat_idx & 0x2)
  857                 cache_bits |= PG_NC_PCD;
  858         if (pat_idx & 0x1)
  859                 cache_bits |= PG_NC_PWT;
  860         return (cache_bits);
  861 }
  862 
  863 /*
  864  * The caller is responsible for maintaining TLB consistency.
  865  */
  866 static void
  867 pmap_kenter_pde(vm_offset_t va, pd_entry_t newpde)
  868 {
  869         pd_entry_t *pde;
  870         pmap_t pmap;
  871         boolean_t PTD_updated;
  872 
  873         PTD_updated = FALSE;
  874         mtx_lock_spin(&allpmaps_lock);
  875         LIST_FOREACH(pmap, &allpmaps, pm_list) {
  876                 if ((pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] &
  877                     PG_FRAME))
  878                         PTD_updated = TRUE;
  879                 pde = pmap_pde(pmap, va);
  880                 pde_store(pde, newpde);
  881         }
  882         mtx_unlock_spin(&allpmaps_lock);
  883         KASSERT(PTD_updated,
  884             ("pmap_kenter_pde: current page table is not in allpmaps"));
  885 }
  886 
  887 /*
  888  * After changing the page size for the specified virtual address in the page
  889  * table, flush the corresponding entries from the processor's TLB.  Only the
  890  * calling processor's TLB is affected.
  891  *
  892  * The calling thread must be pinned to a processor.
  893  */
  894 static void
  895 pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde)
  896 {
  897         u_long cr4;
  898 
  899         if ((newpde & PG_PS) == 0)
  900                 /* Demotion: flush a specific 2MB page mapping. */
  901                 invlpg(va);
  902         else if ((newpde & PG_G) == 0)
  903                 /*
  904                  * Promotion: flush every 4KB page mapping from the TLB
  905                  * because there are too many to flush individually.
  906                  */
  907                 invltlb();
  908         else {
  909                 /*
  910                  * Promotion: flush every 4KB page mapping from the TLB,
  911                  * including any global (PG_G) mappings.
  912                  */
  913                 cr4 = rcr4();
  914                 load_cr4(cr4 & ~CR4_PGE);
  915                 /*
  916                  * Although preemption at this point could be detrimental to
  917                  * performance, it would not lead to an error.  PG_G is simply
  918                  * ignored if CR4.PGE is clear.  Moreover, in case this block
  919                  * is re-entered, the load_cr4() either above or below will
  920                  * modify CR4.PGE flushing the TLB.
  921                  */
  922                 load_cr4(cr4 | CR4_PGE);
  923         }
  924 }
  925 #ifdef SMP
  926 /*
  927  * For SMP, these functions have to use the IPI mechanism for coherence.
  928  *
  929  * N.B.: Before calling any of the following TLB invalidation functions,
  930  * the calling processor must ensure that all stores updating a non-
  931  * kernel page table are globally performed.  Otherwise, another
  932  * processor could cache an old, pre-update entry without being
  933  * invalidated.  This can happen one of two ways: (1) The pmap becomes
  934  * active on another processor after its pm_active field is checked by
  935  * one of the following functions but before a store updating the page
  936  * table is globally performed. (2) The pmap becomes active on another
  937  * processor before its pm_active field is checked but due to
  938  * speculative loads one of the following functions stills reads the
  939  * pmap as inactive on the other processor.
  940  * 
  941  * The kernel page table is exempt because its pm_active field is
  942  * immutable.  The kernel page table is always active on every
  943  * processor.
  944  */
  945 void
  946 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
  947 {
  948         cpumask_t cpumask, other_cpus;
  949 
  950         sched_pin();
  951         if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
  952                 invlpg(va);
  953                 smp_invlpg(va);
  954         } else {
  955                 cpumask = PCPU_GET(cpumask);
  956                 other_cpus = PCPU_GET(other_cpus);
  957                 if (pmap->pm_active & cpumask)
  958                         invlpg(va);
  959                 if (pmap->pm_active & other_cpus)
  960                         smp_masked_invlpg(pmap->pm_active & other_cpus, va);
  961         }
  962         sched_unpin();
  963 }
  964 
  965 void
  966 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
  967 {
  968         cpumask_t cpumask, other_cpus;
  969         vm_offset_t addr;
  970 
  971         sched_pin();
  972         if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
  973                 for (addr = sva; addr < eva; addr += PAGE_SIZE)
  974                         invlpg(addr);
  975                 smp_invlpg_range(sva, eva);
  976         } else {
  977                 cpumask = PCPU_GET(cpumask);
  978                 other_cpus = PCPU_GET(other_cpus);
  979                 if (pmap->pm_active & cpumask)
  980                         for (addr = sva; addr < eva; addr += PAGE_SIZE)
  981                                 invlpg(addr);
  982                 if (pmap->pm_active & other_cpus)
  983                         smp_masked_invlpg_range(pmap->pm_active & other_cpus,
  984                             sva, eva);
  985         }
  986         sched_unpin();
  987 }
  988 
  989 void
  990 pmap_invalidate_all(pmap_t pmap)
  991 {
  992         cpumask_t cpumask, other_cpus;
  993 
  994         sched_pin();
  995         if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
  996                 invltlb();
  997                 smp_invltlb();
  998         } else {
  999                 cpumask = PCPU_GET(cpumask);
 1000                 other_cpus = PCPU_GET(other_cpus);
 1001                 if (pmap->pm_active & cpumask)
 1002                         invltlb();
 1003                 if (pmap->pm_active & other_cpus)
 1004                         smp_masked_invltlb(pmap->pm_active & other_cpus);
 1005         }
 1006         sched_unpin();
 1007 }
 1008 
 1009 void
 1010 pmap_invalidate_cache(void)
 1011 {
 1012 
 1013         sched_pin();
 1014         wbinvd();
 1015         smp_cache_flush();
 1016         sched_unpin();
 1017 }
 1018 
 1019 struct pde_action {
 1020         cpumask_t store;        /* processor that updates the PDE */
 1021         cpumask_t invalidate;   /* processors that invalidate their TLB */
 1022         vm_offset_t va;
 1023         pd_entry_t *pde;
 1024         pd_entry_t newpde;
 1025 };
 1026 
 1027 static void
 1028 pmap_update_pde_kernel(void *arg)
 1029 {
 1030         struct pde_action *act = arg;
 1031         pd_entry_t *pde;
 1032         pmap_t pmap;
 1033 
 1034         if (act->store == PCPU_GET(cpumask))
 1035                 /*
 1036                  * Elsewhere, this operation requires allpmaps_lock for
 1037                  * synchronization.  Here, it does not because it is being
 1038                  * performed in the context of an all_cpus rendezvous.
 1039                  */
 1040                 LIST_FOREACH(pmap, &allpmaps, pm_list) {
 1041                         pde = pmap_pde(pmap, act->va);
 1042                         pde_store(pde, act->newpde);
 1043                 }
 1044 }
 1045 
 1046 static void
 1047 pmap_update_pde_user(void *arg)
 1048 {
 1049         struct pde_action *act = arg;
 1050 
 1051         if (act->store == PCPU_GET(cpumask))
 1052                 pde_store(act->pde, act->newpde);
 1053 }
 1054 
 1055 static void
 1056 pmap_update_pde_teardown(void *arg)
 1057 {
 1058         struct pde_action *act = arg;
 1059 
 1060         if ((act->invalidate & PCPU_GET(cpumask)) != 0)
 1061                 pmap_update_pde_invalidate(act->va, act->newpde);
 1062 }
 1063 
 1064 /*
 1065  * Change the page size for the specified virtual address in a way that
 1066  * prevents any possibility of the TLB ever having two entries that map the
 1067  * same virtual address using different page sizes.  This is the recommended
 1068  * workaround for Erratum 383 on AMD Family 10h processors.  It prevents a
 1069  * machine check exception for a TLB state that is improperly diagnosed as a
 1070  * hardware error.
 1071  */
 1072 static void
 1073 pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde)
 1074 {
 1075         struct pde_action act;
 1076         cpumask_t active, cpumask;
 1077 
 1078         sched_pin();
 1079         cpumask = PCPU_GET(cpumask);
 1080         if (pmap == kernel_pmap)
 1081                 active = all_cpus;
 1082         else
 1083                 active = pmap->pm_active;
 1084         if ((active & PCPU_GET(other_cpus)) != 0) {
 1085                 act.store = cpumask;
 1086                 act.invalidate = active;
 1087                 act.va = va;
 1088                 act.pde = pde;
 1089                 act.newpde = newpde;
 1090                 smp_rendezvous_cpus(cpumask | active,
 1091                     smp_no_rendevous_barrier, pmap == kernel_pmap ?
 1092                     pmap_update_pde_kernel : pmap_update_pde_user,
 1093                     pmap_update_pde_teardown, &act);
 1094         } else {
 1095                 if (pmap == kernel_pmap)
 1096                         pmap_kenter_pde(va, newpde);
 1097                 else
 1098                         pde_store(pde, newpde);
 1099                 if ((active & cpumask) != 0)
 1100                         pmap_update_pde_invalidate(va, newpde);
 1101         }
 1102         sched_unpin();
 1103 }
 1104 #else /* !SMP */
 1105 /*
 1106  * Normal, non-SMP, 486+ invalidation functions.
 1107  * We inline these within pmap.c for speed.
 1108  */
 1109 PMAP_INLINE void
 1110 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 1111 {
 1112 
 1113         if (pmap == kernel_pmap || pmap->pm_active)
 1114                 invlpg(va);
 1115 }
 1116 
 1117 PMAP_INLINE void
 1118 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 1119 {
 1120         vm_offset_t addr;
 1121 
 1122         if (pmap == kernel_pmap || pmap->pm_active)
 1123                 for (addr = sva; addr < eva; addr += PAGE_SIZE)
 1124                         invlpg(addr);
 1125 }
 1126 
 1127 PMAP_INLINE void
 1128 pmap_invalidate_all(pmap_t pmap)
 1129 {
 1130 
 1131         if (pmap == kernel_pmap || pmap->pm_active)
 1132                 invltlb();
 1133 }
 1134 
 1135 PMAP_INLINE void
 1136 pmap_invalidate_cache(void)
 1137 {
 1138 
 1139         wbinvd();
 1140 }
 1141 
 1142 static void
 1143 pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde)
 1144 {
 1145 
 1146         if (pmap == kernel_pmap)
 1147                 pmap_kenter_pde(va, newpde);
 1148         else
 1149                 pde_store(pde, newpde);
 1150         if (pmap == kernel_pmap || pmap->pm_active)
 1151                 pmap_update_pde_invalidate(va, newpde);
 1152 }
 1153 #endif /* !SMP */
 1154 
 1155 void
 1156 pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva)
 1157 {
 1158 
 1159         KASSERT((sva & PAGE_MASK) == 0,
 1160             ("pmap_invalidate_cache_range: sva not page-aligned"));
 1161         KASSERT((eva & PAGE_MASK) == 0,
 1162             ("pmap_invalidate_cache_range: eva not page-aligned"));
 1163 
 1164         if (cpu_feature & CPUID_SS)
 1165                 ; /* If "Self Snoop" is supported, do nothing. */
 1166         else if ((cpu_feature & CPUID_CLFSH) != 0 &&
 1167                  eva - sva < 2 * 1024 * 1024) {
 1168 
 1169                 /*
 1170                  * Otherwise, do per-cache line flush.  Use the mfence
 1171                  * instruction to insure that previous stores are
 1172                  * included in the write-back.  The processor
 1173                  * propagates flush to other processors in the cache
 1174                  * coherence domain.
 1175                  */
 1176                 mfence();
 1177                 for (; sva < eva; sva += cpu_clflush_line_size)
 1178                         clflush(sva);
 1179                 mfence();
 1180         } else {
 1181 
 1182                 /*
 1183                  * No targeted cache flush methods are supported by CPU,
 1184                  * or the supplied range is bigger than 2MB.
 1185                  * Globally invalidate cache.
 1186                  */
 1187                 pmap_invalidate_cache();
 1188         }
 1189 }
 1190 
 1191 /*
 1192  * Are we current address space or kernel?  N.B. We return FALSE when
 1193  * a pmap's page table is in use because a kernel thread is borrowing
 1194  * it.  The borrowed page table can change spontaneously, making any
 1195  * dependence on its continued use subject to a race condition.
 1196  */
 1197 static __inline int
 1198 pmap_is_current(pmap_t pmap)
 1199 {
 1200 
 1201         return (pmap == kernel_pmap ||
 1202                 (pmap == vmspace_pmap(curthread->td_proc->p_vmspace) &&
 1203             (pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME)));
 1204 }
 1205 
 1206 /*
 1207  * If the given pmap is not the current or kernel pmap, the returned pte must
 1208  * be released by passing it to pmap_pte_release().
 1209  */
 1210 pt_entry_t *
 1211 pmap_pte(pmap_t pmap, vm_offset_t va)
 1212 {
 1213         pd_entry_t newpf;
 1214         pd_entry_t *pde;
 1215 
 1216         pde = pmap_pde(pmap, va);
 1217         if (*pde & PG_PS)
 1218                 return (pde);
 1219         if (*pde != 0) {
 1220                 /* are we current address space or kernel? */
 1221                 if (pmap_is_current(pmap))
 1222                         return (vtopte(va));
 1223                 mtx_lock(&PMAP2mutex);
 1224                 newpf = *pde & PG_FRAME;
 1225                 if ((*PMAP2 & PG_FRAME) != newpf) {
 1226                         *PMAP2 = newpf | PG_RW | PG_V | PG_A | PG_M;
 1227                         pmap_invalidate_page(kernel_pmap, (vm_offset_t)PADDR2);
 1228                 }
 1229                 return (PADDR2 + (i386_btop(va) & (NPTEPG - 1)));
 1230         }
 1231         return (0);
 1232 }
 1233 
 1234 /*
 1235  * Releases a pte that was obtained from pmap_pte().  Be prepared for the pte
 1236  * being NULL.
 1237  */
 1238 static __inline void
 1239 pmap_pte_release(pt_entry_t *pte)
 1240 {
 1241 
 1242         if ((pt_entry_t *)((vm_offset_t)pte & ~PAGE_MASK) == PADDR2)
 1243                 mtx_unlock(&PMAP2mutex);
 1244 }
 1245 
 1246 static __inline void
 1247 invlcaddr(void *caddr)
 1248 {
 1249 
 1250         invlpg((u_int)caddr);
 1251 }
 1252 
 1253 /*
 1254  * Super fast pmap_pte routine best used when scanning
 1255  * the pv lists.  This eliminates many coarse-grained
 1256  * invltlb calls.  Note that many of the pv list
 1257  * scans are across different pmaps.  It is very wasteful
 1258  * to do an entire invltlb for checking a single mapping.
 1259  *
 1260  * If the given pmap is not the current pmap, vm_page_queue_mtx
 1261  * must be held and curthread pinned to a CPU.
 1262  */
 1263 static pt_entry_t *
 1264 pmap_pte_quick(pmap_t pmap, vm_offset_t va)
 1265 {
 1266         pd_entry_t newpf;
 1267         pd_entry_t *pde;
 1268 
 1269         pde = pmap_pde(pmap, va);
 1270         if (*pde & PG_PS)
 1271                 return (pde);
 1272         if (*pde != 0) {
 1273                 /* are we current address space or kernel? */
 1274                 if (pmap_is_current(pmap))
 1275                         return (vtopte(va));
 1276                 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 1277                 KASSERT(curthread->td_pinned > 0, ("curthread not pinned"));
 1278                 newpf = *pde & PG_FRAME;
 1279                 if ((*PMAP1 & PG_FRAME) != newpf) {
 1280                         *PMAP1 = newpf | PG_RW | PG_V | PG_A | PG_M;
 1281 #ifdef SMP
 1282                         PMAP1cpu = PCPU_GET(cpuid);
 1283 #endif
 1284                         invlcaddr(PADDR1);
 1285                         PMAP1changed++;
 1286                 } else
 1287 #ifdef SMP
 1288                 if (PMAP1cpu != PCPU_GET(cpuid)) {
 1289                         PMAP1cpu = PCPU_GET(cpuid);
 1290                         invlcaddr(PADDR1);
 1291                         PMAP1changedcpu++;
 1292                 } else
 1293 #endif
 1294                         PMAP1unchanged++;
 1295                 return (PADDR1 + (i386_btop(va) & (NPTEPG - 1)));
 1296         }
 1297         return (0);
 1298 }
 1299 
 1300 /*
 1301  *      Routine:        pmap_extract
 1302  *      Function:
 1303  *              Extract the physical page address associated
 1304  *              with the given map/virtual_address pair.
 1305  */
 1306 vm_paddr_t 
 1307 pmap_extract(pmap_t pmap, vm_offset_t va)
 1308 {
 1309         vm_paddr_t rtval;
 1310         pt_entry_t *pte;
 1311         pd_entry_t pde;
 1312 
 1313         rtval = 0;
 1314         PMAP_LOCK(pmap);
 1315         pde = pmap->pm_pdir[va >> PDRSHIFT];
 1316         if (pde != 0) {
 1317                 if ((pde & PG_PS) != 0)
 1318                         rtval = (pde & PG_PS_FRAME) | (va & PDRMASK);
 1319                 else {
 1320                         pte = pmap_pte(pmap, va);
 1321                         rtval = (*pte & PG_FRAME) | (va & PAGE_MASK);
 1322                         pmap_pte_release(pte);
 1323                 }
 1324         }
 1325         PMAP_UNLOCK(pmap);
 1326         return (rtval);
 1327 }
 1328 
 1329 /*
 1330  *      Routine:        pmap_extract_and_hold
 1331  *      Function:
 1332  *              Atomically extract and hold the physical page
 1333  *              with the given pmap and virtual address pair
 1334  *              if that mapping permits the given protection.
 1335  */
 1336 vm_page_t
 1337 pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
 1338 {
 1339         pd_entry_t pde;
 1340         pt_entry_t pte;
 1341         vm_page_t m;
 1342 
 1343         m = NULL;
 1344         vm_page_lock_queues();
 1345         PMAP_LOCK(pmap);
 1346         pde = *pmap_pde(pmap, va);
 1347         if (pde != 0) {
 1348                 if (pde & PG_PS) {
 1349                         if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) {
 1350                                 m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) |
 1351                                     (va & PDRMASK));
 1352                                 vm_page_hold(m);
 1353                         }
 1354                 } else {
 1355                         sched_pin();
 1356                         pte = *pmap_pte_quick(pmap, va);
 1357                         if (pte != 0 &&
 1358                             ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) {
 1359                                 m = PHYS_TO_VM_PAGE(pte & PG_FRAME);
 1360                                 vm_page_hold(m);
 1361                         }
 1362                         sched_unpin();
 1363                 }
 1364         }
 1365         vm_page_unlock_queues();
 1366         PMAP_UNLOCK(pmap);
 1367         return (m);
 1368 }
 1369 
 1370 /***************************************************
 1371  * Low level mapping routines.....
 1372  ***************************************************/
 1373 
 1374 /*
 1375  * Add a wired page to the kva.
 1376  * Note: not SMP coherent.
 1377  *
 1378  * This function may be used before pmap_bootstrap() is called.
 1379  */
 1380 PMAP_INLINE void 
 1381 pmap_kenter(vm_offset_t va, vm_paddr_t pa)
 1382 {
 1383         pt_entry_t *pte;
 1384 
 1385         pte = vtopte(va);
 1386         pte_store(pte, pa | PG_RW | PG_V | pgeflag);
 1387 }
 1388 
 1389 static __inline void
 1390 pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode)
 1391 {
 1392         pt_entry_t *pte;
 1393 
 1394         pte = vtopte(va);
 1395         pte_store(pte, pa | PG_RW | PG_V | pgeflag | pmap_cache_bits(mode, 0));
 1396 }
 1397 
 1398 /*
 1399  * Remove a page from the kernel pagetables.
 1400  * Note: not SMP coherent.
 1401  *
 1402  * This function may be used before pmap_bootstrap() is called.
 1403  */
 1404 PMAP_INLINE void
 1405 pmap_kremove(vm_offset_t va)
 1406 {
 1407         pt_entry_t *pte;
 1408 
 1409         pte = vtopte(va);
 1410         pte_clear(pte);
 1411 }
 1412 
 1413 /*
 1414  *      Used to map a range of physical addresses into kernel
 1415  *      virtual address space.
 1416  *
 1417  *      The value passed in '*virt' is a suggested virtual address for
 1418  *      the mapping. Architectures which can support a direct-mapped
 1419  *      physical to virtual region can return the appropriate address
 1420  *      within that region, leaving '*virt' unchanged. Other
 1421  *      architectures should map the pages starting at '*virt' and
 1422  *      update '*virt' with the first usable address after the mapped
 1423  *      region.
 1424  */
 1425 vm_offset_t
 1426 pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
 1427 {
 1428         vm_offset_t va, sva;
 1429 
 1430         va = sva = *virt;
 1431         while (start < end) {
 1432                 pmap_kenter(va, start);
 1433                 va += PAGE_SIZE;
 1434                 start += PAGE_SIZE;
 1435         }
 1436         pmap_invalidate_range(kernel_pmap, sva, va);
 1437         *virt = va;
 1438         return (sva);
 1439 }
 1440 
 1441 
 1442 /*
 1443  * Add a list of wired pages to the kva
 1444  * this routine is only used for temporary
 1445  * kernel mappings that do not need to have
 1446  * page modification or references recorded.
 1447  * Note that old mappings are simply written
 1448  * over.  The page *must* be wired.
 1449  * Note: SMP coherent.  Uses a ranged shootdown IPI.
 1450  */
 1451 void
 1452 pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count)
 1453 {
 1454         pt_entry_t *endpte, oldpte, pa, *pte;
 1455         vm_page_t m;
 1456 
 1457         oldpte = 0;
 1458         pte = vtopte(sva);
 1459         endpte = pte + count;
 1460         while (pte < endpte) {
 1461                 m = *ma++;
 1462                 pa = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(m->md.pat_mode, 0);
 1463                 if ((*pte & (PG_FRAME | PG_PTE_CACHE)) != pa) {
 1464                         oldpte |= *pte;
 1465                         pte_store(pte, pa | pgeflag | PG_RW | PG_V);
 1466                 }
 1467                 pte++;
 1468         }
 1469         if (__predict_false((oldpte & PG_V) != 0))
 1470                 pmap_invalidate_range(kernel_pmap, sva, sva + count *
 1471                     PAGE_SIZE);
 1472 }
 1473 
 1474 /*
 1475  * This routine tears out page mappings from the
 1476  * kernel -- it is meant only for temporary mappings.
 1477  * Note: SMP coherent.  Uses a ranged shootdown IPI.
 1478  */
 1479 void
 1480 pmap_qremove(vm_offset_t sva, int count)
 1481 {
 1482         vm_offset_t va;
 1483 
 1484         va = sva;
 1485         while (count-- > 0) {
 1486                 pmap_kremove(va);
 1487                 va += PAGE_SIZE;
 1488         }
 1489         pmap_invalidate_range(kernel_pmap, sva, va);
 1490 }
 1491 
 1492 /***************************************************
 1493  * Page table page management routines.....
 1494  ***************************************************/
 1495 static __inline void
 1496 pmap_free_zero_pages(vm_page_t free)
 1497 {
 1498         vm_page_t m;
 1499 
 1500         while (free != NULL) {
 1501                 m = free;
 1502                 free = m->right;
 1503                 /* Preserve the page's PG_ZERO setting. */
 1504                 vm_page_free_toq(m);
 1505         }
 1506 }
 1507 
 1508 /*
 1509  * Schedule the specified unused page table page to be freed.  Specifically,
 1510  * add the page to the specified list of pages that will be released to the
 1511  * physical memory manager after the TLB has been updated.
 1512  */
 1513 static __inline void
 1514 pmap_add_delayed_free_list(vm_page_t m, vm_page_t *free, boolean_t set_PG_ZERO)
 1515 {
 1516 
 1517         if (set_PG_ZERO)
 1518                 m->flags |= PG_ZERO;
 1519         else
 1520                 m->flags &= ~PG_ZERO;
 1521         m->right = *free;
 1522         *free = m;
 1523 }
 1524 
 1525 /*
 1526  * Inserts the specified page table page into the specified pmap's collection
 1527  * of idle page table pages.  Each of a pmap's page table pages is responsible
 1528  * for mapping a distinct range of virtual addresses.  The pmap's collection is
 1529  * ordered by this virtual address range.
 1530  */
 1531 static void
 1532 pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte)
 1533 {
 1534         vm_page_t root;
 1535 
 1536         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 1537         root = pmap->pm_root;
 1538         if (root == NULL) {
 1539                 mpte->left = NULL;
 1540                 mpte->right = NULL;
 1541         } else {
 1542                 root = vm_page_splay(mpte->pindex, root);
 1543                 if (mpte->pindex < root->pindex) {
 1544                         mpte->left = root->left;
 1545                         mpte->right = root;
 1546                         root->left = NULL;
 1547                 } else if (mpte->pindex == root->pindex)
 1548                         panic("pmap_insert_pt_page: pindex already inserted");
 1549                 else {
 1550                         mpte->right = root->right;
 1551                         mpte->left = root;
 1552                         root->right = NULL;
 1553                 }
 1554         }
 1555         pmap->pm_root = mpte;
 1556 }
 1557 
 1558 /*
 1559  * Looks for a page table page mapping the specified virtual address in the
 1560  * specified pmap's collection of idle page table pages.  Returns NULL if there
 1561  * is no page table page corresponding to the specified virtual address.
 1562  */
 1563 static vm_page_t
 1564 pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va)
 1565 {
 1566         vm_page_t mpte;
 1567         vm_pindex_t pindex = va >> PDRSHIFT;
 1568 
 1569         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 1570         if ((mpte = pmap->pm_root) != NULL && mpte->pindex != pindex) {
 1571                 mpte = vm_page_splay(pindex, mpte);
 1572                 if ((pmap->pm_root = mpte)->pindex != pindex)
 1573                         mpte = NULL;
 1574         }
 1575         return (mpte);
 1576 }
 1577 
 1578 /*
 1579  * Removes the specified page table page from the specified pmap's collection
 1580  * of idle page table pages.  The specified page table page must be a member of
 1581  * the pmap's collection.
 1582  */
 1583 static void
 1584 pmap_remove_pt_page(pmap_t pmap, vm_page_t mpte)
 1585 {
 1586         vm_page_t root;
 1587 
 1588         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 1589         if (mpte != pmap->pm_root)
 1590                 vm_page_splay(mpte->pindex, pmap->pm_root);
 1591         if (mpte->left == NULL)
 1592                 root = mpte->right;
 1593         else {
 1594                 root = vm_page_splay(mpte->pindex, mpte->left);
 1595                 root->right = mpte->right;
 1596         }
 1597         pmap->pm_root = root;
 1598 }
 1599 
 1600 /*
 1601  * This routine unholds page table pages, and if the hold count
 1602  * drops to zero, then it decrements the wire count.
 1603  */
 1604 static __inline int
 1605 pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm_page_t *free)
 1606 {
 1607 
 1608         --m->wire_count;
 1609         if (m->wire_count == 0)
 1610                 return _pmap_unwire_pte_hold(pmap, m, free);
 1611         else
 1612                 return 0;
 1613 }
 1614 
 1615 static int 
 1616 _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm_page_t *free)
 1617 {
 1618         vm_offset_t pteva;
 1619 
 1620         /*
 1621          * unmap the page table page
 1622          */
 1623         pmap->pm_pdir[m->pindex] = 0;
 1624         --pmap->pm_stats.resident_count;
 1625 
 1626         /*
 1627          * This is a release store so that the ordinary store unmapping
 1628          * the page table page is globally performed before TLB shoot-
 1629          * down is begun.
 1630          */
 1631         atomic_subtract_rel_int(&cnt.v_wire_count, 1);
 1632 
 1633         /*
 1634          * Do an invltlb to make the invalidated mapping
 1635          * take effect immediately.
 1636          */
 1637         pteva = VM_MAXUSER_ADDRESS + i386_ptob(m->pindex);
 1638         pmap_invalidate_page(pmap, pteva);
 1639 
 1640         /* 
 1641          * Put page on a list so that it is released after
 1642          * *ALL* TLB shootdown is done
 1643          */
 1644         pmap_add_delayed_free_list(m, free, TRUE);
 1645 
 1646         return 1;
 1647 }
 1648 
 1649 /*
 1650  * After removing a page table entry, this routine is used to
 1651  * conditionally free the page, and manage the hold/wire counts.
 1652  */
 1653 static int
 1654 pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t *free)
 1655 {
 1656         pd_entry_t ptepde;
 1657         vm_page_t mpte;
 1658 
 1659         if (va >= VM_MAXUSER_ADDRESS)
 1660                 return 0;
 1661         ptepde = *pmap_pde(pmap, va);
 1662         mpte = PHYS_TO_VM_PAGE(ptepde & PG_FRAME);
 1663         return pmap_unwire_pte_hold(pmap, mpte, free);
 1664 }
 1665 
 1666 /*
 1667  * Initialize the pmap for the swapper process.
 1668  */
 1669 void
 1670 pmap_pinit0(pmap_t pmap)
 1671 {
 1672 
 1673         PMAP_LOCK_INIT(pmap);
 1674         /*
 1675          * Since the page table directory is shared with the kernel pmap,
 1676          * which is already included in the list "allpmaps", this pmap does
 1677          * not need to be inserted into that list.
 1678          */
 1679         pmap->pm_pdir = (pd_entry_t *)(KERNBASE + (vm_offset_t)IdlePTD);
 1680 #ifdef PAE
 1681         pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT);
 1682 #endif
 1683         pmap->pm_root = NULL;
 1684         pmap->pm_active = 0;
 1685         PCPU_SET(curpmap, pmap);
 1686         TAILQ_INIT(&pmap->pm_pvchunk);
 1687         bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 1688 }
 1689 
 1690 /*
 1691  * Initialize a preallocated and zeroed pmap structure,
 1692  * such as one in a vmspace structure.
 1693  */
 1694 int
 1695 pmap_pinit(pmap_t pmap)
 1696 {
 1697         vm_page_t m, ptdpg[NPGPTD];
 1698         vm_paddr_t pa;
 1699         static int color;
 1700         int i;
 1701 
 1702         PMAP_LOCK_INIT(pmap);
 1703 
 1704         /*
 1705          * No need to allocate page table space yet but we do need a valid
 1706          * page directory table.
 1707          */
 1708         if (pmap->pm_pdir == NULL) {
 1709                 pmap->pm_pdir = (pd_entry_t *)kmem_alloc_nofault(kernel_map,
 1710                     NBPTD);
 1711 
 1712                 if (pmap->pm_pdir == NULL) {
 1713                         PMAP_LOCK_DESTROY(pmap);
 1714                         return (0);
 1715                 }
 1716 #ifdef PAE
 1717                 pmap->pm_pdpt = uma_zalloc(pdptzone, M_WAITOK | M_ZERO);
 1718                 KASSERT(((vm_offset_t)pmap->pm_pdpt &
 1719                     ((NPGPTD * sizeof(pdpt_entry_t)) - 1)) == 0,
 1720                     ("pmap_pinit: pdpt misaligned"));
 1721                 KASSERT(pmap_kextract((vm_offset_t)pmap->pm_pdpt) < (4ULL<<30),
 1722                     ("pmap_pinit: pdpt above 4g"));
 1723 #endif
 1724                 pmap->pm_root = NULL;
 1725         }
 1726         KASSERT(pmap->pm_root == NULL,
 1727             ("pmap_pinit: pmap has reserved page table page(s)"));
 1728 
 1729         /*
 1730          * allocate the page directory page(s)
 1731          */
 1732         for (i = 0; i < NPGPTD;) {
 1733                 m = vm_page_alloc(NULL, color++,
 1734                     VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
 1735                     VM_ALLOC_ZERO);
 1736                 if (m == NULL)
 1737                         VM_WAIT;
 1738                 else {
 1739                         ptdpg[i++] = m;
 1740                 }
 1741         }
 1742 
 1743         pmap_qenter((vm_offset_t)pmap->pm_pdir, ptdpg, NPGPTD);
 1744 
 1745         for (i = 0; i < NPGPTD; i++) {
 1746                 if ((ptdpg[i]->flags & PG_ZERO) == 0)
 1747                         bzero(pmap->pm_pdir + (i * NPDEPG), PAGE_SIZE);
 1748         }
 1749 
 1750         mtx_lock_spin(&allpmaps_lock);
 1751         LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
 1752         /* Copy the kernel page table directory entries. */
 1753         bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * sizeof(pd_entry_t));
 1754         mtx_unlock_spin(&allpmaps_lock);
 1755 
 1756         /* install self-referential address mapping entry(s) */
 1757         for (i = 0; i < NPGPTD; i++) {
 1758                 pa = VM_PAGE_TO_PHYS(ptdpg[i]);
 1759                 pmap->pm_pdir[PTDPTDI + i] = pa | PG_V | PG_RW | PG_A | PG_M;
 1760 #ifdef PAE
 1761                 pmap->pm_pdpt[i] = pa | PG_V;
 1762 #endif
 1763         }
 1764 
 1765         pmap->pm_active = 0;
 1766         TAILQ_INIT(&pmap->pm_pvchunk);
 1767         bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 1768 
 1769         return (1);
 1770 }
 1771 
 1772 /*
 1773  * this routine is called if the page table page is not
 1774  * mapped correctly.
 1775  */
 1776 static vm_page_t
 1777 _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags)
 1778 {
 1779         vm_paddr_t ptepa;
 1780         vm_page_t m;
 1781 
 1782         KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT ||
 1783             (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK,
 1784             ("_pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK"));
 1785 
 1786         /*
 1787          * Allocate a page table page.
 1788          */
 1789         if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ |
 1790             VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) {
 1791                 if (flags & M_WAITOK) {
 1792                         PMAP_UNLOCK(pmap);
 1793                         vm_page_unlock_queues();
 1794                         VM_WAIT;
 1795                         vm_page_lock_queues();
 1796                         PMAP_LOCK(pmap);
 1797                 }
 1798 
 1799                 /*
 1800                  * Indicate the need to retry.  While waiting, the page table
 1801                  * page may have been allocated.
 1802                  */
 1803                 return (NULL);
 1804         }
 1805         if ((m->flags & PG_ZERO) == 0)
 1806                 pmap_zero_page(m);
 1807 
 1808         /*
 1809          * Map the pagetable page into the process address space, if
 1810          * it isn't already there.
 1811          */
 1812 
 1813         pmap->pm_stats.resident_count++;
 1814 
 1815         ptepa = VM_PAGE_TO_PHYS(m);
 1816         pmap->pm_pdir[ptepindex] =
 1817                 (pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_A | PG_M);
 1818 
 1819         return m;
 1820 }
 1821 
 1822 static vm_page_t
 1823 pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags)
 1824 {
 1825         unsigned ptepindex;
 1826         pd_entry_t ptepa;
 1827         vm_page_t m;
 1828 
 1829         KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT ||
 1830             (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK,
 1831             ("pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK"));
 1832 
 1833         /*
 1834          * Calculate pagetable page index
 1835          */
 1836         ptepindex = va >> PDRSHIFT;
 1837 retry:
 1838         /*
 1839          * Get the page directory entry
 1840          */
 1841         ptepa = pmap->pm_pdir[ptepindex];
 1842 
 1843         /*
 1844          * This supports switching from a 4MB page to a
 1845          * normal 4K page.
 1846          */
 1847         if (ptepa & PG_PS) {
 1848                 (void)pmap_demote_pde(pmap, &pmap->pm_pdir[ptepindex], va);
 1849                 ptepa = pmap->pm_pdir[ptepindex];
 1850         }
 1851 
 1852         /*
 1853          * If the page table page is mapped, we just increment the
 1854          * hold count, and activate it.
 1855          */
 1856         if (ptepa) {
 1857                 m = PHYS_TO_VM_PAGE(ptepa & PG_FRAME);
 1858                 m->wire_count++;
 1859         } else {
 1860                 /*
 1861                  * Here if the pte page isn't mapped, or if it has
 1862                  * been deallocated. 
 1863                  */
 1864                 m = _pmap_allocpte(pmap, ptepindex, flags);
 1865                 if (m == NULL && (flags & M_WAITOK))
 1866                         goto retry;
 1867         }
 1868         return (m);
 1869 }
 1870 
 1871 
 1872 /***************************************************
 1873 * Pmap allocation/deallocation routines.
 1874  ***************************************************/
 1875 
 1876 #ifdef SMP
 1877 /*
 1878  * Deal with a SMP shootdown of other users of the pmap that we are
 1879  * trying to dispose of.  This can be a bit hairy.
 1880  */
 1881 static cpumask_t *lazymask;
 1882 static u_int lazyptd;
 1883 static volatile u_int lazywait;
 1884 
 1885 void pmap_lazyfix_action(void);
 1886 
 1887 void
 1888 pmap_lazyfix_action(void)
 1889 {
 1890         cpumask_t mymask = PCPU_GET(cpumask);
 1891 
 1892 #ifdef COUNT_IPIS
 1893         (*ipi_lazypmap_counts[PCPU_GET(cpuid)])++;
 1894 #endif
 1895         if (rcr3() == lazyptd)
 1896                 load_cr3(PCPU_GET(curpcb)->pcb_cr3);
 1897         atomic_clear_int(lazymask, mymask);
 1898         atomic_store_rel_int(&lazywait, 1);
 1899 }
 1900 
 1901 static void
 1902 pmap_lazyfix_self(cpumask_t mymask)
 1903 {
 1904 
 1905         if (rcr3() == lazyptd)
 1906                 load_cr3(PCPU_GET(curpcb)->pcb_cr3);
 1907         atomic_clear_int(lazymask, mymask);
 1908 }
 1909 
 1910 
 1911 static void
 1912 pmap_lazyfix(pmap_t pmap)
 1913 {
 1914         cpumask_t mymask, mask;
 1915         u_int spins;
 1916 
 1917         while ((mask = pmap->pm_active) != 0) {
 1918                 spins = 50000000;
 1919                 mask = mask & -mask;    /* Find least significant set bit */
 1920                 mtx_lock_spin(&smp_ipi_mtx);
 1921 #ifdef PAE
 1922                 lazyptd = vtophys(pmap->pm_pdpt);
 1923 #else
 1924                 lazyptd = vtophys(pmap->pm_pdir);
 1925 #endif
 1926                 mymask = PCPU_GET(cpumask);
 1927                 if (mask == mymask) {
 1928                         lazymask = &pmap->pm_active;
 1929                         pmap_lazyfix_self(mymask);
 1930                 } else {
 1931                         atomic_store_rel_int((u_int *)&lazymask,
 1932                             (u_int)&pmap->pm_active);
 1933                         atomic_store_rel_int(&lazywait, 0);
 1934                         ipi_selected(mask, IPI_LAZYPMAP);
 1935                         while (lazywait == 0) {
 1936                                 ia32_pause();
 1937                                 if (--spins == 0)
 1938                                         break;
 1939                         }
 1940                 }
 1941                 mtx_unlock_spin(&smp_ipi_mtx);
 1942                 if (spins == 0)
 1943                         printf("pmap_lazyfix: spun for 50000000\n");
 1944         }
 1945 }
 1946 
 1947 #else   /* SMP */
 1948 
 1949 /*
 1950  * Cleaning up on uniprocessor is easy.  For various reasons, we're
 1951  * unlikely to have to even execute this code, including the fact
 1952  * that the cleanup is deferred until the parent does a wait(2), which
 1953  * means that another userland process has run.
 1954  */
 1955 static void
 1956 pmap_lazyfix(pmap_t pmap)
 1957 {
 1958         u_int cr3;
 1959 
 1960         cr3 = vtophys(pmap->pm_pdir);
 1961         if (cr3 == rcr3()) {
 1962                 load_cr3(PCPU_GET(curpcb)->pcb_cr3);
 1963                 pmap->pm_active &= ~(PCPU_GET(cpumask));
 1964         }
 1965 }
 1966 #endif  /* SMP */
 1967 
 1968 /*
 1969  * Release any resources held by the given physical map.
 1970  * Called when a pmap initialized by pmap_pinit is being released.
 1971  * Should only be called if the map contains no valid mappings.
 1972  */
 1973 void
 1974 pmap_release(pmap_t pmap)
 1975 {
 1976         vm_page_t m, ptdpg[NPGPTD];
 1977         int i;
 1978 
 1979         KASSERT(pmap->pm_stats.resident_count == 0,
 1980             ("pmap_release: pmap resident count %ld != 0",
 1981             pmap->pm_stats.resident_count));
 1982         KASSERT(pmap->pm_root == NULL,
 1983             ("pmap_release: pmap has reserved page table page(s)"));
 1984 
 1985         pmap_lazyfix(pmap);
 1986         mtx_lock_spin(&allpmaps_lock);
 1987         LIST_REMOVE(pmap, pm_list);
 1988         mtx_unlock_spin(&allpmaps_lock);
 1989 
 1990         for (i = 0; i < NPGPTD; i++)
 1991                 ptdpg[i] = PHYS_TO_VM_PAGE(pmap->pm_pdir[PTDPTDI + i] &
 1992                     PG_FRAME);
 1993 
 1994         bzero(pmap->pm_pdir + PTDPTDI, (nkpt + NPGPTD) *
 1995             sizeof(*pmap->pm_pdir));
 1996 
 1997         pmap_qremove((vm_offset_t)pmap->pm_pdir, NPGPTD);
 1998 
 1999         for (i = 0; i < NPGPTD; i++) {
 2000                 m = ptdpg[i];
 2001 #ifdef PAE
 2002                 KASSERT(VM_PAGE_TO_PHYS(m) == (pmap->pm_pdpt[i] & PG_FRAME),
 2003                     ("pmap_release: got wrong ptd page"));
 2004 #endif
 2005                 m->wire_count--;
 2006                 atomic_subtract_int(&cnt.v_wire_count, 1);
 2007                 vm_page_free_zero(m);
 2008         }
 2009         PMAP_LOCK_DESTROY(pmap);
 2010 }
 2011 
 2012 static int
 2013 kvm_size(SYSCTL_HANDLER_ARGS)
 2014 {
 2015         unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE;
 2016 
 2017         return sysctl_handle_long(oidp, &ksize, 0, req);
 2018 }
 2019 SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 
 2020     0, 0, kvm_size, "IU", "Size of KVM");
 2021 
 2022 static int
 2023 kvm_free(SYSCTL_HANDLER_ARGS)
 2024 {
 2025         unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end;
 2026 
 2027         return sysctl_handle_long(oidp, &kfree, 0, req);
 2028 }
 2029 SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 
 2030     0, 0, kvm_free, "IU", "Amount of KVM free");
 2031 
 2032 /*
 2033  * grow the number of kernel page table entries, if needed
 2034  */
 2035 void
 2036 pmap_growkernel(vm_offset_t addr)
 2037 {
 2038         vm_paddr_t ptppaddr;
 2039         vm_page_t nkpg;
 2040         pd_entry_t newpdir;
 2041 
 2042         mtx_assert(&kernel_map->system_mtx, MA_OWNED);
 2043         if (kernel_vm_end == 0) {
 2044                 kernel_vm_end = KERNBASE;
 2045                 nkpt = 0;
 2046                 while (pdir_pde(PTD, kernel_vm_end)) {
 2047                         kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 2048                         nkpt++;
 2049                         if (kernel_vm_end - 1 >= kernel_map->max_offset) {
 2050                                 kernel_vm_end = kernel_map->max_offset;
 2051                                 break;
 2052                         }
 2053                 }
 2054         }
 2055         addr = roundup2(addr, PAGE_SIZE * NPTEPG);
 2056         if (addr - 1 >= kernel_map->max_offset)
 2057                 addr = kernel_map->max_offset;
 2058         while (kernel_vm_end < addr) {
 2059                 if (pdir_pde(PTD, kernel_vm_end)) {
 2060                         kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 2061                         if (kernel_vm_end - 1 >= kernel_map->max_offset) {
 2062                                 kernel_vm_end = kernel_map->max_offset;
 2063                                 break;
 2064                         }
 2065                         continue;
 2066                 }
 2067 
 2068                 nkpg = vm_page_alloc(NULL, kernel_vm_end >> PDRSHIFT,
 2069                     VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
 2070                     VM_ALLOC_ZERO);
 2071                 if (nkpg == NULL)
 2072                         panic("pmap_growkernel: no memory to grow kernel");
 2073 
 2074                 nkpt++;
 2075 
 2076                 if ((nkpg->flags & PG_ZERO) == 0)
 2077                         pmap_zero_page(nkpg);
 2078                 ptppaddr = VM_PAGE_TO_PHYS(nkpg);
 2079                 newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M);
 2080                 pdir_pde(KPTD, kernel_vm_end) = pgeflag | newpdir;
 2081 
 2082                 pmap_kenter_pde(kernel_vm_end, newpdir);
 2083                 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 2084                 if (kernel_vm_end - 1 >= kernel_map->max_offset) {
 2085                         kernel_vm_end = kernel_map->max_offset;
 2086                         break;
 2087                 }
 2088         }
 2089 }
 2090 
 2091 
 2092 /***************************************************
 2093  * page management routines.
 2094  ***************************************************/
 2095 
 2096 CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE);
 2097 CTASSERT(_NPCM == 11);
 2098 
 2099 static __inline struct pv_chunk *
 2100 pv_to_chunk(pv_entry_t pv)
 2101 {
 2102 
 2103         return (struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK);
 2104 }
 2105 
 2106 #define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap)
 2107 
 2108 #define PC_FREE0_9      0xfffffffful    /* Free values for index 0 through 9 */
 2109 #define PC_FREE10       0x0000fffful    /* Free values for index 10 */
 2110 
 2111 static uint32_t pc_freemask[11] = {
 2112         PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
 2113         PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
 2114         PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
 2115         PC_FREE0_9, PC_FREE10
 2116 };
 2117 
 2118 SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0,
 2119         "Current number of pv entries");
 2120 
 2121 #ifdef PV_STATS
 2122 static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail;
 2123 
 2124 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0,
 2125         "Current number of pv entry chunks");
 2126 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0,
 2127         "Current number of pv entry chunks allocated");
 2128 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0,
 2129         "Current number of pv entry chunks frees");
 2130 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0,
 2131         "Number of times tried to get a chunk page but failed.");
 2132 
 2133 static long pv_entry_frees, pv_entry_allocs;
 2134 static int pv_entry_spare;
 2135 
 2136 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0,
 2137         "Current number of pv entry frees");
 2138 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0,
 2139         "Current number of pv entry allocs");
 2140 SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0,
 2141         "Current number of spare pv entries");
 2142 
 2143 static int pmap_collect_inactive, pmap_collect_active;
 2144 
 2145 SYSCTL_INT(_vm_pmap, OID_AUTO, pmap_collect_inactive, CTLFLAG_RD, &pmap_collect_inactive, 0,
 2146         "Current number times pmap_collect called on inactive queue");
 2147 SYSCTL_INT(_vm_pmap, OID_AUTO, pmap_collect_active, CTLFLAG_RD, &pmap_collect_active, 0,
 2148         "Current number times pmap_collect called on active queue");
 2149 #endif
 2150 
 2151 /*
 2152  * We are in a serious low memory condition.  Resort to
 2153  * drastic measures to free some pages so we can allocate
 2154  * another pv entry chunk.  This is normally called to
 2155  * unmap inactive pages, and if necessary, active pages.
 2156  */
 2157 static void
 2158 pmap_collect(pmap_t locked_pmap, struct vpgqueues *vpq)
 2159 {
 2160         struct md_page *pvh;
 2161         pd_entry_t *pde;
 2162         pmap_t pmap;
 2163         pt_entry_t *pte, tpte;
 2164         pv_entry_t next_pv, pv;
 2165         vm_offset_t va;
 2166         vm_page_t m, free;
 2167 
 2168         sched_pin();
 2169         TAILQ_FOREACH(m, &vpq->pl, pageq) {
 2170                 if (m->hold_count || m->busy)
 2171                         continue;
 2172                 TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) {
 2173                         va = pv->pv_va;
 2174                         pmap = PV_PMAP(pv);
 2175                         /* Avoid deadlock and lock recursion. */
 2176                         if (pmap > locked_pmap)
 2177                                 PMAP_LOCK(pmap);
 2178                         else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap))
 2179                                 continue;
 2180                         pmap->pm_stats.resident_count--;
 2181                         pde = pmap_pde(pmap, va);
 2182                         KASSERT((*pde & PG_PS) == 0, ("pmap_collect: found"
 2183                             " a 4mpage in page %p's pv list", m));
 2184                         pte = pmap_pte_quick(pmap, va);
 2185                         tpte = pte_load_clear(pte);
 2186                         KASSERT((tpte & PG_W) == 0,
 2187                             ("pmap_collect: wired pte %#jx", (uintmax_t)tpte));
 2188                         if (tpte & PG_A)
 2189                                 vm_page_flag_set(m, PG_REFERENCED);
 2190                         if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
 2191                                 vm_page_dirty(m);
 2192                         free = NULL;
 2193                         pmap_unuse_pt(pmap, va, &free);
 2194                         pmap_invalidate_page(pmap, va);
 2195                         pmap_free_zero_pages(free);
 2196                         TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 2197                         if (TAILQ_EMPTY(&m->md.pv_list)) {
 2198                                 pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 2199                                 if (TAILQ_EMPTY(&pvh->pv_list))
 2200                                         vm_page_flag_clear(m, PG_WRITEABLE);
 2201                         }
 2202                         free_pv_entry(pmap, pv);
 2203                         if (pmap != locked_pmap)
 2204                                 PMAP_UNLOCK(pmap);
 2205                 }
 2206         }
 2207         sched_unpin();
 2208 }
 2209 
 2210 
 2211 /*
 2212  * free the pv_entry back to the free list
 2213  */
 2214 static void
 2215 free_pv_entry(pmap_t pmap, pv_entry_t pv)
 2216 {
 2217         vm_page_t m;
 2218         struct pv_chunk *pc;
 2219         int idx, field, bit;
 2220 
 2221         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 2222         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 2223         PV_STAT(pv_entry_frees++);
 2224         PV_STAT(pv_entry_spare++);
 2225         pv_entry_count--;
 2226         pc = pv_to_chunk(pv);
 2227         idx = pv - &pc->pc_pventry[0];
 2228         field = idx / 32;
 2229         bit = idx % 32;
 2230         pc->pc_map[field] |= 1ul << bit;
 2231         /* move to head of list */
 2232         TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 2233         for (idx = 0; idx < _NPCM; idx++)
 2234                 if (pc->pc_map[idx] != pc_freemask[idx]) {
 2235                         TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
 2236                         return;
 2237                 }
 2238         PV_STAT(pv_entry_spare -= _NPCPV);
 2239         PV_STAT(pc_chunk_count--);
 2240         PV_STAT(pc_chunk_frees++);
 2241         /* entire chunk is free, return it */
 2242         m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc));
 2243         pmap_qremove((vm_offset_t)pc, 1);
 2244         vm_page_unwire(m, 0);
 2245         vm_page_free(m);
 2246         pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc);
 2247 }
 2248 
 2249 /*
 2250  * get a new pv_entry, allocating a block from the system
 2251  * when needed.
 2252  */
 2253 static pv_entry_t
 2254 get_pv_entry(pmap_t pmap, int try)
 2255 {
 2256         static const struct timeval printinterval = { 60, 0 };
 2257         static struct timeval lastprint;
 2258         static vm_pindex_t colour;
 2259         struct vpgqueues *pq;
 2260         int bit, field;
 2261         pv_entry_t pv;
 2262         struct pv_chunk *pc;
 2263         vm_page_t m;
 2264 
 2265         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 2266         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 2267         PV_STAT(pv_entry_allocs++);
 2268         pv_entry_count++;
 2269         if (pv_entry_count > pv_entry_high_water)
 2270                 if (ratecheck(&lastprint, &printinterval))
 2271                         printf("Approaching the limit on PV entries, consider "
 2272                             "increasing either the vm.pmap.shpgperproc or the "
 2273                             "vm.pmap.pv_entry_max tunable.\n");
 2274         pq = NULL;
 2275 retry:
 2276         pc = TAILQ_FIRST(&pmap->pm_pvchunk);
 2277         if (pc != NULL) {
 2278                 for (field = 0; field < _NPCM; field++) {
 2279                         if (pc->pc_map[field]) {
 2280                                 bit = bsfl(pc->pc_map[field]);
 2281                                 break;
 2282                         }
 2283                 }
 2284                 if (field < _NPCM) {
 2285                         pv = &pc->pc_pventry[field * 32 + bit];
 2286                         pc->pc_map[field] &= ~(1ul << bit);
 2287                         /* If this was the last item, move it to tail */
 2288                         for (field = 0; field < _NPCM; field++)
 2289                                 if (pc->pc_map[field] != 0) {
 2290                                         PV_STAT(pv_entry_spare--);
 2291                                         return (pv);    /* not full, return */
 2292                                 }
 2293                         TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 2294                         TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list);
 2295                         PV_STAT(pv_entry_spare--);
 2296                         return (pv);
 2297                 }
 2298         }
 2299         /*
 2300          * Access to the ptelist "pv_vafree" is synchronized by the page
 2301          * queues lock.  If "pv_vafree" is currently non-empty, it will
 2302          * remain non-empty until pmap_ptelist_alloc() completes.
 2303          */
 2304         if (pv_vafree == 0 || (m = vm_page_alloc(NULL, colour, (pq ==
 2305             &vm_page_queues[PQ_ACTIVE] ? VM_ALLOC_SYSTEM : VM_ALLOC_NORMAL) |
 2306             VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) {
 2307                 if (try) {
 2308                         pv_entry_count--;
 2309                         PV_STAT(pc_chunk_tryfail++);
 2310                         return (NULL);
 2311                 }
 2312                 /*
 2313                  * Reclaim pv entries: At first, destroy mappings to
 2314                  * inactive pages.  After that, if a pv chunk entry
 2315                  * is still needed, destroy mappings to active pages.
 2316                  */
 2317                 if (pq == NULL) {
 2318                         PV_STAT(pmap_collect_inactive++);
 2319                         pq = &vm_page_queues[PQ_INACTIVE];
 2320                 } else if (pq == &vm_page_queues[PQ_INACTIVE]) {
 2321                         PV_STAT(pmap_collect_active++);
 2322                         pq = &vm_page_queues[PQ_ACTIVE];
 2323                 } else
 2324                         panic("get_pv_entry: increase vm.pmap.shpgperproc");
 2325                 pmap_collect(pmap, pq);
 2326                 goto retry;
 2327         }
 2328         PV_STAT(pc_chunk_count++);
 2329         PV_STAT(pc_chunk_allocs++);
 2330         colour++;
 2331         pc = (struct pv_chunk *)pmap_ptelist_alloc(&pv_vafree);
 2332         pmap_qenter((vm_offset_t)pc, &m, 1);
 2333         pc->pc_pmap = pmap;
 2334         pc->pc_map[0] = pc_freemask[0] & ~1ul;  /* preallocated bit 0 */
 2335         for (field = 1; field < _NPCM; field++)
 2336                 pc->pc_map[field] = pc_freemask[field];
 2337         pv = &pc->pc_pventry[0];
 2338         TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
 2339         PV_STAT(pv_entry_spare += _NPCPV - 1);
 2340         return (pv);
 2341 }
 2342 
 2343 static __inline pv_entry_t
 2344 pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
 2345 {
 2346         pv_entry_t pv;
 2347 
 2348         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 2349         TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) {
 2350                 if (pmap == PV_PMAP(pv) && va == pv->pv_va) {
 2351                         TAILQ_REMOVE(&pvh->pv_list, pv, pv_list);
 2352                         break;
 2353                 }
 2354         }
 2355         return (pv);
 2356 }
 2357 
 2358 static void
 2359 pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa)
 2360 {
 2361         struct md_page *pvh;
 2362         pv_entry_t pv;
 2363         vm_offset_t va_last;
 2364         vm_page_t m;
 2365 
 2366         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 2367         KASSERT((pa & PDRMASK) == 0,
 2368             ("pmap_pv_demote_pde: pa is not 4mpage aligned"));
 2369 
 2370         /*
 2371          * Transfer the 4mpage's pv entry for this mapping to the first
 2372          * page's pv list.
 2373          */
 2374         pvh = pa_to_pvh(pa);
 2375         va = trunc_4mpage(va);
 2376         pv = pmap_pvh_remove(pvh, pmap, va);
 2377         KASSERT(pv != NULL, ("pmap_pv_demote_pde: pv not found"));
 2378         m = PHYS_TO_VM_PAGE(pa);
 2379         TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
 2380         /* Instantiate the remaining NPTEPG - 1 pv entries. */
 2381         va_last = va + NBPDR - PAGE_SIZE;
 2382         do {
 2383                 m++;
 2384                 KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
 2385                     ("pmap_pv_demote_pde: page %p is not managed", m));
 2386                 va += PAGE_SIZE;
 2387                 pmap_insert_entry(pmap, va, m);
 2388         } while (va < va_last);
 2389 }
 2390 
 2391 static void
 2392 pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa)
 2393 {
 2394         struct md_page *pvh;
 2395         pv_entry_t pv;
 2396         vm_offset_t va_last;
 2397         vm_page_t m;
 2398 
 2399         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 2400         KASSERT((pa & PDRMASK) == 0,
 2401             ("pmap_pv_promote_pde: pa is not 4mpage aligned"));
 2402 
 2403         /*
 2404          * Transfer the first page's pv entry for this mapping to the
 2405          * 4mpage's pv list.  Aside from avoiding the cost of a call
 2406          * to get_pv_entry(), a transfer avoids the possibility that
 2407          * get_pv_entry() calls pmap_collect() and that pmap_collect()
 2408          * removes one of the mappings that is being promoted.
 2409          */
 2410         m = PHYS_TO_VM_PAGE(pa);
 2411         va = trunc_4mpage(va);
 2412         pv = pmap_pvh_remove(&m->md, pmap, va);
 2413         KASSERT(pv != NULL, ("pmap_pv_promote_pde: pv not found"));
 2414         pvh = pa_to_pvh(pa);
 2415         TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_list);
 2416         /* Free the remaining NPTEPG - 1 pv entries. */
 2417         va_last = va + NBPDR - PAGE_SIZE;
 2418         do {
 2419                 m++;
 2420                 va += PAGE_SIZE;
 2421                 pmap_pvh_free(&m->md, pmap, va);
 2422         } while (va < va_last);
 2423 }
 2424 
 2425 static void
 2426 pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
 2427 {
 2428         pv_entry_t pv;
 2429 
 2430         pv = pmap_pvh_remove(pvh, pmap, va);
 2431         KASSERT(pv != NULL, ("pmap_pvh_free: pv not found"));
 2432         free_pv_entry(pmap, pv);
 2433 }
 2434 
 2435 static void
 2436 pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va)
 2437 {
 2438         struct md_page *pvh;
 2439 
 2440         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 2441         pmap_pvh_free(&m->md, pmap, va);
 2442         if (TAILQ_EMPTY(&m->md.pv_list)) {
 2443                 pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 2444                 if (TAILQ_EMPTY(&pvh->pv_list))
 2445                         vm_page_flag_clear(m, PG_WRITEABLE);
 2446         }
 2447 }
 2448 
 2449 /*
 2450  * Create a pv entry for page at pa for
 2451  * (pmap, va).
 2452  */
 2453 static void
 2454 pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
 2455 {
 2456         pv_entry_t pv;
 2457 
 2458         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 2459         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 2460         pv = get_pv_entry(pmap, FALSE);
 2461         pv->pv_va = va;
 2462         TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
 2463 }
 2464 
 2465 /*
 2466  * Conditionally create a pv entry.
 2467  */
 2468 static boolean_t
 2469 pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
 2470 {
 2471         pv_entry_t pv;
 2472 
 2473         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 2474         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 2475         if (pv_entry_count < pv_entry_high_water && 
 2476             (pv = get_pv_entry(pmap, TRUE)) != NULL) {
 2477                 pv->pv_va = va;
 2478                 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
 2479                 return (TRUE);
 2480         } else
 2481                 return (FALSE);
 2482 }
 2483 
 2484 /*
 2485  * Create the pv entries for each of the pages within a superpage.
 2486  */
 2487 static boolean_t
 2488 pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa)
 2489 {
 2490         struct md_page *pvh;
 2491         pv_entry_t pv;
 2492 
 2493         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 2494         if (pv_entry_count < pv_entry_high_water && 
 2495             (pv = get_pv_entry(pmap, TRUE)) != NULL) {
 2496                 pv->pv_va = va;
 2497                 pvh = pa_to_pvh(pa);
 2498                 TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_list);
 2499                 return (TRUE);
 2500         } else
 2501                 return (FALSE);
 2502 }
 2503 
 2504 /*
 2505  * Fills a page table page with mappings to consecutive physical pages.
 2506  */
 2507 static void
 2508 pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte)
 2509 {
 2510         pt_entry_t *pte;
 2511 
 2512         for (pte = firstpte; pte < firstpte + NPTEPG; pte++) {
 2513                 *pte = newpte;  
 2514                 newpte += PAGE_SIZE;
 2515         }
 2516 }
 2517 
 2518 /*
 2519  * Tries to demote a 2- or 4MB page mapping.  If demotion fails, the
 2520  * 2- or 4MB page mapping is invalidated.
 2521  */
 2522 static boolean_t
 2523 pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
 2524 {
 2525         pd_entry_t newpde, oldpde;
 2526         pt_entry_t *firstpte, newpte;
 2527         vm_paddr_t mptepa;
 2528         vm_page_t free, mpte;
 2529 
 2530         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 2531         oldpde = *pde;
 2532         KASSERT((oldpde & (PG_PS | PG_V)) == (PG_PS | PG_V),
 2533             ("pmap_demote_pde: oldpde is missing PG_PS and/or PG_V"));
 2534         mpte = pmap_lookup_pt_page(pmap, va);
 2535         if (mpte != NULL)
 2536                 pmap_remove_pt_page(pmap, mpte);
 2537         else {
 2538                 KASSERT((oldpde & PG_W) == 0,
 2539                     ("pmap_demote_pde: page table page for a wired mapping"
 2540                     " is missing"));
 2541 
 2542                 /*
 2543                  * Invalidate the 2- or 4MB page mapping and return
 2544                  * "failure" if the mapping was never accessed or the
 2545                  * allocation of the new page table page fails.
 2546                  */
 2547                 if ((oldpde & PG_A) == 0 || (mpte = vm_page_alloc(NULL,
 2548                     va >> PDRSHIFT, VM_ALLOC_NOOBJ | VM_ALLOC_NORMAL |
 2549                     VM_ALLOC_WIRED)) == NULL) {
 2550                         free = NULL;
 2551                         pmap_remove_pde(pmap, pde, trunc_4mpage(va), &free);
 2552                         pmap_invalidate_page(pmap, trunc_4mpage(va));
 2553                         pmap_free_zero_pages(free);
 2554                         CTR2(KTR_PMAP, "pmap_demote_pde: failure for va %#x"
 2555                             " in pmap %p", va, pmap);
 2556                         return (FALSE);
 2557                 }
 2558                 if (va < VM_MAXUSER_ADDRESS)
 2559                         pmap->pm_stats.resident_count++;
 2560         }
 2561         mptepa = VM_PAGE_TO_PHYS(mpte);
 2562 
 2563         /*
 2564          * If the page mapping is in the kernel's address space, then the
 2565          * KPTmap can provide access to the page table page.  Otherwise,
 2566          * temporarily map the page table page (mpte) into the kernel's
 2567          * address space at either PADDR1 or PADDR2. 
 2568          */
 2569         if (va >= KERNBASE)
 2570                 firstpte = &KPTmap[i386_btop(trunc_4mpage(va))];
 2571         else if (curthread->td_pinned > 0 && mtx_owned(&vm_page_queue_mtx)) {
 2572                 if ((*PMAP1 & PG_FRAME) != mptepa) {
 2573                         *PMAP1 = mptepa | PG_RW | PG_V | PG_A | PG_M;
 2574 #ifdef SMP
 2575                         PMAP1cpu = PCPU_GET(cpuid);
 2576 #endif
 2577                         invlcaddr(PADDR1);
 2578                         PMAP1changed++;
 2579                 } else
 2580 #ifdef SMP
 2581                 if (PMAP1cpu != PCPU_GET(cpuid)) {
 2582                         PMAP1cpu = PCPU_GET(cpuid);
 2583                         invlcaddr(PADDR1);
 2584                         PMAP1changedcpu++;
 2585                 } else
 2586 #endif
 2587                         PMAP1unchanged++;
 2588                 firstpte = PADDR1;
 2589         } else {
 2590                 mtx_lock(&PMAP2mutex);
 2591                 if ((*PMAP2 & PG_FRAME) != mptepa) {
 2592                         *PMAP2 = mptepa | PG_RW | PG_V | PG_A | PG_M;
 2593                         pmap_invalidate_page(kernel_pmap, (vm_offset_t)PADDR2);
 2594                 }
 2595                 firstpte = PADDR2;
 2596         }
 2597         newpde = mptepa | PG_M | PG_A | (oldpde & PG_U) | PG_RW | PG_V;
 2598         KASSERT((oldpde & PG_A) != 0,
 2599             ("pmap_demote_pde: oldpde is missing PG_A"));
 2600         KASSERT((oldpde & (PG_M | PG_RW)) != PG_RW,
 2601             ("pmap_demote_pde: oldpde is missing PG_M"));
 2602         newpte = oldpde & ~PG_PS;
 2603         if ((newpte & PG_PDE_PAT) != 0)
 2604                 newpte ^= PG_PDE_PAT | PG_PTE_PAT;
 2605 
 2606         /*
 2607          * If the page table page is new, initialize it.
 2608          */
 2609         if (mpte->wire_count == 1) {
 2610                 mpte->wire_count = NPTEPG;
 2611                 pmap_fill_ptp(firstpte, newpte);
 2612         }
 2613         KASSERT((*firstpte & PG_FRAME) == (newpte & PG_FRAME),
 2614             ("pmap_demote_pde: firstpte and newpte map different physical"
 2615             " addresses"));
 2616 
 2617         /*
 2618          * If the mapping has changed attributes, update the page table
 2619          * entries.
 2620          */ 
 2621         if ((*firstpte & PG_PTE_PROMOTE) != (newpte & PG_PTE_PROMOTE))
 2622                 pmap_fill_ptp(firstpte, newpte);
 2623         
 2624         /*
 2625          * Demote the mapping.  This pmap is locked.  The old PDE has
 2626          * PG_A set.  If the old PDE has PG_RW set, it also has PG_M
 2627          * set.  Thus, there is no danger of a race with another
 2628          * processor changing the setting of PG_A and/or PG_M between
 2629          * the read above and the store below. 
 2630          */
 2631         if (workaround_erratum383)
 2632                 pmap_update_pde(pmap, va, pde, newpde);
 2633         else if (pmap == kernel_pmap)
 2634                 pmap_kenter_pde(va, newpde);
 2635         else
 2636                 pde_store(pde, newpde); 
 2637         if (firstpte == PADDR2)
 2638                 mtx_unlock(&PMAP2mutex);
 2639 
 2640         /*
 2641          * Invalidate the recursive mapping of the page table page.
 2642          */
 2643         pmap_invalidate_page(pmap, (vm_offset_t)vtopte(va));
 2644 
 2645         /*
 2646          * Demote the pv entry.  This depends on the earlier demotion
 2647          * of the mapping.  Specifically, the (re)creation of a per-
 2648          * page pv entry might trigger the execution of pmap_collect(),
 2649          * which might reclaim a newly (re)created per-page pv entry
 2650          * and destroy the associated mapping.  In order to destroy
 2651          * the mapping, the PDE must have already changed from mapping
 2652          * the 2mpage to referencing the page table page.
 2653          */
 2654         if ((oldpde & PG_MANAGED) != 0)
 2655                 pmap_pv_demote_pde(pmap, va, oldpde & PG_PS_FRAME);
 2656 
 2657         pmap_pde_demotions++;
 2658         CTR2(KTR_PMAP, "pmap_demote_pde: success for va %#x"
 2659             " in pmap %p", va, pmap);
 2660         return (TRUE);
 2661 }
 2662 
 2663 /*
 2664  * pmap_remove_pde: do the things to unmap a superpage in a process
 2665  */
 2666 static void
 2667 pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva,
 2668     vm_page_t *free)
 2669 {
 2670         struct md_page *pvh;
 2671         pd_entry_t oldpde;
 2672         vm_offset_t eva, va;
 2673         vm_page_t m, mpte;
 2674 
 2675         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 2676         KASSERT((sva & PDRMASK) == 0,
 2677             ("pmap_remove_pde: sva is not 4mpage aligned"));
 2678         oldpde = pte_load_clear(pdq);
 2679         if (oldpde & PG_W)
 2680                 pmap->pm_stats.wired_count -= NBPDR / PAGE_SIZE;
 2681 
 2682         /*
 2683          * Machines that don't support invlpg, also don't support
 2684          * PG_G.
 2685          */
 2686         if (oldpde & PG_G)
 2687                 pmap_invalidate_page(kernel_pmap, sva);
 2688         pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
 2689         if (oldpde & PG_MANAGED) {
 2690                 pvh = pa_to_pvh(oldpde & PG_PS_FRAME);
 2691                 pmap_pvh_free(pvh, pmap, sva);
 2692                 eva = sva + NBPDR;
 2693                 for (va = sva, m = PHYS_TO_VM_PAGE(oldpde & PG_PS_FRAME);
 2694                     va < eva; va += PAGE_SIZE, m++) {
 2695                         if ((oldpde & (PG_M | PG_RW)) == (PG_M | PG_RW))
 2696                                 vm_page_dirty(m);
 2697                         if (oldpde & PG_A)
 2698                                 vm_page_flag_set(m, PG_REFERENCED);
 2699                         if (TAILQ_EMPTY(&m->md.pv_list) &&
 2700                             TAILQ_EMPTY(&pvh->pv_list))
 2701                                 vm_page_flag_clear(m, PG_WRITEABLE);
 2702                 }
 2703         }
 2704         if (pmap == kernel_pmap) {
 2705                 if (!pmap_demote_pde(pmap, pdq, sva))
 2706                         panic("pmap_remove_pde: failed demotion");
 2707         } else {
 2708                 mpte = pmap_lookup_pt_page(pmap, sva);
 2709                 if (mpte != NULL) {
 2710                         pmap_remove_pt_page(pmap, mpte);
 2711                         pmap->pm_stats.resident_count--;
 2712                         KASSERT(mpte->wire_count == NPTEPG,
 2713                             ("pmap_remove_pde: pte page wire count error"));
 2714                         mpte->wire_count = 0;
 2715                         pmap_add_delayed_free_list(mpte, free, FALSE);
 2716                         atomic_subtract_int(&cnt.v_wire_count, 1);
 2717                 }
 2718         }
 2719 }
 2720 
 2721 /*
 2722  * pmap_remove_pte: do the things to unmap a page in a process
 2723  */
 2724 static int
 2725 pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va, vm_page_t *free)
 2726 {
 2727         pt_entry_t oldpte;
 2728         vm_page_t m;
 2729 
 2730         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 2731         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 2732         oldpte = pte_load_clear(ptq);
 2733         if (oldpte & PG_W)
 2734                 pmap->pm_stats.wired_count -= 1;
 2735         /*
 2736          * Machines that don't support invlpg, also don't support
 2737          * PG_G.
 2738          */
 2739         if (oldpte & PG_G)
 2740                 pmap_invalidate_page(kernel_pmap, va);
 2741         pmap->pm_stats.resident_count -= 1;
 2742         if (oldpte & PG_MANAGED) {
 2743                 m = PHYS_TO_VM_PAGE(oldpte & PG_FRAME);
 2744                 if ((oldpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
 2745                         vm_page_dirty(m);
 2746                 if (oldpte & PG_A)
 2747                         vm_page_flag_set(m, PG_REFERENCED);
 2748                 pmap_remove_entry(pmap, m, va);
 2749         }
 2750         return (pmap_unuse_pt(pmap, va, free));
 2751 }
 2752 
 2753 /*
 2754  * Remove a single page from a process address space
 2755  */
 2756 static void
 2757 pmap_remove_page(pmap_t pmap, vm_offset_t va, vm_page_t *free)
 2758 {
 2759         pt_entry_t *pte;
 2760 
 2761         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 2762         KASSERT(curthread->td_pinned > 0, ("curthread not pinned"));
 2763         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 2764         if ((pte = pmap_pte_quick(pmap, va)) == NULL || *pte == 0)
 2765                 return;
 2766         pmap_remove_pte(pmap, pte, va, free);
 2767         pmap_invalidate_page(pmap, va);
 2768 }
 2769 
 2770 /*
 2771  *      Remove the given range of addresses from the specified map.
 2772  *
 2773  *      It is assumed that the start and end are properly
 2774  *      rounded to the page size.
 2775  */
 2776 void
 2777 pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 2778 {
 2779         vm_offset_t pdnxt;
 2780         pd_entry_t ptpaddr;
 2781         pt_entry_t *pte;
 2782         vm_page_t free = NULL;
 2783         int anyvalid;
 2784 
 2785         /*
 2786          * Perform an unsynchronized read.  This is, however, safe.
 2787          */
 2788         if (pmap->pm_stats.resident_count == 0)
 2789                 return;
 2790 
 2791         anyvalid = 0;
 2792 
 2793         vm_page_lock_queues();
 2794         sched_pin();
 2795         PMAP_LOCK(pmap);
 2796 
 2797         /*
 2798          * special handling of removing one page.  a very
 2799          * common operation and easy to short circuit some
 2800          * code.
 2801          */
 2802         if ((sva + PAGE_SIZE == eva) && 
 2803             ((pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) {
 2804                 pmap_remove_page(pmap, sva, &free);
 2805                 goto out;
 2806         }
 2807 
 2808         for (; sva < eva; sva = pdnxt) {
 2809                 unsigned pdirindex;
 2810 
 2811                 /*
 2812                  * Calculate index for next page table.
 2813                  */
 2814                 pdnxt = (sva + NBPDR) & ~PDRMASK;
 2815                 if (pdnxt < sva)
 2816                         pdnxt = eva;
 2817                 if (pmap->pm_stats.resident_count == 0)
 2818                         break;
 2819 
 2820                 pdirindex = sva >> PDRSHIFT;
 2821                 ptpaddr = pmap->pm_pdir[pdirindex];
 2822 
 2823                 /*
 2824                  * Weed out invalid mappings. Note: we assume that the page
 2825                  * directory table is always allocated, and in kernel virtual.
 2826                  */
 2827                 if (ptpaddr == 0)
 2828                         continue;
 2829 
 2830                 /*
 2831                  * Check for large page.
 2832                  */
 2833                 if ((ptpaddr & PG_PS) != 0) {
 2834                         /*
 2835                          * Are we removing the entire large page?  If not,
 2836                          * demote the mapping and fall through.
 2837                          */
 2838                         if (sva + NBPDR == pdnxt && eva >= pdnxt) {
 2839                                 /*
 2840                                  * The TLB entry for a PG_G mapping is
 2841                                  * invalidated by pmap_remove_pde().
 2842                                  */
 2843                                 if ((ptpaddr & PG_G) == 0)
 2844                                         anyvalid = 1;
 2845                                 pmap_remove_pde(pmap,
 2846                                     &pmap->pm_pdir[pdirindex], sva, &free);
 2847                                 continue;
 2848                         } else if (!pmap_demote_pde(pmap,
 2849                             &pmap->pm_pdir[pdirindex], sva)) {
 2850                                 /* The large page mapping was destroyed. */
 2851                                 continue;
 2852                         }
 2853                 }
 2854 
 2855                 /*
 2856                  * Limit our scan to either the end of the va represented
 2857                  * by the current page table page, or to the end of the
 2858                  * range being removed.
 2859                  */
 2860                 if (pdnxt > eva)
 2861                         pdnxt = eva;
 2862 
 2863                 for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++,
 2864                     sva += PAGE_SIZE) {
 2865                         if (*pte == 0)
 2866                                 continue;
 2867 
 2868                         /*
 2869                          * The TLB entry for a PG_G mapping is invalidated
 2870                          * by pmap_remove_pte().
 2871                          */
 2872                         if ((*pte & PG_G) == 0)
 2873                                 anyvalid = 1;
 2874                         if (pmap_remove_pte(pmap, pte, sva, &free))
 2875                                 break;
 2876                 }
 2877         }
 2878 out:
 2879         sched_unpin();
 2880         if (anyvalid)
 2881                 pmap_invalidate_all(pmap);
 2882         vm_page_unlock_queues();
 2883         PMAP_UNLOCK(pmap);
 2884         pmap_free_zero_pages(free);
 2885 }
 2886 
 2887 /*
 2888  *      Routine:        pmap_remove_all
 2889  *      Function:
 2890  *              Removes this physical page from
 2891  *              all physical maps in which it resides.
 2892  *              Reflects back modify bits to the pager.
 2893  *
 2894  *      Notes:
 2895  *              Original versions of this routine were very
 2896  *              inefficient because they iteratively called
 2897  *              pmap_remove (slow...)
 2898  */
 2899 
 2900 void
 2901 pmap_remove_all(vm_page_t m)
 2902 {
 2903         struct md_page *pvh;
 2904         pv_entry_t pv;
 2905         pmap_t pmap;
 2906         pt_entry_t *pte, tpte;
 2907         pd_entry_t *pde;
 2908         vm_offset_t va;
 2909         vm_page_t free;
 2910 
 2911         KASSERT((m->flags & PG_FICTITIOUS) == 0,
 2912             ("pmap_remove_all: page %p is fictitious", m));
 2913         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 2914         sched_pin();
 2915         pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 2916         while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) {
 2917                 va = pv->pv_va;
 2918                 pmap = PV_PMAP(pv);
 2919                 PMAP_LOCK(pmap);
 2920                 pde = pmap_pde(pmap, va);
 2921                 (void)pmap_demote_pde(pmap, pde, va);
 2922                 PMAP_UNLOCK(pmap);
 2923         }
 2924         while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
 2925                 pmap = PV_PMAP(pv);
 2926                 PMAP_LOCK(pmap);
 2927                 pmap->pm_stats.resident_count--;
 2928                 pde = pmap_pde(pmap, pv->pv_va);
 2929                 KASSERT((*pde & PG_PS) == 0, ("pmap_remove_all: found"
 2930                     " a 4mpage in page %p's pv list", m));
 2931                 pte = pmap_pte_quick(pmap, pv->pv_va);
 2932                 tpte = pte_load_clear(pte);
 2933                 if (tpte & PG_W)
 2934                         pmap->pm_stats.wired_count--;
 2935                 if (tpte & PG_A)
 2936                         vm_page_flag_set(m, PG_REFERENCED);
 2937 
 2938                 /*
 2939                  * Update the vm_page_t clean and reference bits.
 2940                  */
 2941                 if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
 2942                         vm_page_dirty(m);
 2943                 free = NULL;
 2944                 pmap_unuse_pt(pmap, pv->pv_va, &free);
 2945                 pmap_invalidate_page(pmap, pv->pv_va);
 2946                 pmap_free_zero_pages(free);
 2947                 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 2948                 free_pv_entry(pmap, pv);
 2949                 PMAP_UNLOCK(pmap);
 2950         }
 2951         vm_page_flag_clear(m, PG_WRITEABLE);
 2952         sched_unpin();
 2953 }
 2954 
 2955 /*
 2956  * pmap_protect_pde: do the things to protect a 4mpage in a process
 2957  */
 2958 static boolean_t
 2959 pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva, vm_prot_t prot)
 2960 {
 2961         pd_entry_t newpde, oldpde;
 2962         vm_offset_t eva, va;
 2963         vm_page_t m;
 2964         boolean_t anychanged;
 2965 
 2966         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 2967         KASSERT((sva & PDRMASK) == 0,
 2968             ("pmap_protect_pde: sva is not 4mpage aligned"));
 2969         anychanged = FALSE;
 2970 retry:
 2971         oldpde = newpde = *pde;
 2972         if (oldpde & PG_MANAGED) {
 2973                 eva = sva + NBPDR;
 2974                 for (va = sva, m = PHYS_TO_VM_PAGE(oldpde & PG_PS_FRAME);
 2975                     va < eva; va += PAGE_SIZE, m++) {
 2976                         /*
 2977                          * In contrast to the analogous operation on a 4KB page
 2978                          * mapping, the mapping's PG_A flag is not cleared and
 2979                          * the page's PG_REFERENCED flag is not set.  The
 2980                          * reason is that pmap_demote_pde() expects that a 2/4MB
 2981                          * page mapping with a stored page table page has PG_A
 2982                          * set.
 2983                          */
 2984                         if ((oldpde & (PG_M | PG_RW)) == (PG_M | PG_RW))
 2985                                 vm_page_dirty(m);
 2986                 }
 2987         }
 2988         if ((prot & VM_PROT_WRITE) == 0)
 2989                 newpde &= ~(PG_RW | PG_M);
 2990 #ifdef PAE
 2991         if ((prot & VM_PROT_EXECUTE) == 0)
 2992                 newpde |= pg_nx;
 2993 #endif
 2994         if (newpde != oldpde) {
 2995                 if (!pde_cmpset(pde, oldpde, newpde))
 2996                         goto retry;
 2997                 if (oldpde & PG_G)
 2998                         pmap_invalidate_page(pmap, sva);
 2999                 else
 3000                         anychanged = TRUE;
 3001         }
 3002         return (anychanged);
 3003 }
 3004 
 3005 /*
 3006  *      Set the physical protection on the
 3007  *      specified range of this map as requested.
 3008  */
 3009 void
 3010 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
 3011 {
 3012         vm_offset_t pdnxt;
 3013         pd_entry_t ptpaddr;
 3014         pt_entry_t *pte;
 3015         int anychanged;
 3016 
 3017         if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
 3018                 pmap_remove(pmap, sva, eva);
 3019                 return;
 3020         }
 3021 
 3022 #ifdef PAE
 3023         if ((prot & (VM_PROT_WRITE|VM_PROT_EXECUTE)) ==
 3024             (VM_PROT_WRITE|VM_PROT_EXECUTE))
 3025                 return;
 3026 #else
 3027         if (prot & VM_PROT_WRITE)
 3028                 return;
 3029 #endif
 3030 
 3031         anychanged = 0;
 3032 
 3033         vm_page_lock_queues();
 3034         sched_pin();
 3035         PMAP_LOCK(pmap);
 3036         for (; sva < eva; sva = pdnxt) {
 3037                 pt_entry_t obits, pbits;
 3038                 unsigned pdirindex;
 3039 
 3040                 pdnxt = (sva + NBPDR) & ~PDRMASK;
 3041                 if (pdnxt < sva)
 3042                         pdnxt = eva;
 3043 
 3044                 pdirindex = sva >> PDRSHIFT;
 3045                 ptpaddr = pmap->pm_pdir[pdirindex];
 3046 
 3047                 /*
 3048                  * Weed out invalid mappings. Note: we assume that the page
 3049                  * directory table is always allocated, and in kernel virtual.
 3050                  */
 3051                 if (ptpaddr == 0)
 3052                         continue;
 3053 
 3054                 /*
 3055                  * Check for large page.
 3056                  */
 3057                 if ((ptpaddr & PG_PS) != 0) {
 3058                         /*
 3059                          * Are we protecting the entire large page?  If not,
 3060                          * demote the mapping and fall through.
 3061                          */
 3062                         if (sva + NBPDR == pdnxt && eva >= pdnxt) {
 3063                                 /*
 3064                                  * The TLB entry for a PG_G mapping is
 3065                                  * invalidated by pmap_protect_pde().
 3066                                  */
 3067                                 if (pmap_protect_pde(pmap,
 3068                                     &pmap->pm_pdir[pdirindex], sva, prot))
 3069                                         anychanged = 1;
 3070                                 continue;
 3071                         } else if (!pmap_demote_pde(pmap,
 3072                             &pmap->pm_pdir[pdirindex], sva)) {
 3073                                 /* The large page mapping was destroyed. */
 3074                                 continue;
 3075                         }
 3076                 }
 3077 
 3078                 if (pdnxt > eva)
 3079                         pdnxt = eva;
 3080 
 3081                 for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++,
 3082                     sva += PAGE_SIZE) {
 3083                         vm_page_t m;
 3084 
 3085 retry:
 3086                         /*
 3087                          * Regardless of whether a pte is 32 or 64 bits in
 3088                          * size, PG_RW, PG_A, and PG_M are among the least
 3089                          * significant 32 bits.
 3090                          */
 3091                         obits = pbits = *pte;
 3092                         if ((pbits & PG_V) == 0)
 3093                                 continue;
 3094                         if (pbits & PG_MANAGED) {
 3095                                 m = NULL;
 3096                                 if (pbits & PG_A) {
 3097                                         m = PHYS_TO_VM_PAGE(pbits & PG_FRAME);
 3098                                         vm_page_flag_set(m, PG_REFERENCED);
 3099                                         pbits &= ~PG_A;
 3100                                 }
 3101                                 if ((pbits & (PG_M | PG_RW)) == (PG_M | PG_RW)) {
 3102                                         if (m == NULL)
 3103                                                 m = PHYS_TO_VM_PAGE(pbits & PG_FRAME);
 3104                                         vm_page_dirty(m);
 3105                                 }
 3106                         }
 3107 
 3108                         if ((prot & VM_PROT_WRITE) == 0)
 3109                                 pbits &= ~(PG_RW | PG_M);
 3110 #ifdef PAE
 3111                         if ((prot & VM_PROT_EXECUTE) == 0)
 3112                                 pbits |= pg_nx;
 3113 #endif
 3114 
 3115                         if (pbits != obits) {
 3116 #ifdef PAE
 3117                                 if (!atomic_cmpset_64(pte, obits, pbits))
 3118                                         goto retry;
 3119 #else
 3120                                 if (!atomic_cmpset_int((u_int *)pte, obits,
 3121                                     pbits))
 3122                                         goto retry;
 3123 #endif
 3124                                 if (obits & PG_G)
 3125                                         pmap_invalidate_page(pmap, sva);
 3126                                 else
 3127                                         anychanged = 1;
 3128                         }
 3129                 }
 3130         }
 3131         sched_unpin();
 3132         if (anychanged)
 3133                 pmap_invalidate_all(pmap);
 3134         vm_page_unlock_queues();
 3135         PMAP_UNLOCK(pmap);
 3136 }
 3137 
 3138 /*
 3139  * Tries to promote the 512 or 1024, contiguous 4KB page mappings that are
 3140  * within a single page table page (PTP) to a single 2- or 4MB page mapping.
 3141  * For promotion to occur, two conditions must be met: (1) the 4KB page
 3142  * mappings must map aligned, contiguous physical memory and (2) the 4KB page
 3143  * mappings must have identical characteristics.
 3144  *
 3145  * Managed (PG_MANAGED) mappings within the kernel address space are not
 3146  * promoted.  The reason is that kernel PDEs are replicated in each pmap but
 3147  * pmap_clear_ptes() and pmap_ts_referenced() only read the PDE from the kernel
 3148  * pmap.
 3149  */
 3150 static void
 3151 pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
 3152 {
 3153         pd_entry_t newpde;
 3154         pt_entry_t *firstpte, oldpte, pa, *pte;
 3155         vm_offset_t oldpteva;
 3156         vm_page_t mpte;
 3157 
 3158         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 3159 
 3160         /*
 3161          * Examine the first PTE in the specified PTP.  Abort if this PTE is
 3162          * either invalid, unused, or does not map the first 4KB physical page
 3163          * within a 2- or 4MB page.
 3164          */
 3165         firstpte = pmap_pte_quick(pmap, trunc_4mpage(va));
 3166 setpde:
 3167         newpde = *firstpte;
 3168         if ((newpde & ((PG_FRAME & PDRMASK) | PG_A | PG_V)) != (PG_A | PG_V)) {
 3169                 pmap_pde_p_failures++;
 3170                 CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x"
 3171                     " in pmap %p", va, pmap);
 3172                 return;
 3173         }
 3174         if ((*firstpte & PG_MANAGED) != 0 && pmap == kernel_pmap) {
 3175                 pmap_pde_p_failures++;
 3176                 CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x"
 3177                     " in pmap %p", va, pmap);
 3178                 return;
 3179         }
 3180         if ((newpde & (PG_M | PG_RW)) == PG_RW) {
 3181                 /*
 3182                  * When PG_M is already clear, PG_RW can be cleared without
 3183                  * a TLB invalidation.
 3184                  */
 3185                 if (!atomic_cmpset_int((u_int *)firstpte, newpde, newpde &
 3186                     ~PG_RW))  
 3187                         goto setpde;
 3188                 newpde &= ~PG_RW;
 3189         }
 3190 
 3191         /* 
 3192          * Examine each of the other PTEs in the specified PTP.  Abort if this
 3193          * PTE maps an unexpected 4KB physical page or does not have identical
 3194          * characteristics to the first PTE.
 3195          */
 3196         pa = (newpde & (PG_PS_FRAME | PG_A | PG_V)) + NBPDR - PAGE_SIZE;
 3197         for (pte = firstpte + NPTEPG - 1; pte > firstpte; pte--) {
 3198 setpte:
 3199                 oldpte = *pte;
 3200                 if ((oldpte & (PG_FRAME | PG_A | PG_V)) != pa) {
 3201                         pmap_pde_p_failures++;
 3202                         CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x"
 3203                             " in pmap %p", va, pmap);
 3204                         return;
 3205                 }
 3206                 if ((oldpte & (PG_M | PG_RW)) == PG_RW) {
 3207                         /*
 3208                          * When PG_M is already clear, PG_RW can be cleared
 3209                          * without a TLB invalidation.
 3210                          */
 3211                         if (!atomic_cmpset_int((u_int *)pte, oldpte,
 3212                             oldpte & ~PG_RW))
 3213                                 goto setpte;
 3214                         oldpte &= ~PG_RW;
 3215                         oldpteva = (oldpte & PG_FRAME & PDRMASK) |
 3216                             (va & ~PDRMASK);
 3217                         CTR2(KTR_PMAP, "pmap_promote_pde: protect for va %#x"
 3218                             " in pmap %p", oldpteva, pmap);
 3219                 }
 3220                 if ((oldpte & PG_PTE_PROMOTE) != (newpde & PG_PTE_PROMOTE)) {
 3221                         pmap_pde_p_failures++;
 3222                         CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x"
 3223                             " in pmap %p", va, pmap);
 3224                         return;
 3225                 }
 3226                 pa -= PAGE_SIZE;
 3227         }
 3228 
 3229         /*
 3230          * Save the page table page in its current state until the PDE
 3231          * mapping the superpage is demoted by pmap_demote_pde() or
 3232          * destroyed by pmap_remove_pde(). 
 3233          */
 3234         mpte = PHYS_TO_VM_PAGE(*pde & PG_FRAME);
 3235         KASSERT(mpte >= vm_page_array &&
 3236             mpte < &vm_page_array[vm_page_array_size],
 3237             ("pmap_promote_pde: page table page is out of range"));
 3238         KASSERT(mpte->pindex == va >> PDRSHIFT,
 3239             ("pmap_promote_pde: page table page's pindex is wrong"));
 3240         pmap_insert_pt_page(pmap, mpte);
 3241 
 3242         /*
 3243          * Promote the pv entries.
 3244          */
 3245         if ((newpde & PG_MANAGED) != 0)
 3246                 pmap_pv_promote_pde(pmap, va, newpde & PG_PS_FRAME);
 3247 
 3248         /*
 3249          * Propagate the PAT index to its proper position.
 3250          */
 3251         if ((newpde & PG_PTE_PAT) != 0)
 3252                 newpde ^= PG_PDE_PAT | PG_PTE_PAT;
 3253 
 3254         /*
 3255          * Map the superpage.
 3256          */
 3257         if (workaround_erratum383)
 3258                 pmap_update_pde(pmap, va, pde, PG_PS | newpde);
 3259         else if (pmap == kernel_pmap)
 3260                 pmap_kenter_pde(va, PG_PS | newpde);
 3261         else
 3262                 pde_store(pde, PG_PS | newpde);
 3263 
 3264         pmap_pde_promotions++;
 3265         CTR2(KTR_PMAP, "pmap_promote_pde: success for va %#x"
 3266             " in pmap %p", va, pmap);
 3267 }
 3268 
 3269 /*
 3270  *      Insert the given physical page (p) at
 3271  *      the specified virtual address (v) in the
 3272  *      target physical map with the protection requested.
 3273  *
 3274  *      If specified, the page will be wired down, meaning
 3275  *      that the related pte can not be reclaimed.
 3276  *
 3277  *      NB:  This is the only routine which MAY NOT lazy-evaluate
 3278  *      or lose information.  That is, this routine must actually
 3279  *      insert this page into the given map NOW.
 3280  */
 3281 void
 3282 pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m,
 3283     vm_prot_t prot, boolean_t wired)
 3284 {
 3285         vm_paddr_t pa;
 3286         pd_entry_t *pde;
 3287         pt_entry_t *pte;
 3288         vm_paddr_t opa;
 3289         pt_entry_t origpte, newpte;
 3290         vm_page_t mpte, om;
 3291         boolean_t invlva;
 3292 
 3293         va = trunc_page(va);
 3294         KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig"));
 3295         KASSERT(va < UPT_MIN_ADDRESS || va >= UPT_MAX_ADDRESS,
 3296             ("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", va));
 3297 
 3298         mpte = NULL;
 3299 
 3300         vm_page_lock_queues();
 3301         PMAP_LOCK(pmap);
 3302         sched_pin();
 3303 
 3304         /*
 3305          * In the case that a page table page is not
 3306          * resident, we are creating it here.
 3307          */
 3308         if (va < VM_MAXUSER_ADDRESS) {
 3309                 mpte = pmap_allocpte(pmap, va, M_WAITOK);
 3310         }
 3311 
 3312         pde = pmap_pde(pmap, va);
 3313         if ((*pde & PG_PS) != 0)
 3314                 panic("pmap_enter: attempted pmap_enter on 4MB page");
 3315         pte = pmap_pte_quick(pmap, va);
 3316 
 3317         /*
 3318          * Page Directory table entry not valid, we need a new PT page
 3319          */
 3320         if (pte == NULL) {
 3321                 panic("pmap_enter: invalid page directory pdir=%#jx, va=%#x",
 3322                         (uintmax_t)pmap->pm_pdir[PTDPTDI], va);
 3323         }
 3324 
 3325         pa = VM_PAGE_TO_PHYS(m);
 3326         om = NULL;
 3327         origpte = *pte;
 3328         opa = origpte & PG_FRAME;
 3329 
 3330         /*
 3331          * Mapping has not changed, must be protection or wiring change.
 3332          */
 3333         if (origpte && (opa == pa)) {
 3334                 /*
 3335                  * Wiring change, just update stats. We don't worry about
 3336                  * wiring PT pages as they remain resident as long as there
 3337                  * are valid mappings in them. Hence, if a user page is wired,
 3338                  * the PT page will be also.
 3339                  */
 3340                 if (wired && ((origpte & PG_W) == 0))
 3341                         pmap->pm_stats.wired_count++;
 3342                 else if (!wired && (origpte & PG_W))
 3343                         pmap->pm_stats.wired_count--;
 3344 
 3345                 /*
 3346                  * Remove extra pte reference
 3347                  */
 3348                 if (mpte)
 3349                         mpte->wire_count--;
 3350 
 3351                 /*
 3352                  * We might be turning off write access to the page,
 3353                  * so we go ahead and sense modify status.
 3354                  */
 3355                 if (origpte & PG_MANAGED) {
 3356                         om = m;
 3357                         pa |= PG_MANAGED;
 3358                 }
 3359                 goto validate;
 3360         } 
 3361         /*
 3362          * Mapping has changed, invalidate old range and fall through to
 3363          * handle validating new mapping.
 3364          */
 3365         if (opa) {
 3366                 if (origpte & PG_W)
 3367                         pmap->pm_stats.wired_count--;
 3368                 if (origpte & PG_MANAGED) {
 3369                         om = PHYS_TO_VM_PAGE(opa);
 3370                         pmap_remove_entry(pmap, om, va);
 3371                 }
 3372                 if (mpte != NULL) {
 3373                         mpte->wire_count--;
 3374                         KASSERT(mpte->wire_count > 0,
 3375                             ("pmap_enter: missing reference to page table page,"
 3376                              " va: 0x%x", va));
 3377                 }
 3378         } else
 3379                 pmap->pm_stats.resident_count++;
 3380 
 3381         /*
 3382          * Enter on the PV list if part of our managed memory.
 3383          */
 3384         if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) {
 3385                 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva,
 3386                     ("pmap_enter: managed mapping within the clean submap"));
 3387                 pmap_insert_entry(pmap, va, m);
 3388                 pa |= PG_MANAGED;
 3389         }
 3390 
 3391         /*
 3392          * Increment counters
 3393          */
 3394         if (wired)
 3395                 pmap->pm_stats.wired_count++;
 3396 
 3397 validate:
 3398         /*
 3399          * Now validate mapping with desired protection/wiring.
 3400          */
 3401         newpte = (pt_entry_t)(pa | pmap_cache_bits(m->md.pat_mode, 0) | PG_V);
 3402         if ((prot & VM_PROT_WRITE) != 0) {
 3403                 newpte |= PG_RW;
 3404                 vm_page_flag_set(m, PG_WRITEABLE);
 3405         }
 3406 #ifdef PAE
 3407         if ((prot & VM_PROT_EXECUTE) == 0)
 3408                 newpte |= pg_nx;
 3409 #endif
 3410         if (wired)
 3411                 newpte |= PG_W;
 3412         if (va < VM_MAXUSER_ADDRESS)
 3413                 newpte |= PG_U;
 3414         if (pmap == kernel_pmap)
 3415                 newpte |= pgeflag;
 3416 
 3417         /*
 3418          * if the mapping or permission bits are different, we need
 3419          * to update the pte.
 3420          */
 3421         if ((origpte & ~(PG_M|PG_A)) != newpte) {
 3422                 newpte |= PG_A;
 3423                 if ((access & VM_PROT_WRITE) != 0)
 3424                         newpte |= PG_M;
 3425                 if (origpte & PG_V) {
 3426                         invlva = FALSE;
 3427                         origpte = pte_load_store(pte, newpte);
 3428                         if (origpte & PG_A) {
 3429                                 if (origpte & PG_MANAGED)
 3430                                         vm_page_flag_set(om, PG_REFERENCED);
 3431                                 if (opa != VM_PAGE_TO_PHYS(m))
 3432                                         invlva = TRUE;
 3433 #ifdef PAE
 3434                                 if ((origpte & PG_NX) == 0 &&
 3435                                     (newpte & PG_NX) != 0)
 3436                                         invlva = TRUE;
 3437 #endif
 3438                         }
 3439                         if ((origpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) {
 3440                                 if ((origpte & PG_MANAGED) != 0)
 3441                                         vm_page_dirty(om);
 3442                                 if ((prot & VM_PROT_WRITE) == 0)
 3443                                         invlva = TRUE;
 3444                         }
 3445                         if (invlva)
 3446                                 pmap_invalidate_page(pmap, va);
 3447                 } else
 3448                         pte_store(pte, newpte);
 3449         }
 3450 
 3451         /*
 3452          * If both the page table page and the reservation are fully
 3453          * populated, then attempt promotion.
 3454          */
 3455         if ((mpte == NULL || mpte->wire_count == NPTEPG) &&
 3456             pg_ps_enabled && vm_reserv_level_iffullpop(m) == 0)
 3457                 pmap_promote_pde(pmap, pde, va);
 3458 
 3459         sched_unpin();
 3460         vm_page_unlock_queues();
 3461         PMAP_UNLOCK(pmap);
 3462 }
 3463 
 3464 /*
 3465  * Tries to create a 2- or 4MB page mapping.  Returns TRUE if successful and
 3466  * FALSE otherwise.  Fails if (1) a page table page cannot be allocated without
 3467  * blocking, (2) a mapping already exists at the specified virtual address, or
 3468  * (3) a pv entry cannot be allocated without reclaiming another pv entry. 
 3469  */
 3470 static boolean_t
 3471 pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
 3472 {
 3473         pd_entry_t *pde, newpde;
 3474 
 3475         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 3476         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 3477         pde = pmap_pde(pmap, va);
 3478         if (*pde != 0) {
 3479                 CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx"
 3480                     " in pmap %p", va, pmap);
 3481                 return (FALSE);
 3482         }
 3483         newpde = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(m->md.pat_mode, 1) |
 3484             PG_PS | PG_V;
 3485         if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) {
 3486                 newpde |= PG_MANAGED;
 3487 
 3488                 /*
 3489                  * Abort this mapping if its PV entry could not be created.
 3490                  */
 3491                 if (!pmap_pv_insert_pde(pmap, va, VM_PAGE_TO_PHYS(m))) {
 3492                         CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx"
 3493                             " in pmap %p", va, pmap);
 3494                         return (FALSE);
 3495                 }
 3496         }
 3497 #ifdef PAE
 3498         if ((prot & VM_PROT_EXECUTE) == 0)
 3499                 newpde |= pg_nx;
 3500 #endif
 3501         if (va < VM_MAXUSER_ADDRESS)
 3502                 newpde |= PG_U;
 3503 
 3504         /*
 3505          * Increment counters.
 3506          */
 3507         pmap->pm_stats.resident_count += NBPDR / PAGE_SIZE;
 3508 
 3509         /*
 3510          * Map the superpage.
 3511          */
 3512         pde_store(pde, newpde);
 3513 
 3514         pmap_pde_mappings++;
 3515         CTR2(KTR_PMAP, "pmap_enter_pde: success for va %#lx"
 3516             " in pmap %p", va, pmap);
 3517         return (TRUE);
 3518 }
 3519 
 3520 /*
 3521  * Maps a sequence of resident pages belonging to the same object.
 3522  * The sequence begins with the given page m_start.  This page is
 3523  * mapped at the given virtual address start.  Each subsequent page is
 3524  * mapped at a virtual address that is offset from start by the same
 3525  * amount as the page is offset from m_start within the object.  The
 3526  * last page in the sequence is the page with the largest offset from
 3527  * m_start that can be mapped at a virtual address less than the given
 3528  * virtual address end.  Not every virtual page between start and end
 3529  * is mapped; only those for which a resident page exists with the
 3530  * corresponding offset from m_start are mapped.
 3531  */
 3532 void
 3533 pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
 3534     vm_page_t m_start, vm_prot_t prot)
 3535 {
 3536         vm_offset_t va;
 3537         vm_page_t m, mpte;
 3538         vm_pindex_t diff, psize;
 3539 
 3540         VM_OBJECT_LOCK_ASSERT(m_start->object, MA_OWNED);
 3541         psize = atop(end - start);
 3542         mpte = NULL;
 3543         m = m_start;
 3544         PMAP_LOCK(pmap);
 3545         while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
 3546                 va = start + ptoa(diff);
 3547                 if ((va & PDRMASK) == 0 && va + NBPDR <= end &&
 3548                     (VM_PAGE_TO_PHYS(m) & PDRMASK) == 0 &&
 3549                     pg_ps_enabled && vm_reserv_level_iffullpop(m) == 0 &&
 3550                     pmap_enter_pde(pmap, va, m, prot))
 3551                         m = &m[NBPDR / PAGE_SIZE - 1];
 3552                 else
 3553                         mpte = pmap_enter_quick_locked(pmap, va, m, prot,
 3554                             mpte);
 3555                 m = TAILQ_NEXT(m, listq);
 3556         }
 3557         PMAP_UNLOCK(pmap);
 3558 }
 3559 
 3560 /*
 3561  * this code makes some *MAJOR* assumptions:
 3562  * 1. Current pmap & pmap exists.
 3563  * 2. Not wired.
 3564  * 3. Read access.
 3565  * 4. No page table pages.
 3566  * but is *MUCH* faster than pmap_enter...
 3567  */
 3568 
 3569 void
 3570 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
 3571 {
 3572 
 3573         PMAP_LOCK(pmap);
 3574         (void) pmap_enter_quick_locked(pmap, va, m, prot, NULL);
 3575         PMAP_UNLOCK(pmap);
 3576 }
 3577 
 3578 static vm_page_t
 3579 pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
 3580     vm_prot_t prot, vm_page_t mpte)
 3581 {
 3582         pt_entry_t *pte;
 3583         vm_paddr_t pa;
 3584         vm_page_t free;
 3585 
 3586         KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
 3587             (m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0,
 3588             ("pmap_enter_quick_locked: managed mapping within the clean submap"));
 3589         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 3590         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 3591 
 3592         /*
 3593          * In the case that a page table page is not
 3594          * resident, we are creating it here.
 3595          */
 3596         if (va < VM_MAXUSER_ADDRESS) {
 3597                 unsigned ptepindex;
 3598                 pd_entry_t ptepa;
 3599 
 3600                 /*
 3601                  * Calculate pagetable page index
 3602                  */
 3603                 ptepindex = va >> PDRSHIFT;
 3604                 if (mpte && (mpte->pindex == ptepindex)) {
 3605                         mpte->wire_count++;
 3606                 } else {
 3607                         /*
 3608                          * Get the page directory entry
 3609                          */
 3610                         ptepa = pmap->pm_pdir[ptepindex];
 3611 
 3612                         /*
 3613                          * If the page table page is mapped, we just increment
 3614                          * the hold count, and activate it.
 3615                          */
 3616                         if (ptepa) {
 3617                                 if (ptepa & PG_PS)
 3618                                         return (NULL);
 3619                                 mpte = PHYS_TO_VM_PAGE(ptepa & PG_FRAME);
 3620                                 mpte->wire_count++;
 3621                         } else {
 3622                                 mpte = _pmap_allocpte(pmap, ptepindex,
 3623                                     M_NOWAIT);
 3624                                 if (mpte == NULL)
 3625                                         return (mpte);
 3626                         }
 3627                 }
 3628         } else {
 3629                 mpte = NULL;
 3630         }
 3631 
 3632         /*
 3633          * This call to vtopte makes the assumption that we are
 3634          * entering the page into the current pmap.  In order to support
 3635          * quick entry into any pmap, one would likely use pmap_pte_quick.
 3636          * But that isn't as quick as vtopte.
 3637          */
 3638         pte = vtopte(va);
 3639         if (*pte) {
 3640                 if (mpte != NULL) {
 3641                         mpte->wire_count--;
 3642                         mpte = NULL;
 3643                 }
 3644                 return (mpte);
 3645         }
 3646 
 3647         /*
 3648          * Enter on the PV list if part of our managed memory.
 3649          */
 3650         if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0 &&
 3651             !pmap_try_insert_pv_entry(pmap, va, m)) {
 3652                 if (mpte != NULL) {
 3653                         free = NULL;
 3654                         if (pmap_unwire_pte_hold(pmap, mpte, &free)) {
 3655                                 pmap_invalidate_page(pmap, va);
 3656                                 pmap_free_zero_pages(free);
 3657                         }
 3658                         
 3659                         mpte = NULL;
 3660                 }
 3661                 return (mpte);
 3662         }
 3663 
 3664         /*
 3665          * Increment counters
 3666          */
 3667         pmap->pm_stats.resident_count++;
 3668 
 3669         pa = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(m->md.pat_mode, 0);
 3670 #ifdef PAE
 3671         if ((prot & VM_PROT_EXECUTE) == 0)
 3672                 pa |= pg_nx;
 3673 #endif
 3674 
 3675         /*
 3676          * Now validate mapping with RO protection
 3677          */
 3678         if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED))
 3679                 pte_store(pte, pa | PG_V | PG_U);
 3680         else
 3681                 pte_store(pte, pa | PG_V | PG_U | PG_MANAGED);
 3682         return mpte;
 3683 }
 3684 
 3685 /*
 3686  * Make a temporary mapping for a physical address.  This is only intended
 3687  * to be used for panic dumps.
 3688  */
 3689 void *
 3690 pmap_kenter_temporary(vm_paddr_t pa, int i)
 3691 {
 3692         vm_offset_t va;
 3693 
 3694         va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE);
 3695         pmap_kenter(va, pa);
 3696         invlpg(va);
 3697         return ((void *)crashdumpmap);
 3698 }
 3699 
 3700 /*
 3701  * This code maps large physical mmap regions into the
 3702  * processor address space.  Note that some shortcuts
 3703  * are taken, but the code works.
 3704  */
 3705 void
 3706 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object,
 3707     vm_pindex_t pindex, vm_size_t size)
 3708 {
 3709         pd_entry_t *pde;
 3710         vm_paddr_t pa, ptepa;
 3711         vm_page_t p;
 3712         int pat_mode;
 3713 
 3714         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 3715         KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
 3716             ("pmap_object_init_pt: non-device object"));
 3717         if (pseflag && 
 3718             (addr & (NBPDR - 1)) == 0 && (size & (NBPDR - 1)) == 0) {
 3719                 if (!vm_object_populate(object, pindex, pindex + atop(size)))
 3720                         return;
 3721                 p = vm_page_lookup(object, pindex);
 3722                 KASSERT(p->valid == VM_PAGE_BITS_ALL,
 3723                     ("pmap_object_init_pt: invalid page %p", p));
 3724                 pat_mode = p->md.pat_mode;
 3725 
 3726                 /*
 3727                  * Abort the mapping if the first page is not physically
 3728                  * aligned to a 2/4MB page boundary.
 3729                  */
 3730                 ptepa = VM_PAGE_TO_PHYS(p);
 3731                 if (ptepa & (NBPDR - 1))
 3732                         return;
 3733 
 3734                 /*
 3735                  * Skip the first page.  Abort the mapping if the rest of
 3736                  * the pages are not physically contiguous or have differing
 3737                  * memory attributes.
 3738                  */
 3739                 p = TAILQ_NEXT(p, listq);
 3740                 for (pa = ptepa + PAGE_SIZE; pa < ptepa + size;
 3741                     pa += PAGE_SIZE) {
 3742                         KASSERT(p->valid == VM_PAGE_BITS_ALL,
 3743                             ("pmap_object_init_pt: invalid page %p", p));
 3744                         if (pa != VM_PAGE_TO_PHYS(p) ||
 3745                             pat_mode != p->md.pat_mode)
 3746                                 return;
 3747                         p = TAILQ_NEXT(p, listq);
 3748                 }
 3749 
 3750                 /*
 3751                  * Map using 2/4MB pages.  Since "ptepa" is 2/4M aligned and
 3752                  * "size" is a multiple of 2/4M, adding the PAT setting to
 3753                  * "pa" will not affect the termination of this loop.
 3754                  */
 3755                 PMAP_LOCK(pmap);
 3756                 for (pa = ptepa | pmap_cache_bits(pat_mode, 1); pa < ptepa +
 3757                     size; pa += NBPDR) {
 3758                         pde = pmap_pde(pmap, addr);
 3759                         if (*pde == 0) {
 3760                                 pde_store(pde, pa | PG_PS | PG_M | PG_A |
 3761                                     PG_U | PG_RW | PG_V);
 3762                                 pmap->pm_stats.resident_count += NBPDR /
 3763                                     PAGE_SIZE;
 3764                                 pmap_pde_mappings++;
 3765                         }
 3766                         /* Else continue on if the PDE is already valid. */
 3767                         addr += NBPDR;
 3768                 }
 3769                 PMAP_UNLOCK(pmap);
 3770         }
 3771 }
 3772 
 3773 /*
 3774  *      Routine:        pmap_change_wiring
 3775  *      Function:       Change the wiring attribute for a map/virtual-address
 3776  *                      pair.
 3777  *      In/out conditions:
 3778  *                      The mapping must already exist in the pmap.
 3779  */
 3780 void
 3781 pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired)
 3782 {
 3783         pd_entry_t *pde;
 3784         pt_entry_t *pte;
 3785         boolean_t are_queues_locked;
 3786 
 3787         are_queues_locked = FALSE;
 3788 retry:
 3789         PMAP_LOCK(pmap);
 3790         pde = pmap_pde(pmap, va);
 3791         if ((*pde & PG_PS) != 0) {
 3792                 if (!wired != ((*pde & PG_W) == 0)) {
 3793                         if (!are_queues_locked) {
 3794                                 are_queues_locked = TRUE;
 3795                                 if (!mtx_trylock(&vm_page_queue_mtx)) {
 3796                                         PMAP_UNLOCK(pmap);
 3797                                         vm_page_lock_queues();
 3798                                         goto retry;
 3799                                 }
 3800                         }
 3801                         if (!pmap_demote_pde(pmap, pde, va))
 3802                                 panic("pmap_change_wiring: demotion failed");
 3803                 } else
 3804                         goto out;
 3805         }
 3806         pte = pmap_pte(pmap, va);
 3807 
 3808         if (wired && !pmap_pte_w(pte))
 3809                 pmap->pm_stats.wired_count++;
 3810         else if (!wired && pmap_pte_w(pte))
 3811                 pmap->pm_stats.wired_count--;
 3812 
 3813         /*
 3814          * Wiring is not a hardware characteristic so there is no need to
 3815          * invalidate TLB.
 3816          */
 3817         pmap_pte_set_w(pte, wired);
 3818         pmap_pte_release(pte);
 3819 out:
 3820         if (are_queues_locked)
 3821                 vm_page_unlock_queues();
 3822         PMAP_UNLOCK(pmap);
 3823 }
 3824 
 3825 
 3826 
 3827 /*
 3828  *      Copy the range specified by src_addr/len
 3829  *      from the source map to the range dst_addr/len
 3830  *      in the destination map.
 3831  *
 3832  *      This routine is only advisory and need not do anything.
 3833  */
 3834 
 3835 void
 3836 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
 3837     vm_offset_t src_addr)
 3838 {
 3839         vm_page_t   free;
 3840         vm_offset_t addr;
 3841         vm_offset_t end_addr = src_addr + len;
 3842         vm_offset_t pdnxt;
 3843 
 3844         if (dst_addr != src_addr)
 3845                 return;
 3846 
 3847         if (!pmap_is_current(src_pmap))
 3848                 return;
 3849 
 3850         vm_page_lock_queues();
 3851         if (dst_pmap < src_pmap) {
 3852                 PMAP_LOCK(dst_pmap);
 3853                 PMAP_LOCK(src_pmap);
 3854         } else {
 3855                 PMAP_LOCK(src_pmap);
 3856                 PMAP_LOCK(dst_pmap);
 3857         }
 3858         sched_pin();
 3859         for (addr = src_addr; addr < end_addr; addr = pdnxt) {
 3860                 pt_entry_t *src_pte, *dst_pte;
 3861                 vm_page_t dstmpte, srcmpte;
 3862                 pd_entry_t srcptepaddr;
 3863                 unsigned ptepindex;
 3864 
 3865                 KASSERT(addr < UPT_MIN_ADDRESS,
 3866                     ("pmap_copy: invalid to pmap_copy page tables"));
 3867 
 3868                 pdnxt = (addr + NBPDR) & ~PDRMASK;
 3869                 if (pdnxt < addr)
 3870                         pdnxt = end_addr;
 3871                 ptepindex = addr >> PDRSHIFT;
 3872 
 3873                 srcptepaddr = src_pmap->pm_pdir[ptepindex];
 3874                 if (srcptepaddr == 0)
 3875                         continue;
 3876                         
 3877                 if (srcptepaddr & PG_PS) {
 3878                         if (dst_pmap->pm_pdir[ptepindex] == 0 &&
 3879                             ((srcptepaddr & PG_MANAGED) == 0 ||
 3880                             pmap_pv_insert_pde(dst_pmap, addr, srcptepaddr &
 3881                             PG_PS_FRAME))) {
 3882                                 dst_pmap->pm_pdir[ptepindex] = srcptepaddr &
 3883                                     ~PG_W;
 3884                                 dst_pmap->pm_stats.resident_count +=
 3885                                     NBPDR / PAGE_SIZE;
 3886                         }
 3887                         continue;
 3888                 }
 3889 
 3890                 srcmpte = PHYS_TO_VM_PAGE(srcptepaddr & PG_FRAME);
 3891                 KASSERT(srcmpte->wire_count > 0,
 3892                     ("pmap_copy: source page table page is unused"));
 3893 
 3894                 if (pdnxt > end_addr)
 3895                         pdnxt = end_addr;
 3896 
 3897                 src_pte = vtopte(addr);
 3898                 while (addr < pdnxt) {
 3899                         pt_entry_t ptetemp;
 3900                         ptetemp = *src_pte;
 3901                         /*
 3902                          * we only virtual copy managed pages
 3903                          */
 3904                         if ((ptetemp & PG_MANAGED) != 0) {
 3905                                 dstmpte = pmap_allocpte(dst_pmap, addr,
 3906                                     M_NOWAIT);
 3907                                 if (dstmpte == NULL)
 3908                                         goto out;
 3909                                 dst_pte = pmap_pte_quick(dst_pmap, addr);
 3910                                 if (*dst_pte == 0 &&
 3911                                     pmap_try_insert_pv_entry(dst_pmap, addr,
 3912                                     PHYS_TO_VM_PAGE(ptetemp & PG_FRAME))) {
 3913                                         /*
 3914                                          * Clear the wired, modified, and
 3915                                          * accessed (referenced) bits
 3916                                          * during the copy.
 3917                                          */
 3918                                         *dst_pte = ptetemp & ~(PG_W | PG_M |
 3919                                             PG_A);
 3920                                         dst_pmap->pm_stats.resident_count++;
 3921                                 } else {
 3922                                         free = NULL;
 3923                                         if (pmap_unwire_pte_hold(dst_pmap,
 3924                                             dstmpte, &free)) {
 3925                                                 pmap_invalidate_page(dst_pmap,
 3926                                                     addr);
 3927                                                 pmap_free_zero_pages(free);
 3928                                         }
 3929                                         goto out;
 3930                                 }
 3931                                 if (dstmpte->wire_count >= srcmpte->wire_count)
 3932                                         break;
 3933                         }
 3934                         addr += PAGE_SIZE;
 3935                         src_pte++;
 3936                 }
 3937         }
 3938 out:
 3939         sched_unpin();
 3940         vm_page_unlock_queues();
 3941         PMAP_UNLOCK(src_pmap);
 3942         PMAP_UNLOCK(dst_pmap);
 3943 }       
 3944 
 3945 static __inline void
 3946 pagezero(void *page)
 3947 {
 3948 #if defined(I686_CPU)
 3949         if (cpu_class == CPUCLASS_686) {
 3950 #if defined(CPU_ENABLE_SSE)
 3951                 if (cpu_feature & CPUID_SSE2)
 3952                         sse2_pagezero(page);
 3953                 else
 3954 #endif
 3955                         i686_pagezero(page);
 3956         } else
 3957 #endif
 3958                 bzero(page, PAGE_SIZE);
 3959 }
 3960 
 3961 /*
 3962  *      pmap_zero_page zeros the specified hardware page by mapping 
 3963  *      the page into KVM and using bzero to clear its contents.
 3964  */
 3965 void
 3966 pmap_zero_page(vm_page_t m)
 3967 {
 3968         struct sysmaps *sysmaps;
 3969 
 3970         sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
 3971         mtx_lock(&sysmaps->lock);
 3972         if (*sysmaps->CMAP2)
 3973                 panic("pmap_zero_page: CMAP2 busy");
 3974         sched_pin();
 3975         *sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M |
 3976             pmap_cache_bits(m->md.pat_mode, 0);
 3977         invlcaddr(sysmaps->CADDR2);
 3978         pagezero(sysmaps->CADDR2);
 3979         *sysmaps->CMAP2 = 0;
 3980         sched_unpin();
 3981         mtx_unlock(&sysmaps->lock);
 3982 }
 3983 
 3984 /*
 3985  *      pmap_zero_page_area zeros the specified hardware page by mapping 
 3986  *      the page into KVM and using bzero to clear its contents.
 3987  *
 3988  *      off and size may not cover an area beyond a single hardware page.
 3989  */
 3990 void
 3991 pmap_zero_page_area(vm_page_t m, int off, int size)
 3992 {
 3993         struct sysmaps *sysmaps;
 3994 
 3995         sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
 3996         mtx_lock(&sysmaps->lock);
 3997         if (*sysmaps->CMAP2)
 3998                 panic("pmap_zero_page_area: CMAP2 busy");
 3999         sched_pin();
 4000         *sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M |
 4001             pmap_cache_bits(m->md.pat_mode, 0);
 4002         invlcaddr(sysmaps->CADDR2);
 4003         if (off == 0 && size == PAGE_SIZE) 
 4004                 pagezero(sysmaps->CADDR2);
 4005         else
 4006                 bzero((char *)sysmaps->CADDR2 + off, size);
 4007         *sysmaps->CMAP2 = 0;
 4008         sched_unpin();
 4009         mtx_unlock(&sysmaps->lock);
 4010 }
 4011 
 4012 /*
 4013  *      pmap_zero_page_idle zeros the specified hardware page by mapping 
 4014  *      the page into KVM and using bzero to clear its contents.  This
 4015  *      is intended to be called from the vm_pagezero process only and
 4016  *      outside of Giant.
 4017  */
 4018 void
 4019 pmap_zero_page_idle(vm_page_t m)
 4020 {
 4021 
 4022         if (*CMAP3)
 4023                 panic("pmap_zero_page_idle: CMAP3 busy");
 4024         sched_pin();
 4025         *CMAP3 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M |
 4026             pmap_cache_bits(m->md.pat_mode, 0);
 4027         invlcaddr(CADDR3);
 4028         pagezero(CADDR3);
 4029         *CMAP3 = 0;
 4030         sched_unpin();
 4031 }
 4032 
 4033 /*
 4034  *      pmap_copy_page copies the specified (machine independent)
 4035  *      page by mapping the page into virtual memory and using
 4036  *      bcopy to copy the page, one machine dependent page at a
 4037  *      time.
 4038  */
 4039 void
 4040 pmap_copy_page(vm_page_t src, vm_page_t dst)
 4041 {
 4042         struct sysmaps *sysmaps;
 4043 
 4044         sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
 4045         mtx_lock(&sysmaps->lock);
 4046         if (*sysmaps->CMAP1)
 4047                 panic("pmap_copy_page: CMAP1 busy");
 4048         if (*sysmaps->CMAP2)
 4049                 panic("pmap_copy_page: CMAP2 busy");
 4050         sched_pin();
 4051         invlpg((u_int)sysmaps->CADDR1);
 4052         invlpg((u_int)sysmaps->CADDR2);
 4053         *sysmaps->CMAP1 = PG_V | VM_PAGE_TO_PHYS(src) | PG_A |
 4054             pmap_cache_bits(src->md.pat_mode, 0);
 4055         *sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(dst) | PG_A | PG_M |
 4056             pmap_cache_bits(dst->md.pat_mode, 0);
 4057         bcopy(sysmaps->CADDR1, sysmaps->CADDR2, PAGE_SIZE);
 4058         *sysmaps->CMAP1 = 0;
 4059         *sysmaps->CMAP2 = 0;
 4060         sched_unpin();
 4061         mtx_unlock(&sysmaps->lock);
 4062 }
 4063 
 4064 /*
 4065  * Returns true if the pmap's pv is one of the first
 4066  * 16 pvs linked to from this page.  This count may
 4067  * be changed upwards or downwards in the future; it
 4068  * is only necessary that true be returned for a small
 4069  * subset of pmaps for proper page aging.
 4070  */
 4071 boolean_t
 4072 pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
 4073 {
 4074         struct md_page *pvh;
 4075         pv_entry_t pv;
 4076         int loops = 0;
 4077 
 4078         if (m->flags & PG_FICTITIOUS)
 4079                 return FALSE;
 4080 
 4081         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 4082         TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 4083                 if (PV_PMAP(pv) == pmap) {
 4084                         return TRUE;
 4085                 }
 4086                 loops++;
 4087                 if (loops >= 16)
 4088                         break;
 4089         }
 4090         if (loops < 16) {
 4091                 pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 4092                 TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) {
 4093                         if (PV_PMAP(pv) == pmap)
 4094                                 return (TRUE);
 4095                         loops++;
 4096                         if (loops >= 16)
 4097                                 break;
 4098                 }
 4099         }
 4100         return (FALSE);
 4101 }
 4102 
 4103 /*
 4104  *      pmap_page_wired_mappings:
 4105  *
 4106  *      Return the number of managed mappings to the given physical page
 4107  *      that are wired.
 4108  */
 4109 int
 4110 pmap_page_wired_mappings(vm_page_t m)
 4111 {
 4112         int count;
 4113 
 4114         count = 0;
 4115         if ((m->flags & PG_FICTITIOUS) != 0)
 4116                 return (count);
 4117         count = pmap_pvh_wired_mappings(&m->md, count);
 4118         return (pmap_pvh_wired_mappings(pa_to_pvh(VM_PAGE_TO_PHYS(m)), count));
 4119 }
 4120 
 4121 /*
 4122  *      pmap_pvh_wired_mappings:
 4123  *
 4124  *      Return the updated number "count" of managed mappings that are wired.
 4125  */
 4126 static int
 4127 pmap_pvh_wired_mappings(struct md_page *pvh, int count)
 4128 {
 4129         pmap_t pmap;
 4130         pt_entry_t *pte;
 4131         pv_entry_t pv;
 4132 
 4133         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 4134         sched_pin();
 4135         TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) {
 4136                 pmap = PV_PMAP(pv);
 4137                 PMAP_LOCK(pmap);
 4138                 pte = pmap_pte_quick(pmap, pv->pv_va);
 4139                 if ((*pte & PG_W) != 0)
 4140                         count++;
 4141                 PMAP_UNLOCK(pmap);
 4142         }
 4143         sched_unpin();
 4144         return (count);
 4145 }
 4146 
 4147 /*
 4148  * Returns TRUE if the given page is mapped individually or as part of
 4149  * a 4mpage.  Otherwise, returns FALSE.
 4150  */
 4151 boolean_t
 4152 pmap_page_is_mapped(vm_page_t m)
 4153 {
 4154         struct md_page *pvh;
 4155 
 4156         if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0)
 4157                 return (FALSE);
 4158         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 4159         if (TAILQ_EMPTY(&m->md.pv_list)) {
 4160                 pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 4161                 return (!TAILQ_EMPTY(&pvh->pv_list));
 4162         } else
 4163                 return (TRUE);
 4164 }
 4165 
 4166 /*
 4167  * Remove all pages from specified address space
 4168  * this aids process exit speeds.  Also, this code
 4169  * is special cased for current process only, but
 4170  * can have the more generic (and slightly slower)
 4171  * mode enabled.  This is much faster than pmap_remove
 4172  * in the case of running down an entire address space.
 4173  */
 4174 void
 4175 pmap_remove_pages(pmap_t pmap)
 4176 {
 4177         pt_entry_t *pte, tpte;
 4178         vm_page_t free = NULL;
 4179         vm_page_t m, mpte, mt;
 4180         pv_entry_t pv;
 4181         struct md_page *pvh;
 4182         struct pv_chunk *pc, *npc;
 4183         int field, idx;
 4184         int32_t bit;
 4185         uint32_t inuse, bitmask;
 4186         int allfree;
 4187 
 4188         if (pmap != PCPU_GET(curpmap)) {
 4189                 printf("warning: pmap_remove_pages called with non-current pmap\n");
 4190                 return;
 4191         }
 4192         vm_page_lock_queues();
 4193         PMAP_LOCK(pmap);
 4194         sched_pin();
 4195         TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
 4196                 allfree = 1;
 4197                 for (field = 0; field < _NPCM; field++) {
 4198                         inuse = (~(pc->pc_map[field])) & pc_freemask[field];
 4199                         while (inuse != 0) {
 4200                                 bit = bsfl(inuse);
 4201                                 bitmask = 1UL << bit;
 4202                                 idx = field * 32 + bit;
 4203                                 pv = &pc->pc_pventry[idx];
 4204                                 inuse &= ~bitmask;
 4205 
 4206                                 pte = pmap_pde(pmap, pv->pv_va);
 4207                                 tpte = *pte;
 4208                                 if ((tpte & PG_PS) == 0) {
 4209                                         pte = vtopte(pv->pv_va);
 4210                                         tpte = *pte & ~PG_PTE_PAT;
 4211                                 }
 4212 
 4213                                 if (tpte == 0) {
 4214                                         printf(
 4215                                             "TPTE at %p  IS ZERO @ VA %08x\n",
 4216                                             pte, pv->pv_va);
 4217                                         panic("bad pte");
 4218                                 }
 4219 
 4220 /*
 4221  * We cannot remove wired pages from a process' mapping at this time
 4222  */
 4223                                 if (tpte & PG_W) {
 4224                                         allfree = 0;
 4225                                         continue;
 4226                                 }
 4227 
 4228                                 m = PHYS_TO_VM_PAGE(tpte & PG_FRAME);
 4229                                 KASSERT(m->phys_addr == (tpte & PG_FRAME),
 4230                                     ("vm_page_t %p phys_addr mismatch %016jx %016jx",
 4231                                     m, (uintmax_t)m->phys_addr,
 4232                                     (uintmax_t)tpte));
 4233 
 4234                                 KASSERT(m < &vm_page_array[vm_page_array_size],
 4235                                         ("pmap_remove_pages: bad tpte %#jx",
 4236                                         (uintmax_t)tpte));
 4237 
 4238                                 pte_clear(pte);
 4239 
 4240                                 /*
 4241                                  * Update the vm_page_t clean/reference bits.
 4242                                  */
 4243                                 if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) {
 4244                                         if ((tpte & PG_PS) != 0) {
 4245                                                 for (mt = m; mt < &m[NBPDR / PAGE_SIZE]; mt++)
 4246                                                         vm_page_dirty(mt);
 4247                                         } else
 4248                                                 vm_page_dirty(m);
 4249                                 }
 4250 
 4251                                 /* Mark free */
 4252                                 PV_STAT(pv_entry_frees++);
 4253                                 PV_STAT(pv_entry_spare++);
 4254                                 pv_entry_count--;
 4255                                 pc->pc_map[field] |= bitmask;
 4256                                 if ((tpte & PG_PS) != 0) {
 4257                                         pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
 4258                                         pvh = pa_to_pvh(tpte & PG_PS_FRAME);
 4259                                         TAILQ_REMOVE(&pvh->pv_list, pv, pv_list);
 4260                                         if (TAILQ_EMPTY(&pvh->pv_list)) {
 4261                                                 for (mt = m; mt < &m[NBPDR / PAGE_SIZE]; mt++)
 4262                                                         if (TAILQ_EMPTY(&mt->md.pv_list))
 4263                                                                 vm_page_flag_clear(mt, PG_WRITEABLE);
 4264                                         }
 4265                                         mpte = pmap_lookup_pt_page(pmap, pv->pv_va);
 4266                                         if (mpte != NULL) {
 4267                                                 pmap_remove_pt_page(pmap, mpte);
 4268                                                 pmap->pm_stats.resident_count--;
 4269                                                 KASSERT(mpte->wire_count == NPTEPG,
 4270                                                     ("pmap_remove_pages: pte page wire count error"));
 4271                                                 mpte->wire_count = 0;
 4272                                                 pmap_add_delayed_free_list(mpte, &free, FALSE);
 4273                                                 atomic_subtract_int(&cnt.v_wire_count, 1);
 4274                                         }
 4275                                 } else {
 4276                                         pmap->pm_stats.resident_count--;
 4277                                         TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 4278                                         if (TAILQ_EMPTY(&m->md.pv_list)) {
 4279                                                 pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 4280                                                 if (TAILQ_EMPTY(&pvh->pv_list))
 4281                                                         vm_page_flag_clear(m, PG_WRITEABLE);
 4282                                         }
 4283                                         pmap_unuse_pt(pmap, pv->pv_va, &free);
 4284                                 }
 4285                         }
 4286                 }
 4287                 if (allfree) {
 4288                         PV_STAT(pv_entry_spare -= _NPCPV);
 4289                         PV_STAT(pc_chunk_count--);
 4290                         PV_STAT(pc_chunk_frees++);
 4291                         TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 4292                         m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc));
 4293                         pmap_qremove((vm_offset_t)pc, 1);
 4294                         vm_page_unwire(m, 0);
 4295                         vm_page_free(m);
 4296                         pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc);
 4297                 }
 4298         }
 4299         sched_unpin();
 4300         pmap_invalidate_all(pmap);
 4301         vm_page_unlock_queues();
 4302         PMAP_UNLOCK(pmap);
 4303         pmap_free_zero_pages(free);
 4304 }
 4305 
 4306 /*
 4307  *      pmap_is_modified:
 4308  *
 4309  *      Return whether or not the specified physical page was modified
 4310  *      in any physical maps.
 4311  */
 4312 boolean_t
 4313 pmap_is_modified(vm_page_t m)
 4314 {
 4315 
 4316         if (m->flags & PG_FICTITIOUS)
 4317                 return (FALSE);
 4318         if (pmap_is_modified_pvh(&m->md))
 4319                 return (TRUE);
 4320         return (pmap_is_modified_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m))));
 4321 }
 4322 
 4323 /*
 4324  * Returns TRUE if any of the given mappings were used to modify
 4325  * physical memory.  Otherwise, returns FALSE.  Both page and 2mpage
 4326  * mappings are supported.
 4327  */
 4328 static boolean_t
 4329 pmap_is_modified_pvh(struct md_page *pvh)
 4330 {
 4331         pv_entry_t pv;
 4332         pt_entry_t *pte;
 4333         pmap_t pmap;
 4334         boolean_t rv;
 4335 
 4336         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 4337         rv = FALSE;
 4338         sched_pin();
 4339         TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) {
 4340                 pmap = PV_PMAP(pv);
 4341                 PMAP_LOCK(pmap);
 4342                 pte = pmap_pte_quick(pmap, pv->pv_va);
 4343                 rv = (*pte & (PG_M | PG_RW)) == (PG_M | PG_RW);
 4344                 PMAP_UNLOCK(pmap);
 4345                 if (rv)
 4346                         break;
 4347         }
 4348         sched_unpin();
 4349         return (rv);
 4350 }
 4351 
 4352 /*
 4353  *      pmap_is_prefaultable:
 4354  *
 4355  *      Return whether or not the specified virtual address is elgible
 4356  *      for prefault.
 4357  */
 4358 boolean_t
 4359 pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
 4360 {
 4361         pd_entry_t *pde;
 4362         pt_entry_t *pte;
 4363         boolean_t rv;
 4364 
 4365         rv = FALSE;
 4366         PMAP_LOCK(pmap);
 4367         pde = pmap_pde(pmap, addr);
 4368         if (*pde != 0 && (*pde & PG_PS) == 0) {
 4369                 pte = vtopte(addr);
 4370                 rv = *pte == 0;
 4371         }
 4372         PMAP_UNLOCK(pmap);
 4373         return (rv);
 4374 }
 4375 
 4376 /*
 4377  * Clear the write and modified bits in each of the given page's mappings.
 4378  */
 4379 void
 4380 pmap_remove_write(vm_page_t m)
 4381 {
 4382         struct md_page *pvh;
 4383         pv_entry_t next_pv, pv;
 4384         pmap_t pmap;
 4385         pd_entry_t *pde;
 4386         pt_entry_t oldpte, *pte;
 4387         vm_offset_t va;
 4388 
 4389         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 4390         if ((m->flags & PG_FICTITIOUS) != 0 ||
 4391             (m->flags & PG_WRITEABLE) == 0)
 4392                 return;
 4393         sched_pin();
 4394         pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 4395         TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) {
 4396                 va = pv->pv_va;
 4397                 pmap = PV_PMAP(pv);
 4398                 PMAP_LOCK(pmap);
 4399                 pde = pmap_pde(pmap, va);
 4400                 if ((*pde & PG_RW) != 0)
 4401                         (void)pmap_demote_pde(pmap, pde, va);
 4402                 PMAP_UNLOCK(pmap);
 4403         }
 4404         TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 4405                 pmap = PV_PMAP(pv);
 4406                 PMAP_LOCK(pmap);
 4407                 pde = pmap_pde(pmap, pv->pv_va);
 4408                 KASSERT((*pde & PG_PS) == 0, ("pmap_clear_write: found"
 4409                     " a 4mpage in page %p's pv list", m));
 4410                 pte = pmap_pte_quick(pmap, pv->pv_va);
 4411 retry:
 4412                 oldpte = *pte;
 4413                 if ((oldpte & PG_RW) != 0) {
 4414                         /*
 4415                          * Regardless of whether a pte is 32 or 64 bits
 4416                          * in size, PG_RW and PG_M are among the least
 4417                          * significant 32 bits.
 4418                          */
 4419                         if (!atomic_cmpset_int((u_int *)pte, oldpte,
 4420                             oldpte & ~(PG_RW | PG_M)))
 4421                                 goto retry;
 4422                         if ((oldpte & PG_M) != 0)
 4423                                 vm_page_dirty(m);
 4424                         pmap_invalidate_page(pmap, pv->pv_va);
 4425                 }
 4426                 PMAP_UNLOCK(pmap);
 4427         }
 4428         vm_page_flag_clear(m, PG_WRITEABLE);
 4429         sched_unpin();
 4430 }
 4431 
 4432 /*
 4433  *      pmap_ts_referenced:
 4434  *
 4435  *      Return a count of reference bits for a page, clearing those bits.
 4436  *      It is not necessary for every reference bit to be cleared, but it
 4437  *      is necessary that 0 only be returned when there are truly no
 4438  *      reference bits set.
 4439  *
 4440  *      XXX: The exact number of bits to check and clear is a matter that
 4441  *      should be tested and standardized at some point in the future for
 4442  *      optimal aging of shared pages.
 4443  */
 4444 int
 4445 pmap_ts_referenced(vm_page_t m)
 4446 {
 4447         struct md_page *pvh;
 4448         pv_entry_t pv, pvf, pvn;
 4449         pmap_t pmap;
 4450         pd_entry_t oldpde, *pde;
 4451         pt_entry_t *pte;
 4452         vm_offset_t va;
 4453         int rtval = 0;
 4454 
 4455         if (m->flags & PG_FICTITIOUS)
 4456                 return (rtval);
 4457         sched_pin();
 4458         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 4459         pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 4460         TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, pvn) {
 4461                 va = pv->pv_va;
 4462                 pmap = PV_PMAP(pv);
 4463                 PMAP_LOCK(pmap);
 4464                 pde = pmap_pde(pmap, va);
 4465                 oldpde = *pde;
 4466                 if ((oldpde & PG_A) != 0) {
 4467                         if (pmap_demote_pde(pmap, pde, va)) {
 4468                                 if ((oldpde & PG_W) == 0) {
 4469                                         /*
 4470                                          * Remove the mapping to a single page
 4471                                          * so that a subsequent access may
 4472                                          * repromote.  Since the underlying
 4473                                          * page table page is fully populated,
 4474                                          * this removal never frees a page
 4475                                          * table page.
 4476                                          */
 4477                                         va += VM_PAGE_TO_PHYS(m) - (oldpde &
 4478                                             PG_PS_FRAME);
 4479                                         pmap_remove_page(pmap, va, NULL);
 4480                                         rtval++;
 4481                                         if (rtval > 4) {
 4482                                                 PMAP_UNLOCK(pmap);
 4483                                                 goto out;
 4484                                         }
 4485                                 }
 4486                         }
 4487                 }
 4488                 PMAP_UNLOCK(pmap);
 4489         }
 4490         if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
 4491                 pvf = pv;
 4492                 do {
 4493                         pvn = TAILQ_NEXT(pv, pv_list);
 4494                         TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 4495                         TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
 4496                         pmap = PV_PMAP(pv);
 4497                         PMAP_LOCK(pmap);
 4498                         pde = pmap_pde(pmap, pv->pv_va);
 4499                         KASSERT((*pde & PG_PS) == 0, ("pmap_ts_referenced:"
 4500                             " found a 4mpage in page %p's pv list", m));
 4501                         pte = pmap_pte_quick(pmap, pv->pv_va);
 4502                         if ((*pte & PG_A) != 0) {
 4503                                 atomic_clear_int((u_int *)pte, PG_A);
 4504                                 pmap_invalidate_page(pmap, pv->pv_va);
 4505                                 rtval++;
 4506                                 if (rtval > 4)
 4507                                         pvn = NULL;
 4508                         }
 4509                         PMAP_UNLOCK(pmap);
 4510                 } while ((pv = pvn) != NULL && pv != pvf);
 4511         }
 4512 out:
 4513         sched_unpin();
 4514         return (rtval);
 4515 }
 4516 
 4517 /*
 4518  *      Clear the modify bits on the specified physical page.
 4519  */
 4520 void
 4521 pmap_clear_modify(vm_page_t m)
 4522 {
 4523         struct md_page *pvh;
 4524         pv_entry_t next_pv, pv;
 4525         pmap_t pmap;
 4526         pd_entry_t oldpde, *pde;
 4527         pt_entry_t oldpte, *pte;
 4528         vm_offset_t va;
 4529 
 4530         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 4531         if ((m->flags & PG_FICTITIOUS) != 0)
 4532                 return;
 4533         sched_pin();
 4534         pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 4535         TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) {
 4536                 va = pv->pv_va;
 4537                 pmap = PV_PMAP(pv);
 4538                 PMAP_LOCK(pmap);
 4539                 pde = pmap_pde(pmap, va);
 4540                 oldpde = *pde;
 4541                 if ((oldpde & PG_RW) != 0) {
 4542                         if (pmap_demote_pde(pmap, pde, va)) {
 4543                                 if ((oldpde & PG_W) == 0) {
 4544                                         /*
 4545                                          * Write protect the mapping to a
 4546                                          * single page so that a subsequent
 4547                                          * write access may repromote.
 4548                                          */
 4549                                         va += VM_PAGE_TO_PHYS(m) - (oldpde &
 4550                                             PG_PS_FRAME);
 4551                                         pte = pmap_pte_quick(pmap, va);
 4552                                         oldpte = *pte;
 4553                                         if ((oldpte & PG_V) != 0) {
 4554                                                 /*
 4555                                                  * Regardless of whether a pte is 32 or 64 bits
 4556                                                  * in size, PG_RW and PG_M are among the least
 4557                                                  * significant 32 bits.
 4558                                                  */
 4559                                                 while (!atomic_cmpset_int((u_int *)pte,
 4560                                                     oldpte,
 4561                                                     oldpte & ~(PG_M | PG_RW)))
 4562                                                         oldpte = *pte;
 4563                                                 vm_page_dirty(m);
 4564                                                 pmap_invalidate_page(pmap, va);
 4565                                         }
 4566                                 }
 4567                         }
 4568                 }
 4569                 PMAP_UNLOCK(pmap);
 4570         }
 4571         TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 4572                 pmap = PV_PMAP(pv);
 4573                 PMAP_LOCK(pmap);
 4574                 pde = pmap_pde(pmap, pv->pv_va);
 4575                 KASSERT((*pde & PG_PS) == 0, ("pmap_clear_modify: found"
 4576                     " a 4mpage in page %p's pv list", m));
 4577                 pte = pmap_pte_quick(pmap, pv->pv_va);
 4578                 if ((*pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) {
 4579                         /*
 4580                          * Regardless of whether a pte is 32 or 64 bits
 4581                          * in size, PG_M is among the least significant
 4582                          * 32 bits. 
 4583                          */
 4584                         atomic_clear_int((u_int *)pte, PG_M);
 4585                         pmap_invalidate_page(pmap, pv->pv_va);
 4586                 }
 4587                 PMAP_UNLOCK(pmap);
 4588         }
 4589         sched_unpin();
 4590 }
 4591 
 4592 /*
 4593  *      pmap_clear_reference:
 4594  *
 4595  *      Clear the reference bit on the specified physical page.
 4596  */
 4597 void
 4598 pmap_clear_reference(vm_page_t m)
 4599 {
 4600         struct md_page *pvh;
 4601         pv_entry_t next_pv, pv;
 4602         pmap_t pmap;
 4603         pd_entry_t oldpde, *pde;
 4604         pt_entry_t *pte;
 4605         vm_offset_t va;
 4606 
 4607         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 4608         if ((m->flags & PG_FICTITIOUS) != 0)
 4609                 return;
 4610         sched_pin();
 4611         pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 4612         TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) {
 4613                 va = pv->pv_va;
 4614                 pmap = PV_PMAP(pv);
 4615                 PMAP_LOCK(pmap);
 4616                 pde = pmap_pde(pmap, va);
 4617                 oldpde = *pde;
 4618                 if ((oldpde & PG_A) != 0) {
 4619                         if (pmap_demote_pde(pmap, pde, va)) {
 4620                                 /*
 4621                                  * Remove the mapping to a single page so
 4622                                  * that a subsequent access may repromote.
 4623                                  * Since the underlying page table page is
 4624                                  * fully populated, this removal never frees
 4625                                  * a page table page.
 4626                                  */
 4627                                 va += VM_PAGE_TO_PHYS(m) - (oldpde &
 4628                                     PG_PS_FRAME);
 4629                                 pmap_remove_page(pmap, va, NULL);
 4630                         }
 4631                 }
 4632                 PMAP_UNLOCK(pmap);
 4633         }
 4634         TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 4635                 pmap = PV_PMAP(pv);
 4636                 PMAP_LOCK(pmap);
 4637                 pde = pmap_pde(pmap, pv->pv_va);
 4638                 KASSERT((*pde & PG_PS) == 0, ("pmap_clear_reference: found"
 4639                     " a 4mpage in page %p's pv list", m));
 4640                 pte = pmap_pte_quick(pmap, pv->pv_va);
 4641                 if ((*pte & PG_A) != 0) {
 4642                         /*
 4643                          * Regardless of whether a pte is 32 or 64 bits
 4644                          * in size, PG_A is among the least significant
 4645                          * 32 bits. 
 4646                          */
 4647                         atomic_clear_int((u_int *)pte, PG_A);
 4648                         pmap_invalidate_page(pmap, pv->pv_va);
 4649                 }
 4650                 PMAP_UNLOCK(pmap);
 4651         }
 4652         sched_unpin();
 4653 }
 4654 
 4655 /*
 4656  * Miscellaneous support routines follow
 4657  */
 4658 
 4659 /* Adjust the cache mode for a 4KB page mapped via a PTE. */
 4660 static __inline void
 4661 pmap_pte_attr(pt_entry_t *pte, int cache_bits)
 4662 {
 4663         u_int opte, npte;
 4664 
 4665         /*
 4666          * The cache mode bits are all in the low 32-bits of the
 4667          * PTE, so we can just spin on updating the low 32-bits.
 4668          */
 4669         do {
 4670                 opte = *(u_int *)pte;
 4671                 npte = opte & ~PG_PTE_CACHE;
 4672                 npte |= cache_bits;
 4673         } while (npte != opte && !atomic_cmpset_int((u_int *)pte, opte, npte));
 4674 }
 4675 
 4676 /* Adjust the cache mode for a 2/4MB page mapped via a PDE. */
 4677 static __inline void
 4678 pmap_pde_attr(pd_entry_t *pde, int cache_bits)
 4679 {
 4680         u_int opde, npde;
 4681 
 4682         /*
 4683          * The cache mode bits are all in the low 32-bits of the
 4684          * PDE, so we can just spin on updating the low 32-bits.
 4685          */
 4686         do {
 4687                 opde = *(u_int *)pde;
 4688                 npde = opde & ~PG_PDE_CACHE;
 4689                 npde |= cache_bits;
 4690         } while (npde != opde && !atomic_cmpset_int((u_int *)pde, opde, npde));
 4691 }
 4692 
 4693 /*
 4694  * Map a set of physical memory pages into the kernel virtual
 4695  * address space. Return a pointer to where it is mapped. This
 4696  * routine is intended to be used for mapping device memory,
 4697  * NOT real memory.
 4698  */
 4699 void *
 4700 pmap_mapdev_attr(vm_paddr_t pa, vm_size_t size, int mode)
 4701 {
 4702         vm_offset_t va, offset;
 4703         vm_size_t tmpsize;
 4704 
 4705         offset = pa & PAGE_MASK;
 4706         size = roundup(offset + size, PAGE_SIZE);
 4707         pa = pa & PG_FRAME;
 4708 
 4709         if (pa < KERNLOAD && pa + size <= KERNLOAD)
 4710                 va = KERNBASE + pa;
 4711         else
 4712                 va = kmem_alloc_nofault(kernel_map, size);
 4713         if (!va)
 4714                 panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
 4715 
 4716         for (tmpsize = 0; tmpsize < size; tmpsize += PAGE_SIZE)
 4717                 pmap_kenter_attr(va + tmpsize, pa + tmpsize, mode);
 4718         pmap_invalidate_range(kernel_pmap, va, va + tmpsize);
 4719         pmap_invalidate_cache_range(va, va + size);
 4720         return ((void *)(va + offset));
 4721 }
 4722 
 4723 void *
 4724 pmap_mapdev(vm_paddr_t pa, vm_size_t size)
 4725 {
 4726 
 4727         return (pmap_mapdev_attr(pa, size, PAT_UNCACHEABLE));
 4728 }
 4729 
 4730 void *
 4731 pmap_mapbios(vm_paddr_t pa, vm_size_t size)
 4732 {
 4733 
 4734         return (pmap_mapdev_attr(pa, size, PAT_WRITE_BACK));
 4735 }
 4736 
 4737 void
 4738 pmap_unmapdev(vm_offset_t va, vm_size_t size)
 4739 {
 4740         vm_offset_t base, offset, tmpva;
 4741 
 4742         if (va >= KERNBASE && va + size <= KERNBASE + KERNLOAD)
 4743                 return;
 4744         base = trunc_page(va);
 4745         offset = va & PAGE_MASK;
 4746         size = roundup(offset + size, PAGE_SIZE);
 4747         for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE)
 4748                 pmap_kremove(tmpva);
 4749         pmap_invalidate_range(kernel_pmap, va, tmpva);
 4750         kmem_free(kernel_map, base, size);
 4751 }
 4752 
 4753 /*
 4754  * Sets the memory attribute for the specified page.
 4755  */
 4756 void
 4757 pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma)
 4758 {
 4759         struct sysmaps *sysmaps;
 4760         vm_offset_t sva, eva;
 4761 
 4762         m->md.pat_mode = ma;
 4763         if ((m->flags & PG_FICTITIOUS) != 0)
 4764                 return;
 4765 
 4766         /*
 4767          * If "m" is a normal page, flush it from the cache.
 4768          * See pmap_invalidate_cache_range().
 4769          *
 4770          * First, try to find an existing mapping of the page by sf
 4771          * buffer. sf_buf_invalidate_cache() modifies mapping and
 4772          * flushes the cache.
 4773          */    
 4774         if (sf_buf_invalidate_cache(m))
 4775                 return;
 4776 
 4777         /*
 4778          * If page is not mapped by sf buffer, but CPU does not
 4779          * support self snoop, map the page transient and do
 4780          * invalidation. In the worst case, whole cache is flushed by
 4781          * pmap_invalidate_cache_range().
 4782          */
 4783         if ((cpu_feature & (CPUID_SS|CPUID_CLFSH)) == CPUID_CLFSH) {
 4784                 sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
 4785                 mtx_lock(&sysmaps->lock);
 4786                 if (*sysmaps->CMAP2)
 4787                         panic("pmap_page_set_memattr: CMAP2 busy");
 4788                 sched_pin();
 4789                 *sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) |
 4790                     PG_A | PG_M | pmap_cache_bits(m->md.pat_mode, 0);
 4791                 invlcaddr(sysmaps->CADDR2);
 4792                 sva = (vm_offset_t)sysmaps->CADDR2;
 4793                 eva = sva + PAGE_SIZE;
 4794         } else
 4795                 sva = eva = 0; /* gcc */
 4796         pmap_invalidate_cache_range(sva, eva);
 4797         if (sva != 0) {
 4798                 *sysmaps->CMAP2 = 0;
 4799                 sched_unpin();
 4800                 mtx_unlock(&sysmaps->lock);
 4801         }
 4802 }
 4803 
 4804 /*
 4805  * Changes the specified virtual address range's memory type to that given by
 4806  * the parameter "mode".  The specified virtual address range must be
 4807  * completely contained within either the kernel map.
 4808  *
 4809  * Returns zero if the change completed successfully, and either EINVAL or
 4810  * ENOMEM if the change failed.  Specifically, EINVAL is returned if some part
 4811  * of the virtual address range was not mapped, and ENOMEM is returned if
 4812  * there was insufficient memory available to complete the change.
 4813  */
 4814 int
 4815 pmap_change_attr(vm_offset_t va, vm_size_t size, int mode)
 4816 {
 4817         vm_offset_t base, offset, tmpva;
 4818         pd_entry_t *pde;
 4819         pt_entry_t *pte;
 4820         int cache_bits_pte, cache_bits_pde;
 4821         boolean_t changed;
 4822 
 4823         base = trunc_page(va);
 4824         offset = va & PAGE_MASK;
 4825         size = roundup(offset + size, PAGE_SIZE);
 4826 
 4827         /*
 4828          * Only supported on kernel virtual addresses above the recursive map.
 4829          */
 4830         if (base < VM_MIN_KERNEL_ADDRESS)
 4831                 return (EINVAL);
 4832 
 4833         cache_bits_pde = pmap_cache_bits(mode, 1);
 4834         cache_bits_pte = pmap_cache_bits(mode, 0);
 4835         changed = FALSE;
 4836 
 4837         /*
 4838          * Pages that aren't mapped aren't supported.  Also break down
 4839          * 2/4MB pages into 4KB pages if required.
 4840          */
 4841         PMAP_LOCK(kernel_pmap);
 4842         for (tmpva = base; tmpva < base + size; ) {
 4843                 pde = pmap_pde(kernel_pmap, tmpva);
 4844                 if (*pde == 0) {
 4845                         PMAP_UNLOCK(kernel_pmap);
 4846                         return (EINVAL);
 4847                 }
 4848                 if (*pde & PG_PS) {
 4849                         /*
 4850                          * If the current 2/4MB page already has
 4851                          * the required memory type, then we need not
 4852                          * demote this page.  Just increment tmpva to
 4853                          * the next 2/4MB page frame.
 4854                          */
 4855                         if ((*pde & PG_PDE_CACHE) == cache_bits_pde) {
 4856                                 tmpva = trunc_4mpage(tmpva) + NBPDR;
 4857                                 continue;
 4858                         }
 4859 
 4860                         /*
 4861                          * If the current offset aligns with a 2/4MB
 4862                          * page frame and there is at least 2/4MB left
 4863                          * within the range, then we need not break
 4864                          * down this page into 4KB pages.
 4865                          */
 4866                         if ((tmpva & PDRMASK) == 0 &&
 4867                             tmpva + PDRMASK < base + size) {
 4868                                 tmpva += NBPDR;
 4869                                 continue;
 4870                         }
 4871                         if (!pmap_demote_pde(kernel_pmap, pde, tmpva)) {
 4872                                 PMAP_UNLOCK(kernel_pmap);
 4873                                 return (ENOMEM);
 4874                         }
 4875                 }
 4876                 pte = vtopte(tmpva);
 4877                 if (*pte == 0) {
 4878                         PMAP_UNLOCK(kernel_pmap);
 4879                         return (EINVAL);
 4880                 }
 4881                 tmpva += PAGE_SIZE;
 4882         }
 4883         PMAP_UNLOCK(kernel_pmap);
 4884 
 4885         /*
 4886          * Ok, all the pages exist, so run through them updating their
 4887          * cache mode if required.
 4888          */
 4889         for (tmpva = base; tmpva < base + size; ) {
 4890                 pde = pmap_pde(kernel_pmap, tmpva);
 4891                 if (*pde & PG_PS) {
 4892                         if ((*pde & PG_PDE_CACHE) != cache_bits_pde) {
 4893                                 pmap_pde_attr(pde, cache_bits_pde);
 4894                                 changed = TRUE;
 4895                         }
 4896                         tmpva = trunc_4mpage(tmpva) + NBPDR;
 4897                 } else {
 4898                         pte = vtopte(tmpva);
 4899                         if ((*pte & PG_PTE_CACHE) != cache_bits_pte) {
 4900                                 pmap_pte_attr(pte, cache_bits_pte);
 4901                                 changed = TRUE;
 4902                         }
 4903                         tmpva += PAGE_SIZE;
 4904                 }
 4905         }
 4906 
 4907         /*
 4908          * Flush CPU caches to make sure any data isn't cached that
 4909          * shouldn't be, etc.
 4910          */
 4911         if (changed) {
 4912                 pmap_invalidate_range(kernel_pmap, base, tmpva);
 4913                 pmap_invalidate_cache_range(base, tmpva);
 4914         }
 4915         return (0);
 4916 }
 4917 
 4918 /*
 4919  * perform the pmap work for mincore
 4920  */
 4921 int
 4922 pmap_mincore(pmap_t pmap, vm_offset_t addr)
 4923 {
 4924         pd_entry_t *pdep;
 4925         pt_entry_t *ptep, pte;
 4926         vm_paddr_t pa;
 4927         vm_page_t m;
 4928         int val = 0;
 4929         
 4930         PMAP_LOCK(pmap);
 4931         pdep = pmap_pde(pmap, addr);
 4932         if (*pdep != 0) {
 4933                 if (*pdep & PG_PS) {
 4934                         pte = *pdep;
 4935                         val = MINCORE_SUPER;
 4936                         /* Compute the physical address of the 4KB page. */
 4937                         pa = ((*pdep & PG_PS_FRAME) | (addr & PDRMASK)) &
 4938                             PG_FRAME;
 4939                 } else {
 4940                         ptep = pmap_pte(pmap, addr);
 4941                         pte = *ptep;
 4942                         pmap_pte_release(ptep);
 4943                         pa = pte & PG_FRAME;
 4944                 }
 4945         } else {
 4946                 pte = 0;
 4947                 pa = 0;
 4948         }
 4949         PMAP_UNLOCK(pmap);
 4950 
 4951         if (pte != 0) {
 4952                 val |= MINCORE_INCORE;
 4953                 if ((pte & PG_MANAGED) == 0)
 4954                         return val;
 4955 
 4956                 m = PHYS_TO_VM_PAGE(pa);
 4957 
 4958                 /*
 4959                  * Modified by us
 4960                  */
 4961                 if ((pte & (PG_M | PG_RW)) == (PG_M | PG_RW))
 4962                         val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
 4963                 else {
 4964                         /*
 4965                          * Modified by someone else
 4966                          */
 4967                         vm_page_lock_queues();
 4968                         if (m->dirty || pmap_is_modified(m))
 4969                                 val |= MINCORE_MODIFIED_OTHER;
 4970                         vm_page_unlock_queues();
 4971                 }
 4972                 /*
 4973                  * Referenced by us
 4974                  */
 4975                 if (pte & PG_A)
 4976                         val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
 4977                 else {
 4978                         /*
 4979                          * Referenced by someone else
 4980                          */
 4981                         vm_page_lock_queues();
 4982                         if ((m->flags & PG_REFERENCED) ||
 4983                             pmap_ts_referenced(m)) {
 4984                                 val |= MINCORE_REFERENCED_OTHER;
 4985                                 vm_page_flag_set(m, PG_REFERENCED);
 4986                         }
 4987                         vm_page_unlock_queues();
 4988                 }
 4989         } 
 4990         return val;
 4991 }
 4992 
 4993 void
 4994 pmap_activate(struct thread *td)
 4995 {
 4996         pmap_t  pmap, oldpmap;
 4997         u_int32_t  cr3;
 4998 
 4999         critical_enter();
 5000         pmap = vmspace_pmap(td->td_proc->p_vmspace);
 5001         oldpmap = PCPU_GET(curpmap);
 5002 #if defined(SMP)
 5003         atomic_clear_int(&oldpmap->pm_active, PCPU_GET(cpumask));
 5004         atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask));
 5005 #else
 5006         oldpmap->pm_active &= ~1;
 5007         pmap->pm_active |= 1;
 5008 #endif
 5009 #ifdef PAE
 5010         cr3 = vtophys(pmap->pm_pdpt);
 5011 #else
 5012         cr3 = vtophys(pmap->pm_pdir);
 5013 #endif
 5014         /*
 5015          * pmap_activate is for the current thread on the current cpu
 5016          */
 5017         td->td_pcb->pcb_cr3 = cr3;
 5018         load_cr3(cr3);
 5019         PCPU_SET(curpmap, pmap);
 5020         critical_exit();
 5021 }
 5022 
 5023 void
 5024 pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
 5025 {
 5026 }
 5027 
 5028 /*
 5029  *      Increase the starting virtual address of the given mapping if a
 5030  *      different alignment might result in more superpage mappings.
 5031  */
 5032 void
 5033 pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
 5034     vm_offset_t *addr, vm_size_t size)
 5035 {
 5036         vm_offset_t superpage_offset;
 5037 
 5038         if (size < NBPDR)
 5039                 return;
 5040         if (object != NULL && (object->flags & OBJ_COLORED) != 0)
 5041                 offset += ptoa(object->pg_color);
 5042         superpage_offset = offset & PDRMASK;
 5043         if (size - ((NBPDR - superpage_offset) & PDRMASK) < NBPDR ||
 5044             (*addr & PDRMASK) == superpage_offset)
 5045                 return;
 5046         if ((*addr & PDRMASK) < superpage_offset)
 5047                 *addr = (*addr & ~PDRMASK) + superpage_offset;
 5048         else
 5049                 *addr = ((*addr + PDRMASK) & ~PDRMASK) + superpage_offset;
 5050 }
 5051 
 5052 
 5053 #if defined(PMAP_DEBUG)
 5054 pmap_pid_dump(int pid)
 5055 {
 5056         pmap_t pmap;
 5057         struct proc *p;
 5058         int npte = 0;
 5059         int index;
 5060 
 5061         sx_slock(&allproc_lock);
 5062         FOREACH_PROC_IN_SYSTEM(p) {
 5063                 if (p->p_pid != pid)
 5064                         continue;
 5065 
 5066                 if (p->p_vmspace) {
 5067                         int i,j;
 5068                         index = 0;
 5069                         pmap = vmspace_pmap(p->p_vmspace);
 5070                         for (i = 0; i < NPDEPTD; i++) {
 5071                                 pd_entry_t *pde;
 5072                                 pt_entry_t *pte;
 5073                                 vm_offset_t base = i << PDRSHIFT;
 5074                                 
 5075                                 pde = &pmap->pm_pdir[i];
 5076                                 if (pde && pmap_pde_v(pde)) {
 5077                                         for (j = 0; j < NPTEPG; j++) {
 5078                                                 vm_offset_t va = base + (j << PAGE_SHIFT);
 5079                                                 if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) {
 5080                                                         if (index) {
 5081                                                                 index = 0;
 5082                                                                 printf("\n");
 5083                                                         }
 5084                                                         sx_sunlock(&allproc_lock);
 5085                                                         return npte;
 5086                                                 }
 5087                                                 pte = pmap_pte(pmap, va);
 5088                                                 if (pte && pmap_pte_v(pte)) {
 5089                                                         pt_entry_t pa;
 5090                                                         vm_page_t m;
 5091                                                         pa = *pte;
 5092                                                         m = PHYS_TO_VM_PAGE(pa & PG_FRAME);
 5093                                                         printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x",
 5094                                                                 va, pa, m->hold_count, m->wire_count, m->flags);
 5095                                                         npte++;
 5096                                                         index++;
 5097                                                         if (index >= 2) {
 5098                                                                 index = 0;
 5099                                                                 printf("\n");
 5100                                                         } else {
 5101                                                                 printf(" ");
 5102                                                         }
 5103                                                 }
 5104                                         }
 5105                                 }
 5106                         }
 5107                 }
 5108         }
 5109         sx_sunlock(&allproc_lock);
 5110         return npte;
 5111 }
 5112 #endif
 5113 
 5114 #if defined(DEBUG)
 5115 
 5116 static void     pads(pmap_t pm);
 5117 void            pmap_pvdump(vm_offset_t pa);
 5118 
 5119 /* print address space of pmap*/
 5120 static void
 5121 pads(pmap_t pm)
 5122 {
 5123         int i, j;
 5124         vm_paddr_t va;
 5125         pt_entry_t *ptep;
 5126 
 5127         if (pm == kernel_pmap)
 5128                 return;
 5129         for (i = 0; i < NPDEPTD; i++)
 5130                 if (pm->pm_pdir[i])
 5131                         for (j = 0; j < NPTEPG; j++) {
 5132                                 va = (i << PDRSHIFT) + (j << PAGE_SHIFT);
 5133                                 if (pm == kernel_pmap && va < KERNBASE)
 5134                                         continue;
 5135                                 if (pm != kernel_pmap && va > UPT_MAX_ADDRESS)
 5136                                         continue;
 5137                                 ptep = pmap_pte(pm, va);
 5138                                 if (pmap_pte_v(ptep))
 5139                                         printf("%x:%x ", va, *ptep);
 5140                         };
 5141 
 5142 }
 5143 
 5144 void
 5145 pmap_pvdump(vm_paddr_t pa)
 5146 {
 5147         pv_entry_t pv;
 5148         pmap_t pmap;
 5149         vm_page_t m;
 5150 
 5151         printf("pa %x", pa);
 5152         m = PHYS_TO_VM_PAGE(pa);
 5153         TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 5154                 pmap = PV_PMAP(pv);
 5155                 printf(" -> pmap %p, va %x", (void *)pmap, pv->pv_va);
 5156                 pads(pmap);
 5157         }
 5158         printf(" ");
 5159 }
 5160 #endif

Cache object: b1bad3b3ad2dcb896d1d181f681a5993


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.