The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/amd64/amd64/pmap.c

Version: -  FREEBSD  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-2  -  FREEBSD-11-1  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-4  -  FREEBSD-10-3  -  FREEBSD-10-2  -  FREEBSD-10-1  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-3  -  FREEBSD-9-2  -  FREEBSD-9-1  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-4  -  FREEBSD-8-3  -  FREEBSD-8-2  -  FREEBSD-8-1  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-4  -  FREEBSD-7-3  -  FREEBSD-7-2  -  FREEBSD-7-1  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-4  -  FREEBSD-6-3  -  FREEBSD-6-2  -  FREEBSD-6-1  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-5  -  FREEBSD-5-4  -  FREEBSD-5-3  -  FREEBSD-5-2  -  FREEBSD-5-1  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  xnu-1456.1.26  -  xnu-1699.24.8  -  xnu-2050.18.24  -  OPENSOLARIS  -  minix-3-1-1 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1991 Regents of the University of California.
    3  * All rights reserved.
    4  * Copyright (c) 1994 John S. Dyson
    5  * All rights reserved.
    6  * Copyright (c) 1994 David Greenman
    7  * All rights reserved.
    8  * Copyright (c) 2003 Peter Wemm
    9  * All rights reserved.
   10  * Copyright (c) 2005 Alan L. Cox <alc@cs.rice.edu>
   11  * All rights reserved.
   12  *
   13  * This code is derived from software contributed to Berkeley by
   14  * the Systems Programming Group of the University of Utah Computer
   15  * Science Department and William Jolitz of UUNET Technologies Inc.
   16  *
   17  * Redistribution and use in source and binary forms, with or without
   18  * modification, are permitted provided that the following conditions
   19  * are met:
   20  * 1. Redistributions of source code must retain the above copyright
   21  *    notice, this list of conditions and the following disclaimer.
   22  * 2. Redistributions in binary form must reproduce the above copyright
   23  *    notice, this list of conditions and the following disclaimer in the
   24  *    documentation and/or other materials provided with the distribution.
   25  * 3. All advertising materials mentioning features or use of this software
   26  *    must display the following acknowledgement:
   27  *      This product includes software developed by the University of
   28  *      California, Berkeley and its contributors.
   29  * 4. Neither the name of the University nor the names of its contributors
   30  *    may be used to endorse or promote products derived from this software
   31  *    without specific prior written permission.
   32  *
   33  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   34  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   35  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   36  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   37  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   38  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   39  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   40  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   41  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   42  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   43  * SUCH DAMAGE.
   44  *
   45  *      from:   @(#)pmap.c      7.7 (Berkeley)  5/12/91
   46  */
   47 /*-
   48  * Copyright (c) 2003 Networks Associates Technology, Inc.
   49  * All rights reserved.
   50  *
   51  * This software was developed for the FreeBSD Project by Jake Burkholder,
   52  * Safeport Network Services, and Network Associates Laboratories, the
   53  * Security Research Division of Network Associates, Inc. under
   54  * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA
   55  * CHATS research program.
   56  *
   57  * Redistribution and use in source and binary forms, with or without
   58  * modification, are permitted provided that the following conditions
   59  * are met:
   60  * 1. Redistributions of source code must retain the above copyright
   61  *    notice, this list of conditions and the following disclaimer.
   62  * 2. Redistributions in binary form must reproduce the above copyright
   63  *    notice, this list of conditions and the following disclaimer in the
   64  *    documentation and/or other materials provided with the distribution.
   65  *
   66  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   67  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   68  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   69  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   70  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   71  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   72  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   73  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   74  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   75  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   76  * SUCH DAMAGE.
   77  */
   78 
   79 #include <sys/cdefs.h>
   80 __FBSDID("$FreeBSD: releng/6.2/sys/amd64/amd64/pmap.c 162707 2006-09-27 18:10:16Z alc $");
   81 
   82 /*
   83  *      Manages physical address maps.
   84  *
   85  *      In addition to hardware address maps, this
   86  *      module is called upon to provide software-use-only
   87  *      maps which may or may not be stored in the same
   88  *      form as hardware maps.  These pseudo-maps are
   89  *      used to store intermediate results from copy
   90  *      operations to and from address spaces.
   91  *
   92  *      Since the information managed by this module is
   93  *      also stored by the logical address mapping module,
   94  *      this module may throw away valid virtual-to-physical
   95  *      mappings at almost any time.  However, invalidations
   96  *      of virtual-to-physical mappings must be done as
   97  *      requested.
   98  *
   99  *      In order to cope with hardware architectures which
  100  *      make virtual-to-physical map invalidates expensive,
  101  *      this module may delay invalidate or reduced protection
  102  *      operations until such time as they are actually
  103  *      necessary.  This module is given full information as
  104  *      to which processors are currently using which maps,
  105  *      and to when physical maps must be made correct.
  106  */
  107 
  108 #include "opt_msgbuf.h"
  109 #include "opt_pmap.h"
  110 
  111 #include <sys/param.h>
  112 #include <sys/systm.h>
  113 #include <sys/kernel.h>
  114 #include <sys/lock.h>
  115 #include <sys/malloc.h>
  116 #include <sys/mman.h>
  117 #include <sys/msgbuf.h>
  118 #include <sys/mutex.h>
  119 #include <sys/proc.h>
  120 #include <sys/sx.h>
  121 #include <sys/vmmeter.h>
  122 #include <sys/sched.h>
  123 #include <sys/sysctl.h>
  124 #ifdef SMP
  125 #include <sys/smp.h>
  126 #endif
  127 
  128 #include <vm/vm.h>
  129 #include <vm/vm_param.h>
  130 #include <vm/vm_kern.h>
  131 #include <vm/vm_page.h>
  132 #include <vm/vm_map.h>
  133 #include <vm/vm_object.h>
  134 #include <vm/vm_extern.h>
  135 #include <vm/vm_pageout.h>
  136 #include <vm/vm_pager.h>
  137 #include <vm/uma.h>
  138 
  139 #include <machine/cpu.h>
  140 #include <machine/cputypes.h>
  141 #include <machine/md_var.h>
  142 #include <machine/pcb.h>
  143 #include <machine/specialreg.h>
  144 #ifdef SMP
  145 #include <machine/smp.h>
  146 #endif
  147 
  148 #ifndef PMAP_SHPGPERPROC
  149 #define PMAP_SHPGPERPROC 200
  150 #endif
  151 
  152 #if defined(DIAGNOSTIC)
  153 #define PMAP_DIAGNOSTIC
  154 #endif
  155 
  156 #if !defined(PMAP_DIAGNOSTIC)
  157 #define PMAP_INLINE __inline
  158 #else
  159 #define PMAP_INLINE
  160 #endif
  161 
  162 struct pmap kernel_pmap_store;
  163 
  164 vm_paddr_t avail_start;         /* PA of first available physical page */
  165 vm_paddr_t avail_end;           /* PA of last available physical page */
  166 vm_offset_t virtual_avail;      /* VA of first avail page (after kernel bss) */
  167 vm_offset_t virtual_end;        /* VA of last avail page (end of kernel AS) */
  168 
  169 static int nkpt;
  170 static int ndmpdp;
  171 static vm_paddr_t dmaplimit;
  172 vm_offset_t kernel_vm_end;
  173 pt_entry_t pg_nx;
  174 
  175 static u_int64_t        KPTphys;        /* phys addr of kernel level 1 */
  176 static u_int64_t        KPDphys;        /* phys addr of kernel level 2 */
  177 u_int64_t               KPDPphys;       /* phys addr of kernel level 3 */
  178 u_int64_t               KPML4phys;      /* phys addr of kernel level 4 */
  179 
  180 static u_int64_t        DMPDphys;       /* phys addr of direct mapped level 2 */
  181 static u_int64_t        DMPDPphys;      /* phys addr of direct mapped level 3 */
  182 
  183 /*
  184  * Data for the pv entry allocation mechanism
  185  */
  186 static uma_zone_t pvzone;
  187 static struct vm_object pvzone_obj;
  188 static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
  189 int pmap_pagedaemon_waken;
  190 
  191 /*
  192  * All those kernel PT submaps that BSD is so fond of
  193  */
  194 pt_entry_t *CMAP1 = 0;
  195 caddr_t CADDR1 = 0;
  196 struct msgbuf *msgbufp = 0;
  197 
  198 /*
  199  * Crashdump maps.
  200  */
  201 static caddr_t crashdumpmap;
  202 
  203 static PMAP_INLINE void free_pv_entry(pv_entry_t pv);
  204 static pv_entry_t get_pv_entry(void);
  205 static void     pmap_clear_ptes(vm_page_t m, long bit);
  206 
  207 static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq,
  208                 vm_offset_t sva, pd_entry_t ptepde);
  209 static void pmap_remove_page(pmap_t pmap, vm_offset_t va, pd_entry_t *pde);
  210 static void pmap_remove_entry(struct pmap *pmap, vm_page_t m,
  211                 vm_offset_t va);
  212 static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m);
  213 
  214 static vm_page_t pmap_allocpde(pmap_t pmap, vm_offset_t va, int flags);
  215 static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags);
  216 
  217 static vm_page_t _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, int flags);
  218 static int _pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m);
  219 static int pmap_unuse_pt(pmap_t, vm_offset_t, pd_entry_t);
  220 static vm_offset_t pmap_kmem_choose(vm_offset_t addr);
  221 
  222 CTASSERT(1 << PDESHIFT == sizeof(pd_entry_t));
  223 CTASSERT(1 << PTESHIFT == sizeof(pt_entry_t));
  224 
  225 /*
  226  * Move the kernel virtual free pointer to the next
  227  * 2MB.  This is used to help improve performance
  228  * by using a large (2MB) page for much of the kernel
  229  * (.text, .data, .bss)
  230  */
  231 static vm_offset_t
  232 pmap_kmem_choose(vm_offset_t addr)
  233 {
  234         vm_offset_t newaddr = addr;
  235 
  236         newaddr = (addr + (NBPDR - 1)) & ~(NBPDR - 1);
  237         return newaddr;
  238 }
  239 
  240 /********************/
  241 /* Inline functions */
  242 /********************/
  243 
  244 /* Return a non-clipped PD index for a given VA */
  245 static __inline vm_pindex_t
  246 pmap_pde_pindex(vm_offset_t va)
  247 {
  248         return va >> PDRSHIFT;
  249 }
  250 
  251 
  252 /* Return various clipped indexes for a given VA */
  253 static __inline vm_pindex_t
  254 pmap_pte_index(vm_offset_t va)
  255 {
  256 
  257         return ((va >> PAGE_SHIFT) & ((1ul << NPTEPGSHIFT) - 1));
  258 }
  259 
  260 static __inline vm_pindex_t
  261 pmap_pde_index(vm_offset_t va)
  262 {
  263 
  264         return ((va >> PDRSHIFT) & ((1ul << NPDEPGSHIFT) - 1));
  265 }
  266 
  267 static __inline vm_pindex_t
  268 pmap_pdpe_index(vm_offset_t va)
  269 {
  270 
  271         return ((va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1));
  272 }
  273 
  274 static __inline vm_pindex_t
  275 pmap_pml4e_index(vm_offset_t va)
  276 {
  277 
  278         return ((va >> PML4SHIFT) & ((1ul << NPML4EPGSHIFT) - 1));
  279 }
  280 
  281 /* Return a pointer to the PML4 slot that corresponds to a VA */
  282 static __inline pml4_entry_t *
  283 pmap_pml4e(pmap_t pmap, vm_offset_t va)
  284 {
  285 
  286         if (!pmap)
  287                 return NULL;
  288         return (&pmap->pm_pml4[pmap_pml4e_index(va)]);
  289 }
  290 
  291 /* Return a pointer to the PDP slot that corresponds to a VA */
  292 static __inline pdp_entry_t *
  293 pmap_pml4e_to_pdpe(pml4_entry_t *pml4e, vm_offset_t va)
  294 {
  295         pdp_entry_t *pdpe;
  296 
  297         pdpe = (pdp_entry_t *)PHYS_TO_DMAP(*pml4e & PG_FRAME);
  298         return (&pdpe[pmap_pdpe_index(va)]);
  299 }
  300 
  301 /* Return a pointer to the PDP slot that corresponds to a VA */
  302 static __inline pdp_entry_t *
  303 pmap_pdpe(pmap_t pmap, vm_offset_t va)
  304 {
  305         pml4_entry_t *pml4e;
  306 
  307         pml4e = pmap_pml4e(pmap, va);
  308         if (pml4e == NULL || (*pml4e & PG_V) == 0)
  309                 return NULL;
  310         return (pmap_pml4e_to_pdpe(pml4e, va));
  311 }
  312 
  313 /* Return a pointer to the PD slot that corresponds to a VA */
  314 static __inline pd_entry_t *
  315 pmap_pdpe_to_pde(pdp_entry_t *pdpe, vm_offset_t va)
  316 {
  317         pd_entry_t *pde;
  318 
  319         pde = (pd_entry_t *)PHYS_TO_DMAP(*pdpe & PG_FRAME);
  320         return (&pde[pmap_pde_index(va)]);
  321 }
  322 
  323 /* Return a pointer to the PD slot that corresponds to a VA */
  324 static __inline pd_entry_t *
  325 pmap_pde(pmap_t pmap, vm_offset_t va)
  326 {
  327         pdp_entry_t *pdpe;
  328 
  329         pdpe = pmap_pdpe(pmap, va);
  330         if (pdpe == NULL || (*pdpe & PG_V) == 0)
  331                  return NULL;
  332         return (pmap_pdpe_to_pde(pdpe, va));
  333 }
  334 
  335 /* Return a pointer to the PT slot that corresponds to a VA */
  336 static __inline pt_entry_t *
  337 pmap_pde_to_pte(pd_entry_t *pde, vm_offset_t va)
  338 {
  339         pt_entry_t *pte;
  340 
  341         pte = (pt_entry_t *)PHYS_TO_DMAP(*pde & PG_FRAME);
  342         return (&pte[pmap_pte_index(va)]);
  343 }
  344 
  345 /* Return a pointer to the PT slot that corresponds to a VA */
  346 static __inline pt_entry_t *
  347 pmap_pte(pmap_t pmap, vm_offset_t va)
  348 {
  349         pd_entry_t *pde;
  350 
  351         pde = pmap_pde(pmap, va);
  352         if (pde == NULL || (*pde & PG_V) == 0)
  353                 return NULL;
  354         if ((*pde & PG_PS) != 0)        /* compat with i386 pmap_pte() */
  355                 return ((pt_entry_t *)pde);
  356         return (pmap_pde_to_pte(pde, va));
  357 }
  358 
  359 
  360 static __inline pt_entry_t *
  361 pmap_pte_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *ptepde)
  362 {
  363         pd_entry_t *pde;
  364 
  365         pde = pmap_pde(pmap, va);
  366         if (pde == NULL || (*pde & PG_V) == 0)
  367                 return NULL;
  368         *ptepde = *pde;
  369         if ((*pde & PG_PS) != 0)        /* compat with i386 pmap_pte() */
  370                 return ((pt_entry_t *)pde);
  371         return (pmap_pde_to_pte(pde, va));
  372 }
  373 
  374 
  375 PMAP_INLINE pt_entry_t *
  376 vtopte(vm_offset_t va)
  377 {
  378         u_int64_t mask = ((1ul << (NPTEPGSHIFT + NPDEPGSHIFT + NPDPEPGSHIFT + NPML4EPGSHIFT)) - 1);
  379 
  380         return (PTmap + ((va >> PAGE_SHIFT) & mask));
  381 }
  382 
  383 static __inline pd_entry_t *
  384 vtopde(vm_offset_t va)
  385 {
  386         u_int64_t mask = ((1ul << (NPDEPGSHIFT + NPDPEPGSHIFT + NPML4EPGSHIFT)) - 1);
  387 
  388         return (PDmap + ((va >> PDRSHIFT) & mask));
  389 }
  390 
  391 static u_int64_t
  392 allocpages(int n)
  393 {
  394         u_int64_t ret;
  395 
  396         ret = avail_start;
  397         bzero((void *)ret, n * PAGE_SIZE);
  398         avail_start += n * PAGE_SIZE;
  399         return (ret);
  400 }
  401 
  402 static void
  403 create_pagetables(void)
  404 {
  405         int i;
  406 
  407         /* Allocate pages */
  408         KPTphys = allocpages(NKPT);
  409         KPML4phys = allocpages(1);
  410         KPDPphys = allocpages(NKPML4E);
  411         KPDphys = allocpages(NKPDPE);
  412 
  413         ndmpdp = (ptoa(Maxmem) + NBPDP - 1) >> PDPSHIFT;
  414         if (ndmpdp < 4)         /* Minimum 4GB of dirmap */
  415                 ndmpdp = 4;
  416         DMPDPphys = allocpages(NDMPML4E);
  417         DMPDphys = allocpages(ndmpdp);
  418         dmaplimit = (vm_paddr_t)ndmpdp << PDPSHIFT;
  419 
  420         /* Fill in the underlying page table pages */
  421         /* Read-only from zero to physfree */
  422         /* XXX not fully used, underneath 2M pages */
  423         for (i = 0; (i << PAGE_SHIFT) < avail_start; i++) {
  424                 ((pt_entry_t *)KPTphys)[i] = i << PAGE_SHIFT;
  425                 ((pt_entry_t *)KPTphys)[i] |= PG_RW | PG_V | PG_G;
  426         }
  427 
  428         /* Now map the page tables at their location within PTmap */
  429         for (i = 0; i < NKPT; i++) {
  430                 ((pd_entry_t *)KPDphys)[i] = KPTphys + (i << PAGE_SHIFT);
  431                 ((pd_entry_t *)KPDphys)[i] |= PG_RW | PG_V;
  432         }
  433 
  434         /* Map from zero to end of allocations under 2M pages */
  435         /* This replaces some of the KPTphys entries above */
  436         for (i = 0; (i << PDRSHIFT) < avail_start; i++) {
  437                 ((pd_entry_t *)KPDphys)[i] = i << PDRSHIFT;
  438                 ((pd_entry_t *)KPDphys)[i] |= PG_RW | PG_V | PG_PS | PG_G;
  439         }
  440 
  441         /* And connect up the PD to the PDP */
  442         for (i = 0; i < NKPDPE; i++) {
  443                 ((pdp_entry_t *)KPDPphys)[i + KPDPI] = KPDphys + (i << PAGE_SHIFT);
  444                 ((pdp_entry_t *)KPDPphys)[i + KPDPI] |= PG_RW | PG_V | PG_U;
  445         }
  446 
  447 
  448         /* Now set up the direct map space using 2MB pages */
  449         for (i = 0; i < NPDEPG * ndmpdp; i++) {
  450                 ((pd_entry_t *)DMPDphys)[i] = (vm_paddr_t)i << PDRSHIFT;
  451                 ((pd_entry_t *)DMPDphys)[i] |= PG_RW | PG_V | PG_PS | PG_G;
  452         }
  453 
  454         /* And the direct map space's PDP */
  455         for (i = 0; i < ndmpdp; i++) {
  456                 ((pdp_entry_t *)DMPDPphys)[i] = DMPDphys + (i << PAGE_SHIFT);
  457                 ((pdp_entry_t *)DMPDPphys)[i] |= PG_RW | PG_V | PG_U;
  458         }
  459 
  460         /* And recursively map PML4 to itself in order to get PTmap */
  461         ((pdp_entry_t *)KPML4phys)[PML4PML4I] = KPML4phys;
  462         ((pdp_entry_t *)KPML4phys)[PML4PML4I] |= PG_RW | PG_V | PG_U;
  463 
  464         /* Connect the Direct Map slot up to the PML4 */
  465         ((pdp_entry_t *)KPML4phys)[DMPML4I] = DMPDPphys;
  466         ((pdp_entry_t *)KPML4phys)[DMPML4I] |= PG_RW | PG_V | PG_U;
  467 
  468         /* Connect the KVA slot up to the PML4 */
  469         ((pdp_entry_t *)KPML4phys)[KPML4I] = KPDPphys;
  470         ((pdp_entry_t *)KPML4phys)[KPML4I] |= PG_RW | PG_V | PG_U;
  471 }
  472 
  473 /*
  474  *      Bootstrap the system enough to run with virtual memory.
  475  *
  476  *      On amd64 this is called after mapping has already been enabled
  477  *      and just syncs the pmap module with what has already been done.
  478  *      [We can't call it easily with mapping off since the kernel is not
  479  *      mapped with PA == VA, hence we would have to relocate every address
  480  *      from the linked base (virtual) address "KERNBASE" to the actual
  481  *      (physical) address starting relative to 0]
  482  */
  483 void
  484 pmap_bootstrap(firstaddr)
  485         vm_paddr_t *firstaddr;
  486 {
  487         vm_offset_t va;
  488         pt_entry_t *pte, *unused;
  489 
  490         avail_start = *firstaddr;
  491 
  492         /*
  493          * Create an initial set of page tables to run the kernel in.
  494          */
  495         create_pagetables();
  496         *firstaddr = avail_start;
  497 
  498         virtual_avail = (vm_offset_t) KERNBASE + avail_start;
  499         virtual_avail = pmap_kmem_choose(virtual_avail);
  500 
  501         virtual_end = VM_MAX_KERNEL_ADDRESS;
  502 
  503 
  504         /* XXX do %cr0 as well */
  505         load_cr4(rcr4() | CR4_PGE | CR4_PSE);
  506         load_cr3(KPML4phys);
  507 
  508         /*
  509          * Initialize the kernel pmap (which is statically allocated).
  510          */
  511         PMAP_LOCK_INIT(kernel_pmap);
  512         kernel_pmap->pm_pml4 = (pdp_entry_t *) (KERNBASE + KPML4phys);
  513         kernel_pmap->pm_active = -1;    /* don't allow deactivation */
  514         TAILQ_INIT(&kernel_pmap->pm_pvlist);
  515         nkpt = NKPT;
  516 
  517         /*
  518          * Reserve some special page table entries/VA space for temporary
  519          * mapping of pages.
  520          */
  521 #define SYSMAP(c, p, v, n)      \
  522         v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n);
  523 
  524         va = virtual_avail;
  525         pte = vtopte(va);
  526 
  527         /*
  528          * CMAP1 is only used for the memory test.
  529          */
  530         SYSMAP(caddr_t, CMAP1, CADDR1, 1)
  531 
  532         /*
  533          * Crashdump maps.
  534          */
  535         SYSMAP(caddr_t, unused, crashdumpmap, MAXDUMPPGS)
  536 
  537         /*
  538          * msgbufp is used to map the system message buffer.
  539          */
  540         SYSMAP(struct msgbuf *, unused, msgbufp, atop(round_page(MSGBUF_SIZE)))
  541 
  542         virtual_avail = va;
  543 
  544         *CMAP1 = 0;
  545 
  546         invltlb();
  547 }
  548 
  549 /*
  550  *      Initialize a vm_page's machine-dependent fields.
  551  */
  552 void
  553 pmap_page_init(vm_page_t m)
  554 {
  555 
  556         TAILQ_INIT(&m->md.pv_list);
  557         m->md.pv_list_count = 0;
  558 }
  559 
  560 /*
  561  *      Initialize the pmap module.
  562  *      Called by vm_init, to initialize any structures that the pmap
  563  *      system needs to map virtual memory.
  564  */
  565 void
  566 pmap_init(void)
  567 {
  568         int shpgperproc = PMAP_SHPGPERPROC;
  569 
  570         /*
  571          * Initialize the address space (zone) for the pv entries.  Set a
  572          * high water mark so that the system can recover from excessive
  573          * numbers of pv entries.
  574          */
  575         pvzone = uma_zcreate("PV ENTRY", sizeof(struct pv_entry), NULL, NULL, 
  576             NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
  577         TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
  578         pv_entry_max = shpgperproc * maxproc + cnt.v_page_count;
  579         TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max);
  580         pv_entry_high_water = 9 * (pv_entry_max / 10);
  581         uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max);
  582 }
  583 
  584 void
  585 pmap_init2()
  586 {
  587 }
  588 
  589 
  590 /***************************************************
  591  * Low level helper routines.....
  592  ***************************************************/
  593 
  594 #if defined(PMAP_DIAGNOSTIC)
  595 
  596 /*
  597  * This code checks for non-writeable/modified pages.
  598  * This should be an invalid condition.
  599  */
  600 static int
  601 pmap_nw_modified(pt_entry_t ptea)
  602 {
  603         int pte;
  604 
  605         pte = (int) ptea;
  606 
  607         if ((pte & (PG_M|PG_RW)) == PG_M)
  608                 return 1;
  609         else
  610                 return 0;
  611 }
  612 #endif
  613 
  614 
  615 /*
  616  * this routine defines the region(s) of memory that should
  617  * not be tested for the modified bit.
  618  */
  619 static PMAP_INLINE int
  620 pmap_track_modified(vm_offset_t va)
  621 {
  622         if ((va < kmi.clean_sva) || (va >= kmi.clean_eva)) 
  623                 return 1;
  624         else
  625                 return 0;
  626 }
  627 
  628 #ifdef SMP
  629 /*
  630  * For SMP, these functions have to use the IPI mechanism for coherence.
  631  */
  632 void
  633 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
  634 {
  635         u_int cpumask;
  636         u_int other_cpus;
  637 
  638         if (smp_started) {
  639                 if (!(read_rflags() & PSL_I))
  640                         panic("%s: interrupts disabled", __func__);
  641                 mtx_lock_spin(&smp_ipi_mtx);
  642         } else
  643                 critical_enter();
  644         /*
  645          * We need to disable interrupt preemption but MUST NOT have
  646          * interrupts disabled here.
  647          * XXX we may need to hold schedlock to get a coherent pm_active
  648          * XXX critical sections disable interrupts again
  649          */
  650         if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
  651                 invlpg(va);
  652                 smp_invlpg(va);
  653         } else {
  654                 cpumask = PCPU_GET(cpumask);
  655                 other_cpus = PCPU_GET(other_cpus);
  656                 if (pmap->pm_active & cpumask)
  657                         invlpg(va);
  658                 if (pmap->pm_active & other_cpus)
  659                         smp_masked_invlpg(pmap->pm_active & other_cpus, va);
  660         }
  661         if (smp_started)
  662                 mtx_unlock_spin(&smp_ipi_mtx);
  663         else
  664                 critical_exit();
  665 }
  666 
  667 void
  668 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
  669 {
  670         u_int cpumask;
  671         u_int other_cpus;
  672         vm_offset_t addr;
  673 
  674         if (smp_started) {
  675                 if (!(read_rflags() & PSL_I))
  676                         panic("%s: interrupts disabled", __func__);
  677                 mtx_lock_spin(&smp_ipi_mtx);
  678         } else
  679                 critical_enter();
  680         /*
  681          * We need to disable interrupt preemption but MUST NOT have
  682          * interrupts disabled here.
  683          * XXX we may need to hold schedlock to get a coherent pm_active
  684          * XXX critical sections disable interrupts again
  685          */
  686         if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
  687                 for (addr = sva; addr < eva; addr += PAGE_SIZE)
  688                         invlpg(addr);
  689                 smp_invlpg_range(sva, eva);
  690         } else {
  691                 cpumask = PCPU_GET(cpumask);
  692                 other_cpus = PCPU_GET(other_cpus);
  693                 if (pmap->pm_active & cpumask)
  694                         for (addr = sva; addr < eva; addr += PAGE_SIZE)
  695                                 invlpg(addr);
  696                 if (pmap->pm_active & other_cpus)
  697                         smp_masked_invlpg_range(pmap->pm_active & other_cpus,
  698                             sva, eva);
  699         }
  700         if (smp_started)
  701                 mtx_unlock_spin(&smp_ipi_mtx);
  702         else
  703                 critical_exit();
  704 }
  705 
  706 void
  707 pmap_invalidate_all(pmap_t pmap)
  708 {
  709         u_int cpumask;
  710         u_int other_cpus;
  711 
  712         if (smp_started) {
  713                 if (!(read_rflags() & PSL_I))
  714                         panic("%s: interrupts disabled", __func__);
  715                 mtx_lock_spin(&smp_ipi_mtx);
  716         } else
  717                 critical_enter();
  718         /*
  719          * We need to disable interrupt preemption but MUST NOT have
  720          * interrupts disabled here.
  721          * XXX we may need to hold schedlock to get a coherent pm_active
  722          * XXX critical sections disable interrupts again
  723          */
  724         if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
  725                 invltlb();
  726                 smp_invltlb();
  727         } else {
  728                 cpumask = PCPU_GET(cpumask);
  729                 other_cpus = PCPU_GET(other_cpus);
  730                 if (pmap->pm_active & cpumask)
  731                         invltlb();
  732                 if (pmap->pm_active & other_cpus)
  733                         smp_masked_invltlb(pmap->pm_active & other_cpus);
  734         }
  735         if (smp_started)
  736                 mtx_unlock_spin(&smp_ipi_mtx);
  737         else
  738                 critical_exit();
  739 }
  740 #else /* !SMP */
  741 /*
  742  * Normal, non-SMP, invalidation functions.
  743  * We inline these within pmap.c for speed.
  744  */
  745 PMAP_INLINE void
  746 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
  747 {
  748 
  749         if (pmap == kernel_pmap || pmap->pm_active)
  750                 invlpg(va);
  751 }
  752 
  753 PMAP_INLINE void
  754 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
  755 {
  756         vm_offset_t addr;
  757 
  758         if (pmap == kernel_pmap || pmap->pm_active)
  759                 for (addr = sva; addr < eva; addr += PAGE_SIZE)
  760                         invlpg(addr);
  761 }
  762 
  763 PMAP_INLINE void
  764 pmap_invalidate_all(pmap_t pmap)
  765 {
  766 
  767         if (pmap == kernel_pmap || pmap->pm_active)
  768                 invltlb();
  769 }
  770 #endif /* !SMP */
  771 
  772 /*
  773  * Are we current address space or kernel?
  774  */
  775 static __inline int
  776 pmap_is_current(pmap_t pmap)
  777 {
  778         return (pmap == kernel_pmap ||
  779             (pmap->pm_pml4[PML4PML4I] & PG_FRAME) == (PML4pml4e[0] & PG_FRAME));
  780 }
  781 
  782 /*
  783  *      Routine:        pmap_extract
  784  *      Function:
  785  *              Extract the physical page address associated
  786  *              with the given map/virtual_address pair.
  787  */
  788 vm_paddr_t 
  789 pmap_extract(pmap_t pmap, vm_offset_t va)
  790 {
  791         vm_paddr_t rtval;
  792         pt_entry_t *pte;
  793         pd_entry_t pde, *pdep;
  794 
  795         rtval = 0;
  796         PMAP_LOCK(pmap);
  797         pdep = pmap_pde(pmap, va);
  798         if (pdep != NULL) {
  799                 pde = *pdep;
  800                 if (pde) {
  801                         if ((pde & PG_PS) != 0) {
  802                                 KASSERT((pde & PG_FRAME & PDRMASK) == 0,
  803                                     ("pmap_extract: bad pde"));
  804                                 rtval = (pde & PG_FRAME) | (va & PDRMASK);
  805                                 PMAP_UNLOCK(pmap);
  806                                 return rtval;
  807                         }
  808                         pte = pmap_pde_to_pte(pdep, va);
  809                         rtval = (*pte & PG_FRAME) | (va & PAGE_MASK);
  810                 }
  811         }
  812         PMAP_UNLOCK(pmap);
  813         return (rtval);
  814 }
  815 
  816 /*
  817  *      Routine:        pmap_extract_and_hold
  818  *      Function:
  819  *              Atomically extract and hold the physical page
  820  *              with the given pmap and virtual address pair
  821  *              if that mapping permits the given protection.
  822  */
  823 vm_page_t
  824 pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
  825 {
  826         pd_entry_t pde, *pdep;
  827         pt_entry_t pte;
  828         vm_page_t m;
  829 
  830         m = NULL;
  831         vm_page_lock_queues();
  832         PMAP_LOCK(pmap);
  833         pdep = pmap_pde(pmap, va);
  834         if (pdep != NULL && (pde = *pdep)) {
  835                 if (pde & PG_PS) {
  836                         if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) {
  837                                 KASSERT((pde & PG_FRAME & PDRMASK) == 0,
  838                                     ("pmap_extract_and_hold: bad pde"));
  839                                 m = PHYS_TO_VM_PAGE((pde & PG_FRAME) |
  840                                     (va & PDRMASK));
  841                                 vm_page_hold(m);
  842                         }
  843                 } else {
  844                         pte = *pmap_pde_to_pte(pdep, va);
  845                         if ((pte & PG_V) &&
  846                             ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) {
  847                                 m = PHYS_TO_VM_PAGE(pte & PG_FRAME);
  848                                 vm_page_hold(m);
  849                         }
  850                 }
  851         }
  852         vm_page_unlock_queues();
  853         PMAP_UNLOCK(pmap);
  854         return (m);
  855 }
  856 
  857 vm_paddr_t
  858 pmap_kextract(vm_offset_t va)
  859 {
  860         pd_entry_t *pde;
  861         vm_paddr_t pa;
  862 
  863         if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) {
  864                 pa = DMAP_TO_PHYS(va);
  865         } else {
  866                 pde = vtopde(va);
  867                 if (*pde & PG_PS) {
  868                         pa = (*pde & ~(NBPDR - 1)) | (va & (NBPDR - 1));
  869                 } else {
  870                         pa = *vtopte(va);
  871                         pa = (pa & PG_FRAME) | (va & PAGE_MASK);
  872                 }
  873         }
  874         return pa;
  875 }
  876 
  877 /***************************************************
  878  * Low level mapping routines.....
  879  ***************************************************/
  880 
  881 /*
  882  * Add a wired page to the kva.
  883  * Note: not SMP coherent.
  884  */
  885 PMAP_INLINE void 
  886 pmap_kenter(vm_offset_t va, vm_paddr_t pa)
  887 {
  888         pt_entry_t *pte;
  889 
  890         pte = vtopte(va);
  891         pte_store(pte, pa | PG_RW | PG_V | PG_G);
  892 }
  893 
  894 /*
  895  * Remove a page from the kernel pagetables.
  896  * Note: not SMP coherent.
  897  */
  898 PMAP_INLINE void
  899 pmap_kremove(vm_offset_t va)
  900 {
  901         pt_entry_t *pte;
  902 
  903         pte = vtopte(va);
  904         pte_clear(pte);
  905 }
  906 
  907 /*
  908  *      Used to map a range of physical addresses into kernel
  909  *      virtual address space.
  910  *
  911  *      The value passed in '*virt' is a suggested virtual address for
  912  *      the mapping. Architectures which can support a direct-mapped
  913  *      physical to virtual region can return the appropriate address
  914  *      within that region, leaving '*virt' unchanged. Other
  915  *      architectures should map the pages starting at '*virt' and
  916  *      update '*virt' with the first usable address after the mapped
  917  *      region.
  918  */
  919 vm_offset_t
  920 pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
  921 {
  922         return PHYS_TO_DMAP(start);
  923 }
  924 
  925 
  926 /*
  927  * Add a list of wired pages to the kva
  928  * this routine is only used for temporary
  929  * kernel mappings that do not need to have
  930  * page modification or references recorded.
  931  * Note that old mappings are simply written
  932  * over.  The page *must* be wired.
  933  * Note: SMP coherent.  Uses a ranged shootdown IPI.
  934  */
  935 void
  936 pmap_qenter(vm_offset_t sva, vm_page_t *m, int count)
  937 {
  938         vm_offset_t va;
  939 
  940         va = sva;
  941         while (count-- > 0) {
  942                 pmap_kenter(va, VM_PAGE_TO_PHYS(*m));
  943                 va += PAGE_SIZE;
  944                 m++;
  945         }
  946         pmap_invalidate_range(kernel_pmap, sva, va);
  947 }
  948 
  949 /*
  950  * This routine tears out page mappings from the
  951  * kernel -- it is meant only for temporary mappings.
  952  * Note: SMP coherent.  Uses a ranged shootdown IPI.
  953  */
  954 void
  955 pmap_qremove(vm_offset_t sva, int count)
  956 {
  957         vm_offset_t va;
  958 
  959         va = sva;
  960         while (count-- > 0) {
  961                 pmap_kremove(va);
  962                 va += PAGE_SIZE;
  963         }
  964         pmap_invalidate_range(kernel_pmap, sva, va);
  965 }
  966 
  967 /***************************************************
  968  * Page table page management routines.....
  969  ***************************************************/
  970 
  971 /*
  972  * This routine unholds page table pages, and if the hold count
  973  * drops to zero, then it decrements the wire count.
  974  */
  975 static PMAP_INLINE int
  976 pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m)
  977 {
  978 
  979         --m->wire_count;
  980         if (m->wire_count == 0)
  981                 return _pmap_unwire_pte_hold(pmap, va, m);
  982         else
  983                 return 0;
  984 }
  985 
  986 static int 
  987 _pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m)
  988 {
  989         vm_offset_t pteva;
  990 
  991         /*
  992          * unmap the page table page
  993          */
  994         if (m->pindex >= (NUPDE + NUPDPE)) {
  995                 /* PDP page */
  996                 pml4_entry_t *pml4;
  997                 pml4 = pmap_pml4e(pmap, va);
  998                 pteva = (vm_offset_t) PDPmap + amd64_ptob(m->pindex - (NUPDE + NUPDPE));
  999                 *pml4 = 0;
 1000         } else if (m->pindex >= NUPDE) {
 1001                 /* PD page */
 1002                 pdp_entry_t *pdp;
 1003                 pdp = pmap_pdpe(pmap, va);
 1004                 pteva = (vm_offset_t) PDmap + amd64_ptob(m->pindex - NUPDE);
 1005                 *pdp = 0;
 1006         } else {
 1007                 /* PTE page */
 1008                 pd_entry_t *pd;
 1009                 pd = pmap_pde(pmap, va);
 1010                 pteva = (vm_offset_t) PTmap + amd64_ptob(m->pindex);
 1011                 *pd = 0;
 1012         }
 1013         --pmap->pm_stats.resident_count;
 1014         if (m->pindex < NUPDE) {
 1015                 /* We just released a PT, unhold the matching PD */
 1016                 vm_page_t pdpg;
 1017 
 1018                 pdpg = PHYS_TO_VM_PAGE(*pmap_pdpe(pmap, va) & PG_FRAME);
 1019                 pmap_unwire_pte_hold(pmap, va, pdpg);
 1020         }
 1021         if (m->pindex >= NUPDE && m->pindex < (NUPDE + NUPDPE)) {
 1022                 /* We just released a PD, unhold the matching PDP */
 1023                 vm_page_t pdppg;
 1024 
 1025                 pdppg = PHYS_TO_VM_PAGE(*pmap_pml4e(pmap, va) & PG_FRAME);
 1026                 pmap_unwire_pte_hold(pmap, va, pdppg);
 1027         }
 1028 
 1029         /*
 1030          * Do an invltlb to make the invalidated mapping
 1031          * take effect immediately.
 1032          */
 1033         pmap_invalidate_page(pmap, pteva);
 1034 
 1035         vm_page_free_zero(m);
 1036         atomic_subtract_int(&cnt.v_wire_count, 1);
 1037         return 1;
 1038 }
 1039 
 1040 /*
 1041  * After removing a page table entry, this routine is used to
 1042  * conditionally free the page, and manage the hold/wire counts.
 1043  */
 1044 static int
 1045 pmap_unuse_pt(pmap_t pmap, vm_offset_t va, pd_entry_t ptepde)
 1046 {
 1047         vm_page_t mpte;
 1048 
 1049         if (va >= VM_MAXUSER_ADDRESS)
 1050                 return 0;
 1051         KASSERT(ptepde != 0, ("pmap_unuse_pt: ptepde != 0"));
 1052         mpte = PHYS_TO_VM_PAGE(ptepde & PG_FRAME);
 1053         return pmap_unwire_pte_hold(pmap, va, mpte);
 1054 }
 1055 
 1056 void
 1057 pmap_pinit0(pmap)
 1058         struct pmap *pmap;
 1059 {
 1060 
 1061         PMAP_LOCK_INIT(pmap);
 1062         pmap->pm_pml4 = (pml4_entry_t *)(KERNBASE + KPML4phys);
 1063         pmap->pm_active = 0;
 1064         TAILQ_INIT(&pmap->pm_pvlist);
 1065         bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 1066 }
 1067 
 1068 /*
 1069  * Initialize a preallocated and zeroed pmap structure,
 1070  * such as one in a vmspace structure.
 1071  */
 1072 void
 1073 pmap_pinit(pmap)
 1074         register struct pmap *pmap;
 1075 {
 1076         vm_page_t pml4pg;
 1077         static vm_pindex_t color;
 1078 
 1079         PMAP_LOCK_INIT(pmap);
 1080 
 1081         /*
 1082          * allocate the page directory page
 1083          */
 1084         while ((pml4pg = vm_page_alloc(NULL, color++, VM_ALLOC_NOOBJ |
 1085             VM_ALLOC_NORMAL | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL)
 1086                 VM_WAIT;
 1087 
 1088         pmap->pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pml4pg));
 1089 
 1090         if ((pml4pg->flags & PG_ZERO) == 0)
 1091                 pagezero(pmap->pm_pml4);
 1092 
 1093         /* Wire in kernel global address entries. */
 1094         pmap->pm_pml4[KPML4I] = KPDPphys | PG_RW | PG_V | PG_U;
 1095         pmap->pm_pml4[DMPML4I] = DMPDPphys | PG_RW | PG_V | PG_U;
 1096 
 1097         /* install self-referential address mapping entry(s) */
 1098         pmap->pm_pml4[PML4PML4I] = VM_PAGE_TO_PHYS(pml4pg) | PG_V | PG_RW | PG_A | PG_M;
 1099 
 1100         pmap->pm_active = 0;
 1101         TAILQ_INIT(&pmap->pm_pvlist);
 1102         bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 1103 }
 1104 
 1105 /*
 1106  * this routine is called if the page table page is not
 1107  * mapped correctly.
 1108  *
 1109  * Note: If a page allocation fails at page table level two or three,
 1110  * one or two pages may be held during the wait, only to be released
 1111  * afterwards.  This conservative approach is easily argued to avoid
 1112  * race conditions.
 1113  */
 1114 static vm_page_t
 1115 _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, int flags)
 1116 {
 1117         vm_page_t m, pdppg, pdpg;
 1118 
 1119         KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT ||
 1120             (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK,
 1121             ("_pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK"));
 1122 
 1123         /*
 1124          * Allocate a page table page.
 1125          */
 1126         if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ |
 1127             VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) {
 1128                 if (flags & M_WAITOK) {
 1129                         PMAP_UNLOCK(pmap);
 1130                         vm_page_unlock_queues();
 1131                         VM_WAIT;
 1132                         vm_page_lock_queues();
 1133                         PMAP_LOCK(pmap);
 1134                 }
 1135 
 1136                 /*
 1137                  * Indicate the need to retry.  While waiting, the page table
 1138                  * page may have been allocated.
 1139                  */
 1140                 return (NULL);
 1141         }
 1142         if ((m->flags & PG_ZERO) == 0)
 1143                 pmap_zero_page(m);
 1144 
 1145         /*
 1146          * Map the pagetable page into the process address space, if
 1147          * it isn't already there.
 1148          */
 1149 
 1150         pmap->pm_stats.resident_count++;
 1151 
 1152         if (ptepindex >= (NUPDE + NUPDPE)) {
 1153                 pml4_entry_t *pml4;
 1154                 vm_pindex_t pml4index;
 1155 
 1156                 /* Wire up a new PDPE page */
 1157                 pml4index = ptepindex - (NUPDE + NUPDPE);
 1158                 pml4 = &pmap->pm_pml4[pml4index];
 1159                 *pml4 = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | PG_A | PG_M;
 1160 
 1161         } else if (ptepindex >= NUPDE) {
 1162                 vm_pindex_t pml4index;
 1163                 vm_pindex_t pdpindex;
 1164                 pml4_entry_t *pml4;
 1165                 pdp_entry_t *pdp;
 1166 
 1167                 /* Wire up a new PDE page */
 1168                 pdpindex = ptepindex - NUPDE;
 1169                 pml4index = pdpindex >> NPML4EPGSHIFT;
 1170 
 1171                 pml4 = &pmap->pm_pml4[pml4index];
 1172                 if ((*pml4 & PG_V) == 0) {
 1173                         /* Have to allocate a new pdp, recurse */
 1174                         if (_pmap_allocpte(pmap, NUPDE + NUPDPE + pml4index,
 1175                             flags) == NULL) {
 1176                                 --m->wire_count;
 1177                                 vm_page_free(m);
 1178                                 return (NULL);
 1179                         }
 1180                 } else {
 1181                         /* Add reference to pdp page */
 1182                         pdppg = PHYS_TO_VM_PAGE(*pml4 & PG_FRAME);
 1183                         pdppg->wire_count++;
 1184                 }
 1185                 pdp = (pdp_entry_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME);
 1186 
 1187                 /* Now find the pdp page */
 1188                 pdp = &pdp[pdpindex & ((1ul << NPDPEPGSHIFT) - 1)];
 1189                 *pdp = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | PG_A | PG_M;
 1190 
 1191         } else {
 1192                 vm_pindex_t pml4index;
 1193                 vm_pindex_t pdpindex;
 1194                 pml4_entry_t *pml4;
 1195                 pdp_entry_t *pdp;
 1196                 pd_entry_t *pd;
 1197 
 1198                 /* Wire up a new PTE page */
 1199                 pdpindex = ptepindex >> NPDPEPGSHIFT;
 1200                 pml4index = pdpindex >> NPML4EPGSHIFT;
 1201 
 1202                 /* First, find the pdp and check that its valid. */
 1203                 pml4 = &pmap->pm_pml4[pml4index];
 1204                 if ((*pml4 & PG_V) == 0) {
 1205                         /* Have to allocate a new pd, recurse */
 1206                         if (_pmap_allocpte(pmap, NUPDE + pdpindex,
 1207                             flags) == NULL) {
 1208                                 --m->wire_count;
 1209                                 vm_page_free(m);
 1210                                 return (NULL);
 1211                         }
 1212                         pdp = (pdp_entry_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME);
 1213                         pdp = &pdp[pdpindex & ((1ul << NPDPEPGSHIFT) - 1)];
 1214                 } else {
 1215                         pdp = (pdp_entry_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME);
 1216                         pdp = &pdp[pdpindex & ((1ul << NPDPEPGSHIFT) - 1)];
 1217                         if ((*pdp & PG_V) == 0) {
 1218                                 /* Have to allocate a new pd, recurse */
 1219                                 if (_pmap_allocpte(pmap, NUPDE + pdpindex,
 1220                                     flags) == NULL) {
 1221                                         --m->wire_count;
 1222                                         vm_page_free(m);
 1223                                         return (NULL);
 1224                                 }
 1225                         } else {
 1226                                 /* Add reference to the pd page */
 1227                                 pdpg = PHYS_TO_VM_PAGE(*pdp & PG_FRAME);
 1228                                 pdpg->wire_count++;
 1229                         }
 1230                 }
 1231                 pd = (pd_entry_t *)PHYS_TO_DMAP(*pdp & PG_FRAME);
 1232 
 1233                 /* Now we know where the page directory page is */
 1234                 pd = &pd[ptepindex & ((1ul << NPDEPGSHIFT) - 1)];
 1235                 *pd = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | PG_A | PG_M;
 1236         }
 1237 
 1238         return m;
 1239 }
 1240 
 1241 static vm_page_t
 1242 pmap_allocpde(pmap_t pmap, vm_offset_t va, int flags)
 1243 {
 1244         vm_pindex_t pdpindex, ptepindex;
 1245         pdp_entry_t *pdpe;
 1246         vm_page_t pdpg;
 1247 
 1248         KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT ||
 1249             (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK,
 1250             ("pmap_allocpde: flags is neither M_NOWAIT nor M_WAITOK"));
 1251 retry:
 1252         pdpe = pmap_pdpe(pmap, va);
 1253         if (pdpe != NULL && (*pdpe & PG_V) != 0) {
 1254                 /* Add a reference to the pd page. */
 1255                 pdpg = PHYS_TO_VM_PAGE(*pdpe & PG_FRAME);
 1256                 pdpg->wire_count++;
 1257         } else {
 1258                 /* Allocate a pd page. */
 1259                 ptepindex = pmap_pde_pindex(va);
 1260                 pdpindex = ptepindex >> NPDPEPGSHIFT;
 1261                 pdpg = _pmap_allocpte(pmap, NUPDE + pdpindex, flags);
 1262                 if (pdpg == NULL && (flags & M_WAITOK))
 1263                         goto retry;
 1264         }
 1265         return (pdpg);
 1266 }
 1267 
 1268 static vm_page_t
 1269 pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags)
 1270 {
 1271         vm_pindex_t ptepindex;
 1272         pd_entry_t *pd;
 1273         vm_page_t m;
 1274 
 1275         KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT ||
 1276             (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK,
 1277             ("pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK"));
 1278 
 1279         /*
 1280          * Calculate pagetable page index
 1281          */
 1282         ptepindex = pmap_pde_pindex(va);
 1283 retry:
 1284         /*
 1285          * Get the page directory entry
 1286          */
 1287         pd = pmap_pde(pmap, va);
 1288 
 1289         /*
 1290          * This supports switching from a 2MB page to a
 1291          * normal 4K page.
 1292          */
 1293         if (pd != 0 && (*pd & (PG_PS | PG_V)) == (PG_PS | PG_V)) {
 1294                 *pd = 0;
 1295                 pd = 0;
 1296                 pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
 1297                 pmap_unuse_pt(pmap, va, *pmap_pdpe(pmap, va));
 1298                 pmap_invalidate_all(kernel_pmap);
 1299         }
 1300 
 1301         /*
 1302          * If the page table page is mapped, we just increment the
 1303          * hold count, and activate it.
 1304          */
 1305         if (pd != 0 && (*pd & PG_V) != 0) {
 1306                 m = PHYS_TO_VM_PAGE(*pd & PG_FRAME);
 1307                 m->wire_count++;
 1308         } else {
 1309                 /*
 1310                  * Here if the pte page isn't mapped, or if it has been
 1311                  * deallocated.
 1312                  */
 1313                 m = _pmap_allocpte(pmap, ptepindex, flags);
 1314                 if (m == NULL && (flags & M_WAITOK))
 1315                         goto retry;
 1316         }
 1317         return (m);
 1318 }
 1319 
 1320 
 1321 /***************************************************
 1322  * Pmap allocation/deallocation routines.
 1323  ***************************************************/
 1324 
 1325 /*
 1326  * Release any resources held by the given physical map.
 1327  * Called when a pmap initialized by pmap_pinit is being released.
 1328  * Should only be called if the map contains no valid mappings.
 1329  */
 1330 void
 1331 pmap_release(pmap_t pmap)
 1332 {
 1333         vm_page_t m;
 1334 
 1335         KASSERT(pmap->pm_stats.resident_count == 0,
 1336             ("pmap_release: pmap resident count %ld != 0",
 1337             pmap->pm_stats.resident_count));
 1338 
 1339         m = PHYS_TO_VM_PAGE(pmap->pm_pml4[PML4PML4I] & PG_FRAME);
 1340 
 1341         pmap->pm_pml4[KPML4I] = 0;      /* KVA */
 1342         pmap->pm_pml4[DMPML4I] = 0;     /* Direct Map */
 1343         pmap->pm_pml4[PML4PML4I] = 0;   /* Recursive Mapping */
 1344 
 1345         vm_page_lock_queues();
 1346         m->wire_count--;
 1347         atomic_subtract_int(&cnt.v_wire_count, 1);
 1348         vm_page_free_zero(m);
 1349         vm_page_unlock_queues();
 1350         PMAP_LOCK_DESTROY(pmap);
 1351 }
 1352 
 1353 static int
 1354 kvm_size(SYSCTL_HANDLER_ARGS)
 1355 {
 1356         unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE;
 1357 
 1358         return sysctl_handle_long(oidp, &ksize, 0, req);
 1359 }
 1360 SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 
 1361     0, 0, kvm_size, "LU", "Size of KVM");
 1362 
 1363 static int
 1364 kvm_free(SYSCTL_HANDLER_ARGS)
 1365 {
 1366         unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end;
 1367 
 1368         return sysctl_handle_long(oidp, &kfree, 0, req);
 1369 }
 1370 SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 
 1371     0, 0, kvm_free, "LU", "Amount of KVM free");
 1372 
 1373 /*
 1374  * grow the number of kernel page table entries, if needed
 1375  */
 1376 void
 1377 pmap_growkernel(vm_offset_t addr)
 1378 {
 1379         vm_paddr_t paddr;
 1380         vm_page_t nkpg;
 1381         pd_entry_t *pde, newpdir;
 1382         pdp_entry_t newpdp;
 1383 
 1384         mtx_assert(&kernel_map->system_mtx, MA_OWNED);
 1385         if (kernel_vm_end == 0) {
 1386                 kernel_vm_end = KERNBASE;
 1387                 nkpt = 0;
 1388                 while ((*pmap_pde(kernel_pmap, kernel_vm_end) & PG_V) != 0) {
 1389                         kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 1390                         nkpt++;
 1391                 }
 1392         }
 1393         addr = roundup2(addr, PAGE_SIZE * NPTEPG);
 1394         while (kernel_vm_end < addr) {
 1395                 pde = pmap_pde(kernel_pmap, kernel_vm_end);
 1396                 if (pde == NULL) {
 1397                         /* We need a new PDP entry */
 1398                         nkpg = vm_page_alloc(NULL, nkpt,
 1399                             VM_ALLOC_NOOBJ | VM_ALLOC_SYSTEM | VM_ALLOC_WIRED);
 1400                         if (!nkpg)
 1401                                 panic("pmap_growkernel: no memory to grow kernel");
 1402                         pmap_zero_page(nkpg);
 1403                         paddr = VM_PAGE_TO_PHYS(nkpg);
 1404                         newpdp = (pdp_entry_t)
 1405                                 (paddr | PG_V | PG_RW | PG_A | PG_M);
 1406                         *pmap_pdpe(kernel_pmap, kernel_vm_end) = newpdp;
 1407                         continue; /* try again */
 1408                 }
 1409                 if ((*pde & PG_V) != 0) {
 1410                         kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 1411                         continue;
 1412                 }
 1413 
 1414                 /*
 1415                  * This index is bogus, but out of the way
 1416                  */
 1417                 nkpg = vm_page_alloc(NULL, nkpt,
 1418                     VM_ALLOC_NOOBJ | VM_ALLOC_SYSTEM | VM_ALLOC_WIRED);
 1419                 if (!nkpg)
 1420                         panic("pmap_growkernel: no memory to grow kernel");
 1421 
 1422                 nkpt++;
 1423 
 1424                 pmap_zero_page(nkpg);
 1425                 paddr = VM_PAGE_TO_PHYS(nkpg);
 1426                 newpdir = (pd_entry_t) (paddr | PG_V | PG_RW | PG_A | PG_M);
 1427                 *pmap_pde(kernel_pmap, kernel_vm_end) = newpdir;
 1428 
 1429                 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 1430         }
 1431 }
 1432 
 1433 
 1434 /***************************************************
 1435  * page management routines.
 1436  ***************************************************/
 1437 
 1438 /*
 1439  * free the pv_entry back to the free list
 1440  */
 1441 static PMAP_INLINE void
 1442 free_pv_entry(pv_entry_t pv)
 1443 {
 1444         pv_entry_count--;
 1445         uma_zfree(pvzone, pv);
 1446 }
 1447 
 1448 /*
 1449  * get a new pv_entry, allocating a block from the system
 1450  * when needed.
 1451  * the memory allocation is performed bypassing the malloc code
 1452  * because of the possibility of allocations at interrupt time.
 1453  */
 1454 static pv_entry_t
 1455 get_pv_entry(void)
 1456 {
 1457         pv_entry_count++;
 1458         if ((pv_entry_count > pv_entry_high_water) &&
 1459                 (pmap_pagedaemon_waken == 0)) {
 1460                 pmap_pagedaemon_waken = 1;
 1461                 wakeup (&vm_pages_needed);
 1462         }
 1463         return uma_zalloc(pvzone, M_NOWAIT);
 1464 }
 1465 
 1466 
 1467 static void
 1468 pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va)
 1469 {
 1470         pv_entry_t pv;
 1471 
 1472         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 1473         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 1474         if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
 1475                 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 1476                         if (pmap == pv->pv_pmap && va == pv->pv_va) 
 1477                                 break;
 1478                 }
 1479         } else {
 1480                 TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
 1481                         if (va == pv->pv_va) 
 1482                                 break;
 1483                 }
 1484         }
 1485         KASSERT(pv != NULL, ("pmap_remove_entry: pv not found"));
 1486         TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 1487         m->md.pv_list_count--;
 1488         if (TAILQ_EMPTY(&m->md.pv_list))
 1489                 vm_page_flag_clear(m, PG_WRITEABLE);
 1490         TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
 1491         free_pv_entry(pv);
 1492 }
 1493 
 1494 /*
 1495  * Create a pv entry for page at pa for
 1496  * (pmap, va).
 1497  */
 1498 static void
 1499 pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
 1500 {
 1501         pv_entry_t pv;
 1502 
 1503         pv = get_pv_entry();
 1504         if (pv == NULL)
 1505                 panic("no pv entries: increase vm.pmap.shpgperproc");
 1506         pv->pv_va = va;
 1507         pv->pv_pmap = pmap;
 1508 
 1509         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 1510         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 1511         TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
 1512         TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
 1513         m->md.pv_list_count++;
 1514 }
 1515 
 1516 /*
 1517  * pmap_remove_pte: do the things to unmap a page in a process
 1518  */
 1519 static int
 1520 pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va, pd_entry_t ptepde)
 1521 {
 1522         pt_entry_t oldpte;
 1523         vm_page_t m;
 1524 
 1525         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 1526         oldpte = pte_load_clear(ptq);
 1527         if (oldpte & PG_W)
 1528                 pmap->pm_stats.wired_count -= 1;
 1529         /*
 1530          * Machines that don't support invlpg, also don't support
 1531          * PG_G.
 1532          */
 1533         if (oldpte & PG_G)
 1534                 pmap_invalidate_page(kernel_pmap, va);
 1535         pmap->pm_stats.resident_count -= 1;
 1536         if (oldpte & PG_MANAGED) {
 1537                 m = PHYS_TO_VM_PAGE(oldpte & PG_FRAME);
 1538                 if (oldpte & PG_M) {
 1539 #if defined(PMAP_DIAGNOSTIC)
 1540                         if (pmap_nw_modified((pt_entry_t) oldpte)) {
 1541                                 printf(
 1542         "pmap_remove: modified page not writable: va: 0x%lx, pte: 0x%lx\n",
 1543                                     va, oldpte);
 1544                         }
 1545 #endif
 1546                         if (pmap_track_modified(va))
 1547                                 vm_page_dirty(m);
 1548                 }
 1549                 if (oldpte & PG_A)
 1550                         vm_page_flag_set(m, PG_REFERENCED);
 1551                 pmap_remove_entry(pmap, m, va);
 1552         }
 1553         return (pmap_unuse_pt(pmap, va, ptepde));
 1554 }
 1555 
 1556 /*
 1557  * Remove a single page from a process address space
 1558  */
 1559 static void
 1560 pmap_remove_page(pmap_t pmap, vm_offset_t va, pd_entry_t *pde)
 1561 {
 1562         pt_entry_t *pte;
 1563 
 1564         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 1565         if ((*pde & PG_V) == 0)
 1566                 return;
 1567         pte = pmap_pde_to_pte(pde, va);
 1568         if ((*pte & PG_V) == 0)
 1569                 return;
 1570         pmap_remove_pte(pmap, pte, va, *pde);
 1571         pmap_invalidate_page(pmap, va);
 1572 }
 1573 
 1574 /*
 1575  *      Remove the given range of addresses from the specified map.
 1576  *
 1577  *      It is assumed that the start and end are properly
 1578  *      rounded to the page size.
 1579  */
 1580 void
 1581 pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 1582 {
 1583         vm_offset_t va_next;
 1584         pml4_entry_t *pml4e;
 1585         pdp_entry_t *pdpe;
 1586         pd_entry_t ptpaddr, *pde;
 1587         pt_entry_t *pte;
 1588         int anyvalid;
 1589 
 1590         /*
 1591          * Perform an unsynchronized read.  This is, however, safe.
 1592          */
 1593         if (pmap->pm_stats.resident_count == 0)
 1594                 return;
 1595 
 1596         anyvalid = 0;
 1597 
 1598         vm_page_lock_queues();
 1599         PMAP_LOCK(pmap);
 1600 
 1601         /*
 1602          * special handling of removing one page.  a very
 1603          * common operation and easy to short circuit some
 1604          * code.
 1605          */
 1606         if (sva + PAGE_SIZE == eva) {
 1607                 pde = pmap_pde(pmap, sva);
 1608                 if (pde && (*pde & PG_PS) == 0) {
 1609                         pmap_remove_page(pmap, sva, pde);
 1610                         goto out;
 1611                 }
 1612         }
 1613 
 1614         for (; sva < eva; sva = va_next) {
 1615 
 1616                 if (pmap->pm_stats.resident_count == 0)
 1617                         break;
 1618 
 1619                 pml4e = pmap_pml4e(pmap, sva);
 1620                 if ((*pml4e & PG_V) == 0) {
 1621                         va_next = (sva + NBPML4) & ~PML4MASK;
 1622                         continue;
 1623                 }
 1624 
 1625                 pdpe = pmap_pml4e_to_pdpe(pml4e, sva);
 1626                 if ((*pdpe & PG_V) == 0) {
 1627                         va_next = (sva + NBPDP) & ~PDPMASK;
 1628                         continue;
 1629                 }
 1630 
 1631                 /*
 1632                  * Calculate index for next page table.
 1633                  */
 1634                 va_next = (sva + NBPDR) & ~PDRMASK;
 1635 
 1636                 pde = pmap_pdpe_to_pde(pdpe, sva);
 1637                 ptpaddr = *pde;
 1638 
 1639                 /*
 1640                  * Weed out invalid mappings.
 1641                  */
 1642                 if (ptpaddr == 0)
 1643                         continue;
 1644 
 1645                 /*
 1646                  * Check for large page.
 1647                  */
 1648                 if ((ptpaddr & PG_PS) != 0) {
 1649                         *pde = 0;
 1650                         pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
 1651                         pmap_unuse_pt(pmap, sva, *pdpe);
 1652                         anyvalid = 1;
 1653                         continue;
 1654                 }
 1655 
 1656                 /*
 1657                  * Limit our scan to either the end of the va represented
 1658                  * by the current page table page, or to the end of the
 1659                  * range being removed.
 1660                  */
 1661                 if (va_next > eva)
 1662                         va_next = eva;
 1663 
 1664                 for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++,
 1665                     sva += PAGE_SIZE) {
 1666                         if (*pte == 0)
 1667                                 continue;
 1668                         anyvalid = 1;
 1669                         if (pmap_remove_pte(pmap, pte, sva, ptpaddr))
 1670                                 break;
 1671                 }
 1672         }
 1673 out:
 1674         vm_page_unlock_queues();
 1675         if (anyvalid)
 1676                 pmap_invalidate_all(pmap);
 1677         PMAP_UNLOCK(pmap);
 1678 }
 1679 
 1680 /*
 1681  *      Routine:        pmap_remove_all
 1682  *      Function:
 1683  *              Removes this physical page from
 1684  *              all physical maps in which it resides.
 1685  *              Reflects back modify bits to the pager.
 1686  *
 1687  *      Notes:
 1688  *              Original versions of this routine were very
 1689  *              inefficient because they iteratively called
 1690  *              pmap_remove (slow...)
 1691  */
 1692 
 1693 void
 1694 pmap_remove_all(vm_page_t m)
 1695 {
 1696         register pv_entry_t pv;
 1697         pt_entry_t *pte, tpte;
 1698         pd_entry_t ptepde;
 1699 
 1700 #if defined(PMAP_DIAGNOSTIC)
 1701         /*
 1702          * XXX This makes pmap_remove_all() illegal for non-managed pages!
 1703          */
 1704         if (m->flags & PG_FICTITIOUS) {
 1705                 panic("pmap_remove_all: illegal for unmanaged page, va: 0x%lx",
 1706                     VM_PAGE_TO_PHYS(m));
 1707         }
 1708 #endif
 1709         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 1710         while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
 1711                 PMAP_LOCK(pv->pv_pmap);
 1712                 pv->pv_pmap->pm_stats.resident_count--;
 1713                 pte = pmap_pte_pde(pv->pv_pmap, pv->pv_va, &ptepde);
 1714                 tpte = pte_load_clear(pte);
 1715                 if (tpte & PG_W)
 1716                         pv->pv_pmap->pm_stats.wired_count--;
 1717                 if (tpte & PG_A)
 1718                         vm_page_flag_set(m, PG_REFERENCED);
 1719 
 1720                 /*
 1721                  * Update the vm_page_t clean and reference bits.
 1722                  */
 1723                 if (tpte & PG_M) {
 1724 #if defined(PMAP_DIAGNOSTIC)
 1725                         if (pmap_nw_modified((pt_entry_t) tpte)) {
 1726                                 printf(
 1727         "pmap_remove_all: modified page not writable: va: 0x%lx, pte: 0x%lx\n",
 1728                                     pv->pv_va, tpte);
 1729                         }
 1730 #endif
 1731                         if (pmap_track_modified(pv->pv_va))
 1732                                 vm_page_dirty(m);
 1733                 }
 1734                 pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
 1735                 TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
 1736                 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 1737                 m->md.pv_list_count--;
 1738                 pmap_unuse_pt(pv->pv_pmap, pv->pv_va, ptepde);
 1739                 PMAP_UNLOCK(pv->pv_pmap);
 1740                 free_pv_entry(pv);
 1741         }
 1742         vm_page_flag_clear(m, PG_WRITEABLE);
 1743 }
 1744 
 1745 /*
 1746  *      Set the physical protection on the
 1747  *      specified range of this map as requested.
 1748  */
 1749 void
 1750 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
 1751 {
 1752         vm_offset_t va_next;
 1753         pml4_entry_t *pml4e;
 1754         pdp_entry_t *pdpe;
 1755         pd_entry_t ptpaddr, *pde;
 1756         pt_entry_t *pte;
 1757         int anychanged;
 1758 
 1759         if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
 1760                 pmap_remove(pmap, sva, eva);
 1761                 return;
 1762         }
 1763 
 1764         if (prot & VM_PROT_WRITE)
 1765                 return;
 1766 
 1767         anychanged = 0;
 1768 
 1769         vm_page_lock_queues();
 1770         PMAP_LOCK(pmap);
 1771         for (; sva < eva; sva = va_next) {
 1772 
 1773                 pml4e = pmap_pml4e(pmap, sva);
 1774                 if ((*pml4e & PG_V) == 0) {
 1775                         va_next = (sva + NBPML4) & ~PML4MASK;
 1776                         continue;
 1777                 }
 1778 
 1779                 pdpe = pmap_pml4e_to_pdpe(pml4e, sva);
 1780                 if ((*pdpe & PG_V) == 0) {
 1781                         va_next = (sva + NBPDP) & ~PDPMASK;
 1782                         continue;
 1783                 }
 1784 
 1785                 va_next = (sva + NBPDR) & ~PDRMASK;
 1786 
 1787                 pde = pmap_pdpe_to_pde(pdpe, sva);
 1788                 ptpaddr = *pde;
 1789 
 1790                 /*
 1791                  * Weed out invalid mappings.
 1792                  */
 1793                 if (ptpaddr == 0)
 1794                         continue;
 1795 
 1796                 /*
 1797                  * Check for large page.
 1798                  */
 1799                 if ((ptpaddr & PG_PS) != 0) {
 1800                         *pde &= ~(PG_M|PG_RW);
 1801                         anychanged = 1;
 1802                         continue;
 1803                 }
 1804 
 1805                 if (va_next > eva)
 1806                         va_next = eva;
 1807 
 1808                 for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++,
 1809                     sva += PAGE_SIZE) {
 1810                         pt_entry_t obits, pbits;
 1811                         vm_page_t m;
 1812 
 1813 retry:
 1814                         obits = pbits = *pte;
 1815                         if (pbits & PG_MANAGED) {
 1816                                 m = NULL;
 1817                                 if (pbits & PG_A) {
 1818                                         m = PHYS_TO_VM_PAGE(pbits & PG_FRAME);
 1819                                         vm_page_flag_set(m, PG_REFERENCED);
 1820                                         pbits &= ~PG_A;
 1821                                 }
 1822                                 if ((pbits & PG_M) != 0 &&
 1823                                     pmap_track_modified(sva)) {
 1824                                         if (m == NULL)
 1825                                                 m = PHYS_TO_VM_PAGE(pbits &
 1826                                                     PG_FRAME);
 1827                                         vm_page_dirty(m);
 1828                                 }
 1829                         }
 1830 
 1831                         pbits &= ~(PG_RW | PG_M);
 1832 
 1833                         if (pbits != obits) {
 1834                                 if (!atomic_cmpset_long(pte, obits, pbits))
 1835                                         goto retry;
 1836                                 if (obits & PG_G)
 1837                                         pmap_invalidate_page(pmap, sva);
 1838                                 else
 1839                                         anychanged = 1;
 1840                         }
 1841                 }
 1842         }
 1843         vm_page_unlock_queues();
 1844         if (anychanged)
 1845                 pmap_invalidate_all(pmap);
 1846         PMAP_UNLOCK(pmap);
 1847 }
 1848 
 1849 /*
 1850  *      Insert the given physical page (p) at
 1851  *      the specified virtual address (v) in the
 1852  *      target physical map with the protection requested.
 1853  *
 1854  *      If specified, the page will be wired down, meaning
 1855  *      that the related pte can not be reclaimed.
 1856  *
 1857  *      NB:  This is the only routine which MAY NOT lazy-evaluate
 1858  *      or lose information.  That is, this routine must actually
 1859  *      insert this page into the given map NOW.
 1860  */
 1861 void
 1862 pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
 1863            boolean_t wired)
 1864 {
 1865         vm_paddr_t pa;
 1866         register pt_entry_t *pte;
 1867         vm_paddr_t opa;
 1868         pt_entry_t origpte, newpte;
 1869         vm_page_t mpte, om;
 1870         boolean_t invlva;
 1871 
 1872         va = trunc_page(va);
 1873 #ifdef PMAP_DIAGNOSTIC
 1874         if (va > VM_MAX_KERNEL_ADDRESS)
 1875                 panic("pmap_enter: toobig");
 1876         if ((va >= UPT_MIN_ADDRESS) && (va < UPT_MAX_ADDRESS))
 1877                 panic("pmap_enter: invalid to pmap_enter page table pages (va: 0x%lx)", va);
 1878 #endif
 1879 
 1880         mpte = NULL;
 1881 
 1882         vm_page_lock_queues();
 1883         PMAP_LOCK(pmap);
 1884 
 1885         /*
 1886          * In the case that a page table page is not
 1887          * resident, we are creating it here.
 1888          */
 1889         if (va < VM_MAXUSER_ADDRESS) {
 1890                 mpte = pmap_allocpte(pmap, va, M_WAITOK);
 1891         }
 1892 #if 0 && defined(PMAP_DIAGNOSTIC)
 1893         else {
 1894                 pd_entry_t *pdeaddr = pmap_pde(pmap, va);
 1895                 origpte = *pdeaddr;
 1896                 if ((origpte & PG_V) == 0) { 
 1897                         panic("pmap_enter: invalid kernel page table page, pde=%p, va=%p\n",
 1898                                 origpte, va);
 1899                 }
 1900         }
 1901 #endif
 1902 
 1903         pte = pmap_pte(pmap, va);
 1904 
 1905         /*
 1906          * Page Directory table entry not valid, we need a new PT page
 1907          */
 1908         if (pte == NULL)
 1909                 panic("pmap_enter: invalid page directory va=%#lx\n", va);
 1910 
 1911         pa = VM_PAGE_TO_PHYS(m);
 1912         om = NULL;
 1913         origpte = *pte;
 1914         opa = origpte & PG_FRAME;
 1915 
 1916         if (origpte & PG_PS)
 1917                 panic("pmap_enter: attempted pmap_enter on 2MB page");
 1918 
 1919         /*
 1920          * Mapping has not changed, must be protection or wiring change.
 1921          */
 1922         if (origpte && (opa == pa)) {
 1923                 /*
 1924                  * Wiring change, just update stats. We don't worry about
 1925                  * wiring PT pages as they remain resident as long as there
 1926                  * are valid mappings in them. Hence, if a user page is wired,
 1927                  * the PT page will be also.
 1928                  */
 1929                 if (wired && ((origpte & PG_W) == 0))
 1930                         pmap->pm_stats.wired_count++;
 1931                 else if (!wired && (origpte & PG_W))
 1932                         pmap->pm_stats.wired_count--;
 1933 
 1934                 /*
 1935                  * Remove extra pte reference
 1936                  */
 1937                 if (mpte)
 1938                         mpte->wire_count--;
 1939 
 1940                 /*
 1941                  * We might be turning off write access to the page,
 1942                  * so we go ahead and sense modify status.
 1943                  */
 1944                 if (origpte & PG_MANAGED) {
 1945                         om = m;
 1946                         pa |= PG_MANAGED;
 1947                 }
 1948                 goto validate;
 1949         } 
 1950         /*
 1951          * Mapping has changed, invalidate old range and fall through to
 1952          * handle validating new mapping.
 1953          */
 1954         if (opa) {
 1955                 if (origpte & PG_W)
 1956                         pmap->pm_stats.wired_count--;
 1957                 if (origpte & PG_MANAGED) {
 1958                         om = PHYS_TO_VM_PAGE(opa);
 1959                         pmap_remove_entry(pmap, om, va);
 1960                 }
 1961                 if (mpte != NULL) {
 1962                         mpte->wire_count--;
 1963                         KASSERT(mpte->wire_count > 0,
 1964                             ("pmap_enter: missing reference to page table page,"
 1965                              " va: 0x%lx", va));
 1966                 }
 1967         } else
 1968                 pmap->pm_stats.resident_count++;
 1969 
 1970         /*
 1971          * Enter on the PV list if part of our managed memory.
 1972          */
 1973         if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) {
 1974                 pmap_insert_entry(pmap, va, m);
 1975                 pa |= PG_MANAGED;
 1976         }
 1977 
 1978         /*
 1979          * Increment counters
 1980          */
 1981         if (wired)
 1982                 pmap->pm_stats.wired_count++;
 1983 
 1984 validate:
 1985         /*
 1986          * Now validate mapping with desired protection/wiring.
 1987          */
 1988         newpte = (pt_entry_t)(pa | PG_V);
 1989         if ((prot & VM_PROT_WRITE) != 0)
 1990                 newpte |= PG_RW;
 1991         if ((prot & VM_PROT_EXECUTE) == 0)
 1992                 newpte |= pg_nx;
 1993         if (wired)
 1994                 newpte |= PG_W;
 1995         if (va < VM_MAXUSER_ADDRESS)
 1996                 newpte |= PG_U;
 1997         if (pmap == kernel_pmap)
 1998                 newpte |= PG_G;
 1999 
 2000         /*
 2001          * if the mapping or permission bits are different, we need
 2002          * to update the pte.
 2003          */
 2004         if ((origpte & ~(PG_M|PG_A)) != newpte) {
 2005                 if (origpte & PG_V) {
 2006                         invlva = FALSE;
 2007                         origpte = pte_load_store(pte, newpte | PG_A);
 2008                         if (origpte & PG_A) {
 2009                                 if (origpte & PG_MANAGED)
 2010                                         vm_page_flag_set(om, PG_REFERENCED);
 2011                                 if (opa != VM_PAGE_TO_PHYS(m) || ((origpte &
 2012                                     PG_NX) == 0 && (newpte & PG_NX)))
 2013                                         invlva = TRUE;
 2014                         }
 2015                         if (origpte & PG_M) {
 2016                                 KASSERT((origpte & PG_RW),
 2017                                     ("pmap_enter: modified page not writable:"
 2018                                      " va: 0x%lx, pte: 0x%lx", va, origpte));
 2019                                 if ((origpte & PG_MANAGED) &&
 2020                                     pmap_track_modified(va))
 2021                                         vm_page_dirty(om);
 2022                                 if ((newpte & PG_RW) == 0)
 2023                                         invlva = TRUE;
 2024                         }
 2025                         if (invlva)
 2026                                 pmap_invalidate_page(pmap, va);
 2027                 } else
 2028                         pte_store(pte, newpte | PG_A);
 2029         }
 2030         vm_page_unlock_queues();
 2031         PMAP_UNLOCK(pmap);
 2032 }
 2033 
 2034 /*
 2035  * this code makes some *MAJOR* assumptions:
 2036  * 1. Current pmap & pmap exists.
 2037  * 2. Not wired.
 2038  * 3. Read access.
 2039  * 4. No page table pages.
 2040  * but is *MUCH* faster than pmap_enter...
 2041  */
 2042 
 2043 vm_page_t
 2044 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
 2045     vm_page_t mpte)
 2046 {
 2047         pt_entry_t *pte;
 2048         vm_paddr_t pa;
 2049 
 2050         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 2051         VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
 2052         PMAP_LOCK(pmap);
 2053 
 2054         /*
 2055          * In the case that a page table page is not
 2056          * resident, we are creating it here.
 2057          */
 2058         if (va < VM_MAXUSER_ADDRESS) {
 2059                 vm_pindex_t ptepindex;
 2060                 pd_entry_t *ptepa;
 2061 
 2062                 /*
 2063                  * Calculate pagetable page index
 2064                  */
 2065                 ptepindex = pmap_pde_pindex(va);
 2066                 if (mpte && (mpte->pindex == ptepindex)) {
 2067                         mpte->wire_count++;
 2068                 } else {
 2069         retry:
 2070                         /*
 2071                          * Get the page directory entry
 2072                          */
 2073                         ptepa = pmap_pde(pmap, va);
 2074 
 2075                         /*
 2076                          * If the page table page is mapped, we just increment
 2077                          * the hold count, and activate it.
 2078                          */
 2079                         if (ptepa && (*ptepa & PG_V) != 0) {
 2080                                 if (*ptepa & PG_PS)
 2081                                         panic("pmap_enter_quick: unexpected mapping into 2MB page");
 2082                                 mpte = PHYS_TO_VM_PAGE(*ptepa & PG_FRAME);
 2083                                 mpte->wire_count++;
 2084                         } else {
 2085                                 mpte = _pmap_allocpte(pmap, ptepindex,
 2086                                     M_NOWAIT);
 2087                                 if (mpte == NULL) {
 2088                                         PMAP_UNLOCK(pmap);
 2089                                         vm_page_busy(m);
 2090                                         vm_page_unlock_queues();
 2091                                         VM_OBJECT_UNLOCK(m->object);
 2092                                         VM_WAIT;
 2093                                         VM_OBJECT_LOCK(m->object);
 2094                                         vm_page_lock_queues();
 2095                                         vm_page_wakeup(m);
 2096                                         PMAP_LOCK(pmap);
 2097                                         goto retry;
 2098                                 }
 2099                         }
 2100                 }
 2101         } else {
 2102                 mpte = NULL;
 2103         }
 2104 
 2105         /*
 2106          * This call to vtopte makes the assumption that we are
 2107          * entering the page into the current pmap.  In order to support
 2108          * quick entry into any pmap, one would likely use pmap_pte.
 2109          * But that isn't as quick as vtopte.
 2110          */
 2111         pte = vtopte(va);
 2112         if (*pte) {
 2113                 if (mpte != NULL) {
 2114                         pmap_unwire_pte_hold(pmap, va, mpte);
 2115                         mpte = NULL;
 2116                 }
 2117                 goto out;
 2118         }
 2119 
 2120         /*
 2121          * Enter on the PV list if part of our managed memory. Note that we
 2122          * raise IPL while manipulating pv_table since pmap_enter can be
 2123          * called at interrupt time.
 2124          */
 2125         if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0)
 2126                 pmap_insert_entry(pmap, va, m);
 2127 
 2128         /*
 2129          * Increment counters
 2130          */
 2131         pmap->pm_stats.resident_count++;
 2132 
 2133         pa = VM_PAGE_TO_PHYS(m);
 2134         if ((prot & VM_PROT_EXECUTE) == 0)
 2135                 pa |= pg_nx;
 2136 
 2137         /*
 2138          * Now validate mapping with RO protection
 2139          */
 2140         if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED))
 2141                 pte_store(pte, pa | PG_V | PG_U);
 2142         else
 2143                 pte_store(pte, pa | PG_V | PG_U | PG_MANAGED);
 2144 out:
 2145         PMAP_UNLOCK(pmap);
 2146         return mpte;
 2147 }
 2148 
 2149 /*
 2150  * Make a temporary mapping for a physical address.  This is only intended
 2151  * to be used for panic dumps.
 2152  */
 2153 void *
 2154 pmap_kenter_temporary(vm_paddr_t pa, int i)
 2155 {
 2156         vm_offset_t va;
 2157 
 2158         va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE);
 2159         pmap_kenter(va, pa);
 2160         invlpg(va);
 2161         return ((void *)crashdumpmap);
 2162 }
 2163 
 2164 /*
 2165  * This code maps large physical mmap regions into the
 2166  * processor address space.  Note that some shortcuts
 2167  * are taken, but the code works.
 2168  */
 2169 void
 2170 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
 2171                     vm_object_t object, vm_pindex_t pindex,
 2172                     vm_size_t size)
 2173 {
 2174         vm_offset_t va;
 2175         vm_page_t p, pdpg;
 2176 
 2177         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 2178         KASSERT(object->type == OBJT_DEVICE,
 2179             ("pmap_object_init_pt: non-device object"));
 2180         if (((addr & (NBPDR - 1)) == 0) && ((size & (NBPDR - 1)) == 0)) {
 2181                 vm_page_t m[1];
 2182                 pd_entry_t ptepa, *pde;
 2183 
 2184                 PMAP_LOCK(pmap);
 2185                 pde = pmap_pde(pmap, addr);
 2186                 if (pde != 0 && (*pde & PG_V) != 0)
 2187                         goto out;
 2188                 PMAP_UNLOCK(pmap);
 2189 retry:
 2190                 p = vm_page_lookup(object, pindex);
 2191                 if (p != NULL) {
 2192                         vm_page_lock_queues();
 2193                         if (vm_page_sleep_if_busy(p, FALSE, "init4p"))
 2194                                 goto retry;
 2195                 } else {
 2196                         p = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL);
 2197                         if (p == NULL)
 2198                                 return;
 2199                         m[0] = p;
 2200 
 2201                         if (vm_pager_get_pages(object, m, 1, 0) != VM_PAGER_OK) {
 2202                                 vm_page_lock_queues();
 2203                                 vm_page_free(p);
 2204                                 vm_page_unlock_queues();
 2205                                 return;
 2206                         }
 2207 
 2208                         p = vm_page_lookup(object, pindex);
 2209                         vm_page_lock_queues();
 2210                         vm_page_wakeup(p);
 2211                 }
 2212                 vm_page_unlock_queues();
 2213 
 2214                 ptepa = VM_PAGE_TO_PHYS(p);
 2215                 if (ptepa & (NBPDR - 1))
 2216                         return;
 2217 
 2218                 p->valid = VM_PAGE_BITS_ALL;
 2219 
 2220                 PMAP_LOCK(pmap);
 2221                 for (va = addr; va < addr + size; va += NBPDR) {
 2222                         while ((pdpg =
 2223                             pmap_allocpde(pmap, va, M_NOWAIT)) == NULL) {
 2224                                 PMAP_UNLOCK(pmap);
 2225                                 vm_page_lock_queues();
 2226                                 vm_page_busy(p);
 2227                                 vm_page_unlock_queues();
 2228                                 VM_OBJECT_UNLOCK(object);
 2229                                 VM_WAIT;
 2230                                 VM_OBJECT_LOCK(object);
 2231                                 vm_page_lock_queues();
 2232                                 vm_page_wakeup(p);
 2233                                 vm_page_unlock_queues();
 2234                                 PMAP_LOCK(pmap);
 2235                         }
 2236                         pde = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pdpg));
 2237                         pde = &pde[pmap_pde_index(va)];
 2238                         if ((*pde & PG_V) == 0) {
 2239                                 pde_store(pde, ptepa | PG_PS | PG_M | PG_A |
 2240                                     PG_U | PG_RW | PG_V);
 2241                                 pmap->pm_stats.resident_count +=
 2242                                     NBPDR / PAGE_SIZE;
 2243                         } else {
 2244                                 pdpg->wire_count--;
 2245                                 KASSERT(pdpg->wire_count > 0,
 2246                                     ("pmap_object_init_pt: missing reference "
 2247                                      "to page directory page, va: 0x%lx", va));
 2248                         }
 2249                         ptepa += NBPDR;
 2250                 }
 2251                 pmap_invalidate_all(pmap);
 2252 out:
 2253                 PMAP_UNLOCK(pmap);
 2254         }
 2255 }
 2256 
 2257 /*
 2258  *      Routine:        pmap_change_wiring
 2259  *      Function:       Change the wiring attribute for a map/virtual-address
 2260  *                      pair.
 2261  *      In/out conditions:
 2262  *                      The mapping must already exist in the pmap.
 2263  */
 2264 void
 2265 pmap_change_wiring(pmap, va, wired)
 2266         register pmap_t pmap;
 2267         vm_offset_t va;
 2268         boolean_t wired;
 2269 {
 2270         register pt_entry_t *pte;
 2271 
 2272         /*
 2273          * Wiring is not a hardware characteristic so there is no need to
 2274          * invalidate TLB.
 2275          */
 2276         PMAP_LOCK(pmap);
 2277         pte = pmap_pte(pmap, va);
 2278         if (wired && (*pte & PG_W) == 0) {
 2279                 pmap->pm_stats.wired_count++;
 2280                 atomic_set_long(pte, PG_W);
 2281         } else if (!wired && (*pte & PG_W) != 0) {
 2282                 pmap->pm_stats.wired_count--;
 2283                 atomic_clear_long(pte, PG_W);
 2284         }
 2285         PMAP_UNLOCK(pmap);
 2286 }
 2287 
 2288 
 2289 
 2290 /*
 2291  *      Copy the range specified by src_addr/len
 2292  *      from the source map to the range dst_addr/len
 2293  *      in the destination map.
 2294  *
 2295  *      This routine is only advisory and need not do anything.
 2296  */
 2297 
 2298 void
 2299 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
 2300           vm_offset_t src_addr)
 2301 {
 2302         vm_offset_t addr;
 2303         vm_offset_t end_addr = src_addr + len;
 2304         vm_offset_t va_next;
 2305         vm_page_t m;
 2306 
 2307         if (dst_addr != src_addr)
 2308                 return;
 2309 
 2310         if (!pmap_is_current(src_pmap))
 2311                 return;
 2312 
 2313         vm_page_lock_queues();
 2314         if (dst_pmap < src_pmap) {
 2315                 PMAP_LOCK(dst_pmap);
 2316                 PMAP_LOCK(src_pmap);
 2317         } else {
 2318                 PMAP_LOCK(src_pmap);
 2319                 PMAP_LOCK(dst_pmap);
 2320         }
 2321         for (addr = src_addr; addr < end_addr; addr = va_next) {
 2322                 pt_entry_t *src_pte, *dst_pte;
 2323                 vm_page_t dstmpde, dstmpte, srcmpte;
 2324                 pml4_entry_t *pml4e;
 2325                 pdp_entry_t *pdpe;
 2326                 pd_entry_t srcptepaddr, *pde;
 2327 
 2328                 if (addr >= UPT_MIN_ADDRESS)
 2329                         panic("pmap_copy: invalid to pmap_copy page tables");
 2330 
 2331                 /*
 2332                  * Don't let optional prefaulting of pages make us go
 2333                  * way below the low water mark of free pages or way
 2334                  * above high water mark of used pv entries.
 2335                  */
 2336                 if (cnt.v_free_count < cnt.v_free_reserved ||
 2337                     pv_entry_count > pv_entry_high_water)
 2338                         break;
 2339                 
 2340                 pml4e = pmap_pml4e(src_pmap, addr);
 2341                 if ((*pml4e & PG_V) == 0) {
 2342                         va_next = (addr + NBPML4) & ~PML4MASK;
 2343                         continue;
 2344                 }
 2345 
 2346                 pdpe = pmap_pml4e_to_pdpe(pml4e, addr);
 2347                 if ((*pdpe & PG_V) == 0) {
 2348                         va_next = (addr + NBPDP) & ~PDPMASK;
 2349                         continue;
 2350                 }
 2351 
 2352                 va_next = (addr + NBPDR) & ~PDRMASK;
 2353 
 2354                 pde = pmap_pdpe_to_pde(pdpe, addr);
 2355                 srcptepaddr = *pde;
 2356                 if (srcptepaddr == 0)
 2357                         continue;
 2358                         
 2359                 if (srcptepaddr & PG_PS) {
 2360                         dstmpde = pmap_allocpde(dst_pmap, addr, M_NOWAIT);
 2361                         if (dstmpde == NULL)
 2362                                 break;
 2363                         pde = (pd_entry_t *)
 2364                             PHYS_TO_DMAP(VM_PAGE_TO_PHYS(dstmpde));
 2365                         pde = &pde[pmap_pde_index(addr)];
 2366                         if (*pde == 0) {
 2367                                 *pde = srcptepaddr & ~PG_W;
 2368                                 dst_pmap->pm_stats.resident_count +=
 2369                                     NBPDR / PAGE_SIZE;
 2370                         } else
 2371                                 pmap_unwire_pte_hold(dst_pmap, addr, dstmpde);
 2372                         continue;
 2373                 }
 2374 
 2375                 srcmpte = PHYS_TO_VM_PAGE(srcptepaddr & PG_FRAME);
 2376                 if (srcmpte->wire_count == 0)
 2377                         panic("pmap_copy: source page table page is unused");
 2378 
 2379                 if (va_next > end_addr)
 2380                         va_next = end_addr;
 2381 
 2382                 src_pte = vtopte(addr);
 2383                 while (addr < va_next) {
 2384                         pt_entry_t ptetemp;
 2385                         ptetemp = *src_pte;
 2386                         /*
 2387                          * we only virtual copy managed pages
 2388                          */
 2389                         if ((ptetemp & PG_MANAGED) != 0) {
 2390                                 /*
 2391                                  * We have to check after allocpte for the
 2392                                  * pte still being around...  allocpte can
 2393                                  * block.
 2394                                  */
 2395                                 dstmpte = pmap_allocpte(dst_pmap, addr,
 2396                                     M_NOWAIT);
 2397                                 if (dstmpte == NULL)
 2398                                         break;
 2399                                 dst_pte = (pt_entry_t *)
 2400                                     PHYS_TO_DMAP(VM_PAGE_TO_PHYS(dstmpte));
 2401                                 dst_pte = &dst_pte[pmap_pte_index(addr)];
 2402                                 if (*dst_pte == 0) {
 2403                                         /*
 2404                                          * Clear the wired, modified, and
 2405                                          * accessed (referenced) bits
 2406                                          * during the copy.
 2407                                          */
 2408                                         m = PHYS_TO_VM_PAGE(ptetemp & PG_FRAME);
 2409                                         *dst_pte = ptetemp & ~(PG_W | PG_M |
 2410                                             PG_A);
 2411                                         dst_pmap->pm_stats.resident_count++;
 2412                                         pmap_insert_entry(dst_pmap, addr, m);
 2413                                 } else
 2414                                         pmap_unwire_pte_hold(dst_pmap, addr, dstmpte);
 2415                                 if (dstmpte->wire_count >= srcmpte->wire_count)
 2416                                         break;
 2417                         }
 2418                         addr += PAGE_SIZE;
 2419                         src_pte++;
 2420                 }
 2421         }
 2422         vm_page_unlock_queues();
 2423         PMAP_UNLOCK(src_pmap);
 2424         PMAP_UNLOCK(dst_pmap);
 2425 }       
 2426 
 2427 /*
 2428  *      pmap_zero_page zeros the specified hardware page by mapping 
 2429  *      the page into KVM and using bzero to clear its contents.
 2430  */
 2431 void
 2432 pmap_zero_page(vm_page_t m)
 2433 {
 2434         vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
 2435 
 2436         pagezero((void *)va);
 2437 }
 2438 
 2439 /*
 2440  *      pmap_zero_page_area zeros the specified hardware page by mapping 
 2441  *      the page into KVM and using bzero to clear its contents.
 2442  *
 2443  *      off and size may not cover an area beyond a single hardware page.
 2444  */
 2445 void
 2446 pmap_zero_page_area(vm_page_t m, int off, int size)
 2447 {
 2448         vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
 2449 
 2450         if (off == 0 && size == PAGE_SIZE)
 2451                 pagezero((void *)va);
 2452         else
 2453                 bzero((char *)va + off, size);
 2454 }
 2455 
 2456 /*
 2457  *      pmap_zero_page_idle zeros the specified hardware page by mapping 
 2458  *      the page into KVM and using bzero to clear its contents.  This
 2459  *      is intended to be called from the vm_pagezero process only and
 2460  *      outside of Giant.
 2461  */
 2462 void
 2463 pmap_zero_page_idle(vm_page_t m)
 2464 {
 2465         vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
 2466 
 2467         pagezero((void *)va);
 2468 }
 2469 
 2470 /*
 2471  *      pmap_copy_page copies the specified (machine independent)
 2472  *      page by mapping the page into virtual memory and using
 2473  *      bcopy to copy the page, one machine dependent page at a
 2474  *      time.
 2475  */
 2476 void
 2477 pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
 2478 {
 2479         vm_offset_t src = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(msrc));
 2480         vm_offset_t dst = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mdst));
 2481 
 2482         pagecopy((void *)src, (void *)dst);
 2483 }
 2484 
 2485 /*
 2486  * Returns true if the pmap's pv is one of the first
 2487  * 16 pvs linked to from this page.  This count may
 2488  * be changed upwards or downwards in the future; it
 2489  * is only necessary that true be returned for a small
 2490  * subset of pmaps for proper page aging.
 2491  */
 2492 boolean_t
 2493 pmap_page_exists_quick(pmap, m)
 2494         pmap_t pmap;
 2495         vm_page_t m;
 2496 {
 2497         pv_entry_t pv;
 2498         int loops = 0;
 2499 
 2500         if (m->flags & PG_FICTITIOUS)
 2501                 return FALSE;
 2502 
 2503         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 2504         TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 2505                 if (pv->pv_pmap == pmap) {
 2506                         return TRUE;
 2507                 }
 2508                 loops++;
 2509                 if (loops >= 16)
 2510                         break;
 2511         }
 2512         return (FALSE);
 2513 }
 2514 
 2515 #define PMAP_REMOVE_PAGES_CURPROC_ONLY
 2516 /*
 2517  * Remove all pages from specified address space
 2518  * this aids process exit speeds.  Also, this code
 2519  * is special cased for current process only, but
 2520  * can have the more generic (and slightly slower)
 2521  * mode enabled.  This is much faster than pmap_remove
 2522  * in the case of running down an entire address space.
 2523  */
 2524 void
 2525 pmap_remove_pages(pmap, sva, eva)
 2526         pmap_t pmap;
 2527         vm_offset_t sva, eva;
 2528 {
 2529         pt_entry_t *pte, tpte;
 2530         vm_page_t m;
 2531         pv_entry_t pv, npv;
 2532 
 2533 #ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
 2534         if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) {
 2535                 printf("warning: pmap_remove_pages called with non-current pmap\n");
 2536                 return;
 2537         }
 2538 #endif
 2539         vm_page_lock_queues();
 2540         PMAP_LOCK(pmap);
 2541         for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) {
 2542 
 2543                 if (pv->pv_va >= eva || pv->pv_va < sva) {
 2544                         npv = TAILQ_NEXT(pv, pv_plist);
 2545                         continue;
 2546                 }
 2547 
 2548 #ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
 2549                 pte = vtopte(pv->pv_va);
 2550 #else
 2551                 pte = pmap_pte(pmap, pv->pv_va);
 2552 #endif
 2553                 tpte = *pte;
 2554 
 2555                 if (tpte == 0) {
 2556                         printf("TPTE at %p  IS ZERO @ VA %08lx\n",
 2557                                                         pte, pv->pv_va);
 2558                         panic("bad pte");
 2559                 }
 2560 
 2561 /*
 2562  * We cannot remove wired pages from a process' mapping at this time
 2563  */
 2564                 if (tpte & PG_W) {
 2565                         npv = TAILQ_NEXT(pv, pv_plist);
 2566                         continue;
 2567                 }
 2568 
 2569                 m = PHYS_TO_VM_PAGE(tpte & PG_FRAME);
 2570                 KASSERT(m->phys_addr == (tpte & PG_FRAME),
 2571                     ("vm_page_t %p phys_addr mismatch %016jx %016jx",
 2572                     m, (uintmax_t)m->phys_addr, (uintmax_t)tpte));
 2573 
 2574                 KASSERT(m < &vm_page_array[vm_page_array_size],
 2575                         ("pmap_remove_pages: bad tpte %#jx", (uintmax_t)tpte));
 2576 
 2577                 pmap->pm_stats.resident_count--;
 2578 
 2579                 pte_clear(pte);
 2580 
 2581                 /*
 2582                  * Update the vm_page_t clean and reference bits.
 2583                  */
 2584                 if (tpte & PG_M) {
 2585                         vm_page_dirty(m);
 2586                 }
 2587 
 2588                 npv = TAILQ_NEXT(pv, pv_plist);
 2589                 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
 2590 
 2591                 m->md.pv_list_count--;
 2592                 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 2593                 if (TAILQ_EMPTY(&m->md.pv_list))
 2594                         vm_page_flag_clear(m, PG_WRITEABLE);
 2595 
 2596                 pmap_unuse_pt(pmap, pv->pv_va, *vtopde(pv->pv_va));
 2597                 free_pv_entry(pv);
 2598         }
 2599         pmap_invalidate_all(pmap);
 2600         PMAP_UNLOCK(pmap);
 2601         vm_page_unlock_queues();
 2602 }
 2603 
 2604 /*
 2605  *      pmap_is_modified:
 2606  *
 2607  *      Return whether or not the specified physical page was modified
 2608  *      in any physical maps.
 2609  */
 2610 boolean_t
 2611 pmap_is_modified(vm_page_t m)
 2612 {
 2613         pv_entry_t pv;
 2614         pt_entry_t *pte;
 2615         boolean_t rv;
 2616 
 2617         rv = FALSE;
 2618         if (m->flags & PG_FICTITIOUS)
 2619                 return (rv);
 2620 
 2621         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 2622         TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 2623                 /*
 2624                  * if the bit being tested is the modified bit, then
 2625                  * mark clean_map and ptes as never
 2626                  * modified.
 2627                  */
 2628                 if (!pmap_track_modified(pv->pv_va))
 2629                         continue;
 2630                 PMAP_LOCK(pv->pv_pmap);
 2631                 pte = pmap_pte(pv->pv_pmap, pv->pv_va);
 2632                 rv = (*pte & PG_M) != 0;
 2633                 PMAP_UNLOCK(pv->pv_pmap);
 2634                 if (rv)
 2635                         break;
 2636         }
 2637         return (rv);
 2638 }
 2639 
 2640 /*
 2641  *      pmap_is_prefaultable:
 2642  *
 2643  *      Return whether or not the specified virtual address is elgible
 2644  *      for prefault.
 2645  */
 2646 boolean_t
 2647 pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
 2648 {
 2649         pd_entry_t *pde;
 2650         pt_entry_t *pte;
 2651         boolean_t rv;
 2652 
 2653         rv = FALSE;
 2654         PMAP_LOCK(pmap);
 2655         pde = pmap_pde(pmap, addr);
 2656         if (pde != NULL && (*pde & PG_V)) {
 2657                 pte = vtopte(addr);
 2658                 rv = (*pte & PG_V) == 0;
 2659         }
 2660         PMAP_UNLOCK(pmap);
 2661         return (rv);
 2662 }
 2663 
 2664 /*
 2665  *      Clear the given bit in each of the given page's ptes.
 2666  */
 2667 static __inline void
 2668 pmap_clear_ptes(vm_page_t m, long bit)
 2669 {
 2670         register pv_entry_t pv;
 2671         pt_entry_t pbits, *pte;
 2672 
 2673         if ((m->flags & PG_FICTITIOUS) ||
 2674             (bit == PG_RW && (m->flags & PG_WRITEABLE) == 0))
 2675                 return;
 2676 
 2677         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 2678         /*
 2679          * Loop over all current mappings setting/clearing as appropos If
 2680          * setting RO do we need to clear the VAC?
 2681          */
 2682         TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 2683                 /*
 2684                  * don't write protect pager mappings
 2685                  */
 2686                 if (bit == PG_RW) {
 2687                         if (!pmap_track_modified(pv->pv_va))
 2688                                 continue;
 2689                 }
 2690 
 2691                 PMAP_LOCK(pv->pv_pmap);
 2692                 pte = pmap_pte(pv->pv_pmap, pv->pv_va);
 2693 retry:
 2694                 pbits = *pte;
 2695                 if (pbits & bit) {
 2696                         if (bit == PG_RW) {
 2697                                 if (!atomic_cmpset_long(pte, pbits,
 2698                                     pbits & ~(PG_RW | PG_M)))
 2699                                         goto retry;
 2700                                 if (pbits & PG_M) {
 2701                                         vm_page_dirty(m);
 2702                                 }
 2703                         } else {
 2704                                 atomic_clear_long(pte, bit);
 2705                         }
 2706                         pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
 2707                 }
 2708                 PMAP_UNLOCK(pv->pv_pmap);
 2709         }
 2710         if (bit == PG_RW)
 2711                 vm_page_flag_clear(m, PG_WRITEABLE);
 2712 }
 2713 
 2714 /*
 2715  *      pmap_page_protect:
 2716  *
 2717  *      Lower the permission for all mappings to a given page.
 2718  */
 2719 void
 2720 pmap_page_protect(vm_page_t m, vm_prot_t prot)
 2721 {
 2722         if ((prot & VM_PROT_WRITE) == 0) {
 2723                 if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
 2724                         pmap_clear_ptes(m, PG_RW);
 2725                 } else {
 2726                         pmap_remove_all(m);
 2727                 }
 2728         }
 2729 }
 2730 
 2731 /*
 2732  *      pmap_ts_referenced:
 2733  *
 2734  *      Return a count of reference bits for a page, clearing those bits.
 2735  *      It is not necessary for every reference bit to be cleared, but it
 2736  *      is necessary that 0 only be returned when there are truly no
 2737  *      reference bits set.
 2738  *
 2739  *      XXX: The exact number of bits to check and clear is a matter that
 2740  *      should be tested and standardized at some point in the future for
 2741  *      optimal aging of shared pages.
 2742  */
 2743 int
 2744 pmap_ts_referenced(vm_page_t m)
 2745 {
 2746         register pv_entry_t pv, pvf, pvn;
 2747         pt_entry_t *pte;
 2748         pt_entry_t v;
 2749         int rtval = 0;
 2750 
 2751         if (m->flags & PG_FICTITIOUS)
 2752                 return (rtval);
 2753 
 2754         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 2755         if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
 2756 
 2757                 pvf = pv;
 2758 
 2759                 do {
 2760                         pvn = TAILQ_NEXT(pv, pv_list);
 2761 
 2762                         TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 2763 
 2764                         TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
 2765 
 2766                         if (!pmap_track_modified(pv->pv_va))
 2767                                 continue;
 2768 
 2769                         PMAP_LOCK(pv->pv_pmap);
 2770                         pte = pmap_pte(pv->pv_pmap, pv->pv_va);
 2771 
 2772                         if (pte && ((v = pte_load(pte)) & PG_A) != 0) {
 2773                                 atomic_clear_long(pte, PG_A);
 2774                                 pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
 2775 
 2776                                 rtval++;
 2777                                 if (rtval > 4) {
 2778                                         PMAP_UNLOCK(pv->pv_pmap);
 2779                                         break;
 2780                                 }
 2781                         }
 2782                         PMAP_UNLOCK(pv->pv_pmap);
 2783                 } while ((pv = pvn) != NULL && pv != pvf);
 2784         }
 2785 
 2786         return (rtval);
 2787 }
 2788 
 2789 /*
 2790  *      Clear the modify bits on the specified physical page.
 2791  */
 2792 void
 2793 pmap_clear_modify(vm_page_t m)
 2794 {
 2795         pmap_clear_ptes(m, PG_M);
 2796 }
 2797 
 2798 /*
 2799  *      pmap_clear_reference:
 2800  *
 2801  *      Clear the reference bit on the specified physical page.
 2802  */
 2803 void
 2804 pmap_clear_reference(vm_page_t m)
 2805 {
 2806         pmap_clear_ptes(m, PG_A);
 2807 }
 2808 
 2809 /*
 2810  * Miscellaneous support routines follow
 2811  */
 2812 
 2813 /*
 2814  * Map a set of physical memory pages into the kernel virtual
 2815  * address space. Return a pointer to where it is mapped. This
 2816  * routine is intended to be used for mapping device memory,
 2817  * NOT real memory.
 2818  */
 2819 void *
 2820 pmap_mapdev(pa, size)
 2821         vm_paddr_t pa;
 2822         vm_size_t size;
 2823 {
 2824         vm_offset_t va, tmpva, offset;
 2825 
 2826         /* If this fits within the direct map window, use it */
 2827         if (pa < dmaplimit && (pa + size) < dmaplimit)
 2828                 return ((void *)PHYS_TO_DMAP(pa));
 2829         offset = pa & PAGE_MASK;
 2830         size = roundup(offset + size, PAGE_SIZE);
 2831         va = kmem_alloc_nofault(kernel_map, size);
 2832         if (!va)
 2833                 panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
 2834         pa = trunc_page(pa);
 2835         for (tmpva = va; size > 0; ) {
 2836                 pmap_kenter(tmpva, pa);
 2837                 size -= PAGE_SIZE;
 2838                 tmpva += PAGE_SIZE;
 2839                 pa += PAGE_SIZE;
 2840         }
 2841         pmap_invalidate_range(kernel_pmap, va, tmpva);
 2842         return ((void *)(va + offset));
 2843 }
 2844 
 2845 void
 2846 pmap_unmapdev(va, size)
 2847         vm_offset_t va;
 2848         vm_size_t size;
 2849 {
 2850         vm_offset_t base, offset, tmpva;
 2851 
 2852         /* If we gave a direct map region in pmap_mapdev, do nothing */
 2853         if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS)
 2854                 return;
 2855         base = trunc_page(va);
 2856         offset = va & PAGE_MASK;
 2857         size = roundup(offset + size, PAGE_SIZE);
 2858         for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE)
 2859                 pmap_kremove(tmpva);
 2860         pmap_invalidate_range(kernel_pmap, va, tmpva);
 2861         kmem_free(kernel_map, base, size);
 2862 }
 2863 
 2864 /*
 2865  * perform the pmap work for mincore
 2866  */
 2867 int
 2868 pmap_mincore(pmap, addr)
 2869         pmap_t pmap;
 2870         vm_offset_t addr;
 2871 {
 2872         pt_entry_t *ptep, pte;
 2873         vm_page_t m;
 2874         int val = 0;
 2875         
 2876         PMAP_LOCK(pmap);
 2877         ptep = pmap_pte(pmap, addr);
 2878         pte = (ptep != NULL) ? *ptep : 0;
 2879         PMAP_UNLOCK(pmap);
 2880 
 2881         if (pte != 0) {
 2882                 vm_paddr_t pa;
 2883 
 2884                 val = MINCORE_INCORE;
 2885                 if ((pte & PG_MANAGED) == 0)
 2886                         return val;
 2887 
 2888                 pa = pte & PG_FRAME;
 2889 
 2890                 m = PHYS_TO_VM_PAGE(pa);
 2891 
 2892                 /*
 2893                  * Modified by us
 2894                  */
 2895                 if (pte & PG_M)
 2896                         val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
 2897                 else {
 2898                         /*
 2899                          * Modified by someone else
 2900                          */
 2901                         vm_page_lock_queues();
 2902                         if (m->dirty || pmap_is_modified(m))
 2903                                 val |= MINCORE_MODIFIED_OTHER;
 2904                         vm_page_unlock_queues();
 2905                 }
 2906                 /*
 2907                  * Referenced by us
 2908                  */
 2909                 if (pte & PG_A)
 2910                         val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
 2911                 else {
 2912                         /*
 2913                          * Referenced by someone else
 2914                          */
 2915                         vm_page_lock_queues();
 2916                         if ((m->flags & PG_REFERENCED) ||
 2917                             pmap_ts_referenced(m)) {
 2918                                 val |= MINCORE_REFERENCED_OTHER;
 2919                                 vm_page_flag_set(m, PG_REFERENCED);
 2920                         }
 2921                         vm_page_unlock_queues();
 2922                 }
 2923         } 
 2924         return val;
 2925 }
 2926 
 2927 void
 2928 pmap_activate(struct thread *td)
 2929 {
 2930         struct proc *p = td->td_proc;
 2931         pmap_t  pmap, oldpmap;
 2932         u_int64_t  cr3;
 2933 
 2934         critical_enter();
 2935         pmap = vmspace_pmap(td->td_proc->p_vmspace);
 2936         oldpmap = PCPU_GET(curpmap);
 2937 #ifdef SMP
 2938 if (oldpmap)    /* XXX FIXME */
 2939         atomic_clear_int(&oldpmap->pm_active, PCPU_GET(cpumask));
 2940         atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask));
 2941 #else
 2942 if (oldpmap)    /* XXX FIXME */
 2943         oldpmap->pm_active &= ~PCPU_GET(cpumask);
 2944         pmap->pm_active |= PCPU_GET(cpumask);
 2945 #endif
 2946         cr3 = vtophys(pmap->pm_pml4);
 2947         /* XXXKSE this is wrong.
 2948          * pmap_activate is for the current thread on the current cpu
 2949          */
 2950         if (p->p_flag & P_SA) {
 2951                 /* Make sure all other cr3 entries are updated. */
 2952                 /* what if they are running?  XXXKSE (maybe abort them) */
 2953                 FOREACH_THREAD_IN_PROC(p, td) {
 2954                         td->td_pcb->pcb_cr3 = cr3;
 2955                 }
 2956         } else {
 2957                 td->td_pcb->pcb_cr3 = cr3;
 2958         }
 2959         load_cr3(cr3);
 2960         critical_exit();
 2961 }
 2962 
 2963 vm_offset_t
 2964 pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size)
 2965 {
 2966 
 2967         if ((obj == NULL) || (size < NBPDR) || (obj->type != OBJT_DEVICE)) {
 2968                 return addr;
 2969         }
 2970 
 2971         addr = (addr + (NBPDR - 1)) & ~(NBPDR - 1);
 2972         return addr;
 2973 }

Cache object: 94fdee4930a7b88ba7c6bedfa6db9c63


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.